From 3b3a015ad4ab1ad0cf707ccbaef1dbe965993a4a Mon Sep 17 00:00:00 2001 From: Wolfgang Wiedmeyer Date: Fri, 23 Oct 2015 04:06:23 +0200 Subject: merged more uncritical stuff from 3.2.72 --- arch/arm/boot/compressed/.gitignore | 9 + arch/arm/configs/at91rm9200_defconfig | 47 +- arch/arm/configs/cm_x300_defconfig | 18 +- arch/arm/configs/integrator_defconfig | 19 +- arch/arm/configs/mmp2_defconfig | 9 +- arch/arm/configs/mx3_defconfig | 46 +- arch/arm/configs/mxs_defconfig | 5 +- arch/arm/configs/omap1_defconfig | 6 - arch/arm/configs/tegra_defconfig | 39 +- arch/arm/configs/u300_defconfig | 13 +- arch/arm/configs/u8500_defconfig | 32 +- arch/m68k/include/asm/entry_mm.h | 128 -- arch/x86/kernel/amd_iommu.c | 2770 ---------------------------- arch/x86/kernel/amd_iommu_init.c | 1586 ---------------- drivers/xen/Kconfig | 86 +- drivers/xen/Makefile | 8 +- drivers/xen/balloon.c | 215 ++- drivers/xen/events.c | 131 +- drivers/xen/evtchn.c | 31 +- drivers/xen/gntdev.c | 118 +- drivers/xen/grant-table.c | 10 +- drivers/xen/manage.c | 21 +- drivers/xen/pci.c | 107 +- drivers/xen/swiotlb-xen.c | 62 +- drivers/xen/tmem.c | 170 +- drivers/xen/xen-balloon.c | 17 +- drivers/xen/xenbus/xenbus_client.c | 16 +- drivers/xen/xenbus/xenbus_comms.c | 4 +- drivers/xen/xenbus/xenbus_probe.c | 151 +- drivers/xen/xenbus/xenbus_probe.h | 5 +- drivers/xen/xenbus/xenbus_probe_backend.c | 12 +- drivers/xen/xenbus/xenbus_probe_frontend.c | 128 +- drivers/xen/xenbus/xenbus_xs.c | 4 +- drivers/zorro/.gitignore | 2 + drivers/zorro/proc.c | 1 + drivers/zorro/zorro-driver.c | 8 +- fs/btrfs/Makefile | 7 +- fs/btrfs/acl.c | 104 +- fs/btrfs/async-thread.c | 120 +- fs/btrfs/async-thread.h | 4 +- fs/btrfs/btrfs_inode.h | 43 +- fs/btrfs/compression.c | 17 +- fs/btrfs/ctree.c | 484 +++-- fs/btrfs/ctree.h | 272 ++- fs/btrfs/delayed-inode.c | 112 +- fs/btrfs/delayed-inode.h | 2 +- fs/btrfs/dir-item.c | 39 +- fs/btrfs/disk-io.c | 803 ++++++-- fs/btrfs/disk-io.h | 14 +- fs/btrfs/extent-tree.c | 1494 +++++++++------ fs/btrfs/extent_io.c | 1044 ++++++++--- fs/btrfs/extent_io.h | 80 +- fs/btrfs/extent_map.c | 155 +- fs/btrfs/file-item.c | 69 +- fs/btrfs/file.c | 293 ++- fs/btrfs/free-space-cache.c | 1083 ++++++----- fs/btrfs/inode-map.c | 69 +- fs/btrfs/inode.c | 1058 ++++++----- fs/btrfs/ioctl.c | 350 +++- fs/btrfs/ioctl.h | 29 + fs/btrfs/locking.c | 280 +-- fs/btrfs/locking.h | 36 +- fs/btrfs/print-tree.c | 8 +- fs/btrfs/ref-cache.c | 68 - fs/btrfs/ref-cache.h | 52 - fs/btrfs/relocation.c | 43 +- fs/btrfs/root-tree.c | 5 +- fs/btrfs/scrub.c | 668 +++++-- fs/btrfs/struct-funcs.c | 100 +- fs/btrfs/super.c | 347 ++-- fs/btrfs/transaction.c | 262 +-- fs/btrfs/tree-log.c | 39 +- fs/btrfs/volumes.c | 270 ++- fs/btrfs/volumes.h | 24 +- fs/btrfs/xattr.c | 184 +- fs/hfs/bnode.c | 9 +- fs/hfs/brec.c | 20 +- fs/hfs/dir.c | 4 +- fs/hfs/inode.c | 19 +- fs/hfs/super.c | 4 +- fs/hfsplus/bnode.c | 3 - fs/hfsplus/brec.c | 24 +- fs/hfsplus/catalog.c | 14 +- fs/hfsplus/dir.c | 12 +- fs/hfsplus/extents.c | 50 +- fs/hfsplus/hfsplus_fs.h | 5 +- fs/hfsplus/inode.c | 38 +- fs/hfsplus/options.c | 4 +- fs/hfsplus/super.c | 36 +- fs/hfsplus/unicode.c | 35 +- fs/hfsplus/wrapper.c | 9 +- fs/hostfs/hostfs_kern.c | 25 +- fs/hostfs/hostfs_user.c | 1 - fs/hpfs/alloc.c | 66 +- fs/hpfs/dir.c | 16 +- fs/hpfs/file.c | 12 +- fs/hpfs/hpfs_fn.h | 8 +- fs/hpfs/inode.c | 10 +- fs/hpfs/namei.c | 10 +- fs/hpfs/super.c | 32 +- fs/hppfs/hppfs.c | 7 +- fs/hugetlbfs/inode.c | 60 +- fs/isofs/dir.c | 3 - fs/isofs/inode.c | 30 +- fs/isofs/isofs.h | 24 +- fs/isofs/namei.c | 13 +- fs/isofs/rock.c | 55 +- fs/jbd/checkpoint.c | 64 +- fs/jbd/commit.c | 57 +- fs/jbd/journal.c | 99 +- fs/jbd/recovery.c | 4 + fs/jbd/transaction.c | 83 +- fs/jfs/acl.c | 73 +- fs/jfs/file.c | 13 +- fs/jfs/inode.c | 4 +- fs/jfs/jfs_acl.h | 2 +- fs/jfs/jfs_dmap.c | 5 +- fs/jfs/jfs_dtree.c | 10 +- fs/jfs/jfs_imap.c | 6 +- fs/jfs/jfs_inode.c | 5 +- fs/jfs/jfs_inode.h | 2 +- fs/jfs/jfs_logmgr.c | 1 + fs/jfs/jfs_txnmgr.c | 6 +- fs/jfs/jfs_umount.c | 4 +- fs/jfs/namei.c | 66 +- fs/jfs/super.c | 1 - fs/jfs/xattr.c | 61 +- fs/nfsd/Kconfig | 12 - fs/nfsd/cache.h | 2 +- fs/nfsd/export.c | 434 +---- fs/nfsd/lockd.c | 2 - fs/nfsd/nfs4acl.c | 18 +- fs/nfsd/nfs4callback.c | 32 +- fs/nfsd/nfs4proc.c | 438 ++++- fs/nfsd/nfs4recover.c | 103 +- fs/nfsd/nfs4state.c | 1810 +++++++++--------- fs/nfsd/nfs4xdr.c | 555 ++++-- fs/nfsd/nfscache.c | 3 +- fs/nfsd/nfsctl.c | 345 +--- fs/nfsd/nfsd.h | 33 + fs/nfsd/nfsfh.c | 39 +- fs/nfsd/nfssvc.c | 38 +- fs/nfsd/state.h | 167 +- fs/nfsd/vfs.c | 259 ++- fs/nfsd/vfs.h | 42 +- fs/nfsd/xdr4.h | 56 +- fs/nilfs2/btree.c | 47 +- fs/nilfs2/file.c | 12 +- fs/nilfs2/inode.c | 21 +- fs/nilfs2/ioctl.c | 3 + fs/nilfs2/namei.c | 9 +- fs/nilfs2/nilfs.h | 14 +- fs/nilfs2/page.c | 2 + fs/nilfs2/segment.c | 66 +- fs/nilfs2/segment.h | 5 + fs/ocfs2/acl.c | 81 +- fs/ocfs2/acl.h | 2 +- fs/ocfs2/alloc.c | 2 +- fs/ocfs2/aops.c | 93 +- fs/ocfs2/aops.h | 14 + fs/ocfs2/buffer_head_io.c | 2 - fs/ocfs2/cluster/heartbeat.c | 194 +- fs/ocfs2/cluster/netdebug.c | 102 +- fs/ocfs2/cluster/tcp.c | 139 +- fs/ocfs2/cluster/tcp.h | 2 + fs/ocfs2/dcache.c | 2 +- fs/ocfs2/dir.c | 7 +- fs/ocfs2/dlm/dlmcommon.h | 56 +- fs/ocfs2/dlm/dlmdebug.c | 1 + fs/ocfs2/dlm/dlmdomain.c | 44 +- fs/ocfs2/dlm/dlmlock.c | 54 +- fs/ocfs2/dlm/dlmmaster.c | 197 +- fs/ocfs2/dlm/dlmrecovery.c | 199 +- fs/ocfs2/dlm/dlmthread.c | 16 +- fs/ocfs2/dlmglue.c | 33 +- fs/ocfs2/extent_map.c | 96 + fs/ocfs2/extent_map.h | 2 + fs/ocfs2/file.c | 161 +- fs/ocfs2/file.h | 2 +- fs/ocfs2/inode.c | 6 +- fs/ocfs2/inode.h | 3 + fs/ocfs2/ioctl.c | 11 +- fs/ocfs2/journal.c | 23 +- fs/ocfs2/journal.h | 5 +- fs/ocfs2/mmap.c | 53 +- fs/ocfs2/move_extents.c | 3 +- fs/ocfs2/namei.c | 19 +- fs/ocfs2/ocfs2.h | 51 +- fs/ocfs2/quota_global.c | 27 +- fs/ocfs2/quota_local.c | 27 +- fs/ocfs2/refcounttree.c | 49 +- fs/ocfs2/slot_map.c | 4 +- fs/ocfs2/stack_o2cb.c | 71 +- fs/ocfs2/super.c | 29 +- fs/ocfs2/super.h | 14 +- fs/ocfs2/xattr.c | 50 +- fs/reiserfs/bitmap.c | 8 +- fs/reiserfs/dir.c | 19 +- fs/reiserfs/file.c | 13 +- fs/reiserfs/inode.c | 44 +- fs/reiserfs/journal.c | 43 +- fs/reiserfs/namei.c | 20 +- fs/reiserfs/resize.c | 10 +- fs/reiserfs/super.c | 1 + fs/reiserfs/xattr.c | 36 +- fs/reiserfs/xattr_acl.c | 77 +- fs/reiserfs/xattr_security.c | 4 +- fs/ubifs/budget.c | 2 +- fs/ubifs/commit.c | 18 +- fs/ubifs/debug.c | 778 +++++--- fs/ubifs/debug.h | 252 +-- fs/ubifs/dir.c | 16 +- fs/ubifs/file.c | 26 +- fs/ubifs/io.c | 168 +- fs/ubifs/log.c | 25 +- fs/ubifs/lprops.c | 8 +- fs/ubifs/lpt.c | 37 +- fs/ubifs/lpt_commit.c | 40 +- fs/ubifs/master.c | 7 +- fs/ubifs/misc.h | 103 +- fs/ubifs/orphan.c | 7 +- fs/ubifs/recovery.c | 45 +- fs/ubifs/replay.c | 3 +- fs/ubifs/sb.c | 8 +- fs/ubifs/scan.c | 4 +- fs/ubifs/shrinker.c | 1 - fs/ubifs/super.c | 27 +- fs/ubifs/tnc.c | 26 +- fs/ubifs/tnc_commit.c | 145 +- fs/ubifs/ubifs.h | 27 +- fs/ubifs/xattr.c | 4 +- fs/udf/balloc.c | 14 +- fs/udf/dir.c | 3 +- fs/udf/directory.c | 8 +- fs/udf/file.c | 2 +- fs/udf/inode.c | 137 +- fs/udf/lowlevel.c | 2 +- fs/udf/misc.c | 19 +- fs/udf/namei.c | 23 +- fs/udf/partition.c | 19 +- fs/udf/super.c | 288 ++- fs/udf/symlink.c | 67 +- fs/udf/truncate.c | 22 +- fs/udf/udf_sb.h | 5 +- fs/udf/udfdecl.h | 38 +- fs/udf/udftime.c | 3 +- fs/udf/unicode.c | 34 +- fs/ufs/ialloc.c | 2 +- fs/ufs/inode.c | 4 +- fs/ufs/namei.c | 2 - fs/ufs/ufs.h | 9 +- include/linux/dvb/audio.h | 2 +- include/linux/dvb/frontend.h | 1 + include/linux/dvb/version.h | 2 +- include/xen/Kbuild | 2 + include/xen/balloon.h | 19 +- include/xen/events.h | 7 +- include/xen/grant_table.h | 2 +- include/xen/hvc-console.h | 4 +- include/xen/interface/io/blkif.h | 36 + include/xen/interface/io/netif.h | 19 + include/xen/interface/io/ring.h | 5 + include/xen/interface/io/xs_wire.h | 5 +- include/xen/interface/physdev.h | 34 +- include/xen/interface/xen.h | 1 + include/xen/page.h | 12 +- include/xen/platform_pci.h | 6 +- include/xen/xenbus.h | 11 +- lib/raid6/algos.c | 1 + lib/raid6/int.uc | 2 +- lib/raid6/mktables.c | 1 + lib/raid6/recov.c | 1 + net/802/fc.c | 1 + net/802/garp.c | 5 +- net/802/stp.c | 5 +- net/8021q/vlan.c | 35 +- net/8021q/vlan.h | 31 + net/8021q/vlan_core.c | 57 +- net/8021q/vlan_dev.c | 42 +- net/8021q/vlan_netlink.c | 3 +- net/8021q/vlanproc.c | 6 +- net/9p/client.c | 574 ++++-- net/9p/mod.c | 4 +- net/9p/protocol.c | 109 +- net/9p/protocol.h | 4 +- net/9p/trans_common.c | 57 +- net/9p/trans_common.h | 21 +- net/9p/trans_virtio.c | 320 ++-- net/appletalk/aarp.c | 139 +- net/appletalk/atalk_proc.c | 1 + net/appletalk/ddp.c | 404 ++-- net/atm/atm_misc.c | 2 +- net/atm/clip.c | 8 +- net/atm/common.c | 4 +- net/atm/lec.c | 4 +- net/atm/mpc.c | 2 +- net/atm/pppoatm.c | 1 + net/atm/proc.c | 2 +- net/atm/pvc.c | 1 + net/atm/svc.c | 1 + net/ax25/af_ax25.c | 4 +- net/ax25/ax25_route.c | 1 + net/ax25/ax25_uid.c | 1 + net/dcb/dcbevent.c | 1 + net/dcb/dcbnl.c | 694 +++++-- net/dccp/ackvec.c | 1 + net/dccp/ccid.c | 4 +- net/dccp/ccids/ccid2.c | 193 +- net/dccp/ccids/ccid2.h | 31 +- net/dccp/ccids/lib/tfrc.c | 1 + net/dccp/dccp.h | 1 + net/dccp/feat.c | 202 +- net/dccp/feat.h | 1 + net/dccp/input.c | 61 +- net/dccp/ipv4.c | 4 +- net/dccp/ipv6.c | 7 +- net/dccp/output.c | 14 +- net/dccp/proto.c | 1 - net/dccp/timer.c | 1 + net/decnet/af_decnet.c | 697 ++++--- net/decnet/dn_dev.c | 74 +- net/decnet/dn_fib.c | 79 +- net/decnet/dn_neigh.c | 42 +- net/decnet/dn_nsp_in.c | 182 +- net/decnet/dn_route.c | 133 +- net/decnet/dn_rules.c | 1 + net/decnet/dn_table.c | 25 +- net/decnet/dn_timer.c | 19 +- net/decnet/netfilter/dn_rtmsg.c | 18 +- net/decnet/sysctl_net_decnet.c | 17 +- net/dns_resolver/dns_query.c | 4 +- net/dsa/dsa.c | 1 + net/dsa/mv88e6131.c | 8 +- net/dsa/slave.c | 3 - net/econet/af_econet.c | 172 +- net/ieee802154/Kconfig | 6 + net/ieee802154/Makefile | 8 +- net/ieee802154/af_ieee802154.c | 2 +- net/ieee802154/dgram.c | 2 +- net/ieee802154/nl-mac.c | 1 + net/ieee802154/nl-phy.c | 31 + net/mac80211/Kconfig | 25 + net/mac80211/aes_ccm.c | 37 +- net/mac80211/aes_ccm.h | 2 - net/mac80211/aes_cmac.c | 10 +- net/mac80211/aes_cmac.h | 2 +- net/mac80211/agg-rx.c | 61 +- net/mac80211/agg-tx.c | 64 +- net/mac80211/cfg.c | 553 +++++- net/mac80211/debugfs.c | 71 +- net/mac80211/debugfs_key.c | 13 +- net/mac80211/debugfs_netdev.c | 66 +- net/mac80211/debugfs_sta.c | 41 +- net/mac80211/driver-ops.h | 150 +- net/mac80211/driver-trace.h | 261 ++- net/mac80211/ht.c | 15 +- net/mac80211/ibss.c | 15 +- net/mac80211/ieee80211_i.h | 176 +- net/mac80211/iface.c | 40 +- net/mac80211/key.c | 178 +- net/mac80211/key.h | 32 +- net/mac80211/led.c | 1 + net/mac80211/main.c | 93 +- net/mac80211/mesh.c | 213 ++- net/mac80211/mesh.h | 38 +- net/mac80211/mesh_hwmp.c | 193 +- net/mac80211/mesh_pathtbl.c | 481 ++++- net/mac80211/mesh_plink.c | 257 +-- net/mac80211/mlme.c | 331 +++- net/mac80211/offchannel.c | 68 +- net/mac80211/pm.c | 56 +- net/mac80211/rate.c | 38 +- net/mac80211/rc80211_minstrel.c | 9 + net/mac80211/rc80211_minstrel.h | 12 + net/mac80211/rc80211_minstrel_debugfs.c | 1 + net/mac80211/rc80211_minstrel_ht.c | 20 +- net/mac80211/rc80211_minstrel_ht_debugfs.c | 1 + net/mac80211/rc80211_pid_debugfs.c | 1 + net/mac80211/rx.c | 320 +++- net/mac80211/scan.c | 142 +- net/mac80211/spectmgmt.c | 6 +- net/mac80211/sta_info.c | 986 +++++++--- net/mac80211/sta_info.h | 186 +- net/mac80211/status.c | 257 ++- net/mac80211/tkip.c | 116 +- net/mac80211/tkip.h | 8 +- net/mac80211/tx.c | 606 +++--- net/mac80211/util.c | 421 ++--- net/mac80211/wep.c | 6 +- net/mac80211/wme.c | 23 +- net/mac80211/wme.h | 8 +- net/mac80211/work.c | 107 +- net/mac80211/wpa.c | 97 +- net/netlabel/Makefile | 2 - net/netlabel/netlabel_addrlist.c | 2 +- net/netlabel/netlabel_addrlist.h | 2 +- net/netlabel/netlabel_cipso_v4.c | 4 +- net/netlabel/netlabel_cipso_v4.h | 2 +- net/netlabel/netlabel_domainhash.c | 11 +- net/netlabel/netlabel_domainhash.h | 2 +- net/netlabel/netlabel_kapi.c | 76 +- net/netlabel/netlabel_mgmt.c | 4 +- net/netlabel/netlabel_mgmt.h | 4 +- net/netlabel/netlabel_unlabeled.c | 21 +- net/netlabel/netlabel_unlabeled.h | 2 +- net/netlabel/netlabel_user.c | 2 +- net/netlabel/netlabel_user.h | 2 +- net/netrom/af_netrom.c | 3 +- net/netrom/nr_route.c | 23 +- net/rfkill/Kconfig | 5 - net/rfkill/core.c | 6 +- net/rfkill/input.c | 1 + net/rfkill/rfkill-gpio.c | 11 + net/rfkill/rfkill-regulator.c | 1 - net/rose/af_rose.c | 27 +- net/rose/rose_link.c | 7 - net/rose/rose_route.c | 6 +- net/rxrpc/ar-output.c | 1 + net/rxrpc/ar-recvmsg.c | 12 +- net/sctp/associola.c | 13 +- net/sctp/auth.c | 4 +- net/sctp/bind_addr.c | 17 +- net/sctp/input.c | 3 +- net/sctp/inqueue.c | 33 +- net/sctp/ipv6.c | 6 +- net/sctp/output.c | 9 +- net/sctp/outqueue.c | 17 + net/sctp/proc.c | 1 + net/sctp/protocol.c | 161 +- net/sctp/sm_make_chunk.c | 162 +- net/sctp/sm_sideeffect.c | 9 +- net/sctp/sm_statefuns.c | 95 +- net/sctp/socket.c | 281 ++- net/sctp/sysctl.c | 7 + net/sctp/ulpevent.c | 122 +- net/x25/af_x25.c | 487 ++--- net/x25/x25_dev.c | 44 +- net/x25/x25_in.c | 126 +- net/x25/x25_link.c | 87 +- net/x25/x25_proc.c | 1 + net/x25/x25_subr.c | 78 +- scripts/basic/fixdep.c | 1 + scripts/dtc/Makefile | 28 +- scripts/dtc/dtc-lexer.lex.c_shipped | 59 +- scripts/dtc/dtc-parser.tab.c_shipped | 116 +- scripts/dtc/dtc-parser.tab.h_shipped | 11 +- security/apparmor/apparmorfs.c | 2 +- security/apparmor/domain.c | 2 +- security/apparmor/ipc.c | 1 + security/apparmor/lib.c | 2 + security/apparmor/policy_unpack.c | 12 +- security/apparmor/procattr.c | 1 + sound/aoa/codecs/onyx.c | 4 +- sound/aoa/fabrics/layout.c | 2 +- sound/aoa/soundbus/i2sbus/core.c | 9 +- sound/aoa/soundbus/i2sbus/pcm.c | 1 + sound/atmel/abdac.c | 2 +- sound/atmel/ac97c.c | 6 +- sound/drivers/mpu401/mpu401.c | 5 +- sound/drivers/mpu401/mpu401_uart.c | 21 +- sound/drivers/opl3/opl3_lib.c | 1 + sound/drivers/opl3/opl3_oss.c | 1 + sound/drivers/opl3/opl3_seq.c | 1 + sound/drivers/opl3/opl3_synth.c | 1 + sound/drivers/opl4/opl4_lib.c | 1 + sound/drivers/opl4/opl4_proc.c | 1 + sound/drivers/opl4/opl4_seq.c | 1 + sound/drivers/pcsp/pcsp.c | 4 +- sound/drivers/pcsp/pcsp.h | 8 +- sound/drivers/vx/vx_core.c | 1 + sound/drivers/vx/vx_hwdep.c | 1 + sound/firewire/cmp.c | 2 +- sound/firewire/isight.c | 1 - sound/firewire/iso-resources.c | 1 + sound/firewire/packets-buffer.c | 1 + sound/firewire/speakers.c | 7 +- sound/i2c/cs8427.c | 1 + sound/i2c/i2c.c | 1 + sound/i2c/other/ak4113.c | 18 +- sound/i2c/other/ak4114.c | 19 +- sound/i2c/other/ak4117.c | 1 + sound/i2c/other/ak4xxx-adda.c | 3 +- sound/i2c/other/pt2258.c | 1 + sound/i2c/other/tea575x-tuner.c | 144 +- sound/i2c/tea6330t.c | 1 + sound/isa/ad1816a/ad1816a.c | 4 +- sound/isa/ad1816a/ad1816a_lib.c | 2 +- sound/isa/ad1848/ad1848.c | 2 +- sound/isa/als100.c | 3 +- sound/isa/azt2320.c | 5 +- sound/isa/cmi8330.c | 4 +- sound/isa/cs423x/cs4231.c | 3 +- sound/isa/cs423x/cs4236.c | 5 +- sound/isa/es1688/es1688.c | 4 +- sound/isa/es1688/es1688_lib.c | 3 +- sound/isa/es18xx.c | 8 +- sound/isa/galaxy/galaxy.c | 3 +- sound/isa/gus/gus_main.c | 3 +- sound/isa/gus/gus_volume.c | 1 + sound/isa/gus/gusclassic.c | 2 +- sound/isa/gus/gusextreme.c | 5 +- sound/isa/gus/gusmax.c | 4 +- sound/isa/gus/interwave.c | 4 +- sound/isa/msnd/msnd.c | 1 + sound/isa/msnd/msnd.h | 2 +- sound/isa/msnd/msnd_midi.c | 1 + sound/isa/msnd/msnd_pinnacle.c | 6 +- sound/isa/msnd/msnd_pinnacle_mixer.c | 1 + sound/isa/opl3sa2.c | 9 +- sound/isa/opti9xx/miro.c | 5 +- sound/isa/opti9xx/opti92x-ad1848.c | 6 +- sound/isa/sb/emu8000.c | 1 + sound/isa/sb/emu8000_callback.c | 1 + sound/isa/sb/emu8000_patch.c | 1 + sound/isa/sb/emu8000_synth.c | 1 + sound/isa/sb/jazz16.c | 1 - sound/isa/sb/sb16.c | 7 +- sound/isa/sb/sb16_csp.c | 1 + sound/isa/sb/sb16_main.c | 1 + sound/isa/sb/sb8.c | 2 +- sound/isa/sb/sb8_main.c | 1 + sound/isa/sb/sb_common.c | 3 +- sound/isa/sc6000.c | 3 +- sound/isa/sscape.c | 5 +- sound/isa/wavefront/wavefront.c | 7 +- sound/isa/wavefront/wavefront_fx.c | 1 + sound/isa/wavefront/wavefront_synth.c | 1 + sound/isa/wss/wss_lib.c | 3 +- sound/mips/Kconfig | 7 +- sound/mips/au1x00.c | 5 +- sound/mips/hal2.c | 1 + sound/mips/sgio2audio.c | 1 + sound/oss/Kconfig | 5 +- sound/oss/ad1848.c | 6 +- sound/oss/pas2_pcm.c | 8 +- sound/oss/pss.c | 6 +- sound/oss/sb_mixer.c | 6 +- sound/oss/sound_timer.c | 2 +- sound/pci/ac97/ac97_codec.c | 2 +- sound/pci/ac97/ac97_pcm.c | 1 + sound/pci/ali5451/ali5451.c | 6 +- sound/pci/asihpi/asihpi.c | 90 +- sound/pci/asihpi/hpi.h | 24 +- sound/pci/asihpi/hpi6000.c | 11 +- sound/pci/asihpi/hpi6205.c | 52 +- sound/pci/asihpi/hpi6205.h | 25 +- sound/pci/asihpi/hpi_internal.h | 155 +- sound/pci/asihpi/hpicmn.c | 22 +- sound/pci/asihpi/hpidspcd.c | 141 +- sound/pci/asihpi/hpidspcd.h | 72 +- sound/pci/asihpi/hpifunc.c | 86 +- sound/pci/asihpi/hpimsginit.c | 4 +- sound/pci/asihpi/hpimsgx.c | 6 +- sound/pci/asihpi/hpioctl.c | 35 +- sound/pci/asihpi/hpios.c | 8 - sound/pci/asihpi/hpios.h | 1 + sound/pci/au88x0/au88x0.c | 6 +- sound/pci/au88x0/au88x0_game.c | 1 + sound/pci/au88x0/au88x0_mpu401.c | 6 +- sound/pci/aw2/aw2-alsa.c | 5 +- sound/pci/ca0106/ca0106_main.c | 6 +- sound/pci/cs46xx/cs46xx.c | 4 +- sound/pci/cs46xx/cs46xx_lib.c | 3 +- sound/pci/cs5535audio/cs5535audio.c | 6 +- sound/pci/cs5535audio/cs5535audio_pcm.c | 2 +- sound/pci/ctxfi/ct20k2reg.h | 1 + sound/pci/ctxfi/ctatc.c | 107 +- sound/pci/ctxfi/ctatc.h | 8 +- sound/pci/ctxfi/ctdaio.c | 23 +- sound/pci/ctxfi/ctdaio.h | 1 + sound/pci/ctxfi/cthardware.h | 14 +- sound/pci/ctxfi/cthw20k1.c | 15 +- sound/pci/ctxfi/cthw20k2.c | 337 +++- sound/pci/ctxfi/ctmixer.c | 145 +- sound/pci/ctxfi/ctpcm.c | 2 +- sound/pci/ctxfi/ctsrc.c | 2 +- sound/pci/ctxfi/ctvmem.h | 2 +- sound/pci/ctxfi/xfi.c | 7 +- sound/pci/echoaudio/darla20.c | 4 +- sound/pci/echoaudio/darla24.c | 4 +- sound/pci/echoaudio/echo3g.c | 4 +- sound/pci/echoaudio/echoaudio.c | 8 +- sound/pci/echoaudio/gina20.c | 4 +- sound/pci/echoaudio/gina24.c | 4 +- sound/pci/echoaudio/indigo.c | 4 +- sound/pci/echoaudio/indigodj.c | 4 +- sound/pci/echoaudio/indigodjx.c | 4 +- sound/pci/echoaudio/indigoio.c | 4 +- sound/pci/echoaudio/indigoiox.c | 4 +- sound/pci/echoaudio/layla20.c | 4 +- sound/pci/echoaudio/layla24.c | 4 +- sound/pci/echoaudio/mia.c | 4 +- sound/pci/echoaudio/mona.c | 4 +- sound/pci/emu10k1/emu10k1.c | 10 +- sound/pci/emu10k1/emu10k1_callback.c | 11 +- sound/pci/emu10k1/emu10k1_main.c | 24 +- sound/pci/emu10k1/emu10k1_synth.c | 1 + sound/pci/emu10k1/emu10k1x.c | 6 +- sound/pci/emu10k1/emupcm.c | 7 +- sound/pci/emu10k1/emuproc.c | 12 - sound/pci/emu10k1/io.c | 1 + sound/pci/emu10k1/memory.c | 12 +- sound/pci/emu10k1/voice.c | 1 + sound/pci/ice1712/ak4xxx.c | 1 + sound/pci/ice1712/ice1712.c | 35 +- sound/pci/ice1712/ice1724.c | 6 +- sound/pci/oxygen/oxygen.c | 3 +- sound/pci/oxygen/oxygen_io.c | 1 + sound/pci/oxygen/oxygen_lib.c | 9 +- sound/pci/oxygen/oxygen_mixer.c | 25 +- sound/pci/oxygen/oxygen_pcm.c | 6 - sound/pci/oxygen/virtuoso.c | 5 +- sound/pci/oxygen/xonar_dg.c | 12 +- sound/pci/oxygen/xonar_pcm179x.c | 13 +- sound/pci/oxygen/xonar_wm87x6.c | 6 +- sound/pci/pcxhr/pcxhr.c | 6 +- sound/pci/pcxhr/pcxhr_hwdep.c | 1 + sound/pci/riptide/riptide.c | 36 +- sound/pci/rme9652/hdsp.c | 8 +- sound/pci/rme9652/hdspm.c | 331 ++-- sound/pci/rme9652/rme9652.c | 8 +- sound/pcmcia/vx/vxpocket.c | 2 +- sound/ppc/keywest.c | 1 - sound/ppc/pmac.c | 9 +- sound/ppc/powermac.c | 2 +- sound/ppc/snd_ps3.c | 5 +- sound/soc/atmel/Kconfig | 21 +- sound/soc/atmel/Makefile | 4 - sound/soc/atmel/atmel-pcm.c | 8 +- sound/soc/atmel/atmel-pcm.h | 2 +- sound/soc/atmel/atmel_ssc_dai.c | 24 +- sound/soc/atmel/playpaq_wm8510.c | 471 ----- sound/soc/atmel/sam9g20_wm8731.c | 3 +- sound/soc/atmel/snd-soc-afeb9260.c | 2 - sound/soc/au1x/Kconfig | 30 +- sound/soc/au1x/Makefile | 10 + sound/soc/au1x/db1200.c | 64 +- sound/soc/au1x/dbdma2.c | 98 +- sound/soc/au1x/psc-ac97.c | 61 +- sound/soc/au1x/psc-i2s.c | 55 +- sound/soc/au1x/psc.h | 16 +- sound/soc/davinci/Kconfig | 1 - sound/soc/davinci/davinci-evm.c | 2 - sound/soc/davinci/davinci-i2s.c | 5 +- sound/soc/davinci/davinci-mcasp.c | 20 +- sound/soc/davinci/davinci-pcm.c | 277 +-- sound/soc/ep93xx/edb93xx.c | 61 +- sound/soc/ep93xx/ep93xx-ac97.c | 6 +- sound/soc/ep93xx/ep93xx-i2s.c | 13 +- sound/soc/ep93xx/ep93xx-pcm.c | 148 +- sound/soc/ep93xx/simone.c | 64 +- sound/soc/ep93xx/snappercl15.c | 58 +- sound/soc/fsl/fsl_dma.c | 9 +- sound/soc/fsl/fsl_ssi.c | 211 ++- sound/soc/fsl/mpc5200_dma.c | 9 +- sound/soc/fsl/mpc8610_hpcd.c | 52 +- sound/soc/fsl/p1022_ds.c | 42 +- sound/soc/imx/Kconfig | 5 +- sound/soc/imx/imx-pcm-dma-mx2.c | 4 +- sound/soc/imx/imx-pcm-fiq.c | 17 +- sound/soc/imx/imx-ssi.c | 19 +- sound/soc/imx/imx-ssi.h | 9 +- sound/soc/imx/wm1133-ev1.c | 1 + sound/soc/jz4740/jz4740-pcm.c | 6 +- sound/soc/kirkwood/Kconfig | 3 +- sound/soc/kirkwood/kirkwood-dma.c | 6 +- sound/soc/kirkwood/kirkwood-i2s.c | 4 +- sound/soc/kirkwood/kirkwood-t5325.c | 2 - sound/soc/mid-x86/mfld_machine.c | 5 +- sound/soc/mid-x86/sst_platform.c | 25 +- sound/soc/nuc900/nuc900-ac97.c | 5 +- sound/soc/nuc900/nuc900-pcm.c | 10 +- sound/soc/pxa/Kconfig | 7 +- sound/soc/pxa/corgi.c | 1 - sound/soc/pxa/e740_wm9705.c | 2 - sound/soc/pxa/e750_wm9705.c | 2 - sound/soc/pxa/e800_wm9712.c | 1 - sound/soc/pxa/hx4700.c | 5 +- sound/soc/pxa/magician.c | 4 +- sound/soc/pxa/mioa701_wm9713.c | 1 - sound/soc/pxa/palm27x.c | 4 - sound/soc/pxa/poodle.c | 1 - sound/soc/pxa/pxa-ssp.c | 4 +- sound/soc/pxa/pxa2xx-pcm.c | 6 +- sound/soc/pxa/raumfeld.c | 4 +- sound/soc/pxa/saarb.c | 4 - sound/soc/pxa/spitz.c | 3 +- sound/soc/pxa/tavorevb3.c | 4 - sound/soc/pxa/tosa.c | 1 - sound/soc/pxa/z2.c | 6 +- sound/soc/pxa/zylonite.c | 1 - sound/soc/s6000/s6000-pcm.c | 8 +- sound/soc/sh/dma-sh7760.c | 6 +- sound/soc/sh/fsi-ak4642.c | 1 + sound/soc/sh/fsi-da7210.c | 1 + sound/soc/sh/fsi-hdmi.c | 1 + sound/soc/sh/fsi.c | 598 +++--- sound/soc/sh/sh7760-ac97.c | 7 - sound/soc/sh/siu_dai.c | 1 + sound/soc/sh/siu_pcm.c | 5 +- sound/soc/sh/ssi.c | 2 +- sound/soc/tegra/Kconfig | 9 + sound/soc/tegra/Makefile | 2 + sound/soc/tegra/tegra_asoc_utils.c | 1 + sound/soc/tegra/tegra_das.c | 4 +- sound/soc/tegra/tegra_i2s.c | 6 +- sound/soc/tegra/tegra_pcm.c | 8 +- sound/soc/tegra/tegra_wm8903.c | 8 +- sound/soc/tegra/trimslice.c | 2 - sound/soc/txx9/txx9aclc-ac97.c | 2 +- sound/soc/txx9/txx9aclc-generic.c | 2 +- sound/soc/txx9/txx9aclc.c | 6 +- sound/sparc/amd7930.c | 2 +- sound/sparc/dbri.c | 3 +- sound/synth/emux/emux.c | 1 + sound/synth/emux/emux_oss.c | 12 +- sound/synth/emux/emux_seq.c | 31 +- sound/synth/emux/emux_synth.c | 1 + sound/synth/emux/soundfont.c | 1 + sound/synth/util_mem.c | 1 + sound/usb/6fire/chip.c | 2 +- sound/usb/6fire/comm.c | 38 +- sound/usb/6fire/comm.h | 2 +- sound/usb/6fire/firmware.c | 26 +- sound/usb/6fire/midi.c | 16 +- sound/usb/6fire/midi.h | 6 +- sound/usb/6fire/pcm.c | 53 +- sound/usb/6fire/pcm.h | 2 +- sound/usb/caiaq/device.c | 8 +- sound/usb/caiaq/device.h | 1 + sound/usb/caiaq/input.c | 155 +- 731 files changed, 25234 insertions(+), 20985 deletions(-) delete mode 100644 arch/m68k/include/asm/entry_mm.h delete mode 100644 arch/x86/kernel/amd_iommu.c delete mode 100644 arch/x86/kernel/amd_iommu_init.c create mode 100644 drivers/zorro/.gitignore delete mode 100644 fs/btrfs/ref-cache.c delete mode 100644 fs/btrfs/ref-cache.h delete mode 100644 sound/soc/atmel/playpaq_wm8510.c diff --git a/arch/arm/boot/compressed/.gitignore b/arch/arm/boot/compressed/.gitignore index c602896..e0936a1 100644 --- a/arch/arm/boot/compressed/.gitignore +++ b/arch/arm/boot/compressed/.gitignore @@ -5,3 +5,12 @@ piggy.lzo piggy.lzma vmlinux vmlinux.lds + +# borrowed libfdt files +fdt.c +fdt.h +fdt_ro.c +fdt_rw.c +fdt_wip.c +libfdt.h +libfdt_internal.h diff --git a/arch/arm/configs/at91rm9200_defconfig b/arch/arm/configs/at91rm9200_defconfig index 38cb7c9..bbe4e1a 100644 --- a/arch/arm/configs/at91rm9200_defconfig +++ b/arch/arm/configs/at91rm9200_defconfig @@ -5,7 +5,6 @@ CONFIG_SYSVIPC=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=14 -CONFIG_SYSFS_DEPRECATED_V2=y CONFIG_BLK_DEV_INITRD=y CONFIG_MODULES=y CONFIG_MODULE_FORCE_LOAD=y @@ -56,7 +55,6 @@ CONFIG_IP_PNP=y CONFIG_IP_PNP_DHCP=y CONFIG_IP_PNP_BOOTP=y CONFIG_NET_IPIP=m -CONFIG_NET_IPGRE=m CONFIG_INET_AH=m CONFIG_INET_ESP=m CONFIG_INET_IPCOMP=m @@ -75,18 +73,8 @@ CONFIG_IPV6_TUNNEL=m CONFIG_BRIDGE=m CONFIG_VLAN_8021Q=m CONFIG_BT=m -CONFIG_BT_L2CAP=m -CONFIG_BT_SCO=m -CONFIG_BT_RFCOMM=m -CONFIG_BT_RFCOMM_TTY=y -CONFIG_BT_BNEP=m -CONFIG_BT_BNEP_MC_FILTER=y -CONFIG_BT_BNEP_PROTO_FILTER=y -CONFIG_BT_HIDP=m CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_MTD=y -CONFIG_MTD_CONCAT=y -CONFIG_MTD_PARTITIONS=y CONFIG_MTD_CMDLINE_PARTS=y CONFIG_MTD_AFS_PARTS=y CONFIG_MTD_CHAR=y @@ -108,8 +96,6 @@ CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_NBD=y CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=8192 -CONFIG_ATMEL_TCLIB=y -CONFIG_EEPROM_LEGACY=m CONFIG_SCSI=y CONFIG_BLK_DEV_SD=y CONFIG_BLK_DEV_SR=m @@ -119,14 +105,23 @@ CONFIG_SCSI_MULTI_LUN=y # CONFIG_SCSI_LOWLEVEL is not set CONFIG_NETDEVICES=y CONFIG_TUN=m +CONFIG_ARM_AT91_ETHER=y CONFIG_PHYLIB=y CONFIG_DAVICOM_PHY=y CONFIG_SMSC_PHY=y CONFIG_MICREL_PHY=y -CONFIG_NET_ETHERNET=y -CONFIG_ARM_AT91_ETHER=y -# CONFIG_NETDEV_1000 is not set -# CONFIG_NETDEV_10000 is not set +CONFIG_PPP=y +CONFIG_PPP_BSDCOMP=y +CONFIG_PPP_DEFLATE=y +CONFIG_PPP_FILTER=y +CONFIG_PPP_MPPE=m +CONFIG_PPP_MULTILINK=y +CONFIG_PPPOE=m +CONFIG_PPP_ASYNC=y +CONFIG_SLIP=m +CONFIG_SLIP_COMPRESSED=y +CONFIG_SLIP_SMART=y +CONFIG_SLIP_MODE_SLIP6=y CONFIG_USB_CATC=m CONFIG_USB_KAWETH=m CONFIG_USB_PEGASUS=m @@ -139,18 +134,6 @@ CONFIG_USB_NET_RNDIS_HOST=m CONFIG_USB_ALI_M5632=y CONFIG_USB_AN2720=y CONFIG_USB_EPSON2888=y -CONFIG_PPP=y -CONFIG_PPP_MULTILINK=y -CONFIG_PPP_FILTER=y -CONFIG_PPP_ASYNC=y -CONFIG_PPP_DEFLATE=y -CONFIG_PPP_BSDCOMP=y -CONFIG_PPP_MPPE=m -CONFIG_PPPOE=m -CONFIG_SLIP=m -CONFIG_SLIP_COMPRESSED=y -CONFIG_SLIP_SMART=y -CONFIG_SLIP_MODE_SLIP6=y # CONFIG_INPUT_MOUSEDEV_PSAUX is not set CONFIG_INPUT_MOUSEDEV_SCREEN_X=640 CONFIG_INPUT_MOUSEDEV_SCREEN_Y=480 @@ -158,9 +141,9 @@ CONFIG_INPUT_EVDEV=y CONFIG_KEYBOARD_GPIO=y # CONFIG_INPUT_MOUSE is not set CONFIG_INPUT_TOUCHSCREEN=y +CONFIG_LEGACY_PTY_COUNT=32 CONFIG_SERIAL_ATMEL=y CONFIG_SERIAL_ATMEL_CONSOLE=y -CONFIG_LEGACY_PTY_COUNT=32 CONFIG_HW_RANDOM=y CONFIG_I2C=y CONFIG_I2C_CHARDEV=y @@ -290,7 +273,6 @@ CONFIG_NFS_V3_ACL=y CONFIG_NFS_V4=y CONFIG_ROOT_NFS=y CONFIG_NFSD=y -CONFIG_SMB_FS=m CONFIG_CIFS=m CONFIG_PARTITION_ADVANCED=y CONFIG_MAC_PARTITION=y @@ -335,7 +317,6 @@ CONFIG_NLS_UTF8=y CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_FS=y CONFIG_DEBUG_KERNEL=y -# CONFIG_RCU_CPU_STALL_DETECTOR is not set # CONFIG_FTRACE is not set CONFIG_CRYPTO_PCBC=y CONFIG_CRYPTO_SHA1=y diff --git a/arch/arm/configs/cm_x300_defconfig b/arch/arm/configs/cm_x300_defconfig index 921e56a..f4b7672 100644 --- a/arch/arm/configs/cm_x300_defconfig +++ b/arch/arm/configs/cm_x300_defconfig @@ -5,7 +5,6 @@ CONFIG_SYSVIPC=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=18 -CONFIG_SYSFS_DEPRECATED_V2=y CONFIG_BLK_DEV_INITRD=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set CONFIG_MODULES=y @@ -13,6 +12,7 @@ CONFIG_MODULE_UNLOAD=y CONFIG_MODULE_FORCE_UNLOAD=y # CONFIG_BLK_DEV_BSG is not set CONFIG_ARCH_PXA=y +CONFIG_GPIO_PCA953X=y CONFIG_MACH_CM_X300=y CONFIG_NO_HZ=y CONFIG_AEABI=y @@ -23,7 +23,6 @@ CONFIG_CMDLINE="root=/dev/mtdblock5 rootfstype=ubifs console=ttyS2,38400" CONFIG_CPU_FREQ=y CONFIG_CPU_FREQ_GOV_USERSPACE=y CONFIG_FPE_NWFPE=y -CONFIG_PM=y CONFIG_APM_EMULATION=y CONFIG_NET=y CONFIG_PACKET=y @@ -40,8 +39,8 @@ CONFIG_IP_PNP_RARP=y # CONFIG_INET_DIAG is not set # CONFIG_IPV6 is not set CONFIG_BT=m -CONFIG_BT_L2CAP=m -CONFIG_BT_SCO=m +CONFIG_BT_L2CAP=y +CONFIG_BT_SCO=y CONFIG_BT_RFCOMM=m CONFIG_BT_RFCOMM_TTY=y CONFIG_BT_BNEP=m @@ -60,7 +59,6 @@ CONFIG_MTD_NAND_PXA3xx=y CONFIG_MTD_UBI=y CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_RAM=y -# CONFIG_MISC_DEVICES is not set CONFIG_SCSI=y CONFIG_BLK_DEV_SD=y CONFIG_NETDEVICES=y @@ -81,16 +79,15 @@ CONFIG_TOUCHSCREEN_WM97XX=m # CONFIG_TOUCHSCREEN_WM9705 is not set # CONFIG_TOUCHSCREEN_WM9713 is not set # CONFIG_SERIO is not set +# CONFIG_LEGACY_PTYS is not set CONFIG_SERIAL_PXA=y CONFIG_SERIAL_PXA_CONSOLE=y -# CONFIG_LEGACY_PTYS is not set # CONFIG_HW_RANDOM is not set CONFIG_I2C=y CONFIG_I2C_PXA=y CONFIG_SPI=y CONFIG_SPI_GPIO=y CONFIG_GPIO_SYSFS=y -CONFIG_GPIO_PCA953X=y # CONFIG_HWMON is not set CONFIG_PMIC_DA903X=y CONFIG_REGULATOR=y @@ -102,7 +99,6 @@ CONFIG_LCD_CLASS_DEVICE=y CONFIG_LCD_TDO24M=y # CONFIG_BACKLIGHT_GENERIC is not set CONFIG_BACKLIGHT_DA903X=m -# CONFIG_VGA_CONSOLE is not set CONFIG_FRAMEBUFFER_CONSOLE=y CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY=y CONFIG_FONTS=y @@ -131,7 +127,6 @@ CONFIG_HID_GREENASIA=y CONFIG_HID_SMARTJOYPLUS=y CONFIG_HID_TOPSEED=y CONFIG_HID_THRUSTMASTER=y -CONFIG_HID_WACOM=m CONFIG_HID_ZEROPLUS=y CONFIG_USB=y CONFIG_USB_DEVICEFS=y @@ -152,7 +147,6 @@ CONFIG_RTC_DRV_PXA=y CONFIG_EXT2_FS=y CONFIG_EXT3_FS=y # CONFIG_EXT3_FS_XATTR is not set -CONFIG_INOTIFY=y CONFIG_MSDOS_FS=m CONFIG_VFAT_FS=m CONFIG_TMPFS=y @@ -164,7 +158,6 @@ CONFIG_NFS_V3=y CONFIG_NFS_V3_ACL=y CONFIG_NFS_V4=y CONFIG_ROOT_NFS=y -CONFIG_SMB_FS=m CONFIG_CIFS=m CONFIG_CIFS_WEAK_PW_HASH=y CONFIG_PARTITION_ADVANCED=y @@ -172,9 +165,7 @@ CONFIG_NLS_CODEPAGE_437=m CONFIG_NLS_ISO8859_1=m CONFIG_DEBUG_FS=y CONFIG_DEBUG_KERNEL=y -# CONFIG_DETECT_SOFTLOCKUP is not set # CONFIG_SCHED_DEBUG is not set -# CONFIG_RCU_CPU_STALL_DETECTOR is not set CONFIG_SYSCTL_SYSCALL_CHECK=y # CONFIG_FTRACE is not set CONFIG_DEBUG_USER=y @@ -182,7 +173,6 @@ CONFIG_DEBUG_LL=y CONFIG_CRYPTO_ECB=m CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_AES=m -CONFIG_CRYPTO_ARC4=m # CONFIG_CRYPTO_ANSI_CPRNG is not set # CONFIG_CRYPTO_HW is not set CONFIG_CRC_T10DIF=y diff --git a/arch/arm/configs/integrator_defconfig b/arch/arm/configs/integrator_defconfig index 7196ade..1103f62 100644 --- a/arch/arm/configs/integrator_defconfig +++ b/arch/arm/configs/integrator_defconfig @@ -1,5 +1,6 @@ CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y +CONFIG_TINY_RCU=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=14 @@ -8,20 +9,29 @@ CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y CONFIG_ARCH_INTEGRATOR=y CONFIG_ARCH_INTEGRATOR_AP=y +CONFIG_ARCH_INTEGRATOR_CP=y CONFIG_CPU_ARM720T=y CONFIG_CPU_ARM920T=y +CONFIG_CPU_ARM922T=y +CONFIG_CPU_ARM926T=y +CONFIG_CPU_ARM1020=y +CONFIG_CPU_ARM1022=y +CONFIG_CPU_ARM1026=y CONFIG_PCI=y +CONFIG_NO_HZ=y +CONFIG_HIGH_RES_TIMERS=y +CONFIG_PREEMPT=y +CONFIG_AEABI=y CONFIG_LEDS=y CONFIG_LEDS_CPU=y CONFIG_ZBOOT_ROM_TEXT=0x0 CONFIG_ZBOOT_ROM_BSS=0x0 -CONFIG_CMDLINE="console=ttyAM0,38400n8 root=/dev/nfs ip=bootp mem=32M" +CONFIG_CMDLINE="console=ttyAM0,38400n8 root=/dev/nfs ip=bootp" CONFIG_CPU_FREQ=y CONFIG_CPU_FREQ_GOV_POWERSAVE=y CONFIG_CPU_FREQ_GOV_USERSPACE=y CONFIG_CPU_FREQ_GOV_ONDEMAND=y CONFIG_FPE_NWFPE=y -CONFIG_PM=y CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y @@ -32,7 +42,6 @@ CONFIG_IP_PNP_DHCP=y CONFIG_IP_PNP_BOOTP=y # CONFIG_IPV6 is not set CONFIG_MTD=y -CONFIG_MTD_PARTITIONS=y CONFIG_MTD_CMDLINE_PARTS=y CONFIG_MTD_AFS_PARTS=y CONFIG_MTD_CHAR=y @@ -40,6 +49,7 @@ CONFIG_MTD_BLOCK=y CONFIG_MTD_CFI=y CONFIG_MTD_CFI_ADV_OPTIONS=y CONFIG_MTD_CFI_INTELEXT=y +CONFIG_MTD_PHYSMAP=y CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=8192 @@ -56,6 +66,8 @@ CONFIG_FB_MODE_HELPERS=y CONFIG_FB_MATROX=y CONFIG_FB_MATROX_MILLENIUM=y CONFIG_FB_MATROX_MYSTIQUE=y +CONFIG_RTC_CLASS=y +CONFIG_RTC_DRV_PL030=y CONFIG_EXT2_FS=y CONFIG_TMPFS=y CONFIG_JFFS2_FS=y @@ -68,4 +80,3 @@ CONFIG_NFSD_V3=y CONFIG_PARTITION_ADVANCED=y CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_KERNEL=y -CONFIG_DEBUG_ERRORS=y diff --git a/arch/arm/configs/mmp2_defconfig b/arch/arm/configs/mmp2_defconfig index 47ad3b1..5a58452 100644 --- a/arch/arm/configs/mmp2_defconfig +++ b/arch/arm/configs/mmp2_defconfig @@ -8,6 +8,7 @@ CONFIG_MODULE_UNLOAD=y CONFIG_MODULE_FORCE_UNLOAD=y # CONFIG_BLK_DEV_BSG is not set CONFIG_ARCH_MMP=y +CONFIG_MACH_BROWNSTONE=y CONFIG_MACH_FLINT=y CONFIG_MACH_MARVELL_JASPER=y CONFIG_HIGH_RES_TIMERS=y @@ -63,10 +64,16 @@ CONFIG_BACKLIGHT_MAX8925=y # CONFIG_USB_SUPPORT is not set CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_MAX8925=y +CONFIG_MMC=y # CONFIG_DNOTIFY is not set CONFIG_INOTIFY=y CONFIG_TMPFS=y CONFIG_TMPFS_POSIX_ACL=y +CONFIG_EXT2_FS=y +CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y +CONFIG_MSDOS_FS=y +CONFIG_FAT_DEFAULT_CODEPAGE=437 CONFIG_JFFS2_FS=y CONFIG_CRAMFS=y CONFIG_NFS_FS=y @@ -81,7 +88,7 @@ CONFIG_DEBUG_KERNEL=y # CONFIG_DEBUG_PREEMPT is not set CONFIG_DEBUG_INFO=y # CONFIG_RCU_CPU_STALL_DETECTOR is not set -CONFIG_DYNAMIC_DEBUG=y +# CONFIG_DYNAMIC_DEBUG is not set CONFIG_DEBUG_USER=y CONFIG_DEBUG_ERRORS=y # CONFIG_CRYPTO_ANSI_CPRNG is not set diff --git a/arch/arm/configs/mx3_defconfig b/arch/arm/configs/mx3_defconfig index 7c4b30b..cb0717f 100644 --- a/arch/arm/configs/mx3_defconfig +++ b/arch/arm/configs/mx3_defconfig @@ -3,7 +3,6 @@ CONFIG_SYSVIPC=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=14 -CONFIG_SYSFS_DEPRECATED_V2=y CONFIG_EXPERT=y CONFIG_SLAB=y CONFIG_MODULES=y @@ -13,20 +12,21 @@ CONFIG_MODVERSIONS=y # CONFIG_BLK_DEV_BSG is not set CONFIG_ARCH_MXC=y CONFIG_MACH_MX31ADS_WM1133_EV1=y +CONFIG_MACH_MX31LILLY=y +CONFIG_MACH_MX31LITE=y CONFIG_MACH_PCM037=y CONFIG_MACH_PCM037_EET=y -CONFIG_MACH_MX31LITE=y CONFIG_MACH_MX31_3DS=y CONFIG_MACH_MX31MOBOARD=y -CONFIG_MACH_MX31LILLY=y CONFIG_MACH_QONG=y -CONFIG_MACH_PCM043=y CONFIG_MACH_ARMADILLO5X0=y -CONFIG_MACH_MX35_3DS=y CONFIG_MACH_KZM_ARM11_01=y +CONFIG_MACH_PCM043=y +CONFIG_MACH_MX35_3DS=y CONFIG_MACH_EUKREA_CPUIMX35=y CONFIG_MXC_IRQ_PRIOR=y CONFIG_MXC_PWM=y +CONFIG_ARM_ERRATA_411920=y CONFIG_NO_HZ=y CONFIG_HIGH_RES_TIMERS=y CONFIG_PREEMPT=y @@ -35,7 +35,6 @@ CONFIG_ZBOOT_ROM_TEXT=0x0 CONFIG_ZBOOT_ROM_BSS=0x0 CONFIG_CMDLINE="noinitrd console=ttymxc0,115200 root=/dev/mtdblock2 rw ip=off" CONFIG_VFP=y -CONFIG_PM=y CONFIG_PM_DEBUG=y CONFIG_NET=y CONFIG_PACKET=y @@ -52,7 +51,6 @@ CONFIG_IP_PNP_DHCP=y CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_FW_LOADER=m CONFIG_MTD=y -CONFIG_MTD_PARTITIONS=y CONFIG_MTD_CMDLINE_PARTS=y CONFIG_MTD_CHAR=y CONFIG_MTD_BLOCK=y @@ -62,24 +60,27 @@ CONFIG_MTD_NAND=y CONFIG_MTD_NAND_MXC=y CONFIG_MTD_UBI=y # CONFIG_BLK_DEV is not set +CONFIG_MISC_DEVICES=y CONFIG_EEPROM_AT24=y CONFIG_NETDEVICES=y CONFIG_SMSC_PHY=y CONFIG_NET_ETHERNET=y CONFIG_SMSC911X=y CONFIG_DNET=y -CONFIG_FEC=y # CONFIG_NETDEV_1000 is not set # CONFIG_NETDEV_10000 is not set -# CONFIG_INPUT is not set +# CONFIG_INPUT_MOUSEDEV is not set +# CONFIG_KEYBOARD_ATKBD is not set +CONFIG_KEYBOARD_IMX=y +# CONFIG_INPUT_MOUSE is not set # CONFIG_SERIO is not set # CONFIG_VT is not set +# CONFIG_LEGACY_PTYS is not set CONFIG_SERIAL_8250=m CONFIG_SERIAL_8250_EXTENDED=y CONFIG_SERIAL_8250_SHARE_IRQ=y CONFIG_SERIAL_IMX=y CONFIG_SERIAL_IMX_CONSOLE=y -# CONFIG_LEGACY_PTYS is not set # CONFIG_HW_RANDOM is not set CONFIG_I2C=y CONFIG_I2C_CHARDEV=y @@ -89,12 +90,15 @@ CONFIG_W1=y CONFIG_W1_MASTER_MXC=y CONFIG_W1_SLAVE_THERM=y # CONFIG_HWMON is not set +CONFIG_WATCHDOG=y +CONFIG_IMX2_WDT=y CONFIG_MFD_WM8350_I2C=y CONFIG_REGULATOR=y CONFIG_REGULATOR_WM8350=y CONFIG_MEDIA_SUPPORT=y CONFIG_VIDEO_DEV=y -# CONFIG_VIDEO_ALLOW_V4L1 is not set +# CONFIG_RC_CORE is not set +# CONFIG_MEDIA_TUNER_CUSTOMISE is not set CONFIG_SOC_CAMERA=y CONFIG_SOC_CAMERA_MT9M001=y CONFIG_SOC_CAMERA_MT9M111=y @@ -105,9 +109,26 @@ CONFIG_SOC_CAMERA_OV772X=y CONFIG_VIDEO_MX3=y # CONFIG_RADIO_ADAPTERS is not set CONFIG_FB=y -# CONFIG_USB_SUPPORT is not set +CONFIG_SOUND=y +CONFIG_SND=y +# CONFIG_SND_ARM is not set +# CONFIG_SND_SPI is not set +CONFIG_SND_SOC=y +CONFIG_SND_IMX_SOC=y +CONFIG_SND_MXC_SOC_WM1133_EV1=y +CONFIG_SND_SOC_PHYCORE_AC97=y +CONFIG_SND_SOC_EUKREA_TLV320=y +CONFIG_USB=y +CONFIG_USB_EHCI_HCD=y +CONFIG_USB_EHCI_MXC=y +CONFIG_USB_GADGET=m +CONFIG_USB_FSL_USB2=m +CONFIG_USB_G_SERIAL=m +CONFIG_USB_ULPI=y CONFIG_MMC=y CONFIG_MMC_MXC=y +CONFIG_RTC_CLASS=y +CONFIG_RTC_MXC=y CONFIG_DMADEVICES=y # CONFIG_DNOTIFY is not set CONFIG_TMPFS=y @@ -119,6 +140,5 @@ CONFIG_NFS_V4=y CONFIG_ROOT_NFS=y # CONFIG_ENABLE_WARN_DEPRECATED is not set # CONFIG_ENABLE_MUST_CHECK is not set -# CONFIG_RCU_CPU_STALL_DETECTOR is not set CONFIG_SYSCTL_SYSCALL_CHECK=y # CONFIG_CRYPTO_ANSI_CPRNG is not set diff --git a/arch/arm/configs/mxs_defconfig b/arch/arm/configs/mxs_defconfig index 166d6aa..3ee3e84 100644 --- a/arch/arm/configs/mxs_defconfig +++ b/arch/arm/configs/mxs_defconfig @@ -22,8 +22,11 @@ CONFIG_BLK_DEV_INTEGRITY=y # CONFIG_IOSCHED_DEADLINE is not set # CONFIG_IOSCHED_CFQ is not set CONFIG_ARCH_MXS=y +CONFIG_MACH_MX23EVK=y +CONFIG_MACH_MX28EVK=y CONFIG_MACH_STMP378X_DEVB=y CONFIG_MACH_TX28=y +CONFIG_MACH_M28EVK=y # CONFIG_ARM_THUMB is not set CONFIG_NO_HZ=y CONFIG_HIGH_RES_TIMERS=y @@ -88,7 +91,7 @@ CONFIG_DISPLAY_SUPPORT=m # CONFIG_USB_SUPPORT is not set CONFIG_MMC=y CONFIG_MMC_MXS=y -CONFIG_RTC_CLASS=m +CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_DS1307=m CONFIG_DMADEVICES=y CONFIG_MXS_DMA=y diff --git a/arch/arm/configs/omap1_defconfig b/arch/arm/configs/omap1_defconfig index 7b63462..945a34f 100644 --- a/arch/arm/configs/omap1_defconfig +++ b/arch/arm/configs/omap1_defconfig @@ -48,13 +48,7 @@ CONFIG_MACH_SX1=y CONFIG_MACH_NOKIA770=y CONFIG_MACH_AMS_DELTA=y CONFIG_MACH_OMAP_GENERIC=y -CONFIG_OMAP_CLOCKS_SET_BY_BOOTLOADER=y -CONFIG_OMAP_ARM_216MHZ=y -CONFIG_OMAP_ARM_195MHZ=y -CONFIG_OMAP_ARM_192MHZ=y CONFIG_OMAP_ARM_182MHZ=y -CONFIG_OMAP_ARM_168MHZ=y -# CONFIG_OMAP_ARM_60MHZ is not set # CONFIG_ARM_THUMB is not set CONFIG_PCCARD=y CONFIG_OMAP_CF=y diff --git a/arch/arm/configs/tegra_defconfig b/arch/arm/configs/tegra_defconfig index 8845f1c..1957297 100644 --- a/arch/arm/configs/tegra_defconfig +++ b/arch/arm/configs/tegra_defconfig @@ -25,6 +25,7 @@ CONFIG_MACH_KAEN=y CONFIG_MACH_PAZ00=y CONFIG_MACH_TRIMSLICE=y CONFIG_MACH_WARIO=y +CONFIG_MACH_VENTANA=y CONFIG_TEGRA_DEBUG_UARTD=y CONFIG_ARM_ERRATA_742230=y CONFIG_NO_HZ=y @@ -38,7 +39,6 @@ CONFIG_HIGHMEM=y CONFIG_ZBOOT_ROM_TEXT=0x0 CONFIG_ZBOOT_ROM_BSS=0x0 CONFIG_VFP=y -CONFIG_PM=y CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y @@ -65,6 +65,7 @@ CONFIG_IPV6_TUNNEL=y CONFIG_IPV6_MULTIPLE_TABLES=y # CONFIG_WIRELESS is not set # CONFIG_FIRMWARE_IN_KERNEL is not set +CONFIG_PROC_DEVICETREE=y CONFIG_BLK_DEV_LOOP=y CONFIG_MISC_DEVICES=y CONFIG_AD525X_DPOT=y @@ -72,34 +73,61 @@ CONFIG_AD525X_DPOT_I2C=y CONFIG_ICS932S401=y CONFIG_APDS9802ALS=y CONFIG_ISL29003=y +CONFIG_SCSI=y +CONFIG_BLK_DEV_SD=y +# CONFIG_SCSI_LOWLEVEL is not set CONFIG_NETDEVICES=y CONFIG_DUMMY=y +CONFIG_NET_ETHERNET=y CONFIG_R8169=y # CONFIG_NETDEV_10000 is not set # CONFIG_WLAN is not set +CONFIG_USB_PEGASUS=y +CONFIG_USB_USBNET=y +CONFIG_USB_NET_SMSC75XX=y +CONFIG_USB_NET_SMSC95XX=y # CONFIG_INPUT is not set # CONFIG_SERIO is not set # CONFIG_VT is not set +# CONFIG_LEGACY_PTYS is not set # CONFIG_DEVKMEM is not set CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250_CONSOLE=y -# CONFIG_LEGACY_PTYS is not set +CONFIG_SERIAL_OF_PLATFORM=y # CONFIG_HW_RANDOM is not set CONFIG_I2C=y # CONFIG_I2C_COMPAT is not set # CONFIG_I2C_HELPER_AUTO is not set CONFIG_I2C_TEGRA=y +CONFIG_SPI=y +CONFIG_SPI_TEGRA=y CONFIG_SENSORS_LM90=y CONFIG_MFD_TPS6586X=y CONFIG_REGULATOR=y CONFIG_REGULATOR_TPS6586X=y -# CONFIG_USB_SUPPORT is not set +CONFIG_SOUND=y +CONFIG_SND=y +# CONFIG_SND_SUPPORT_OLD_API is not set +# CONFIG_SND_DRIVERS is not set +# CONFIG_SND_PCI is not set +# CONFIG_SND_ARM is not set +# CONFIG_SND_SPI is not set +# CONFIG_SND_USB is not set +CONFIG_SND_SOC=y +CONFIG_SND_SOC_TEGRA=y +CONFIG_SND_SOC_TEGRA_WM8903=y +CONFIG_SND_SOC_TEGRA_TRIMSLICE=y +CONFIG_USB=y +CONFIG_USB_EHCI_HCD=y +CONFIG_USB_EHCI_TEGRA=y +CONFIG_USB_STORAGE=y CONFIG_MMC=y CONFIG_MMC_SDHCI=y CONFIG_MMC_SDHCI_PLTFM=y CONFIG_MMC_SDHCI_TEGRA=y +CONFIG_RTC_CLASS=y +CONFIG_RTC_DRV_TEGRA=y CONFIG_STAGING=y -# CONFIG_STAGING_EXCLUDE_BUILD is not set CONFIG_IIO=y CONFIG_SENSORS_ISL29018=y CONFIG_SENSORS_AK8975=y @@ -123,18 +151,15 @@ CONFIG_NLS_ISO8859_1=y CONFIG_PRINTK_TIME=y CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_FS=y -CONFIG_DEBUG_KERNEL=y CONFIG_DETECT_HUNG_TASK=y CONFIG_SCHEDSTATS=y CONFIG_TIMER_STATS=y CONFIG_DEBUG_SLAB=y # CONFIG_DEBUG_PREEMPT is not set CONFIG_DEBUG_MUTEXES=y -CONFIG_DEBUG_SPINLOCK_SLEEP=y CONFIG_DEBUG_INFO=y CONFIG_DEBUG_VM=y CONFIG_DEBUG_SG=y -# CONFIG_RCU_CPU_STALL_DETECTOR is not set CONFIG_DEBUG_LL=y CONFIG_EARLY_PRINTK=y CONFIG_CRYPTO_ECB=y diff --git a/arch/arm/configs/u300_defconfig b/arch/arm/configs/u300_defconfig index 4a5a126..374000e 100644 --- a/arch/arm/configs/u300_defconfig +++ b/arch/arm/configs/u300_defconfig @@ -14,8 +14,6 @@ CONFIG_MODULE_UNLOAD=y CONFIG_ARCH_U300=y CONFIG_MACH_U300=y CONFIG_MACH_U300_BS335=y -CONFIG_MACH_U300_DUAL_RAM=y -CONFIG_U300_DEBUG=y CONFIG_MACH_U300_SPIDUMMY=y CONFIG_NO_HZ=y CONFIG_HIGH_RES_TIMERS=y @@ -26,19 +24,21 @@ CONFIG_ZBOOT_ROM_BSS=0x0 CONFIG_CMDLINE="root=/dev/ram0 rw rootfstype=rootfs console=ttyAMA0,115200n8 lpj=515072" CONFIG_CPU_IDLE=y CONFIG_FPE_NWFPE=y -CONFIG_PM=y # CONFIG_SUSPEND is not set CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_PREVENT_FIRMWARE_BUILD is not set -# CONFIG_MISC_DEVICES is not set +CONFIG_MTD=y +CONFIG_MTD_CMDLINE_PARTS=y +CONFIG_MTD_NAND=y +CONFIG_MTD_NAND_FSMC=y # CONFIG_INPUT_MOUSEDEV is not set CONFIG_INPUT_EVDEV=y # CONFIG_KEYBOARD_ATKBD is not set # CONFIG_INPUT_MOUSE is not set # CONFIG_SERIO is not set +CONFIG_LEGACY_PTY_COUNT=16 CONFIG_SERIAL_AMBA_PL011=y CONFIG_SERIAL_AMBA_PL011_CONSOLE=y -CONFIG_LEGACY_PTY_COUNT=16 # CONFIG_HW_RANDOM is not set CONFIG_I2C=y # CONFIG_HWMON is not set @@ -51,6 +51,7 @@ CONFIG_BACKLIGHT_CLASS_DEVICE=y # CONFIG_HID_SUPPORT is not set # CONFIG_USB_SUPPORT is not set CONFIG_MMC=y +CONFIG_MMC_CLKGATE=y CONFIG_MMC_ARMMMCI=y CONFIG_RTC_CLASS=y # CONFIG_RTC_HCTOSYS is not set @@ -65,10 +66,8 @@ CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_ISO8859_1=y CONFIG_PRINTK_TIME=y CONFIG_DEBUG_FS=y -CONFIG_DEBUG_KERNEL=y # CONFIG_SCHED_DEBUG is not set CONFIG_TIMER_STATS=y # CONFIG_DEBUG_PREEMPT is not set CONFIG_DEBUG_INFO=y -# CONFIG_RCU_CPU_STALL_DETECTOR is not set # CONFIG_CRC32 is not set diff --git a/arch/arm/configs/u8500_defconfig b/arch/arm/configs/u8500_defconfig index a5cce24..2d7b6e7 100644 --- a/arch/arm/configs/u8500_defconfig +++ b/arch/arm/configs/u8500_defconfig @@ -10,13 +10,13 @@ CONFIG_MODULE_UNLOAD=y CONFIG_ARCH_U8500=y CONFIG_UX500_SOC_DB5500=y CONFIG_UX500_SOC_DB8500=y -CONFIG_MACH_U8500=y +CONFIG_MACH_HREFV60=y +CONFIG_MACH_SNOWBALL=y CONFIG_MACH_U5500=y CONFIG_NO_HZ=y CONFIG_HIGH_RES_TIMERS=y CONFIG_SMP=y CONFIG_NR_CPUS=2 -CONFIG_HOTPLUG_CPU=y CONFIG_PREEMPT=y CONFIG_AEABI=y CONFIG_CMDLINE="root=/dev/ram0 console=ttyAMA2,115200n8" @@ -24,9 +24,15 @@ CONFIG_CPU_FREQ=y CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND=y CONFIG_VFP=y CONFIG_NEON=y +CONFIG_PM_RUNTIME=y CONFIG_NET=y +CONFIG_PACKET=y +CONFIG_UNIX=y +CONFIG_INET=y +CONFIG_IP_PNP=y +CONFIG_IP_PNP_DHCP=y +CONFIG_NETFILTER=y CONFIG_PHONET=y -CONFIG_PHONET_PIPECTRLR=y # CONFIG_WIRELESS is not set CONFIG_CAIF=y CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" @@ -35,6 +41,10 @@ CONFIG_BLK_DEV_RAM_SIZE=65536 CONFIG_MISC_DEVICES=y CONFIG_AB8500_PWM=y CONFIG_SENSORS_BH1780=y +CONFIG_NETDEVICES=y +CONFIG_SMSC911X=y +CONFIG_SMSC_PHY=y +# CONFIG_WLAN is not set # CONFIG_INPUT_MOUSEDEV_PSAUX is not set CONFIG_INPUT_EVDEV=y # CONFIG_KEYBOARD_ATKBD is not set @@ -49,9 +59,9 @@ CONFIG_INPUT_MISC=y CONFIG_INPUT_AB8500_PONKEY=y # CONFIG_SERIO is not set CONFIG_VT_HW_CONSOLE_BINDING=y +# CONFIG_LEGACY_PTYS is not set CONFIG_SERIAL_AMBA_PL011=y CONFIG_SERIAL_AMBA_PL011_CONSOLE=y -# CONFIG_LEGACY_PTYS is not set CONFIG_HW_RANDOM=y CONFIG_HW_RANDOM_NOMADIK=y CONFIG_I2C=y @@ -60,18 +70,20 @@ CONFIG_SPI=y CONFIG_SPI_PL022=y CONFIG_GPIO_STMPE=y CONFIG_GPIO_TC3589X=y -# CONFIG_HWMON is not set CONFIG_MFD_STMPE=y CONFIG_MFD_TC3589X=y +CONFIG_AB5500_CORE=y CONFIG_AB8500_CORE=y -CONFIG_REGULATOR=y CONFIG_REGULATOR_AB8500=y # CONFIG_HID_SUPPORT is not set -# CONFIG_USB_SUPPORT is not set +CONFIG_USB_GADGET=y +CONFIG_AB8500_USB=y CONFIG_MMC=y +CONFIG_MMC_CLKGATE=y CONFIG_MMC_ARMMMCI=y CONFIG_NEW_LEDS=y CONFIG_LEDS_CLASS=y +CONFIG_LEDS_LM3530=y CONFIG_LEDS_LP5521=y CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_AB8500=y @@ -79,8 +91,8 @@ CONFIG_RTC_DRV_PL031=y CONFIG_DMADEVICES=y CONFIG_STE_DMA40=y CONFIG_STAGING=y -# CONFIG_STAGING_EXCLUDE_BUILD is not set CONFIG_TOUCHSCREEN_SYNAPTICS_I2C_RMI4=y +CONFIG_HSEM_U8500=y CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y @@ -91,6 +103,8 @@ CONFIG_TMPFS=y CONFIG_TMPFS_POSIX_ACL=y CONFIG_CONFIGFS_FS=m # CONFIG_MISC_FILESYSTEMS is not set +CONFIG_NFS_FS=y +CONFIG_ROOT_NFS=y CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_ISO8859_1=y CONFIG_MAGIC_SYSRQ=y @@ -99,7 +113,5 @@ CONFIG_DEBUG_KERNEL=y # CONFIG_SCHED_DEBUG is not set # CONFIG_DEBUG_PREEMPT is not set CONFIG_DEBUG_INFO=y -# CONFIG_RCU_CPU_STALL_DETECTOR is not set # CONFIG_FTRACE is not set CONFIG_DEBUG_USER=y -CONFIG_DEBUG_ERRORS=y diff --git a/arch/m68k/include/asm/entry_mm.h b/arch/m68k/include/asm/entry_mm.h deleted file mode 100644 index bdace4b..0000000 --- a/arch/m68k/include/asm/entry_mm.h +++ /dev/null @@ -1,128 +0,0 @@ -#ifndef __M68K_ENTRY_H -#define __M68K_ENTRY_H - -#include -#include -#ifdef __ASSEMBLY__ -#include -#endif - -/* - * Stack layout in 'ret_from_exception': - * - * This allows access to the syscall arguments in registers d1-d5 - * - * 0(sp) - d1 - * 4(sp) - d2 - * 8(sp) - d3 - * C(sp) - d4 - * 10(sp) - d5 - * 14(sp) - a0 - * 18(sp) - a1 - * 1C(sp) - a2 - * 20(sp) - d0 - * 24(sp) - orig_d0 - * 28(sp) - stack adjustment - * 2C(sp) - sr - * 2E(sp) - pc - * 32(sp) - format & vector - */ - -/* - * 97/05/14 Andreas: Register %a2 is now set to the current task throughout - * the whole kernel. - */ - -/* the following macro is used when enabling interrupts */ -#if defined(MACH_ATARI_ONLY) - /* block out HSYNC = ipl 2 on the atari */ -#define ALLOWINT (~0x500) -#define MAX_NOINT_IPL 3 -#else - /* portable version */ -#define ALLOWINT (~0x700) -#define MAX_NOINT_IPL 0 -#endif /* machine compilation types */ - -#ifdef __ASSEMBLY__ - -#define curptr a2 - -LFLUSH_I_AND_D = 0x00000808 - -#define SAVE_ALL_INT save_all_int -#define SAVE_ALL_SYS save_all_sys -#define RESTORE_ALL restore_all -/* - * This defines the normal kernel pt-regs layout. - * - * regs a3-a6 and d6-d7 are preserved by C code - * the kernel doesn't mess with usp unless it needs to - */ - -/* - * a -1 in the orig_d0 field signifies - * that the stack frame is NOT for syscall - */ -.macro save_all_int - clrl %sp@- | stk_adj - pea -1:w | orig d0 - movel %d0,%sp@- | d0 - moveml %d1-%d5/%a0-%a1/%curptr,%sp@- -.endm - -.macro save_all_sys - clrl %sp@- | stk_adj - movel %d0,%sp@- | orig d0 - movel %d0,%sp@- | d0 - moveml %d1-%d5/%a0-%a1/%curptr,%sp@- -.endm - -.macro restore_all - moveml %sp@+,%a0-%a1/%curptr/%d1-%d5 - movel %sp@+,%d0 - addql #4,%sp | orig d0 - addl %sp@+,%sp | stk adj - rte -.endm - -#define SWITCH_STACK_SIZE (6*4+4) /* includes return address */ - -#define SAVE_SWITCH_STACK save_switch_stack -#define RESTORE_SWITCH_STACK restore_switch_stack -#define GET_CURRENT(tmp) get_current tmp - -.macro save_switch_stack - moveml %a3-%a6/%d6-%d7,%sp@- -.endm - -.macro restore_switch_stack - moveml %sp@+,%a3-%a6/%d6-%d7 -.endm - -.macro get_current reg=%d0 - movel %sp,\reg - andw #-THREAD_SIZE,\reg - movel \reg,%curptr - movel %curptr@,%curptr -.endm - -#else /* C source */ - -#define STR(X) STR1(X) -#define STR1(X) #X - -#define SAVE_ALL_INT \ - "clrl %%sp@-;" /* stk_adj */ \ - "pea -1:w;" /* orig d0 = -1 */ \ - "movel %%d0,%%sp@-;" /* d0 */ \ - "moveml %%d1-%%d5/%%a0-%%a2,%%sp@-" -#define GET_CURRENT(tmp) \ - "movel %%sp,"#tmp"\n\t" \ - "andw #-"STR(THREAD_SIZE)","#tmp"\n\t" \ - "movel "#tmp",%%a2\n\t" \ - "movel %%a2@,%%a2" - -#endif - -#endif /* __M68K_ENTRY_H */ diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c deleted file mode 100644 index d9302b7..0000000 --- a/arch/x86/kernel/amd_iommu.c +++ /dev/null @@ -1,2770 +0,0 @@ -/* - * Copyright (C) 2007-2010 Advanced Micro Devices, Inc. - * Author: Joerg Roedel - * Leo Duran - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published - * by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#define CMD_SET_TYPE(cmd, t) ((cmd)->data[1] |= ((t) << 28)) - -#define LOOP_TIMEOUT 100000 - -static DEFINE_RWLOCK(amd_iommu_devtable_lock); - -/* A list of preallocated protection domains */ -static LIST_HEAD(iommu_pd_list); -static DEFINE_SPINLOCK(iommu_pd_list_lock); - -/* - * Domain for untranslated devices - only allocated - * if iommu=pt passed on kernel cmd line. - */ -static struct protection_domain *pt_domain; - -static struct iommu_ops amd_iommu_ops; - -static struct dma_map_ops amd_iommu_dma_ops; - -/* - * general struct to manage commands send to an IOMMU - */ -struct iommu_cmd { - u32 data[4]; -}; - -static void update_domain(struct protection_domain *domain); - -/**************************************************************************** - * - * Helper functions - * - ****************************************************************************/ - -static inline u16 get_device_id(struct device *dev) -{ - struct pci_dev *pdev = to_pci_dev(dev); - - return calc_devid(pdev->bus->number, pdev->devfn); -} - -static struct iommu_dev_data *get_dev_data(struct device *dev) -{ - return dev->archdata.iommu; -} - -/* - * In this function the list of preallocated protection domains is traversed to - * find the domain for a specific device - */ -static struct dma_ops_domain *find_protection_domain(u16 devid) -{ - struct dma_ops_domain *entry, *ret = NULL; - unsigned long flags; - u16 alias = amd_iommu_alias_table[devid]; - - if (list_empty(&iommu_pd_list)) - return NULL; - - spin_lock_irqsave(&iommu_pd_list_lock, flags); - - list_for_each_entry(entry, &iommu_pd_list, list) { - if (entry->target_dev == devid || - entry->target_dev == alias) { - ret = entry; - break; - } - } - - spin_unlock_irqrestore(&iommu_pd_list_lock, flags); - - return ret; -} - -/* - * This function checks if the driver got a valid device from the caller to - * avoid dereferencing invalid pointers. - */ -static bool check_device(struct device *dev) -{ - u16 devid; - - if (!dev || !dev->dma_mask) - return false; - - /* No device or no PCI device */ - if (dev->bus != &pci_bus_type) - return false; - - devid = get_device_id(dev); - - /* Out of our scope? */ - if (devid > amd_iommu_last_bdf) - return false; - - if (amd_iommu_rlookup_table[devid] == NULL) - return false; - - return true; -} - -static int iommu_init_device(struct device *dev) -{ - struct iommu_dev_data *dev_data; - struct pci_dev *pdev; - u16 devid, alias; - - if (dev->archdata.iommu) - return 0; - - dev_data = kzalloc(sizeof(*dev_data), GFP_KERNEL); - if (!dev_data) - return -ENOMEM; - - dev_data->dev = dev; - - devid = get_device_id(dev); - alias = amd_iommu_alias_table[devid]; - pdev = pci_get_bus_and_slot(PCI_BUS(alias), alias & 0xff); - if (pdev) - dev_data->alias = &pdev->dev; - else { - kfree(dev_data); - return -ENOTSUPP; - } - - atomic_set(&dev_data->bind, 0); - - dev->archdata.iommu = dev_data; - - - return 0; -} - -static void iommu_ignore_device(struct device *dev) -{ - u16 devid, alias; - - devid = get_device_id(dev); - alias = amd_iommu_alias_table[devid]; - - memset(&amd_iommu_dev_table[devid], 0, sizeof(struct dev_table_entry)); - memset(&amd_iommu_dev_table[alias], 0, sizeof(struct dev_table_entry)); - - amd_iommu_rlookup_table[devid] = NULL; - amd_iommu_rlookup_table[alias] = NULL; -} - -static void iommu_uninit_device(struct device *dev) -{ - kfree(dev->archdata.iommu); -} - -void __init amd_iommu_uninit_devices(void) -{ - struct pci_dev *pdev = NULL; - - for_each_pci_dev(pdev) { - - if (!check_device(&pdev->dev)) - continue; - - iommu_uninit_device(&pdev->dev); - } -} - -int __init amd_iommu_init_devices(void) -{ - struct pci_dev *pdev = NULL; - int ret = 0; - - for_each_pci_dev(pdev) { - - if (!check_device(&pdev->dev)) - continue; - - ret = iommu_init_device(&pdev->dev); - if (ret == -ENOTSUPP) - iommu_ignore_device(&pdev->dev); - else if (ret) - goto out_free; - } - - return 0; - -out_free: - - amd_iommu_uninit_devices(); - - return ret; -} -#ifdef CONFIG_AMD_IOMMU_STATS - -/* - * Initialization code for statistics collection - */ - -DECLARE_STATS_COUNTER(compl_wait); -DECLARE_STATS_COUNTER(cnt_map_single); -DECLARE_STATS_COUNTER(cnt_unmap_single); -DECLARE_STATS_COUNTER(cnt_map_sg); -DECLARE_STATS_COUNTER(cnt_unmap_sg); -DECLARE_STATS_COUNTER(cnt_alloc_coherent); -DECLARE_STATS_COUNTER(cnt_free_coherent); -DECLARE_STATS_COUNTER(cross_page); -DECLARE_STATS_COUNTER(domain_flush_single); -DECLARE_STATS_COUNTER(domain_flush_all); -DECLARE_STATS_COUNTER(alloced_io_mem); -DECLARE_STATS_COUNTER(total_map_requests); - -static struct dentry *stats_dir; -static struct dentry *de_fflush; - -static void amd_iommu_stats_add(struct __iommu_counter *cnt) -{ - if (stats_dir == NULL) - return; - - cnt->dent = debugfs_create_u64(cnt->name, 0444, stats_dir, - &cnt->value); -} - -static void amd_iommu_stats_init(void) -{ - stats_dir = debugfs_create_dir("amd-iommu", NULL); - if (stats_dir == NULL) - return; - - de_fflush = debugfs_create_bool("fullflush", 0444, stats_dir, - (u32 *)&amd_iommu_unmap_flush); - - amd_iommu_stats_add(&compl_wait); - amd_iommu_stats_add(&cnt_map_single); - amd_iommu_stats_add(&cnt_unmap_single); - amd_iommu_stats_add(&cnt_map_sg); - amd_iommu_stats_add(&cnt_unmap_sg); - amd_iommu_stats_add(&cnt_alloc_coherent); - amd_iommu_stats_add(&cnt_free_coherent); - amd_iommu_stats_add(&cross_page); - amd_iommu_stats_add(&domain_flush_single); - amd_iommu_stats_add(&domain_flush_all); - amd_iommu_stats_add(&alloced_io_mem); - amd_iommu_stats_add(&total_map_requests); -} - -#endif - -/**************************************************************************** - * - * Interrupt handling functions - * - ****************************************************************************/ - -static void dump_dte_entry(u16 devid) -{ - int i; - - for (i = 0; i < 8; ++i) - pr_err("AMD-Vi: DTE[%d]: %08x\n", i, - amd_iommu_dev_table[devid].data[i]); -} - -static void dump_command(unsigned long phys_addr) -{ - struct iommu_cmd *cmd = phys_to_virt(phys_addr); - int i; - - for (i = 0; i < 4; ++i) - pr_err("AMD-Vi: CMD[%d]: %08x\n", i, cmd->data[i]); -} - -static void iommu_print_event(struct amd_iommu *iommu, void *__evt) -{ - u32 *event = __evt; - int type = (event[1] >> EVENT_TYPE_SHIFT) & EVENT_TYPE_MASK; - int devid = (event[0] >> EVENT_DEVID_SHIFT) & EVENT_DEVID_MASK; - int domid = (event[1] >> EVENT_DOMID_SHIFT) & EVENT_DOMID_MASK; - int flags = (event[1] >> EVENT_FLAGS_SHIFT) & EVENT_FLAGS_MASK; - u64 address = (u64)(((u64)event[3]) << 32) | event[2]; - - printk(KERN_ERR "AMD-Vi: Event logged ["); - - switch (type) { - case EVENT_TYPE_ILL_DEV: - printk("ILLEGAL_DEV_TABLE_ENTRY device=%02x:%02x.%x " - "address=0x%016llx flags=0x%04x]\n", - PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid), - address, flags); - dump_dte_entry(devid); - break; - case EVENT_TYPE_IO_FAULT: - printk("IO_PAGE_FAULT device=%02x:%02x.%x " - "domain=0x%04x address=0x%016llx flags=0x%04x]\n", - PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid), - domid, address, flags); - break; - case EVENT_TYPE_DEV_TAB_ERR: - printk("DEV_TAB_HARDWARE_ERROR device=%02x:%02x.%x " - "address=0x%016llx flags=0x%04x]\n", - PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid), - address, flags); - break; - case EVENT_TYPE_PAGE_TAB_ERR: - printk("PAGE_TAB_HARDWARE_ERROR device=%02x:%02x.%x " - "domain=0x%04x address=0x%016llx flags=0x%04x]\n", - PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid), - domid, address, flags); - break; - case EVENT_TYPE_ILL_CMD: - printk("ILLEGAL_COMMAND_ERROR address=0x%016llx]\n", address); - dump_command(address); - break; - case EVENT_TYPE_CMD_HARD_ERR: - printk("COMMAND_HARDWARE_ERROR address=0x%016llx " - "flags=0x%04x]\n", address, flags); - break; - case EVENT_TYPE_IOTLB_INV_TO: - printk("IOTLB_INV_TIMEOUT device=%02x:%02x.%x " - "address=0x%016llx]\n", - PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid), - address); - break; - case EVENT_TYPE_INV_DEV_REQ: - printk("INVALID_DEVICE_REQUEST device=%02x:%02x.%x " - "address=0x%016llx flags=0x%04x]\n", - PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid), - address, flags); - break; - default: - printk(KERN_ERR "UNKNOWN type=0x%02x]\n", type); - } -} - -static void iommu_poll_events(struct amd_iommu *iommu) -{ - u32 head, tail; - unsigned long flags; - - spin_lock_irqsave(&iommu->lock, flags); - - head = readl(iommu->mmio_base + MMIO_EVT_HEAD_OFFSET); - tail = readl(iommu->mmio_base + MMIO_EVT_TAIL_OFFSET); - - while (head != tail) { - iommu_print_event(iommu, iommu->evt_buf + head); - head = (head + EVENT_ENTRY_SIZE) % iommu->evt_buf_size; - } - - writel(head, iommu->mmio_base + MMIO_EVT_HEAD_OFFSET); - - spin_unlock_irqrestore(&iommu->lock, flags); -} - -irqreturn_t amd_iommu_int_thread(int irq, void *data) -{ - struct amd_iommu *iommu; - - for_each_iommu(iommu) - iommu_poll_events(iommu); - - return IRQ_HANDLED; -} - -irqreturn_t amd_iommu_int_handler(int irq, void *data) -{ - return IRQ_WAKE_THREAD; -} - -/**************************************************************************** - * - * IOMMU command queuing functions - * - ****************************************************************************/ - -static int wait_on_sem(volatile u64 *sem) -{ - int i = 0; - - while (*sem == 0 && i < LOOP_TIMEOUT) { - udelay(1); - i += 1; - } - - if (i == LOOP_TIMEOUT) { - pr_alert("AMD-Vi: Completion-Wait loop timed out\n"); - return -EIO; - } - - return 0; -} - -static void copy_cmd_to_buffer(struct amd_iommu *iommu, - struct iommu_cmd *cmd, - u32 tail) -{ - u8 *target; - - target = iommu->cmd_buf + tail; - tail = (tail + sizeof(*cmd)) % iommu->cmd_buf_size; - - /* Copy command to buffer */ - memcpy(target, cmd, sizeof(*cmd)); - - /* Tell the IOMMU about it */ - writel(tail, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET); -} - -static void build_completion_wait(struct iommu_cmd *cmd, u64 address) -{ - WARN_ON(address & 0x7ULL); - - memset(cmd, 0, sizeof(*cmd)); - cmd->data[0] = lower_32_bits(__pa(address)) | CMD_COMPL_WAIT_STORE_MASK; - cmd->data[1] = upper_32_bits(__pa(address)); - cmd->data[2] = 1; - CMD_SET_TYPE(cmd, CMD_COMPL_WAIT); -} - -static void build_inv_dte(struct iommu_cmd *cmd, u16 devid) -{ - memset(cmd, 0, sizeof(*cmd)); - cmd->data[0] = devid; - CMD_SET_TYPE(cmd, CMD_INV_DEV_ENTRY); -} - -static void build_inv_iommu_pages(struct iommu_cmd *cmd, u64 address, - size_t size, u16 domid, int pde) -{ - u64 pages; - int s; - - pages = iommu_num_pages(address, size, PAGE_SIZE); - s = 0; - - if (pages > 1) { - /* - * If we have to flush more than one page, flush all - * TLB entries for this domain - */ - address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS; - s = 1; - } - - address &= PAGE_MASK; - - memset(cmd, 0, sizeof(*cmd)); - cmd->data[1] |= domid; - cmd->data[2] = lower_32_bits(address); - cmd->data[3] = upper_32_bits(address); - CMD_SET_TYPE(cmd, CMD_INV_IOMMU_PAGES); - if (s) /* size bit - we flush more than one 4kb page */ - cmd->data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK; - if (pde) /* PDE bit - we wan't flush everything not only the PTEs */ - cmd->data[2] |= CMD_INV_IOMMU_PAGES_PDE_MASK; -} - -static void build_inv_iotlb_pages(struct iommu_cmd *cmd, u16 devid, int qdep, - u64 address, size_t size) -{ - u64 pages; - int s; - - pages = iommu_num_pages(address, size, PAGE_SIZE); - s = 0; - - if (pages > 1) { - /* - * If we have to flush more than one page, flush all - * TLB entries for this domain - */ - address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS; - s = 1; - } - - address &= PAGE_MASK; - - memset(cmd, 0, sizeof(*cmd)); - cmd->data[0] = devid; - cmd->data[0] |= (qdep & 0xff) << 24; - cmd->data[1] = devid; - cmd->data[2] = lower_32_bits(address); - cmd->data[3] = upper_32_bits(address); - CMD_SET_TYPE(cmd, CMD_INV_IOTLB_PAGES); - if (s) - cmd->data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK; -} - -static void build_inv_all(struct iommu_cmd *cmd) -{ - memset(cmd, 0, sizeof(*cmd)); - CMD_SET_TYPE(cmd, CMD_INV_ALL); -} - -/* - * Writes the command to the IOMMUs command buffer and informs the - * hardware about the new command. - */ -static int iommu_queue_command_sync(struct amd_iommu *iommu, - struct iommu_cmd *cmd, - bool sync) -{ - u32 left, tail, head, next_tail; - unsigned long flags; - - WARN_ON(iommu->cmd_buf_size & CMD_BUFFER_UNINITIALIZED); - -again: - spin_lock_irqsave(&iommu->lock, flags); - - head = readl(iommu->mmio_base + MMIO_CMD_HEAD_OFFSET); - tail = readl(iommu->mmio_base + MMIO_CMD_TAIL_OFFSET); - next_tail = (tail + sizeof(*cmd)) % iommu->cmd_buf_size; - left = (head - next_tail) % iommu->cmd_buf_size; - - if (left <= 2) { - struct iommu_cmd sync_cmd; - volatile u64 sem = 0; - int ret; - - build_completion_wait(&sync_cmd, (u64)&sem); - copy_cmd_to_buffer(iommu, &sync_cmd, tail); - - spin_unlock_irqrestore(&iommu->lock, flags); - - if ((ret = wait_on_sem(&sem)) != 0) - return ret; - - goto again; - } - - copy_cmd_to_buffer(iommu, cmd, tail); - - /* We need to sync now to make sure all commands are processed */ - iommu->need_sync = sync; - - spin_unlock_irqrestore(&iommu->lock, flags); - - return 0; -} - -static int iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd) -{ - return iommu_queue_command_sync(iommu, cmd, true); -} - -/* - * This function queues a completion wait command into the command - * buffer of an IOMMU - */ -static int iommu_completion_wait(struct amd_iommu *iommu) -{ - struct iommu_cmd cmd; - volatile u64 sem = 0; - int ret; - - if (!iommu->need_sync) - return 0; - - build_completion_wait(&cmd, (u64)&sem); - - ret = iommu_queue_command_sync(iommu, &cmd, false); - if (ret) - return ret; - - return wait_on_sem(&sem); -} - -static int iommu_flush_dte(struct amd_iommu *iommu, u16 devid) -{ - struct iommu_cmd cmd; - - build_inv_dte(&cmd, devid); - - return iommu_queue_command(iommu, &cmd); -} - -static void iommu_flush_dte_all(struct amd_iommu *iommu) -{ - u32 devid; - - for (devid = 0; devid <= 0xffff; ++devid) - iommu_flush_dte(iommu, devid); - - iommu_completion_wait(iommu); -} - -/* - * This function uses heavy locking and may disable irqs for some time. But - * this is no issue because it is only called during resume. - */ -static void iommu_flush_tlb_all(struct amd_iommu *iommu) -{ - u32 dom_id; - - for (dom_id = 0; dom_id <= 0xffff; ++dom_id) { - struct iommu_cmd cmd; - build_inv_iommu_pages(&cmd, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, - dom_id, 1); - iommu_queue_command(iommu, &cmd); - } - - iommu_completion_wait(iommu); -} - -static void iommu_flush_all(struct amd_iommu *iommu) -{ - struct iommu_cmd cmd; - - build_inv_all(&cmd); - - iommu_queue_command(iommu, &cmd); - iommu_completion_wait(iommu); -} - -void iommu_flush_all_caches(struct amd_iommu *iommu) -{ - if (iommu_feature(iommu, FEATURE_IA)) { - iommu_flush_all(iommu); - } else { - iommu_flush_dte_all(iommu); - iommu_flush_tlb_all(iommu); - } -} - -/* - * Command send function for flushing on-device TLB - */ -static int device_flush_iotlb(struct device *dev, u64 address, size_t size) -{ - struct pci_dev *pdev = to_pci_dev(dev); - struct amd_iommu *iommu; - struct iommu_cmd cmd; - u16 devid; - int qdep; - - qdep = pci_ats_queue_depth(pdev); - devid = get_device_id(dev); - iommu = amd_iommu_rlookup_table[devid]; - - build_inv_iotlb_pages(&cmd, devid, qdep, address, size); - - return iommu_queue_command(iommu, &cmd); -} - -/* - * Command send function for invalidating a device table entry - */ -static int device_flush_dte(struct device *dev) -{ - struct amd_iommu *iommu; - struct pci_dev *pdev; - u16 devid; - int ret; - - pdev = to_pci_dev(dev); - devid = get_device_id(dev); - iommu = amd_iommu_rlookup_table[devid]; - - ret = iommu_flush_dte(iommu, devid); - if (ret) - return ret; - - if (pci_ats_enabled(pdev)) - ret = device_flush_iotlb(dev, 0, ~0UL); - - return ret; -} - -/* - * TLB invalidation function which is called from the mapping functions. - * It invalidates a single PTE if the range to flush is within a single - * page. Otherwise it flushes the whole TLB of the IOMMU. - */ -static void __domain_flush_pages(struct protection_domain *domain, - u64 address, size_t size, int pde) -{ - struct iommu_dev_data *dev_data; - struct iommu_cmd cmd; - int ret = 0, i; - - build_inv_iommu_pages(&cmd, address, size, domain->id, pde); - - for (i = 0; i < amd_iommus_present; ++i) { - if (!domain->dev_iommu[i]) - continue; - - /* - * Devices of this domain are behind this IOMMU - * We need a TLB flush - */ - ret |= iommu_queue_command(amd_iommus[i], &cmd); - } - - list_for_each_entry(dev_data, &domain->dev_list, list) { - struct pci_dev *pdev = to_pci_dev(dev_data->dev); - - if (!pci_ats_enabled(pdev)) - continue; - - ret |= device_flush_iotlb(dev_data->dev, address, size); - } - - WARN_ON(ret); -} - -static void domain_flush_pages(struct protection_domain *domain, - u64 address, size_t size) -{ - __domain_flush_pages(domain, address, size, 0); -} - -/* Flush the whole IO/TLB for a given protection domain */ -static void domain_flush_tlb(struct protection_domain *domain) -{ - __domain_flush_pages(domain, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, 0); -} - -/* Flush the whole IO/TLB for a given protection domain - including PDE */ -static void domain_flush_tlb_pde(struct protection_domain *domain) -{ - __domain_flush_pages(domain, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, 1); -} - -static void domain_flush_complete(struct protection_domain *domain) -{ - int i; - - for (i = 0; i < amd_iommus_present; ++i) { - if (!domain->dev_iommu[i]) - continue; - - /* - * Devices of this domain are behind this IOMMU - * We need to wait for completion of all commands. - */ - iommu_completion_wait(amd_iommus[i]); - } -} - - -/* - * This function flushes the DTEs for all devices in domain - */ -static void domain_flush_devices(struct protection_domain *domain) -{ - struct iommu_dev_data *dev_data; - - list_for_each_entry(dev_data, &domain->dev_list, list) - device_flush_dte(dev_data->dev); -} - -/**************************************************************************** - * - * The functions below are used the create the page table mappings for - * unity mapped regions. - * - ****************************************************************************/ - -/* - * This function is used to add another level to an IO page table. Adding - * another level increases the size of the address space by 9 bits to a size up - * to 64 bits. - */ -static bool increase_address_space(struct protection_domain *domain, - gfp_t gfp) -{ - u64 *pte; - - if (domain->mode == PAGE_MODE_6_LEVEL) - /* address space already 64 bit large */ - return false; - - pte = (void *)get_zeroed_page(gfp); - if (!pte) - return false; - - *pte = PM_LEVEL_PDE(domain->mode, - virt_to_phys(domain->pt_root)); - domain->pt_root = pte; - domain->mode += 1; - domain->updated = true; - - return true; -} - -static u64 *alloc_pte(struct protection_domain *domain, - unsigned long address, - unsigned long page_size, - u64 **pte_page, - gfp_t gfp) -{ - int level, end_lvl; - u64 *pte, *page; - - BUG_ON(!is_power_of_2(page_size)); - - while (address > PM_LEVEL_SIZE(domain->mode)) - increase_address_space(domain, gfp); - - level = domain->mode - 1; - pte = &domain->pt_root[PM_LEVEL_INDEX(level, address)]; - address = PAGE_SIZE_ALIGN(address, page_size); - end_lvl = PAGE_SIZE_LEVEL(page_size); - - while (level > end_lvl) { - if (!IOMMU_PTE_PRESENT(*pte)) { - page = (u64 *)get_zeroed_page(gfp); - if (!page) - return NULL; - *pte = PM_LEVEL_PDE(level, virt_to_phys(page)); - } - - /* No level skipping support yet */ - if (PM_PTE_LEVEL(*pte) != level) - return NULL; - - level -= 1; - - pte = IOMMU_PTE_PAGE(*pte); - - if (pte_page && level == end_lvl) - *pte_page = pte; - - pte = &pte[PM_LEVEL_INDEX(level, address)]; - } - - return pte; -} - -/* - * This function checks if there is a PTE for a given dma address. If - * there is one, it returns the pointer to it. - */ -static u64 *fetch_pte(struct protection_domain *domain, unsigned long address) -{ - int level; - u64 *pte; - - if (address > PM_LEVEL_SIZE(domain->mode)) - return NULL; - - level = domain->mode - 1; - pte = &domain->pt_root[PM_LEVEL_INDEX(level, address)]; - - while (level > 0) { - - /* Not Present */ - if (!IOMMU_PTE_PRESENT(*pte)) - return NULL; - - /* Large PTE */ - if (PM_PTE_LEVEL(*pte) == 0x07) { - unsigned long pte_mask, __pte; - - /* - * If we have a series of large PTEs, make - * sure to return a pointer to the first one. - */ - pte_mask = PTE_PAGE_SIZE(*pte); - pte_mask = ~((PAGE_SIZE_PTE_COUNT(pte_mask) << 3) - 1); - __pte = ((unsigned long)pte) & pte_mask; - - return (u64 *)__pte; - } - - /* No level skipping support yet */ - if (PM_PTE_LEVEL(*pte) != level) - return NULL; - - level -= 1; - - /* Walk to the next level */ - pte = IOMMU_PTE_PAGE(*pte); - pte = &pte[PM_LEVEL_INDEX(level, address)]; - } - - return pte; -} - -/* - * Generic mapping functions. It maps a physical address into a DMA - * address space. It allocates the page table pages if necessary. - * In the future it can be extended to a generic mapping function - * supporting all features of AMD IOMMU page tables like level skipping - * and full 64 bit address spaces. - */ -static int iommu_map_page(struct protection_domain *dom, - unsigned long bus_addr, - unsigned long phys_addr, - int prot, - unsigned long page_size) -{ - u64 __pte, *pte; - int i, count; - - if (!(prot & IOMMU_PROT_MASK)) - return -EINVAL; - - bus_addr = PAGE_ALIGN(bus_addr); - phys_addr = PAGE_ALIGN(phys_addr); - count = PAGE_SIZE_PTE_COUNT(page_size); - pte = alloc_pte(dom, bus_addr, page_size, NULL, GFP_KERNEL); - - for (i = 0; i < count; ++i) - if (IOMMU_PTE_PRESENT(pte[i])) - return -EBUSY; - - if (page_size > PAGE_SIZE) { - __pte = PAGE_SIZE_PTE(phys_addr, page_size); - __pte |= PM_LEVEL_ENC(7) | IOMMU_PTE_P | IOMMU_PTE_FC; - } else - __pte = phys_addr | IOMMU_PTE_P | IOMMU_PTE_FC; - - if (prot & IOMMU_PROT_IR) - __pte |= IOMMU_PTE_IR; - if (prot & IOMMU_PROT_IW) - __pte |= IOMMU_PTE_IW; - - for (i = 0; i < count; ++i) - pte[i] = __pte; - - update_domain(dom); - - return 0; -} - -static unsigned long iommu_unmap_page(struct protection_domain *dom, - unsigned long bus_addr, - unsigned long page_size) -{ - unsigned long long unmap_size, unmapped; - u64 *pte; - - BUG_ON(!is_power_of_2(page_size)); - - unmapped = 0; - - while (unmapped < page_size) { - - pte = fetch_pte(dom, bus_addr); - - if (!pte) { - /* - * No PTE for this address - * move forward in 4kb steps - */ - unmap_size = PAGE_SIZE; - } else if (PM_PTE_LEVEL(*pte) == 0) { - /* 4kb PTE found for this address */ - unmap_size = PAGE_SIZE; - *pte = 0ULL; - } else { - int count, i; - - /* Large PTE found which maps this address */ - unmap_size = PTE_PAGE_SIZE(*pte); - count = PAGE_SIZE_PTE_COUNT(unmap_size); - for (i = 0; i < count; i++) - pte[i] = 0ULL; - } - - bus_addr = (bus_addr & ~(unmap_size - 1)) + unmap_size; - unmapped += unmap_size; - } - - BUG_ON(!is_power_of_2(unmapped)); - - return unmapped; -} - -/* - * This function checks if a specific unity mapping entry is needed for - * this specific IOMMU. - */ -static int iommu_for_unity_map(struct amd_iommu *iommu, - struct unity_map_entry *entry) -{ - u16 bdf, i; - - for (i = entry->devid_start; i <= entry->devid_end; ++i) { - bdf = amd_iommu_alias_table[i]; - if (amd_iommu_rlookup_table[bdf] == iommu) - return 1; - } - - return 0; -} - -/* - * This function actually applies the mapping to the page table of the - * dma_ops domain. - */ -static int dma_ops_unity_map(struct dma_ops_domain *dma_dom, - struct unity_map_entry *e) -{ - u64 addr; - int ret; - - for (addr = e->address_start; addr < e->address_end; - addr += PAGE_SIZE) { - ret = iommu_map_page(&dma_dom->domain, addr, addr, e->prot, - PAGE_SIZE); - if (ret) - return ret; - /* - * if unity mapping is in aperture range mark the page - * as allocated in the aperture - */ - if (addr < dma_dom->aperture_size) - __set_bit(addr >> PAGE_SHIFT, - dma_dom->aperture[0]->bitmap); - } - - return 0; -} - -/* - * Init the unity mappings for a specific IOMMU in the system - * - * Basically iterates over all unity mapping entries and applies them to - * the default domain DMA of that IOMMU if necessary. - */ -static int iommu_init_unity_mappings(struct amd_iommu *iommu) -{ - struct unity_map_entry *entry; - int ret; - - list_for_each_entry(entry, &amd_iommu_unity_map, list) { - if (!iommu_for_unity_map(iommu, entry)) - continue; - ret = dma_ops_unity_map(iommu->default_dom, entry); - if (ret) - return ret; - } - - return 0; -} - -/* - * Inits the unity mappings required for a specific device - */ -static int init_unity_mappings_for_device(struct dma_ops_domain *dma_dom, - u16 devid) -{ - struct unity_map_entry *e; - int ret; - - list_for_each_entry(e, &amd_iommu_unity_map, list) { - if (!(devid >= e->devid_start && devid <= e->devid_end)) - continue; - ret = dma_ops_unity_map(dma_dom, e); - if (ret) - return ret; - } - - return 0; -} - -/**************************************************************************** - * - * The next functions belong to the address allocator for the dma_ops - * interface functions. They work like the allocators in the other IOMMU - * drivers. Its basically a bitmap which marks the allocated pages in - * the aperture. Maybe it could be enhanced in the future to a more - * efficient allocator. - * - ****************************************************************************/ - -/* - * The address allocator core functions. - * - * called with domain->lock held - */ - -/* - * Used to reserve address ranges in the aperture (e.g. for exclusion - * ranges. - */ -static void dma_ops_reserve_addresses(struct dma_ops_domain *dom, - unsigned long start_page, - unsigned int pages) -{ - unsigned int i, last_page = dom->aperture_size >> PAGE_SHIFT; - - if (start_page + pages > last_page) - pages = last_page - start_page; - - for (i = start_page; i < start_page + pages; ++i) { - int index = i / APERTURE_RANGE_PAGES; - int page = i % APERTURE_RANGE_PAGES; - __set_bit(page, dom->aperture[index]->bitmap); - } -} - -/* - * This function is used to add a new aperture range to an existing - * aperture in case of dma_ops domain allocation or address allocation - * failure. - */ -static int alloc_new_range(struct dma_ops_domain *dma_dom, - bool populate, gfp_t gfp) -{ - int index = dma_dom->aperture_size >> APERTURE_RANGE_SHIFT; - struct amd_iommu *iommu; - unsigned long i; - -#ifdef CONFIG_IOMMU_STRESS - populate = false; -#endif - - if (index >= APERTURE_MAX_RANGES) - return -ENOMEM; - - dma_dom->aperture[index] = kzalloc(sizeof(struct aperture_range), gfp); - if (!dma_dom->aperture[index]) - return -ENOMEM; - - dma_dom->aperture[index]->bitmap = (void *)get_zeroed_page(gfp); - if (!dma_dom->aperture[index]->bitmap) - goto out_free; - - dma_dom->aperture[index]->offset = dma_dom->aperture_size; - - if (populate) { - unsigned long address = dma_dom->aperture_size; - int i, num_ptes = APERTURE_RANGE_PAGES / 512; - u64 *pte, *pte_page; - - for (i = 0; i < num_ptes; ++i) { - pte = alloc_pte(&dma_dom->domain, address, PAGE_SIZE, - &pte_page, gfp); - if (!pte) - goto out_free; - - dma_dom->aperture[index]->pte_pages[i] = pte_page; - - address += APERTURE_RANGE_SIZE / 64; - } - } - - dma_dom->aperture_size += APERTURE_RANGE_SIZE; - - /* Initialize the exclusion range if necessary */ - for_each_iommu(iommu) { - if (iommu->exclusion_start && - iommu->exclusion_start >= dma_dom->aperture[index]->offset - && iommu->exclusion_start < dma_dom->aperture_size) { - unsigned long startpage; - int pages = iommu_num_pages(iommu->exclusion_start, - iommu->exclusion_length, - PAGE_SIZE); - startpage = iommu->exclusion_start >> PAGE_SHIFT; - dma_ops_reserve_addresses(dma_dom, startpage, pages); - } - } - - /* - * Check for areas already mapped as present in the new aperture - * range and mark those pages as reserved in the allocator. Such - * mappings may already exist as a result of requested unity - * mappings for devices. - */ - for (i = dma_dom->aperture[index]->offset; - i < dma_dom->aperture_size; - i += PAGE_SIZE) { - u64 *pte = fetch_pte(&dma_dom->domain, i); - if (!pte || !IOMMU_PTE_PRESENT(*pte)) - continue; - - dma_ops_reserve_addresses(dma_dom, i >> PAGE_SHIFT, 1); - } - - update_domain(&dma_dom->domain); - - return 0; - -out_free: - update_domain(&dma_dom->domain); - - free_page((unsigned long)dma_dom->aperture[index]->bitmap); - - kfree(dma_dom->aperture[index]); - dma_dom->aperture[index] = NULL; - - return -ENOMEM; -} - -static unsigned long dma_ops_area_alloc(struct device *dev, - struct dma_ops_domain *dom, - unsigned int pages, - unsigned long align_mask, - u64 dma_mask, - unsigned long start) -{ - unsigned long next_bit = dom->next_address % APERTURE_RANGE_SIZE; - int max_index = dom->aperture_size >> APERTURE_RANGE_SHIFT; - int i = start >> APERTURE_RANGE_SHIFT; - unsigned long boundary_size; - unsigned long address = -1; - unsigned long limit; - - next_bit >>= PAGE_SHIFT; - - boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1, - PAGE_SIZE) >> PAGE_SHIFT; - - for (;i < max_index; ++i) { - unsigned long offset = dom->aperture[i]->offset >> PAGE_SHIFT; - - if (dom->aperture[i]->offset >= dma_mask) - break; - - limit = iommu_device_max_index(APERTURE_RANGE_PAGES, offset, - dma_mask >> PAGE_SHIFT); - - address = iommu_area_alloc(dom->aperture[i]->bitmap, - limit, next_bit, pages, 0, - boundary_size, align_mask); - if (address != -1) { - address = dom->aperture[i]->offset + - (address << PAGE_SHIFT); - dom->next_address = address + (pages << PAGE_SHIFT); - break; - } - - next_bit = 0; - } - - return address; -} - -static unsigned long dma_ops_alloc_addresses(struct device *dev, - struct dma_ops_domain *dom, - unsigned int pages, - unsigned long align_mask, - u64 dma_mask) -{ - unsigned long address; - -#ifdef CONFIG_IOMMU_STRESS - dom->next_address = 0; - dom->need_flush = true; -#endif - - address = dma_ops_area_alloc(dev, dom, pages, align_mask, - dma_mask, dom->next_address); - - if (address == -1) { - dom->next_address = 0; - address = dma_ops_area_alloc(dev, dom, pages, align_mask, - dma_mask, 0); - dom->need_flush = true; - } - - if (unlikely(address == -1)) - address = DMA_ERROR_CODE; - - WARN_ON((address + (PAGE_SIZE*pages)) > dom->aperture_size); - - return address; -} - -/* - * The address free function. - * - * called with domain->lock held - */ -static void dma_ops_free_addresses(struct dma_ops_domain *dom, - unsigned long address, - unsigned int pages) -{ - unsigned i = address >> APERTURE_RANGE_SHIFT; - struct aperture_range *range = dom->aperture[i]; - - BUG_ON(i >= APERTURE_MAX_RANGES || range == NULL); - -#ifdef CONFIG_IOMMU_STRESS - if (i < 4) - return; -#endif - - if (address >= dom->next_address) - dom->need_flush = true; - - address = (address % APERTURE_RANGE_SIZE) >> PAGE_SHIFT; - - bitmap_clear(range->bitmap, address, pages); - -} - -/**************************************************************************** - * - * The next functions belong to the domain allocation. A domain is - * allocated for every IOMMU as the default domain. If device isolation - * is enabled, every device get its own domain. The most important thing - * about domains is the page table mapping the DMA address space they - * contain. - * - ****************************************************************************/ - -/* - * This function adds a protection domain to the global protection domain list - */ -static void add_domain_to_list(struct protection_domain *domain) -{ - unsigned long flags; - - spin_lock_irqsave(&amd_iommu_pd_lock, flags); - list_add(&domain->list, &amd_iommu_pd_list); - spin_unlock_irqrestore(&amd_iommu_pd_lock, flags); -} - -/* - * This function removes a protection domain to the global - * protection domain list - */ -static void del_domain_from_list(struct protection_domain *domain) -{ - unsigned long flags; - - spin_lock_irqsave(&amd_iommu_pd_lock, flags); - list_del(&domain->list); - spin_unlock_irqrestore(&amd_iommu_pd_lock, flags); -} - -static u16 domain_id_alloc(void) -{ - unsigned long flags; - int id; - - write_lock_irqsave(&amd_iommu_devtable_lock, flags); - id = find_first_zero_bit(amd_iommu_pd_alloc_bitmap, MAX_DOMAIN_ID); - BUG_ON(id == 0); - if (id > 0 && id < MAX_DOMAIN_ID) - __set_bit(id, amd_iommu_pd_alloc_bitmap); - else - id = 0; - write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); - - return id; -} - -static void domain_id_free(int id) -{ - unsigned long flags; - - write_lock_irqsave(&amd_iommu_devtable_lock, flags); - if (id > 0 && id < MAX_DOMAIN_ID) - __clear_bit(id, amd_iommu_pd_alloc_bitmap); - write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); -} - -static void free_pagetable(struct protection_domain *domain) -{ - int i, j; - u64 *p1, *p2, *p3; - - p1 = domain->pt_root; - - if (!p1) - return; - - for (i = 0; i < 512; ++i) { - if (!IOMMU_PTE_PRESENT(p1[i])) - continue; - - p2 = IOMMU_PTE_PAGE(p1[i]); - for (j = 0; j < 512; ++j) { - if (!IOMMU_PTE_PRESENT(p2[j])) - continue; - p3 = IOMMU_PTE_PAGE(p2[j]); - free_page((unsigned long)p3); - } - - free_page((unsigned long)p2); - } - - free_page((unsigned long)p1); - - domain->pt_root = NULL; -} - -/* - * Free a domain, only used if something went wrong in the - * allocation path and we need to free an already allocated page table - */ -static void dma_ops_domain_free(struct dma_ops_domain *dom) -{ - int i; - - if (!dom) - return; - - del_domain_from_list(&dom->domain); - - free_pagetable(&dom->domain); - - for (i = 0; i < APERTURE_MAX_RANGES; ++i) { - if (!dom->aperture[i]) - continue; - free_page((unsigned long)dom->aperture[i]->bitmap); - kfree(dom->aperture[i]); - } - - kfree(dom); -} - -/* - * Allocates a new protection domain usable for the dma_ops functions. - * It also initializes the page table and the address allocator data - * structures required for the dma_ops interface - */ -static struct dma_ops_domain *dma_ops_domain_alloc(void) -{ - struct dma_ops_domain *dma_dom; - - dma_dom = kzalloc(sizeof(struct dma_ops_domain), GFP_KERNEL); - if (!dma_dom) - return NULL; - - spin_lock_init(&dma_dom->domain.lock); - - dma_dom->domain.id = domain_id_alloc(); - if (dma_dom->domain.id == 0) - goto free_dma_dom; - INIT_LIST_HEAD(&dma_dom->domain.dev_list); - dma_dom->domain.mode = PAGE_MODE_2_LEVEL; - dma_dom->domain.pt_root = (void *)get_zeroed_page(GFP_KERNEL); - dma_dom->domain.flags = PD_DMA_OPS_MASK; - dma_dom->domain.priv = dma_dom; - if (!dma_dom->domain.pt_root) - goto free_dma_dom; - - dma_dom->need_flush = false; - dma_dom->target_dev = 0xffff; - - add_domain_to_list(&dma_dom->domain); - - if (alloc_new_range(dma_dom, true, GFP_KERNEL)) - goto free_dma_dom; - - /* - * mark the first page as allocated so we never return 0 as - * a valid dma-address. So we can use 0 as error value - */ - dma_dom->aperture[0]->bitmap[0] = 1; - dma_dom->next_address = 0; - - - return dma_dom; - -free_dma_dom: - dma_ops_domain_free(dma_dom); - - return NULL; -} - -/* - * little helper function to check whether a given protection domain is a - * dma_ops domain - */ -static bool dma_ops_domain(struct protection_domain *domain) -{ - return domain->flags & PD_DMA_OPS_MASK; -} - -static void set_dte_entry(u16 devid, struct protection_domain *domain, bool ats) -{ - u64 pte_root = virt_to_phys(domain->pt_root); - u32 flags = 0; - - pte_root |= (domain->mode & DEV_ENTRY_MODE_MASK) - << DEV_ENTRY_MODE_SHIFT; - pte_root |= IOMMU_PTE_IR | IOMMU_PTE_IW | IOMMU_PTE_P | IOMMU_PTE_TV; - - if (ats) - flags |= DTE_FLAG_IOTLB; - - amd_iommu_dev_table[devid].data[3] |= flags; - amd_iommu_dev_table[devid].data[2] = domain->id; - amd_iommu_dev_table[devid].data[1] = upper_32_bits(pte_root); - amd_iommu_dev_table[devid].data[0] = lower_32_bits(pte_root); -} - -static void clear_dte_entry(u16 devid) -{ - /* remove entry from the device table seen by the hardware */ - amd_iommu_dev_table[devid].data[0] = IOMMU_PTE_P | IOMMU_PTE_TV; - amd_iommu_dev_table[devid].data[1] = 0; - amd_iommu_dev_table[devid].data[2] = 0; - - amd_iommu_apply_erratum_63(devid); -} - -static void do_attach(struct device *dev, struct protection_domain *domain) -{ - struct iommu_dev_data *dev_data; - struct amd_iommu *iommu; - struct pci_dev *pdev; - bool ats = false; - u16 devid; - - devid = get_device_id(dev); - iommu = amd_iommu_rlookup_table[devid]; - dev_data = get_dev_data(dev); - pdev = to_pci_dev(dev); - - if (amd_iommu_iotlb_sup) - ats = pci_ats_enabled(pdev); - - /* Update data structures */ - dev_data->domain = domain; - list_add(&dev_data->list, &domain->dev_list); - set_dte_entry(devid, domain, ats); - - /* Do reference counting */ - domain->dev_iommu[iommu->index] += 1; - domain->dev_cnt += 1; - - /* Flush the DTE entry */ - device_flush_dte(dev); -} - -static void do_detach(struct device *dev) -{ - struct iommu_dev_data *dev_data; - struct amd_iommu *iommu; - u16 devid; - - devid = get_device_id(dev); - iommu = amd_iommu_rlookup_table[devid]; - dev_data = get_dev_data(dev); - - /* decrease reference counters */ - dev_data->domain->dev_iommu[iommu->index] -= 1; - dev_data->domain->dev_cnt -= 1; - - /* Update data structures */ - dev_data->domain = NULL; - list_del(&dev_data->list); - clear_dte_entry(devid); - - /* Flush the DTE entry */ - device_flush_dte(dev); -} - -/* - * If a device is not yet associated with a domain, this function does - * assigns it visible for the hardware - */ -static int __attach_device(struct device *dev, - struct protection_domain *domain) -{ - struct iommu_dev_data *dev_data, *alias_data; - int ret; - - dev_data = get_dev_data(dev); - alias_data = get_dev_data(dev_data->alias); - - if (!alias_data) - return -EINVAL; - - /* lock domain */ - spin_lock(&domain->lock); - - /* Some sanity checks */ - ret = -EBUSY; - if (alias_data->domain != NULL && - alias_data->domain != domain) - goto out_unlock; - - if (dev_data->domain != NULL && - dev_data->domain != domain) - goto out_unlock; - - /* Do real assignment */ - if (dev_data->alias != dev) { - alias_data = get_dev_data(dev_data->alias); - if (alias_data->domain == NULL) - do_attach(dev_data->alias, domain); - - atomic_inc(&alias_data->bind); - } - - if (dev_data->domain == NULL) - do_attach(dev, domain); - - atomic_inc(&dev_data->bind); - - ret = 0; - -out_unlock: - - /* ready */ - spin_unlock(&domain->lock); - - return ret; -} - -/* - * If a device is not yet associated with a domain, this function does - * assigns it visible for the hardware - */ -static int attach_device(struct device *dev, - struct protection_domain *domain) -{ - struct pci_dev *pdev = to_pci_dev(dev); - unsigned long flags; - int ret; - - if (amd_iommu_iotlb_sup) - pci_enable_ats(pdev, PAGE_SHIFT); - - write_lock_irqsave(&amd_iommu_devtable_lock, flags); - ret = __attach_device(dev, domain); - write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); - - /* - * We might boot into a crash-kernel here. The crashed kernel - * left the caches in the IOMMU dirty. So we have to flush - * here to evict all dirty stuff. - */ - domain_flush_tlb_pde(domain); - - return ret; -} - -/* - * Removes a device from a protection domain (unlocked) - */ -static void __detach_device(struct device *dev) -{ - struct iommu_dev_data *dev_data = get_dev_data(dev); - struct iommu_dev_data *alias_data; - struct protection_domain *domain; - unsigned long flags; - - BUG_ON(!dev_data->domain); - - domain = dev_data->domain; - - spin_lock_irqsave(&domain->lock, flags); - - if (dev_data->alias != dev) { - alias_data = get_dev_data(dev_data->alias); - if (atomic_dec_and_test(&alias_data->bind)) - do_detach(dev_data->alias); - } - - if (atomic_dec_and_test(&dev_data->bind)) - do_detach(dev); - - spin_unlock_irqrestore(&domain->lock, flags); - - /* - * If we run in passthrough mode the device must be assigned to the - * passthrough domain if it is detached from any other domain. - * Make sure we can deassign from the pt_domain itself. - */ - if (iommu_pass_through && - (dev_data->domain == NULL && domain != pt_domain)) - __attach_device(dev, pt_domain); -} - -/* - * Removes a device from a protection domain (with devtable_lock held) - */ -static void detach_device(struct device *dev) -{ - struct pci_dev *pdev = to_pci_dev(dev); - unsigned long flags; - - /* lock device table */ - write_lock_irqsave(&amd_iommu_devtable_lock, flags); - __detach_device(dev); - write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); - - if (amd_iommu_iotlb_sup && pci_ats_enabled(pdev)) - pci_disable_ats(pdev); -} - -/* - * Find out the protection domain structure for a given PCI device. This - * will give us the pointer to the page table root for example. - */ -static struct protection_domain *domain_for_device(struct device *dev) -{ - struct protection_domain *dom; - struct iommu_dev_data *dev_data, *alias_data; - unsigned long flags; - u16 devid; - - devid = get_device_id(dev); - dev_data = get_dev_data(dev); - alias_data = get_dev_data(dev_data->alias); - if (!alias_data) - return NULL; - - read_lock_irqsave(&amd_iommu_devtable_lock, flags); - dom = dev_data->domain; - if (dom == NULL && - alias_data->domain != NULL) { - __attach_device(dev, alias_data->domain); - dom = alias_data->domain; - } - - read_unlock_irqrestore(&amd_iommu_devtable_lock, flags); - - return dom; -} - -static int device_change_notifier(struct notifier_block *nb, - unsigned long action, void *data) -{ - struct device *dev = data; - u16 devid; - struct protection_domain *domain; - struct dma_ops_domain *dma_domain; - struct amd_iommu *iommu; - unsigned long flags; - - if (!check_device(dev)) - return 0; - - devid = get_device_id(dev); - iommu = amd_iommu_rlookup_table[devid]; - - switch (action) { - case BUS_NOTIFY_UNBOUND_DRIVER: - - domain = domain_for_device(dev); - - if (!domain) - goto out; - if (iommu_pass_through) - break; - detach_device(dev); - break; - case BUS_NOTIFY_ADD_DEVICE: - - iommu_init_device(dev); - - domain = domain_for_device(dev); - - dma_domain = find_protection_domain(devid); - if (!dma_domain) { - /* allocate a protection domain if a device is added */ - dma_domain = dma_ops_domain_alloc(); - if (!dma_domain) - goto out; - dma_domain->target_dev = devid; - - spin_lock_irqsave(&iommu_pd_list_lock, flags); - list_add_tail(&dma_domain->list, &iommu_pd_list); - spin_unlock_irqrestore(&iommu_pd_list_lock, flags); - } - - dev->archdata.dma_ops = &amd_iommu_dma_ops; - - break; - case BUS_NOTIFY_DEL_DEVICE: - - iommu_uninit_device(dev); - - default: - goto out; - } - - device_flush_dte(dev); - iommu_completion_wait(iommu); - -out: - return 0; -} - -static struct notifier_block device_nb = { - .notifier_call = device_change_notifier, -}; - -void amd_iommu_init_notifier(void) -{ - bus_register_notifier(&pci_bus_type, &device_nb); -} - -/***************************************************************************** - * - * The next functions belong to the dma_ops mapping/unmapping code. - * - *****************************************************************************/ - -/* - * In the dma_ops path we only have the struct device. This function - * finds the corresponding IOMMU, the protection domain and the - * requestor id for a given device. - * If the device is not yet associated with a domain this is also done - * in this function. - */ -static struct protection_domain *get_domain(struct device *dev) -{ - struct protection_domain *domain; - struct dma_ops_domain *dma_dom; - u16 devid = get_device_id(dev); - - if (!check_device(dev)) - return ERR_PTR(-EINVAL); - - domain = domain_for_device(dev); - if (domain != NULL && !dma_ops_domain(domain)) - return ERR_PTR(-EBUSY); - - if (domain != NULL) - return domain; - - /* Device not bount yet - bind it */ - dma_dom = find_protection_domain(devid); - if (!dma_dom) - dma_dom = amd_iommu_rlookup_table[devid]->default_dom; - attach_device(dev, &dma_dom->domain); - DUMP_printk("Using protection domain %d for device %s\n", - dma_dom->domain.id, dev_name(dev)); - - return &dma_dom->domain; -} - -static void update_device_table(struct protection_domain *domain) -{ - struct iommu_dev_data *dev_data; - - list_for_each_entry(dev_data, &domain->dev_list, list) { - struct pci_dev *pdev = to_pci_dev(dev_data->dev); - u16 devid = get_device_id(dev_data->dev); - set_dte_entry(devid, domain, pci_ats_enabled(pdev)); - } -} - -static void update_domain(struct protection_domain *domain) -{ - if (!domain->updated) - return; - - update_device_table(domain); - - domain_flush_devices(domain); - domain_flush_tlb_pde(domain); - - domain->updated = false; -} - -/* - * This function fetches the PTE for a given address in the aperture - */ -static u64* dma_ops_get_pte(struct dma_ops_domain *dom, - unsigned long address) -{ - struct aperture_range *aperture; - u64 *pte, *pte_page; - - aperture = dom->aperture[APERTURE_RANGE_INDEX(address)]; - if (!aperture) - return NULL; - - pte = aperture->pte_pages[APERTURE_PAGE_INDEX(address)]; - if (!pte) { - pte = alloc_pte(&dom->domain, address, PAGE_SIZE, &pte_page, - GFP_ATOMIC); - aperture->pte_pages[APERTURE_PAGE_INDEX(address)] = pte_page; - } else - pte += PM_LEVEL_INDEX(0, address); - - update_domain(&dom->domain); - - return pte; -} - -/* - * This is the generic map function. It maps one 4kb page at paddr to - * the given address in the DMA address space for the domain. - */ -static dma_addr_t dma_ops_domain_map(struct dma_ops_domain *dom, - unsigned long address, - phys_addr_t paddr, - int direction) -{ - u64 *pte, __pte; - - WARN_ON(address > dom->aperture_size); - - paddr &= PAGE_MASK; - - pte = dma_ops_get_pte(dom, address); - if (!pte) - return DMA_ERROR_CODE; - - __pte = paddr | IOMMU_PTE_P | IOMMU_PTE_FC; - - if (direction == DMA_TO_DEVICE) - __pte |= IOMMU_PTE_IR; - else if (direction == DMA_FROM_DEVICE) - __pte |= IOMMU_PTE_IW; - else if (direction == DMA_BIDIRECTIONAL) - __pte |= IOMMU_PTE_IR | IOMMU_PTE_IW; - - WARN_ON(*pte); - - *pte = __pte; - - return (dma_addr_t)address; -} - -/* - * The generic unmapping function for on page in the DMA address space. - */ -static void dma_ops_domain_unmap(struct dma_ops_domain *dom, - unsigned long address) -{ - struct aperture_range *aperture; - u64 *pte; - - if (address >= dom->aperture_size) - return; - - aperture = dom->aperture[APERTURE_RANGE_INDEX(address)]; - if (!aperture) - return; - - pte = aperture->pte_pages[APERTURE_PAGE_INDEX(address)]; - if (!pte) - return; - - pte += PM_LEVEL_INDEX(0, address); - - WARN_ON(!*pte); - - *pte = 0ULL; -} - -/* - * This function contains common code for mapping of a physically - * contiguous memory region into DMA address space. It is used by all - * mapping functions provided with this IOMMU driver. - * Must be called with the domain lock held. - */ -static dma_addr_t __map_single(struct device *dev, - struct dma_ops_domain *dma_dom, - phys_addr_t paddr, - size_t size, - int dir, - bool align, - u64 dma_mask) -{ - dma_addr_t offset = paddr & ~PAGE_MASK; - dma_addr_t address, start, ret; - unsigned int pages; - unsigned long align_mask = 0; - int i; - - pages = iommu_num_pages(paddr, size, PAGE_SIZE); - paddr &= PAGE_MASK; - - INC_STATS_COUNTER(total_map_requests); - - if (pages > 1) - INC_STATS_COUNTER(cross_page); - - if (align) - align_mask = (1UL << get_order(size)) - 1; - -retry: - address = dma_ops_alloc_addresses(dev, dma_dom, pages, align_mask, - dma_mask); - if (unlikely(address == DMA_ERROR_CODE)) { - /* - * setting next_address here will let the address - * allocator only scan the new allocated range in the - * first run. This is a small optimization. - */ - dma_dom->next_address = dma_dom->aperture_size; - - if (alloc_new_range(dma_dom, false, GFP_ATOMIC)) - goto out; - - /* - * aperture was successfully enlarged by 128 MB, try - * allocation again - */ - goto retry; - } - - start = address; - for (i = 0; i < pages; ++i) { - ret = dma_ops_domain_map(dma_dom, start, paddr, dir); - if (ret == DMA_ERROR_CODE) - goto out_unmap; - - paddr += PAGE_SIZE; - start += PAGE_SIZE; - } - address += offset; - - ADD_STATS_COUNTER(alloced_io_mem, size); - - if (unlikely(dma_dom->need_flush && !amd_iommu_unmap_flush)) { - domain_flush_tlb(&dma_dom->domain); - dma_dom->need_flush = false; - } else if (unlikely(amd_iommu_np_cache)) - domain_flush_pages(&dma_dom->domain, address, size); - -out: - return address; - -out_unmap: - - for (--i; i >= 0; --i) { - start -= PAGE_SIZE; - dma_ops_domain_unmap(dma_dom, start); - } - - dma_ops_free_addresses(dma_dom, address, pages); - - return DMA_ERROR_CODE; -} - -/* - * Does the reverse of the __map_single function. Must be called with - * the domain lock held too - */ -static void __unmap_single(struct dma_ops_domain *dma_dom, - dma_addr_t dma_addr, - size_t size, - int dir) -{ - dma_addr_t flush_addr; - dma_addr_t i, start; - unsigned int pages; - - if ((dma_addr == DMA_ERROR_CODE) || - (dma_addr + size > dma_dom->aperture_size)) - return; - - flush_addr = dma_addr; - pages = iommu_num_pages(dma_addr, size, PAGE_SIZE); - dma_addr &= PAGE_MASK; - start = dma_addr; - - for (i = 0; i < pages; ++i) { - dma_ops_domain_unmap(dma_dom, start); - start += PAGE_SIZE; - } - - SUB_STATS_COUNTER(alloced_io_mem, size); - - dma_ops_free_addresses(dma_dom, dma_addr, pages); - - if (amd_iommu_unmap_flush || dma_dom->need_flush) { - domain_flush_pages(&dma_dom->domain, flush_addr, size); - dma_dom->need_flush = false; - } -} - -/* - * The exported map_single function for dma_ops. - */ -static dma_addr_t map_page(struct device *dev, struct page *page, - unsigned long offset, size_t size, - enum dma_data_direction dir, - struct dma_attrs *attrs) -{ - unsigned long flags; - struct protection_domain *domain; - dma_addr_t addr; - u64 dma_mask; - phys_addr_t paddr = page_to_phys(page) + offset; - - INC_STATS_COUNTER(cnt_map_single); - - domain = get_domain(dev); - if (PTR_ERR(domain) == -EINVAL) - return (dma_addr_t)paddr; - else if (IS_ERR(domain)) - return DMA_ERROR_CODE; - - dma_mask = *dev->dma_mask; - - spin_lock_irqsave(&domain->lock, flags); - - addr = __map_single(dev, domain->priv, paddr, size, dir, false, - dma_mask); - if (addr == DMA_ERROR_CODE) - goto out; - - domain_flush_complete(domain); - -out: - spin_unlock_irqrestore(&domain->lock, flags); - - return addr; -} - -/* - * The exported unmap_single function for dma_ops. - */ -static void unmap_page(struct device *dev, dma_addr_t dma_addr, size_t size, - enum dma_data_direction dir, struct dma_attrs *attrs) -{ - unsigned long flags; - struct protection_domain *domain; - - INC_STATS_COUNTER(cnt_unmap_single); - - domain = get_domain(dev); - if (IS_ERR(domain)) - return; - - spin_lock_irqsave(&domain->lock, flags); - - __unmap_single(domain->priv, dma_addr, size, dir); - - domain_flush_complete(domain); - - spin_unlock_irqrestore(&domain->lock, flags); -} - -/* - * This is a special map_sg function which is used if we should map a - * device which is not handled by an AMD IOMMU in the system. - */ -static int map_sg_no_iommu(struct device *dev, struct scatterlist *sglist, - int nelems, int dir) -{ - struct scatterlist *s; - int i; - - for_each_sg(sglist, s, nelems, i) { - s->dma_address = (dma_addr_t)sg_phys(s); - s->dma_length = s->length; - } - - return nelems; -} - -/* - * The exported map_sg function for dma_ops (handles scatter-gather - * lists). - */ -static int map_sg(struct device *dev, struct scatterlist *sglist, - int nelems, enum dma_data_direction dir, - struct dma_attrs *attrs) -{ - unsigned long flags; - struct protection_domain *domain; - int i; - struct scatterlist *s; - phys_addr_t paddr; - int mapped_elems = 0; - u64 dma_mask; - - INC_STATS_COUNTER(cnt_map_sg); - - domain = get_domain(dev); - if (PTR_ERR(domain) == -EINVAL) - return map_sg_no_iommu(dev, sglist, nelems, dir); - else if (IS_ERR(domain)) - return 0; - - dma_mask = *dev->dma_mask; - - spin_lock_irqsave(&domain->lock, flags); - - for_each_sg(sglist, s, nelems, i) { - paddr = sg_phys(s); - - s->dma_address = __map_single(dev, domain->priv, - paddr, s->length, dir, false, - dma_mask); - - if (s->dma_address) { - s->dma_length = s->length; - mapped_elems++; - } else - goto unmap; - } - - domain_flush_complete(domain); - -out: - spin_unlock_irqrestore(&domain->lock, flags); - - return mapped_elems; -unmap: - for_each_sg(sglist, s, mapped_elems, i) { - if (s->dma_address) - __unmap_single(domain->priv, s->dma_address, - s->dma_length, dir); - s->dma_address = s->dma_length = 0; - } - - mapped_elems = 0; - - goto out; -} - -/* - * The exported map_sg function for dma_ops (handles scatter-gather - * lists). - */ -static void unmap_sg(struct device *dev, struct scatterlist *sglist, - int nelems, enum dma_data_direction dir, - struct dma_attrs *attrs) -{ - unsigned long flags; - struct protection_domain *domain; - struct scatterlist *s; - int i; - - INC_STATS_COUNTER(cnt_unmap_sg); - - domain = get_domain(dev); - if (IS_ERR(domain)) - return; - - spin_lock_irqsave(&domain->lock, flags); - - for_each_sg(sglist, s, nelems, i) { - __unmap_single(domain->priv, s->dma_address, - s->dma_length, dir); - s->dma_address = s->dma_length = 0; - } - - domain_flush_complete(domain); - - spin_unlock_irqrestore(&domain->lock, flags); -} - -/* - * The exported alloc_coherent function for dma_ops. - */ -static void *alloc_coherent(struct device *dev, size_t size, - dma_addr_t *dma_addr, gfp_t flag) -{ - unsigned long flags; - void *virt_addr; - struct protection_domain *domain; - phys_addr_t paddr; - u64 dma_mask = dev->coherent_dma_mask; - - INC_STATS_COUNTER(cnt_alloc_coherent); - - domain = get_domain(dev); - if (PTR_ERR(domain) == -EINVAL) { - virt_addr = (void *)__get_free_pages(flag, get_order(size)); - *dma_addr = __pa(virt_addr); - return virt_addr; - } else if (IS_ERR(domain)) - return NULL; - - dma_mask = dev->coherent_dma_mask; - flag &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32); - flag |= __GFP_ZERO; - - virt_addr = (void *)__get_free_pages(flag, get_order(size)); - if (!virt_addr) - return NULL; - - paddr = virt_to_phys(virt_addr); - - if (!dma_mask) - dma_mask = *dev->dma_mask; - - spin_lock_irqsave(&domain->lock, flags); - - *dma_addr = __map_single(dev, domain->priv, paddr, - size, DMA_BIDIRECTIONAL, true, dma_mask); - - if (*dma_addr == DMA_ERROR_CODE) { - spin_unlock_irqrestore(&domain->lock, flags); - goto out_free; - } - - domain_flush_complete(domain); - - spin_unlock_irqrestore(&domain->lock, flags); - - return virt_addr; - -out_free: - - free_pages((unsigned long)virt_addr, get_order(size)); - - return NULL; -} - -/* - * The exported free_coherent function for dma_ops. - */ -static void free_coherent(struct device *dev, size_t size, - void *virt_addr, dma_addr_t dma_addr) -{ - unsigned long flags; - struct protection_domain *domain; - - INC_STATS_COUNTER(cnt_free_coherent); - - domain = get_domain(dev); - if (IS_ERR(domain)) - goto free_mem; - - spin_lock_irqsave(&domain->lock, flags); - - __unmap_single(domain->priv, dma_addr, size, DMA_BIDIRECTIONAL); - - domain_flush_complete(domain); - - spin_unlock_irqrestore(&domain->lock, flags); - -free_mem: - free_pages((unsigned long)virt_addr, get_order(size)); -} - -/* - * This function is called by the DMA layer to find out if we can handle a - * particular device. It is part of the dma_ops. - */ -static int amd_iommu_dma_supported(struct device *dev, u64 mask) -{ - return check_device(dev); -} - -/* - * The function for pre-allocating protection domains. - * - * If the driver core informs the DMA layer if a driver grabs a device - * we don't need to preallocate the protection domains anymore. - * For now we have to. - */ -static void prealloc_protection_domains(void) -{ - struct pci_dev *dev = NULL; - struct dma_ops_domain *dma_dom; - u16 devid; - - for_each_pci_dev(dev) { - - /* Do we handle this device? */ - if (!check_device(&dev->dev)) - continue; - - /* Is there already any domain for it? */ - if (domain_for_device(&dev->dev)) - continue; - - devid = get_device_id(&dev->dev); - - dma_dom = dma_ops_domain_alloc(); - if (!dma_dom) - continue; - init_unity_mappings_for_device(dma_dom, devid); - dma_dom->target_dev = devid; - - attach_device(&dev->dev, &dma_dom->domain); - - list_add_tail(&dma_dom->list, &iommu_pd_list); - } -} - -static struct dma_map_ops amd_iommu_dma_ops = { - .alloc_coherent = alloc_coherent, - .free_coherent = free_coherent, - .map_page = map_page, - .unmap_page = unmap_page, - .map_sg = map_sg, - .unmap_sg = unmap_sg, - .dma_supported = amd_iommu_dma_supported, -}; - -static unsigned device_dma_ops_init(void) -{ - struct pci_dev *pdev = NULL; - unsigned unhandled = 0; - - for_each_pci_dev(pdev) { - if (!check_device(&pdev->dev)) { - unhandled += 1; - continue; - } - - pdev->dev.archdata.dma_ops = &amd_iommu_dma_ops; - } - - return unhandled; -} - -/* - * The function which clues the AMD IOMMU driver into dma_ops. - */ - -void __init amd_iommu_init_api(void) -{ - register_iommu(&amd_iommu_ops); -} - -int __init amd_iommu_init_dma_ops(void) -{ - struct amd_iommu *iommu; - int ret, unhandled; - - /* - * first allocate a default protection domain for every IOMMU we - * found in the system. Devices not assigned to any other - * protection domain will be assigned to the default one. - */ - for_each_iommu(iommu) { - iommu->default_dom = dma_ops_domain_alloc(); - if (iommu->default_dom == NULL) - return -ENOMEM; - iommu->default_dom->domain.flags |= PD_DEFAULT_MASK; - ret = iommu_init_unity_mappings(iommu); - if (ret) - goto free_domains; - } - - /* - * Pre-allocate the protection domains for each device. - */ - prealloc_protection_domains(); - - iommu_detected = 1; - swiotlb = 0; - - /* Make the driver finally visible to the drivers */ - unhandled = device_dma_ops_init(); - if (unhandled && max_pfn > MAX_DMA32_PFN) { - /* There are unhandled devices - initialize swiotlb for them */ - swiotlb = 1; - } - - amd_iommu_stats_init(); - - return 0; - -free_domains: - - for_each_iommu(iommu) { - if (iommu->default_dom) - dma_ops_domain_free(iommu->default_dom); - } - - return ret; -} - -/***************************************************************************** - * - * The following functions belong to the exported interface of AMD IOMMU - * - * This interface allows access to lower level functions of the IOMMU - * like protection domain handling and assignement of devices to domains - * which is not possible with the dma_ops interface. - * - *****************************************************************************/ - -static void cleanup_domain(struct protection_domain *domain) -{ - struct iommu_dev_data *dev_data, *next; - unsigned long flags; - - write_lock_irqsave(&amd_iommu_devtable_lock, flags); - - list_for_each_entry_safe(dev_data, next, &domain->dev_list, list) { - struct device *dev = dev_data->dev; - - __detach_device(dev); - atomic_set(&dev_data->bind, 0); - } - - write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); -} - -static void protection_domain_free(struct protection_domain *domain) -{ - if (!domain) - return; - - del_domain_from_list(domain); - - if (domain->id) - domain_id_free(domain->id); - - kfree(domain); -} - -static struct protection_domain *protection_domain_alloc(void) -{ - struct protection_domain *domain; - - domain = kzalloc(sizeof(*domain), GFP_KERNEL); - if (!domain) - return NULL; - - spin_lock_init(&domain->lock); - mutex_init(&domain->api_lock); - domain->id = domain_id_alloc(); - if (!domain->id) - goto out_err; - INIT_LIST_HEAD(&domain->dev_list); - - add_domain_to_list(domain); - - return domain; - -out_err: - kfree(domain); - - return NULL; -} - -static int amd_iommu_domain_init(struct iommu_domain *dom) -{ - struct protection_domain *domain; - - domain = protection_domain_alloc(); - if (!domain) - goto out_free; - - domain->mode = PAGE_MODE_3_LEVEL; - domain->pt_root = (void *)get_zeroed_page(GFP_KERNEL); - if (!domain->pt_root) - goto out_free; - - dom->priv = domain; - - return 0; - -out_free: - protection_domain_free(domain); - - return -ENOMEM; -} - -static void amd_iommu_domain_destroy(struct iommu_domain *dom) -{ - struct protection_domain *domain = dom->priv; - - if (!domain) - return; - - if (domain->dev_cnt > 0) - cleanup_domain(domain); - - BUG_ON(domain->dev_cnt != 0); - - free_pagetable(domain); - - protection_domain_free(domain); - - dom->priv = NULL; -} - -static void amd_iommu_detach_device(struct iommu_domain *dom, - struct device *dev) -{ - struct iommu_dev_data *dev_data = dev->archdata.iommu; - struct amd_iommu *iommu; - u16 devid; - - if (!check_device(dev)) - return; - - devid = get_device_id(dev); - - if (dev_data->domain != NULL) - detach_device(dev); - - iommu = amd_iommu_rlookup_table[devid]; - if (!iommu) - return; - - device_flush_dte(dev); - iommu_completion_wait(iommu); -} - -static int amd_iommu_attach_device(struct iommu_domain *dom, - struct device *dev) -{ - struct protection_domain *domain = dom->priv; - struct iommu_dev_data *dev_data; - struct amd_iommu *iommu; - int ret; - u16 devid; - - if (!check_device(dev)) - return -EINVAL; - - dev_data = dev->archdata.iommu; - - devid = get_device_id(dev); - - iommu = amd_iommu_rlookup_table[devid]; - if (!iommu) - return -EINVAL; - - if (dev_data->domain) - detach_device(dev); - - ret = attach_device(dev, domain); - - iommu_completion_wait(iommu); - - return ret; -} - -static int amd_iommu_map(struct iommu_domain *dom, unsigned long iova, - phys_addr_t paddr, int gfp_order, int iommu_prot) -{ - unsigned long page_size = 0x1000UL << gfp_order; - struct protection_domain *domain = dom->priv; - int prot = 0; - int ret; - - if (iommu_prot & IOMMU_READ) - prot |= IOMMU_PROT_IR; - if (iommu_prot & IOMMU_WRITE) - prot |= IOMMU_PROT_IW; - - mutex_lock(&domain->api_lock); - ret = iommu_map_page(domain, iova, paddr, prot, page_size); - mutex_unlock(&domain->api_lock); - - return ret; -} - -static int amd_iommu_unmap(struct iommu_domain *dom, unsigned long iova, - int gfp_order) -{ - struct protection_domain *domain = dom->priv; - unsigned long page_size, unmap_size; - - page_size = 0x1000UL << gfp_order; - - mutex_lock(&domain->api_lock); - unmap_size = iommu_unmap_page(domain, iova, page_size); - mutex_unlock(&domain->api_lock); - - domain_flush_tlb_pde(domain); - - return get_order(unmap_size); -} - -static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom, - unsigned long iova) -{ - struct protection_domain *domain = dom->priv; - unsigned long offset_mask; - phys_addr_t paddr; - u64 *pte, __pte; - - pte = fetch_pte(domain, iova); - - if (!pte || !IOMMU_PTE_PRESENT(*pte)) - return 0; - - if (PM_PTE_LEVEL(*pte) == 0) - offset_mask = PAGE_SIZE - 1; - else - offset_mask = PTE_PAGE_SIZE(*pte) - 1; - - __pte = *pte & PM_ADDR_MASK; - paddr = (__pte & ~offset_mask) | (iova & offset_mask); - - return paddr; -} - -static int amd_iommu_domain_has_cap(struct iommu_domain *domain, - unsigned long cap) -{ - switch (cap) { - case IOMMU_CAP_CACHE_COHERENCY: - return 1; - } - - return 0; -} - -static struct iommu_ops amd_iommu_ops = { - .domain_init = amd_iommu_domain_init, - .domain_destroy = amd_iommu_domain_destroy, - .attach_dev = amd_iommu_attach_device, - .detach_dev = amd_iommu_detach_device, - .map = amd_iommu_map, - .unmap = amd_iommu_unmap, - .iova_to_phys = amd_iommu_iova_to_phys, - .domain_has_cap = amd_iommu_domain_has_cap, -}; - -/***************************************************************************** - * - * The next functions do a basic initialization of IOMMU for pass through - * mode - * - * In passthrough mode the IOMMU is initialized and enabled but not used for - * DMA-API translation. - * - *****************************************************************************/ - -int __init amd_iommu_init_passthrough(void) -{ - struct amd_iommu *iommu; - struct pci_dev *dev = NULL; - u16 devid; - - /* allocate passthrough domain */ - pt_domain = protection_domain_alloc(); - if (!pt_domain) - return -ENOMEM; - - pt_domain->mode |= PAGE_MODE_NONE; - - for_each_pci_dev(dev) { - if (!check_device(&dev->dev)) - continue; - - devid = get_device_id(&dev->dev); - - iommu = amd_iommu_rlookup_table[devid]; - if (!iommu) - continue; - - attach_device(&dev->dev, pt_domain); - } - - pr_info("AMD-Vi: Initialized for Passthrough Mode\n"); - - return 0; -} diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c deleted file mode 100644 index d86aa3f..0000000 --- a/arch/x86/kernel/amd_iommu_init.c +++ /dev/null @@ -1,1586 +0,0 @@ -/* - * Copyright (C) 2007-2010 Advanced Micro Devices, Inc. - * Author: Joerg Roedel - * Leo Duran - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published - * by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -/* - * definitions for the ACPI scanning code - */ -#define IVRS_HEADER_LENGTH 48 - -#define ACPI_IVHD_TYPE 0x10 -#define ACPI_IVMD_TYPE_ALL 0x20 -#define ACPI_IVMD_TYPE 0x21 -#define ACPI_IVMD_TYPE_RANGE 0x22 - -#define IVHD_DEV_ALL 0x01 -#define IVHD_DEV_SELECT 0x02 -#define IVHD_DEV_SELECT_RANGE_START 0x03 -#define IVHD_DEV_RANGE_END 0x04 -#define IVHD_DEV_ALIAS 0x42 -#define IVHD_DEV_ALIAS_RANGE 0x43 -#define IVHD_DEV_EXT_SELECT 0x46 -#define IVHD_DEV_EXT_SELECT_RANGE 0x47 - -#define IVHD_FLAG_HT_TUN_EN_MASK 0x01 -#define IVHD_FLAG_PASSPW_EN_MASK 0x02 -#define IVHD_FLAG_RESPASSPW_EN_MASK 0x04 -#define IVHD_FLAG_ISOC_EN_MASK 0x08 - -#define IVMD_FLAG_EXCL_RANGE 0x08 -#define IVMD_FLAG_UNITY_MAP 0x01 - -#define ACPI_DEVFLAG_INITPASS 0x01 -#define ACPI_DEVFLAG_EXTINT 0x02 -#define ACPI_DEVFLAG_NMI 0x04 -#define ACPI_DEVFLAG_SYSMGT1 0x10 -#define ACPI_DEVFLAG_SYSMGT2 0x20 -#define ACPI_DEVFLAG_LINT0 0x40 -#define ACPI_DEVFLAG_LINT1 0x80 -#define ACPI_DEVFLAG_ATSDIS 0x10000000 - -/* - * ACPI table definitions - * - * These data structures are laid over the table to parse the important values - * out of it. - */ - -/* - * structure describing one IOMMU in the ACPI table. Typically followed by one - * or more ivhd_entrys. - */ -struct ivhd_header { - u8 type; - u8 flags; - u16 length; - u16 devid; - u16 cap_ptr; - u64 mmio_phys; - u16 pci_seg; - u16 info; - u32 reserved; -} __attribute__((packed)); - -/* - * A device entry describing which devices a specific IOMMU translates and - * which requestor ids they use. - */ -struct ivhd_entry { - u8 type; - u16 devid; - u8 flags; - u32 ext; -} __attribute__((packed)); - -/* - * An AMD IOMMU memory definition structure. It defines things like exclusion - * ranges for devices and regions that should be unity mapped. - */ -struct ivmd_header { - u8 type; - u8 flags; - u16 length; - u16 devid; - u16 aux; - u64 resv; - u64 range_start; - u64 range_length; -} __attribute__((packed)); - -bool amd_iommu_dump; - -static int __initdata amd_iommu_detected; -static bool __initdata amd_iommu_disabled; - -u16 amd_iommu_last_bdf; /* largest PCI device id we have - to handle */ -LIST_HEAD(amd_iommu_unity_map); /* a list of required unity mappings - we find in ACPI */ -bool amd_iommu_unmap_flush; /* if true, flush on every unmap */ - -LIST_HEAD(amd_iommu_list); /* list of all AMD IOMMUs in the - system */ - -/* Array to assign indices to IOMMUs*/ -struct amd_iommu *amd_iommus[MAX_IOMMUS]; -int amd_iommus_present; - -/* IOMMUs have a non-present cache? */ -bool amd_iommu_np_cache __read_mostly; -bool amd_iommu_iotlb_sup __read_mostly = true; - -/* - * The ACPI table parsing functions set this variable on an error - */ -static int __initdata amd_iommu_init_err; - -/* - * List of protection domains - used during resume - */ -LIST_HEAD(amd_iommu_pd_list); -spinlock_t amd_iommu_pd_lock; - -/* - * Pointer to the device table which is shared by all AMD IOMMUs - * it is indexed by the PCI device id or the HT unit id and contains - * information about the domain the device belongs to as well as the - * page table root pointer. - */ -struct dev_table_entry *amd_iommu_dev_table; - -/* - * The alias table is a driver specific data structure which contains the - * mappings of the PCI device ids to the actual requestor ids on the IOMMU. - * More than one device can share the same requestor id. - */ -u16 *amd_iommu_alias_table; - -/* - * The rlookup table is used to find the IOMMU which is responsible - * for a specific device. It is also indexed by the PCI device id. - */ -struct amd_iommu **amd_iommu_rlookup_table; - -/* - * AMD IOMMU allows up to 2^16 differend protection domains. This is a bitmap - * to know which ones are already in use. - */ -unsigned long *amd_iommu_pd_alloc_bitmap; - -static u32 dev_table_size; /* size of the device table */ -static u32 alias_table_size; /* size of the alias table */ -static u32 rlookup_table_size; /* size if the rlookup table */ - -/* - * This function flushes all internal caches of - * the IOMMU used by this driver. - */ -extern void iommu_flush_all_caches(struct amd_iommu *iommu); - -static inline void update_last_devid(u16 devid) -{ - if (devid > amd_iommu_last_bdf) - amd_iommu_last_bdf = devid; -} - -static inline unsigned long tbl_size(int entry_size) -{ - unsigned shift = PAGE_SHIFT + - get_order(((int)amd_iommu_last_bdf + 1) * entry_size); - - return 1UL << shift; -} - -/* Access to l1 and l2 indexed register spaces */ - -static u32 iommu_read_l1(struct amd_iommu *iommu, u16 l1, u8 address) -{ - u32 val; - - pci_write_config_dword(iommu->dev, 0xf8, (address | l1 << 16)); - pci_read_config_dword(iommu->dev, 0xfc, &val); - return val; -} - -static void iommu_write_l1(struct amd_iommu *iommu, u16 l1, u8 address, u32 val) -{ - pci_write_config_dword(iommu->dev, 0xf8, (address | l1 << 16 | 1 << 31)); - pci_write_config_dword(iommu->dev, 0xfc, val); - pci_write_config_dword(iommu->dev, 0xf8, (address | l1 << 16)); -} - -static u32 iommu_read_l2(struct amd_iommu *iommu, u8 address) -{ - u32 val; - - pci_write_config_dword(iommu->dev, 0xf0, address); - pci_read_config_dword(iommu->dev, 0xf4, &val); - return val; -} - -static void iommu_write_l2(struct amd_iommu *iommu, u8 address, u32 val) -{ - pci_write_config_dword(iommu->dev, 0xf0, (address | 1 << 8)); - pci_write_config_dword(iommu->dev, 0xf4, val); -} - -/**************************************************************************** - * - * AMD IOMMU MMIO register space handling functions - * - * These functions are used to program the IOMMU device registers in - * MMIO space required for that driver. - * - ****************************************************************************/ - -/* - * This function set the exclusion range in the IOMMU. DMA accesses to the - * exclusion range are passed through untranslated - */ -static void iommu_set_exclusion_range(struct amd_iommu *iommu) -{ - u64 start = iommu->exclusion_start & PAGE_MASK; - u64 limit = (start + iommu->exclusion_length) & PAGE_MASK; - u64 entry; - - if (!iommu->exclusion_start) - return; - - entry = start | MMIO_EXCL_ENABLE_MASK; - memcpy_toio(iommu->mmio_base + MMIO_EXCL_BASE_OFFSET, - &entry, sizeof(entry)); - - entry = limit; - memcpy_toio(iommu->mmio_base + MMIO_EXCL_LIMIT_OFFSET, - &entry, sizeof(entry)); -} - -/* Programs the physical address of the device table into the IOMMU hardware */ -static void __init iommu_set_device_table(struct amd_iommu *iommu) -{ - u64 entry; - - BUG_ON(iommu->mmio_base == NULL); - - entry = virt_to_phys(amd_iommu_dev_table); - entry |= (dev_table_size >> 12) - 1; - memcpy_toio(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET, - &entry, sizeof(entry)); -} - -/* Generic functions to enable/disable certain features of the IOMMU. */ -static void iommu_feature_enable(struct amd_iommu *iommu, u8 bit) -{ - u32 ctrl; - - ctrl = readl(iommu->mmio_base + MMIO_CONTROL_OFFSET); - ctrl |= (1 << bit); - writel(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET); -} - -static void iommu_feature_disable(struct amd_iommu *iommu, u8 bit) -{ - u32 ctrl; - - ctrl = readl(iommu->mmio_base + MMIO_CONTROL_OFFSET); - ctrl &= ~(1 << bit); - writel(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET); -} - -/* Function to enable the hardware */ -static void iommu_enable(struct amd_iommu *iommu) -{ - static const char * const feat_str[] = { - "PreF", "PPR", "X2APIC", "NX", "GT", "[5]", - "IA", "GA", "HE", "PC", NULL - }; - int i; - - printk(KERN_INFO "AMD-Vi: Enabling IOMMU at %s cap 0x%hx", - dev_name(&iommu->dev->dev), iommu->cap_ptr); - - if (iommu->cap & (1 << IOMMU_CAP_EFR)) { - printk(KERN_CONT " extended features: "); - for (i = 0; feat_str[i]; ++i) - if (iommu_feature(iommu, (1ULL << i))) - printk(KERN_CONT " %s", feat_str[i]); - } - printk(KERN_CONT "\n"); - - iommu_feature_enable(iommu, CONTROL_IOMMU_EN); -} - -static void iommu_disable(struct amd_iommu *iommu) -{ - /* Disable command buffer */ - iommu_feature_disable(iommu, CONTROL_CMDBUF_EN); - - /* Disable event logging and event interrupts */ - iommu_feature_disable(iommu, CONTROL_EVT_INT_EN); - iommu_feature_disable(iommu, CONTROL_EVT_LOG_EN); - - /* Disable IOMMU hardware itself */ - iommu_feature_disable(iommu, CONTROL_IOMMU_EN); -} - -/* - * mapping and unmapping functions for the IOMMU MMIO space. Each AMD IOMMU in - * the system has one. - */ -static u8 * __init iommu_map_mmio_space(u64 address) -{ - u8 *ret; - - if (!request_mem_region(address, MMIO_REGION_LENGTH, "amd_iommu")) { - pr_err("AMD-Vi: Can not reserve memory region %llx for mmio\n", - address); - pr_err("AMD-Vi: This is a BIOS bug. Please contact your hardware vendor\n"); - return NULL; - } - - ret = ioremap_nocache(address, MMIO_REGION_LENGTH); - if (ret != NULL) - return ret; - - release_mem_region(address, MMIO_REGION_LENGTH); - - return NULL; -} - -static void __init iommu_unmap_mmio_space(struct amd_iommu *iommu) -{ - if (iommu->mmio_base) - iounmap(iommu->mmio_base); - release_mem_region(iommu->mmio_phys, MMIO_REGION_LENGTH); -} - -/**************************************************************************** - * - * The functions below belong to the first pass of AMD IOMMU ACPI table - * parsing. In this pass we try to find out the highest device id this - * code has to handle. Upon this information the size of the shared data - * structures is determined later. - * - ****************************************************************************/ - -/* - * This function calculates the length of a given IVHD entry - */ -static inline int ivhd_entry_length(u8 *ivhd) -{ - return 0x04 << (*ivhd >> 6); -} - -/* - * This function reads the last device id the IOMMU has to handle from the PCI - * capability header for this IOMMU - */ -static int __init find_last_devid_on_pci(int bus, int dev, int fn, int cap_ptr) -{ - u32 cap; - - cap = read_pci_config(bus, dev, fn, cap_ptr+MMIO_RANGE_OFFSET); - update_last_devid(calc_devid(MMIO_GET_BUS(cap), MMIO_GET_LD(cap))); - - return 0; -} - -/* - * After reading the highest device id from the IOMMU PCI capability header - * this function looks if there is a higher device id defined in the ACPI table - */ -static int __init find_last_devid_from_ivhd(struct ivhd_header *h) -{ - u8 *p = (void *)h, *end = (void *)h; - struct ivhd_entry *dev; - - p += sizeof(*h); - end += h->length; - - find_last_devid_on_pci(PCI_BUS(h->devid), - PCI_SLOT(h->devid), - PCI_FUNC(h->devid), - h->cap_ptr); - - while (p < end) { - dev = (struct ivhd_entry *)p; - switch (dev->type) { - case IVHD_DEV_SELECT: - case IVHD_DEV_RANGE_END: - case IVHD_DEV_ALIAS: - case IVHD_DEV_EXT_SELECT: - /* all the above subfield types refer to device ids */ - update_last_devid(dev->devid); - break; - default: - break; - } - p += ivhd_entry_length(p); - } - - WARN_ON(p != end); - - return 0; -} - -/* - * Iterate over all IVHD entries in the ACPI table and find the highest device - * id which we need to handle. This is the first of three functions which parse - * the ACPI table. So we check the checksum here. - */ -static int __init find_last_devid_acpi(struct acpi_table_header *table) -{ - int i; - u8 checksum = 0, *p = (u8 *)table, *end = (u8 *)table; - struct ivhd_header *h; - - /* - * Validate checksum here so we don't need to do it when - * we actually parse the table - */ - for (i = 0; i < table->length; ++i) - checksum += p[i]; - if (checksum != 0) { - /* ACPI table corrupt */ - amd_iommu_init_err = -ENODEV; - return 0; - } - - p += IVRS_HEADER_LENGTH; - - end += table->length; - while (p < end) { - h = (struct ivhd_header *)p; - switch (h->type) { - case ACPI_IVHD_TYPE: - find_last_devid_from_ivhd(h); - break; - default: - break; - } - p += h->length; - } - WARN_ON(p != end); - - return 0; -} - -/**************************************************************************** - * - * The following functions belong the the code path which parses the ACPI table - * the second time. In this ACPI parsing iteration we allocate IOMMU specific - * data structures, initialize the device/alias/rlookup table and also - * basically initialize the hardware. - * - ****************************************************************************/ - -/* - * Allocates the command buffer. This buffer is per AMD IOMMU. We can - * write commands to that buffer later and the IOMMU will execute them - * asynchronously - */ -static u8 * __init alloc_command_buffer(struct amd_iommu *iommu) -{ - u8 *cmd_buf = (u8 *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, - get_order(CMD_BUFFER_SIZE)); - - if (cmd_buf == NULL) - return NULL; - - iommu->cmd_buf_size = CMD_BUFFER_SIZE | CMD_BUFFER_UNINITIALIZED; - - return cmd_buf; -} - -/* - * This function resets the command buffer if the IOMMU stopped fetching - * commands from it. - */ -void amd_iommu_reset_cmd_buffer(struct amd_iommu *iommu) -{ - iommu_feature_disable(iommu, CONTROL_CMDBUF_EN); - - writel(0x00, iommu->mmio_base + MMIO_CMD_HEAD_OFFSET); - writel(0x00, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET); - - iommu_feature_enable(iommu, CONTROL_CMDBUF_EN); -} - -/* - * This function writes the command buffer address to the hardware and - * enables it. - */ -static void iommu_enable_command_buffer(struct amd_iommu *iommu) -{ - u64 entry; - - BUG_ON(iommu->cmd_buf == NULL); - - entry = (u64)virt_to_phys(iommu->cmd_buf); - entry |= MMIO_CMD_SIZE_512; - - memcpy_toio(iommu->mmio_base + MMIO_CMD_BUF_OFFSET, - &entry, sizeof(entry)); - - amd_iommu_reset_cmd_buffer(iommu); - iommu->cmd_buf_size &= ~(CMD_BUFFER_UNINITIALIZED); -} - -static void __init free_command_buffer(struct amd_iommu *iommu) -{ - free_pages((unsigned long)iommu->cmd_buf, - get_order(iommu->cmd_buf_size & ~(CMD_BUFFER_UNINITIALIZED))); -} - -/* allocates the memory where the IOMMU will log its events to */ -static u8 * __init alloc_event_buffer(struct amd_iommu *iommu) -{ - iommu->evt_buf = (u8 *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, - get_order(EVT_BUFFER_SIZE)); - - if (iommu->evt_buf == NULL) - return NULL; - - iommu->evt_buf_size = EVT_BUFFER_SIZE; - - return iommu->evt_buf; -} - -static void iommu_enable_event_buffer(struct amd_iommu *iommu) -{ - u64 entry; - - BUG_ON(iommu->evt_buf == NULL); - - entry = (u64)virt_to_phys(iommu->evt_buf) | EVT_LEN_MASK; - - memcpy_toio(iommu->mmio_base + MMIO_EVT_BUF_OFFSET, - &entry, sizeof(entry)); - - /* set head and tail to zero manually */ - writel(0x00, iommu->mmio_base + MMIO_EVT_HEAD_OFFSET); - writel(0x00, iommu->mmio_base + MMIO_EVT_TAIL_OFFSET); - - iommu_feature_enable(iommu, CONTROL_EVT_LOG_EN); -} - -static void __init free_event_buffer(struct amd_iommu *iommu) -{ - free_pages((unsigned long)iommu->evt_buf, get_order(EVT_BUFFER_SIZE)); -} - -/* sets a specific bit in the device table entry. */ -static void set_dev_entry_bit(u16 devid, u8 bit) -{ - int i = (bit >> 5) & 0x07; - int _bit = bit & 0x1f; - - amd_iommu_dev_table[devid].data[i] |= (1 << _bit); -} - -static int get_dev_entry_bit(u16 devid, u8 bit) -{ - int i = (bit >> 5) & 0x07; - int _bit = bit & 0x1f; - - return (amd_iommu_dev_table[devid].data[i] & (1 << _bit)) >> _bit; -} - - -void amd_iommu_apply_erratum_63(u16 devid) -{ - int sysmgt; - - sysmgt = get_dev_entry_bit(devid, DEV_ENTRY_SYSMGT1) | - (get_dev_entry_bit(devid, DEV_ENTRY_SYSMGT2) << 1); - - if (sysmgt == 0x01) - set_dev_entry_bit(devid, DEV_ENTRY_IW); -} - -/* Writes the specific IOMMU for a device into the rlookup table */ -static void __init set_iommu_for_device(struct amd_iommu *iommu, u16 devid) -{ - amd_iommu_rlookup_table[devid] = iommu; -} - -/* - * This function takes the device specific flags read from the ACPI - * table and sets up the device table entry with that information - */ -static void __init set_dev_entry_from_acpi(struct amd_iommu *iommu, - u16 devid, u32 flags, u32 ext_flags) -{ - if (flags & ACPI_DEVFLAG_INITPASS) - set_dev_entry_bit(devid, DEV_ENTRY_INIT_PASS); - if (flags & ACPI_DEVFLAG_EXTINT) - set_dev_entry_bit(devid, DEV_ENTRY_EINT_PASS); - if (flags & ACPI_DEVFLAG_NMI) - set_dev_entry_bit(devid, DEV_ENTRY_NMI_PASS); - if (flags & ACPI_DEVFLAG_SYSMGT1) - set_dev_entry_bit(devid, DEV_ENTRY_SYSMGT1); - if (flags & ACPI_DEVFLAG_SYSMGT2) - set_dev_entry_bit(devid, DEV_ENTRY_SYSMGT2); - if (flags & ACPI_DEVFLAG_LINT0) - set_dev_entry_bit(devid, DEV_ENTRY_LINT0_PASS); - if (flags & ACPI_DEVFLAG_LINT1) - set_dev_entry_bit(devid, DEV_ENTRY_LINT1_PASS); - - amd_iommu_apply_erratum_63(devid); - - set_iommu_for_device(iommu, devid); -} - -/* - * Reads the device exclusion range from ACPI and initialize IOMMU with - * it - */ -static void __init set_device_exclusion_range(u16 devid, struct ivmd_header *m) -{ - struct amd_iommu *iommu = amd_iommu_rlookup_table[devid]; - - if (!(m->flags & IVMD_FLAG_EXCL_RANGE)) - return; - - if (iommu) { - /* - * We only can configure exclusion ranges per IOMMU, not - * per device. But we can enable the exclusion range per - * device. This is done here - */ - set_dev_entry_bit(m->devid, DEV_ENTRY_EX); - iommu->exclusion_start = m->range_start; - iommu->exclusion_length = m->range_length; - } -} - -/* - * This function reads some important data from the IOMMU PCI space and - * initializes the driver data structure with it. It reads the hardware - * capabilities and the first/last device entries - */ -static void __init init_iommu_from_pci(struct amd_iommu *iommu) -{ - int cap_ptr = iommu->cap_ptr; - u32 range, misc, low, high; - int i, j; - - pci_read_config_dword(iommu->dev, cap_ptr + MMIO_CAP_HDR_OFFSET, - &iommu->cap); - pci_read_config_dword(iommu->dev, cap_ptr + MMIO_RANGE_OFFSET, - &range); - pci_read_config_dword(iommu->dev, cap_ptr + MMIO_MISC_OFFSET, - &misc); - - iommu->first_device = calc_devid(MMIO_GET_BUS(range), - MMIO_GET_FD(range)); - iommu->last_device = calc_devid(MMIO_GET_BUS(range), - MMIO_GET_LD(range)); - iommu->evt_msi_num = MMIO_MSI_NUM(misc); - - if (!(iommu->cap & (1 << IOMMU_CAP_IOTLB))) - amd_iommu_iotlb_sup = false; - - /* read extended feature bits */ - low = readl(iommu->mmio_base + MMIO_EXT_FEATURES); - high = readl(iommu->mmio_base + MMIO_EXT_FEATURES + 4); - - iommu->features = ((u64)high << 32) | low; - - if (!is_rd890_iommu(iommu->dev)) - return; - - /* - * Some rd890 systems may not be fully reconfigured by the BIOS, so - * it's necessary for us to store this information so it can be - * reprogrammed on resume - */ - - pci_read_config_dword(iommu->dev, iommu->cap_ptr + 4, - &iommu->stored_addr_lo); - pci_read_config_dword(iommu->dev, iommu->cap_ptr + 8, - &iommu->stored_addr_hi); - - /* Low bit locks writes to configuration space */ - iommu->stored_addr_lo &= ~1; - - for (i = 0; i < 6; i++) - for (j = 0; j < 0x12; j++) - iommu->stored_l1[i][j] = iommu_read_l1(iommu, i, j); - - for (i = 0; i < 0x83; i++) - iommu->stored_l2[i] = iommu_read_l2(iommu, i); -} - -/* - * Takes a pointer to an AMD IOMMU entry in the ACPI table and - * initializes the hardware and our data structures with it. - */ -static void __init init_iommu_from_acpi(struct amd_iommu *iommu, - struct ivhd_header *h) -{ - u8 *p = (u8 *)h; - u8 *end = p, flags = 0; - u16 devid = 0, devid_start = 0, devid_to = 0; - u32 dev_i, ext_flags = 0; - bool alias = false; - struct ivhd_entry *e; - - /* - * First save the recommended feature enable bits from ACPI - */ - iommu->acpi_flags = h->flags; - - /* - * Done. Now parse the device entries - */ - p += sizeof(struct ivhd_header); - end += h->length; - - - while (p < end) { - e = (struct ivhd_entry *)p; - switch (e->type) { - case IVHD_DEV_ALL: - - DUMP_printk(" DEV_ALL\t\t\t first devid: %02x:%02x.%x" - " last device %02x:%02x.%x flags: %02x\n", - PCI_BUS(iommu->first_device), - PCI_SLOT(iommu->first_device), - PCI_FUNC(iommu->first_device), - PCI_BUS(iommu->last_device), - PCI_SLOT(iommu->last_device), - PCI_FUNC(iommu->last_device), - e->flags); - - for (dev_i = iommu->first_device; - dev_i <= iommu->last_device; ++dev_i) - set_dev_entry_from_acpi(iommu, dev_i, - e->flags, 0); - break; - case IVHD_DEV_SELECT: - - DUMP_printk(" DEV_SELECT\t\t\t devid: %02x:%02x.%x " - "flags: %02x\n", - PCI_BUS(e->devid), - PCI_SLOT(e->devid), - PCI_FUNC(e->devid), - e->flags); - - devid = e->devid; - set_dev_entry_from_acpi(iommu, devid, e->flags, 0); - break; - case IVHD_DEV_SELECT_RANGE_START: - - DUMP_printk(" DEV_SELECT_RANGE_START\t " - "devid: %02x:%02x.%x flags: %02x\n", - PCI_BUS(e->devid), - PCI_SLOT(e->devid), - PCI_FUNC(e->devid), - e->flags); - - devid_start = e->devid; - flags = e->flags; - ext_flags = 0; - alias = false; - break; - case IVHD_DEV_ALIAS: - - DUMP_printk(" DEV_ALIAS\t\t\t devid: %02x:%02x.%x " - "flags: %02x devid_to: %02x:%02x.%x\n", - PCI_BUS(e->devid), - PCI_SLOT(e->devid), - PCI_FUNC(e->devid), - e->flags, - PCI_BUS(e->ext >> 8), - PCI_SLOT(e->ext >> 8), - PCI_FUNC(e->ext >> 8)); - - devid = e->devid; - devid_to = e->ext >> 8; - set_dev_entry_from_acpi(iommu, devid , e->flags, 0); - set_dev_entry_from_acpi(iommu, devid_to, e->flags, 0); - amd_iommu_alias_table[devid] = devid_to; - break; - case IVHD_DEV_ALIAS_RANGE: - - DUMP_printk(" DEV_ALIAS_RANGE\t\t " - "devid: %02x:%02x.%x flags: %02x " - "devid_to: %02x:%02x.%x\n", - PCI_BUS(e->devid), - PCI_SLOT(e->devid), - PCI_FUNC(e->devid), - e->flags, - PCI_BUS(e->ext >> 8), - PCI_SLOT(e->ext >> 8), - PCI_FUNC(e->ext >> 8)); - - devid_start = e->devid; - flags = e->flags; - devid_to = e->ext >> 8; - ext_flags = 0; - alias = true; - break; - case IVHD_DEV_EXT_SELECT: - - DUMP_printk(" DEV_EXT_SELECT\t\t devid: %02x:%02x.%x " - "flags: %02x ext: %08x\n", - PCI_BUS(e->devid), - PCI_SLOT(e->devid), - PCI_FUNC(e->devid), - e->flags, e->ext); - - devid = e->devid; - set_dev_entry_from_acpi(iommu, devid, e->flags, - e->ext); - break; - case IVHD_DEV_EXT_SELECT_RANGE: - - DUMP_printk(" DEV_EXT_SELECT_RANGE\t devid: " - "%02x:%02x.%x flags: %02x ext: %08x\n", - PCI_BUS(e->devid), - PCI_SLOT(e->devid), - PCI_FUNC(e->devid), - e->flags, e->ext); - - devid_start = e->devid; - flags = e->flags; - ext_flags = e->ext; - alias = false; - break; - case IVHD_DEV_RANGE_END: - - DUMP_printk(" DEV_RANGE_END\t\t devid: %02x:%02x.%x\n", - PCI_BUS(e->devid), - PCI_SLOT(e->devid), - PCI_FUNC(e->devid)); - - devid = e->devid; - for (dev_i = devid_start; dev_i <= devid; ++dev_i) { - if (alias) { - amd_iommu_alias_table[dev_i] = devid_to; - set_dev_entry_from_acpi(iommu, - devid_to, flags, ext_flags); - } - set_dev_entry_from_acpi(iommu, dev_i, - flags, ext_flags); - } - break; - default: - break; - } - - p += ivhd_entry_length(p); - } -} - -/* Initializes the device->iommu mapping for the driver */ -static int __init init_iommu_devices(struct amd_iommu *iommu) -{ - u32 i; - - for (i = iommu->first_device; i <= iommu->last_device; ++i) - set_iommu_for_device(iommu, i); - - return 0; -} - -static void __init free_iommu_one(struct amd_iommu *iommu) -{ - free_command_buffer(iommu); - free_event_buffer(iommu); - iommu_unmap_mmio_space(iommu); -} - -static void __init free_iommu_all(void) -{ - struct amd_iommu *iommu, *next; - - for_each_iommu_safe(iommu, next) { - list_del(&iommu->list); - free_iommu_one(iommu); - kfree(iommu); - } -} - -/* - * This function clues the initialization function for one IOMMU - * together and also allocates the command buffer and programs the - * hardware. It does NOT enable the IOMMU. This is done afterwards. - */ -static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h) -{ - spin_lock_init(&iommu->lock); - - /* Add IOMMU to internal data structures */ - list_add_tail(&iommu->list, &amd_iommu_list); - iommu->index = amd_iommus_present++; - - if (unlikely(iommu->index >= MAX_IOMMUS)) { - WARN(1, "AMD-Vi: System has more IOMMUs than supported by this driver\n"); - return -ENOSYS; - } - - /* Index is fine - add IOMMU to the array */ - amd_iommus[iommu->index] = iommu; - - /* - * Copy data from ACPI table entry to the iommu struct - */ - iommu->dev = pci_get_bus_and_slot(PCI_BUS(h->devid), h->devid & 0xff); - if (!iommu->dev) - return 1; - - iommu->cap_ptr = h->cap_ptr; - iommu->pci_seg = h->pci_seg; - iommu->mmio_phys = h->mmio_phys; - iommu->mmio_base = iommu_map_mmio_space(h->mmio_phys); - if (!iommu->mmio_base) - return -ENOMEM; - - iommu->cmd_buf = alloc_command_buffer(iommu); - if (!iommu->cmd_buf) - return -ENOMEM; - - iommu->evt_buf = alloc_event_buffer(iommu); - if (!iommu->evt_buf) - return -ENOMEM; - - iommu->int_enabled = false; - - init_iommu_from_pci(iommu); - init_iommu_from_acpi(iommu, h); - init_iommu_devices(iommu); - - if (iommu->cap & (1UL << IOMMU_CAP_NPCACHE)) - amd_iommu_np_cache = true; - - return pci_enable_device(iommu->dev); -} - -/* - * Iterates over all IOMMU entries in the ACPI table, allocates the - * IOMMU structure and initializes it with init_iommu_one() - */ -static int __init init_iommu_all(struct acpi_table_header *table) -{ - u8 *p = (u8 *)table, *end = (u8 *)table; - struct ivhd_header *h; - struct amd_iommu *iommu; - int ret; - - end += table->length; - p += IVRS_HEADER_LENGTH; - - while (p < end) { - h = (struct ivhd_header *)p; - switch (*p) { - case ACPI_IVHD_TYPE: - - DUMP_printk("device: %02x:%02x.%01x cap: %04x " - "seg: %d flags: %01x info %04x\n", - PCI_BUS(h->devid), PCI_SLOT(h->devid), - PCI_FUNC(h->devid), h->cap_ptr, - h->pci_seg, h->flags, h->info); - DUMP_printk(" mmio-addr: %016llx\n", - h->mmio_phys); - - iommu = kzalloc(sizeof(struct amd_iommu), GFP_KERNEL); - if (iommu == NULL) { - amd_iommu_init_err = -ENOMEM; - return 0; - } - - ret = init_iommu_one(iommu, h); - if (ret) { - amd_iommu_init_err = ret; - return 0; - } - break; - default: - break; - } - p += h->length; - - } - WARN_ON(p != end); - - return 0; -} - -/**************************************************************************** - * - * The following functions initialize the MSI interrupts for all IOMMUs - * in the system. Its a bit challenging because there could be multiple - * IOMMUs per PCI BDF but we can call pci_enable_msi(x) only once per - * pci_dev. - * - ****************************************************************************/ - -static int iommu_setup_msi(struct amd_iommu *iommu) -{ - int r; - - r = pci_enable_msi(iommu->dev); - if (r) - return r; - - r = request_threaded_irq(iommu->dev->irq, - amd_iommu_int_handler, - amd_iommu_int_thread, - 0, "AMD-Vi", - iommu->dev); - - if (r) { - pci_disable_msi(iommu->dev); - return r; - } - - iommu->int_enabled = true; - - return 0; -} - -static int iommu_init_msi(struct amd_iommu *iommu) -{ - int ret; - - if (iommu->int_enabled) - goto enable_faults; - - if (pci_find_capability(iommu->dev, PCI_CAP_ID_MSI)) - ret = iommu_setup_msi(iommu); - else - ret = -ENODEV; - - if (ret) - return ret; - -enable_faults: - iommu_feature_enable(iommu, CONTROL_EVT_INT_EN); - - return 0; -} - -/**************************************************************************** - * - * The next functions belong to the third pass of parsing the ACPI - * table. In this last pass the memory mapping requirements are - * gathered (like exclusion and unity mapping reanges). - * - ****************************************************************************/ - -static void __init free_unity_maps(void) -{ - struct unity_map_entry *entry, *next; - - list_for_each_entry_safe(entry, next, &amd_iommu_unity_map, list) { - list_del(&entry->list); - kfree(entry); - } -} - -/* called when we find an exclusion range definition in ACPI */ -static int __init init_exclusion_range(struct ivmd_header *m) -{ - int i; - - switch (m->type) { - case ACPI_IVMD_TYPE: - set_device_exclusion_range(m->devid, m); - break; - case ACPI_IVMD_TYPE_ALL: - for (i = 0; i <= amd_iommu_last_bdf; ++i) - set_device_exclusion_range(i, m); - break; - case ACPI_IVMD_TYPE_RANGE: - for (i = m->devid; i <= m->aux; ++i) - set_device_exclusion_range(i, m); - break; - default: - break; - } - - return 0; -} - -/* called for unity map ACPI definition */ -static int __init init_unity_map_range(struct ivmd_header *m) -{ - struct unity_map_entry *e = 0; - char *s; - - e = kzalloc(sizeof(*e), GFP_KERNEL); - if (e == NULL) - return -ENOMEM; - - switch (m->type) { - default: - kfree(e); - return 0; - case ACPI_IVMD_TYPE: - s = "IVMD_TYPEi\t\t\t"; - e->devid_start = e->devid_end = m->devid; - break; - case ACPI_IVMD_TYPE_ALL: - s = "IVMD_TYPE_ALL\t\t"; - e->devid_start = 0; - e->devid_end = amd_iommu_last_bdf; - break; - case ACPI_IVMD_TYPE_RANGE: - s = "IVMD_TYPE_RANGE\t\t"; - e->devid_start = m->devid; - e->devid_end = m->aux; - break; - } - e->address_start = PAGE_ALIGN(m->range_start); - e->address_end = e->address_start + PAGE_ALIGN(m->range_length); - e->prot = m->flags >> 1; - - DUMP_printk("%s devid_start: %02x:%02x.%x devid_end: %02x:%02x.%x" - " range_start: %016llx range_end: %016llx flags: %x\n", s, - PCI_BUS(e->devid_start), PCI_SLOT(e->devid_start), - PCI_FUNC(e->devid_start), PCI_BUS(e->devid_end), - PCI_SLOT(e->devid_end), PCI_FUNC(e->devid_end), - e->address_start, e->address_end, m->flags); - - list_add_tail(&e->list, &amd_iommu_unity_map); - - return 0; -} - -/* iterates over all memory definitions we find in the ACPI table */ -static int __init init_memory_definitions(struct acpi_table_header *table) -{ - u8 *p = (u8 *)table, *end = (u8 *)table; - struct ivmd_header *m; - - end += table->length; - p += IVRS_HEADER_LENGTH; - - while (p < end) { - m = (struct ivmd_header *)p; - if (m->flags & IVMD_FLAG_EXCL_RANGE) - init_exclusion_range(m); - else if (m->flags & IVMD_FLAG_UNITY_MAP) - init_unity_map_range(m); - - p += m->length; - } - - return 0; -} - -/* - * Init the device table to not allow DMA access for devices and - * suppress all page faults - */ -static void init_device_table(void) -{ - u32 devid; - - for (devid = 0; devid <= amd_iommu_last_bdf; ++devid) { - set_dev_entry_bit(devid, DEV_ENTRY_VALID); - set_dev_entry_bit(devid, DEV_ENTRY_TRANSLATION); - } -} - -static void iommu_init_flags(struct amd_iommu *iommu) -{ - iommu->acpi_flags & IVHD_FLAG_HT_TUN_EN_MASK ? - iommu_feature_enable(iommu, CONTROL_HT_TUN_EN) : - iommu_feature_disable(iommu, CONTROL_HT_TUN_EN); - - iommu->acpi_flags & IVHD_FLAG_PASSPW_EN_MASK ? - iommu_feature_enable(iommu, CONTROL_PASSPW_EN) : - iommu_feature_disable(iommu, CONTROL_PASSPW_EN); - - iommu->acpi_flags & IVHD_FLAG_RESPASSPW_EN_MASK ? - iommu_feature_enable(iommu, CONTROL_RESPASSPW_EN) : - iommu_feature_disable(iommu, CONTROL_RESPASSPW_EN); - - iommu->acpi_flags & IVHD_FLAG_ISOC_EN_MASK ? - iommu_feature_enable(iommu, CONTROL_ISOC_EN) : - iommu_feature_disable(iommu, CONTROL_ISOC_EN); - - /* - * make IOMMU memory accesses cache coherent - */ - iommu_feature_enable(iommu, CONTROL_COHERENT_EN); -} - -static void iommu_apply_resume_quirks(struct amd_iommu *iommu) -{ - int i, j; - u32 ioc_feature_control; - struct pci_dev *pdev = NULL; - - /* RD890 BIOSes may not have completely reconfigured the iommu */ - if (!is_rd890_iommu(iommu->dev)) - return; - - /* - * First, we need to ensure that the iommu is enabled. This is - * controlled by a register in the northbridge - */ - pdev = pci_get_bus_and_slot(iommu->dev->bus->number, PCI_DEVFN(0, 0)); - - if (!pdev) - return; - - /* Select Northbridge indirect register 0x75 and enable writing */ - pci_write_config_dword(pdev, 0x60, 0x75 | (1 << 7)); - pci_read_config_dword(pdev, 0x64, &ioc_feature_control); - - /* Enable the iommu */ - if (!(ioc_feature_control & 0x1)) - pci_write_config_dword(pdev, 0x64, ioc_feature_control | 1); - - pci_dev_put(pdev); - - /* Restore the iommu BAR */ - pci_write_config_dword(iommu->dev, iommu->cap_ptr + 4, - iommu->stored_addr_lo); - pci_write_config_dword(iommu->dev, iommu->cap_ptr + 8, - iommu->stored_addr_hi); - - /* Restore the l1 indirect regs for each of the 6 l1s */ - for (i = 0; i < 6; i++) - for (j = 0; j < 0x12; j++) - iommu_write_l1(iommu, i, j, iommu->stored_l1[i][j]); - - /* Restore the l2 indirect regs */ - for (i = 0; i < 0x83; i++) - iommu_write_l2(iommu, i, iommu->stored_l2[i]); - - /* Lock PCI setup registers */ - pci_write_config_dword(iommu->dev, iommu->cap_ptr + 4, - iommu->stored_addr_lo | 1); -} - -/* - * This function finally enables all IOMMUs found in the system after - * they have been initialized - */ -static void enable_iommus(void) -{ - struct amd_iommu *iommu; - - for_each_iommu(iommu) { - iommu_disable(iommu); - iommu_init_flags(iommu); - iommu_set_device_table(iommu); - iommu_enable_command_buffer(iommu); - iommu_enable_event_buffer(iommu); - iommu_set_exclusion_range(iommu); - iommu_init_msi(iommu); - iommu_enable(iommu); - iommu_flush_all_caches(iommu); - } -} - -static void disable_iommus(void) -{ - struct amd_iommu *iommu; - - for_each_iommu(iommu) - iommu_disable(iommu); -} - -/* - * Suspend/Resume support - * disable suspend until real resume implemented - */ - -static void amd_iommu_resume(void) -{ - struct amd_iommu *iommu; - - for_each_iommu(iommu) - iommu_apply_resume_quirks(iommu); - - /* re-load the hardware */ - enable_iommus(); - - /* - * we have to flush after the IOMMUs are enabled because a - * disabled IOMMU will never execute the commands we send - */ - for_each_iommu(iommu) - iommu_flush_all_caches(iommu); -} - -static int amd_iommu_suspend(void) -{ - /* disable IOMMUs to go out of the way for BIOS */ - disable_iommus(); - - return 0; -} - -static struct syscore_ops amd_iommu_syscore_ops = { - .suspend = amd_iommu_suspend, - .resume = amd_iommu_resume, -}; - -/* - * This is the core init function for AMD IOMMU hardware in the system. - * This function is called from the generic x86 DMA layer initialization - * code. - * - * This function basically parses the ACPI table for AMD IOMMU (IVRS) - * three times: - * - * 1 pass) Find the highest PCI device id the driver has to handle. - * Upon this information the size of the data structures is - * determined that needs to be allocated. - * - * 2 pass) Initialize the data structures just allocated with the - * information in the ACPI table about available AMD IOMMUs - * in the system. It also maps the PCI devices in the - * system to specific IOMMUs - * - * 3 pass) After the basic data structures are allocated and - * initialized we update them with information about memory - * remapping requirements parsed out of the ACPI table in - * this last pass. - * - * After that the hardware is initialized and ready to go. In the last - * step we do some Linux specific things like registering the driver in - * the dma_ops interface and initializing the suspend/resume support - * functions. Finally it prints some information about AMD IOMMUs and - * the driver state and enables the hardware. - */ -static int __init amd_iommu_init(void) -{ - struct amd_iommu *iommu; - int i, ret = 0; - - /* - * First parse ACPI tables to find the largest Bus/Dev/Func - * we need to handle. Upon this information the shared data - * structures for the IOMMUs in the system will be allocated - */ - if (acpi_table_parse("IVRS", find_last_devid_acpi) != 0) - return -ENODEV; - - ret = amd_iommu_init_err; - if (ret) - goto out; - - dev_table_size = tbl_size(DEV_TABLE_ENTRY_SIZE); - alias_table_size = tbl_size(ALIAS_TABLE_ENTRY_SIZE); - rlookup_table_size = tbl_size(RLOOKUP_TABLE_ENTRY_SIZE); - - ret = -ENOMEM; - - /* Device table - directly used by all IOMMUs */ - amd_iommu_dev_table = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, - get_order(dev_table_size)); - if (amd_iommu_dev_table == NULL) - goto out; - - /* - * Alias table - map PCI Bus/Dev/Func to Bus/Dev/Func the - * IOMMU see for that device - */ - amd_iommu_alias_table = (void *)__get_free_pages(GFP_KERNEL, - get_order(alias_table_size)); - if (amd_iommu_alias_table == NULL) - goto free; - - /* IOMMU rlookup table - find the IOMMU for a specific device */ - amd_iommu_rlookup_table = (void *)__get_free_pages( - GFP_KERNEL | __GFP_ZERO, - get_order(rlookup_table_size)); - if (amd_iommu_rlookup_table == NULL) - goto free; - - amd_iommu_pd_alloc_bitmap = (void *)__get_free_pages( - GFP_KERNEL | __GFP_ZERO, - get_order(MAX_DOMAIN_ID/8)); - if (amd_iommu_pd_alloc_bitmap == NULL) - goto free; - - /* - * let all alias entries point to itself - */ - for (i = 0; i <= amd_iommu_last_bdf; ++i) - amd_iommu_alias_table[i] = i; - - /* - * never allocate domain 0 because its used as the non-allocated and - * error value placeholder - */ - amd_iommu_pd_alloc_bitmap[0] = 1; - - spin_lock_init(&amd_iommu_pd_lock); - - /* - * now the data structures are allocated and basically initialized - * start the real acpi table scan - */ - ret = -ENODEV; - if (acpi_table_parse("IVRS", init_iommu_all) != 0) - goto free; - - if (amd_iommu_init_err) { - ret = amd_iommu_init_err; - goto free; - } - - if (acpi_table_parse("IVRS", init_memory_definitions) != 0) - goto free; - - if (amd_iommu_init_err) { - ret = amd_iommu_init_err; - goto free; - } - - ret = amd_iommu_init_devices(); - if (ret) - goto free; - - enable_iommus(); - - if (iommu_pass_through) - ret = amd_iommu_init_passthrough(); - else - ret = amd_iommu_init_dma_ops(); - - if (ret) - goto free_disable; - - /* init the device table */ - init_device_table(); - - for_each_iommu(iommu) - iommu_flush_all_caches(iommu); - - amd_iommu_init_api(); - - amd_iommu_init_notifier(); - - register_syscore_ops(&amd_iommu_syscore_ops); - - if (iommu_pass_through) - goto out; - - if (amd_iommu_unmap_flush) - printk(KERN_INFO "AMD-Vi: IO/TLB flush on unmap enabled\n"); - else - printk(KERN_INFO "AMD-Vi: Lazy IO/TLB flushing enabled\n"); - - x86_platform.iommu_shutdown = disable_iommus; -out: - return ret; - -free_disable: - disable_iommus(); - -free: - amd_iommu_uninit_devices(); - - free_pages((unsigned long)amd_iommu_pd_alloc_bitmap, - get_order(MAX_DOMAIN_ID/8)); - - free_pages((unsigned long)amd_iommu_rlookup_table, - get_order(rlookup_table_size)); - - free_pages((unsigned long)amd_iommu_alias_table, - get_order(alias_table_size)); - - free_pages((unsigned long)amd_iommu_dev_table, - get_order(dev_table_size)); - - free_iommu_all(); - - free_unity_maps(); - -#ifdef CONFIG_GART_IOMMU - /* - * We failed to initialize the AMD IOMMU - try fallback to GART - * if possible. - */ - gart_iommu_init(); - -#endif - - goto out; -} - -/**************************************************************************** - * - * Early detect code. This code runs at IOMMU detection time in the DMA - * layer. It just looks if there is an IVRS ACPI table to detect AMD - * IOMMUs - * - ****************************************************************************/ -static int __init early_amd_iommu_detect(struct acpi_table_header *table) -{ - return 0; -} - -int __init amd_iommu_detect(void) -{ - if (no_iommu || (iommu_detected && !gart_iommu_aperture)) - return -ENODEV; - - if (amd_iommu_disabled) - return -ENODEV; - - if (acpi_table_parse("IVRS", early_amd_iommu_detect) == 0) { - iommu_detected = 1; - amd_iommu_detected = 1; - x86_init.iommu.iommu_init = amd_iommu_init; - - /* Make sure ACS will be enabled */ - pci_request_acs(); - return 1; - } - return -ENODEV; -} - -/**************************************************************************** - * - * Parsing functions for the AMD IOMMU specific kernel command line - * options. - * - ****************************************************************************/ - -static int __init parse_amd_iommu_dump(char *str) -{ - amd_iommu_dump = true; - - return 1; -} - -static int __init parse_amd_iommu_options(char *str) -{ - for (; *str; ++str) { - if (strncmp(str, "fullflush", 9) == 0) - amd_iommu_unmap_flush = true; - if (strncmp(str, "off", 3) == 0) - amd_iommu_disabled = true; - } - - return 1; -} - -__setup("amd_iommu_dump", parse_amd_iommu_dump); -__setup("amd_iommu=", parse_amd_iommu_options); - -IOMMU_INIT_FINISH(amd_iommu_detect, - gart_iommu_hole_init, - 0, - 0); diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig index a59638b..8795480 100644 --- a/drivers/xen/Kconfig +++ b/drivers/xen/Kconfig @@ -9,6 +9,53 @@ config XEN_BALLOON the system to expand the domain's memory allocation, or alternatively return unneeded memory to the system. +config XEN_SELFBALLOONING + bool "Dynamically self-balloon kernel memory to target" + depends on XEN && XEN_BALLOON && CLEANCACHE && SWAP && XEN_TMEM + default n + help + Self-ballooning dynamically balloons available kernel memory driven + by the current usage of anonymous memory ("committed AS") and + controlled by various sysfs-settable parameters. Configuring + FRONTSWAP is highly recommended; if it is not configured, self- + ballooning is disabled by default but can be enabled with the + 'selfballooning' kernel boot parameter. If FRONTSWAP is configured, + frontswap-selfshrinking is enabled by default but can be disabled + with the 'noselfshrink' kernel boot parameter; and self-ballooning + is enabled by default but can be disabled with the 'noselfballooning' + kernel boot parameter. Note that systems without a sufficiently + large swap device should not enable self-ballooning. + +config XEN_BALLOON_MEMORY_HOTPLUG + bool "Memory hotplug support for Xen balloon driver" + default n + depends on XEN_BALLOON && MEMORY_HOTPLUG + help + Memory hotplug support for Xen balloon driver allows expanding memory + available for the system above limit declared at system startup. + It is very useful on critical systems which require long + run without rebooting. + + Memory could be hotplugged in following steps: + + 1) dom0: xl mem-max + where is >= requested memory size, + + 2) dom0: xl mem-set + where is requested memory size; alternatively memory + could be added by writing proper value to + /sys/devices/system/xen_memory/xen_memory0/target or + /sys/devices/system/xen_memory/xen_memory0/target_kb on dumU, + + 3) domU: for i in /sys/devices/system/memory/memory*/state; do \ + [ "`cat "$i"`" = offline ] && echo online > "$i"; done + + Memory could be onlined automatically on domU by adding following line to udev rules: + + SUBSYSTEM=="memory", ACTION=="add", RUN+="/bin/sh -c '[ -f /sys$devpath/state ] && echo online > /sys$devpath/state'" + + In that case step 3 should be omitted. + config XEN_SCRUB_PAGES bool "Scrub pages before returning them to system" depends on XEN_BALLOON @@ -90,19 +137,38 @@ config XEN_GRANT_DEV_ALLOC to other domains. This can be used to implement frontend drivers or as part of an inter-domain shared memory channel. -config XEN_PLATFORM_PCI - tristate "xen platform pci device driver" - depends on XEN_PVHVM && PCI - default m - help - Driver for the Xen PCI Platform device: it is responsible for - initializing xenbus and grant_table when running in a Xen HVM - domain. As a consequence this driver is required to run any Xen PV - frontend on Xen HVM. - config SWIOTLB_XEN def_bool y depends on PCI select SWIOTLB +config XEN_TMEM + bool + default y if (CLEANCACHE || FRONTSWAP) + help + Shim to interface in-kernel Transcendent Memory hooks + (e.g. cleancache and frontswap) to Xen tmem hypercalls. + +config XEN_PCIDEV_BACKEND + tristate "Xen PCI-device backend driver" + depends on PCI && X86 && XEN + depends on XEN_BACKEND + default m + help + The PCI device backend driver allows the kernel to export arbitrary + PCI devices to other guests. If you select this to be a module, you + will need to make sure no other driver has bound to the device(s) + you want to make visible to other guests. + + The parameter "passthrough" allows you specify how you want the PCI + devices to appear in the guest. You can choose the default (0) where + PCI topology starts at 00.00.0, or (1) for passthrough if you want + the PCI devices topology appear the same as in the host. + + The "hide" parameter (only applicable if backend driver is compiled + into the kernel) allows you to bind the PCI devices to this module + from the default device drivers. The argument is the list of PCI BDFs: + xen-pciback.hide=(03:00.0)(04:00.0) + + If in doubt, say m. endmenu diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile index bbc1825..974fffd 100644 --- a/drivers/xen/Makefile +++ b/drivers/xen/Makefile @@ -1,6 +1,5 @@ obj-y += grant-table.o features.o events.o manage.o balloon.o obj-y += xenbus/ -obj-y += tmem.o nostackp := $(call cc-option, -fno-stack-protector) CFLAGS_features.o := $(nostackp) @@ -9,17 +8,18 @@ obj-$(CONFIG_BLOCK) += biomerge.o obj-$(CONFIG_HOTPLUG_CPU) += cpu_hotplug.o obj-$(CONFIG_XEN_XENCOMM) += xencomm.o obj-$(CONFIG_XEN_BALLOON) += xen-balloon.o +obj-$(CONFIG_XEN_SELFBALLOONING) += xen-selfballoon.o obj-$(CONFIG_XEN_DEV_EVTCHN) += xen-evtchn.o obj-$(CONFIG_XEN_GNTDEV) += xen-gntdev.o obj-$(CONFIG_XEN_GRANT_DEV_ALLOC) += xen-gntalloc.o obj-$(CONFIG_XENFS) += xenfs/ obj-$(CONFIG_XEN_SYS_HYPERVISOR) += sys-hypervisor.o -obj-$(CONFIG_XEN_PLATFORM_PCI) += xen-platform-pci.o +obj-$(CONFIG_XEN_PVHVM) += platform-pci.o +obj-$(CONFIG_XEN_TMEM) += tmem.o obj-$(CONFIG_SWIOTLB_XEN) += swiotlb-xen.o obj-$(CONFIG_XEN_DOM0) += pci.o +obj-$(CONFIG_XEN_PCIDEV_BACKEND) += xen-pciback/ xen-evtchn-y := evtchn.o xen-gntdev-y := gntdev.o xen-gntalloc-y := gntalloc.o - -xen-platform-pci-y := platform-pci.o diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c index f54290b..31ab82f 100644 --- a/drivers/xen/balloon.c +++ b/drivers/xen/balloon.c @@ -4,6 +4,12 @@ * Copyright (c) 2003, B Dragovic * Copyright (c) 2003-2004, M Williamson, K Fraser * Copyright (c) 2005 Dan M. Smith, IBM Corporation + * Copyright (c) 2010 Daniel Kiper + * + * Memory hotplug support was written by Daniel Kiper. Work on + * it was sponsored by Google under Google Summer of Code 2010 + * program. Jeremy Fitzhardinge from Citrix was the mentor for + * this project. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License version 2 @@ -33,6 +39,7 @@ #include #include #include +#include #include #include #include @@ -40,6 +47,9 @@ #include #include #include +#include +#include +#include #include #include @@ -84,8 +94,8 @@ static unsigned long frame_list[PAGE_SIZE / sizeof(unsigned long)]; #define inc_totalhigh_pages() (totalhigh_pages++) #define dec_totalhigh_pages() (totalhigh_pages--) #else -#define inc_totalhigh_pages() do {} while(0) -#define dec_totalhigh_pages() do {} while(0) +#define inc_totalhigh_pages() do {} while (0) +#define dec_totalhigh_pages() do {} while (0) #endif /* List of ballooned pages, threaded through the mem_map array. */ @@ -145,8 +155,7 @@ static struct page *balloon_retrieve(bool prefer_highmem) if (PageHighMem(page)) { balloon_stats.balloon_high--; inc_totalhigh_pages(); - } - else + } else balloon_stats.balloon_low--; totalram_pages++; @@ -194,6 +203,87 @@ static enum bp_state update_schedule(enum bp_state state) return BP_EAGAIN; } +#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG +static long current_credit(void) +{ + return balloon_stats.target_pages - balloon_stats.current_pages - + balloon_stats.hotplug_pages; +} + +static bool balloon_is_inflated(void) +{ + if (balloon_stats.balloon_low || balloon_stats.balloon_high || + balloon_stats.balloon_hotplug) + return true; + else + return false; +} + +/* + * reserve_additional_memory() adds memory region of size >= credit above + * max_pfn. New region is section aligned and size is modified to be multiple + * of section size. Those features allow optimal use of address space and + * establish proper alignment when this function is called first time after + * boot (last section not fully populated at boot time contains unused memory + * pages with PG_reserved bit not set; online_pages_range() does not allow page + * onlining in whole range if first onlined page does not have PG_reserved + * bit set). Real size of added memory is established at page onlining stage. + */ + +static enum bp_state reserve_additional_memory(long credit) +{ + int nid, rc; + u64 hotplug_start_paddr; + unsigned long balloon_hotplug = credit; + + hotplug_start_paddr = PFN_PHYS(SECTION_ALIGN_UP(max_pfn)); + balloon_hotplug = round_up(balloon_hotplug, PAGES_PER_SECTION); + nid = memory_add_physaddr_to_nid(hotplug_start_paddr); + + rc = add_memory(nid, hotplug_start_paddr, balloon_hotplug << PAGE_SHIFT); + + if (rc) { + pr_info("xen_balloon: %s: add_memory() failed: %i\n", __func__, rc); + return BP_EAGAIN; + } + + balloon_hotplug -= credit; + + balloon_stats.hotplug_pages += credit; + balloon_stats.balloon_hotplug = balloon_hotplug; + + return BP_DONE; +} + +static void xen_online_page(struct page *page) +{ + __online_page_set_limits(page); + + mutex_lock(&balloon_mutex); + + __balloon_append(page); + + if (balloon_stats.hotplug_pages) + --balloon_stats.hotplug_pages; + else + --balloon_stats.balloon_hotplug; + + mutex_unlock(&balloon_mutex); +} + +static int xen_memory_notifier(struct notifier_block *nb, unsigned long val, void *v) +{ + if (val == MEM_ONLINE) + schedule_delayed_work(&balloon_worker, 0); + + return NOTIFY_OK; +} + +static struct notifier_block xen_memory_nb = { + .notifier_call = xen_memory_notifier, + .priority = 0 +}; +#else static long current_credit(void) { unsigned long target = balloon_stats.target_pages; @@ -206,6 +296,21 @@ static long current_credit(void) return target - balloon_stats.current_pages; } +static bool balloon_is_inflated(void) +{ + if (balloon_stats.balloon_low || balloon_stats.balloon_high) + return true; + else + return false; +} + +static enum bp_state reserve_additional_memory(long credit) +{ + balloon_stats.target_pages = balloon_stats.current_pages; + return BP_DONE; +} +#endif /* CONFIG_XEN_BALLOON_MEMORY_HOTPLUG */ + static enum bp_state increase_reservation(unsigned long nr_pages) { int rc; @@ -217,6 +322,15 @@ static enum bp_state increase_reservation(unsigned long nr_pages) .domid = DOMID_SELF }; +#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG + if (!balloon_stats.balloon_low && !balloon_stats.balloon_high) { + nr_pages = min(nr_pages, balloon_stats.balloon_hotplug); + balloon_stats.hotplug_pages += nr_pages; + balloon_stats.balloon_hotplug -= nr_pages; + return BP_DONE; + } +#endif + if (nr_pages > ARRAY_SIZE(frame_list)) nr_pages = ARRAY_SIZE(frame_list); @@ -279,6 +393,15 @@ static enum bp_state decrease_reservation(unsigned long nr_pages, gfp_t gfp) .domid = DOMID_SELF }; +#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG + if (balloon_stats.hotplug_pages) { + nr_pages = min(nr_pages, balloon_stats.hotplug_pages); + balloon_stats.hotplug_pages -= nr_pages; + balloon_stats.balloon_hotplug += nr_pages; + return BP_DONE; + } +#endif + if (nr_pages > ARRAY_SIZE(frame_list)) nr_pages = ARRAY_SIZE(frame_list); @@ -299,7 +422,7 @@ static enum bp_state decrease_reservation(unsigned long nr_pages, gfp_t gfp) (unsigned long)__va(pfn << PAGE_SHIFT), __pte_ma(0), 0); BUG_ON(ret); - } + } } @@ -340,8 +463,12 @@ static void balloon_process(struct work_struct *work) do { credit = current_credit(); - if (credit > 0) - state = increase_reservation(credit); + if (credit > 0) { + if (balloon_is_inflated()) + state = increase_reservation(credit); + else + state = reserve_additional_memory(credit); + } if (credit < 0) state = decrease_reservation(-credit, GFP_BALLOON); @@ -374,20 +501,24 @@ EXPORT_SYMBOL_GPL(balloon_set_new_target); * alloc_xenballooned_pages - get pages that have been ballooned out * @nr_pages: Number of pages to get * @pages: pages returned + * @highmem: allow highmem pages * @return 0 on success, error otherwise */ -int alloc_xenballooned_pages(int nr_pages, struct page** pages) +int alloc_xenballooned_pages(int nr_pages, struct page **pages, bool highmem) { int pgno = 0; - struct page* page; + struct page *page; mutex_lock(&balloon_mutex); while (pgno < nr_pages) { - page = balloon_retrieve(true); - if (page) { + page = balloon_retrieve(highmem); + if (page && (highmem || !PageHighMem(page))) { pages[pgno++] = page; } else { enum bp_state st; - st = decrease_reservation(nr_pages - pgno, GFP_HIGHUSER); + if (page) + balloon_append(page); + st = decrease_reservation(nr_pages - pgno, + highmem ? GFP_HIGHUSER : GFP_USER); if (st != BP_DONE) goto out_undo; } @@ -409,7 +540,7 @@ EXPORT_SYMBOL(alloc_xenballooned_pages); * @nr_pages: Number of pages * @pages: pages to return */ -void free_xenballooned_pages(int nr_pages, struct page** pages) +void free_xenballooned_pages(int nr_pages, struct page **pages) { int i; @@ -428,17 +559,40 @@ void free_xenballooned_pages(int nr_pages, struct page** pages) } EXPORT_SYMBOL(free_xenballooned_pages); -static int __init balloon_init(void) +static void __init balloon_add_region(unsigned long start_pfn, + unsigned long pages) { unsigned long pfn, extra_pfn_end; struct page *page; + /* + * If the amount of usable memory has been limited (e.g., with + * the 'mem' command line parameter), don't add pages beyond + * this limit. + */ + extra_pfn_end = min(max_pfn, start_pfn + pages); + + for (pfn = start_pfn; pfn < extra_pfn_end; pfn++) { + page = pfn_to_page(pfn); + /* totalram_pages and totalhigh_pages do not + include the boot-time balloon extension, so + don't subtract from it. */ + __balloon_append(page); + } +} + +static int __init balloon_init(void) +{ + int i; + if (!xen_domain()) return -ENODEV; pr_info("xen/balloon: Initialising balloon driver.\n"); - balloon_stats.current_pages = xen_pv_domain() ? min(xen_start_info->nr_pages, max_pfn) : max_pfn; + balloon_stats.current_pages = xen_pv_domain() + ? min(xen_start_info->nr_pages - xen_released_pages, max_pfn) + : max_pfn; balloon_stats.target_pages = balloon_stats.current_pages; balloon_stats.balloon_low = 0; balloon_stats.balloon_high = 0; @@ -448,25 +602,22 @@ static int __init balloon_init(void) balloon_stats.retry_count = 1; balloon_stats.max_retry_count = RETRY_UNLIMITED; +#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG + balloon_stats.hotplug_pages = 0; + balloon_stats.balloon_hotplug = 0; + + set_online_page_callback(&xen_online_page); + register_memory_notifier(&xen_memory_nb); +#endif + /* - * Initialise the balloon with excess memory space. We need - * to make sure we don't add memory which doesn't exist or - * logically exist. The E820 map can be trimmed to be smaller - * than the amount of physical memory due to the mem= command - * line parameter. And if this is a 32-bit non-HIGHMEM kernel - * on a system with memory which requires highmem to access, - * don't try to use it. + * Initialize the balloon with pages from the extra memory + * regions (see arch/x86/xen/setup.c). */ - extra_pfn_end = min(min(max_pfn, e820_end_of_ram_pfn()), - (unsigned long)PFN_DOWN(xen_extra_mem_start + xen_extra_mem_size)); - for (pfn = PFN_UP(xen_extra_mem_start); - pfn < extra_pfn_end; - pfn++) { - page = pfn_to_page(pfn); - /* totalram_pages and totalhigh_pages do not include the boot-time - balloon extension, so don't subtract from it. */ - __balloon_append(page); - } + for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) + if (xen_extra_mem[i].size) + balloon_add_region(PFN_UP(xen_extra_mem[i].start), + PFN_DOWN(xen_extra_mem[i].size)); return 0; } diff --git a/drivers/xen/events.c b/drivers/xen/events.c index 7ba4d0e..bcf7711 100644 --- a/drivers/xen/events.c +++ b/drivers/xen/events.c @@ -54,7 +54,7 @@ * This lock protects updates to the following mapping and reference-count * arrays. The lock does not need to be acquired to read the mapping tables. */ -static DEFINE_SPINLOCK(irq_mapping_update_lock); +static DEFINE_MUTEX(irq_mapping_update_lock); static LIST_HEAD(xen_irq_list_head); @@ -85,8 +85,7 @@ enum xen_irq_type { * IPI - IPI vector * EVTCHN - */ -struct irq_info -{ +struct irq_info { struct list_head list; enum xen_irq_type type; /* type */ unsigned irq; @@ -282,9 +281,9 @@ static inline unsigned long active_evtchns(unsigned int cpu, struct shared_info *sh, unsigned int idx) { - return (sh->evtchn_pending[idx] & + return sh->evtchn_pending[idx] & per_cpu(cpu_evtchn_mask, cpu)[idx] & - ~sh->evtchn_mask[idx]); + ~sh->evtchn_mask[idx]; } static void bind_evtchn_to_cpu(unsigned int chn, unsigned int cpu) @@ -432,7 +431,8 @@ static int __must_check xen_allocate_irq_dynamic(void) irq = irq_alloc_desc_from(first, -1); - xen_irq_init(irq); + if (irq >= 0) + xen_irq_init(irq); return irq; } @@ -600,7 +600,7 @@ static void disable_pirq(struct irq_data *data) disable_dynirq(data); } -static int find_irq_by_gsi(unsigned gsi) +int xen_irq_from_gsi(unsigned gsi) { struct irq_info *info; @@ -614,11 +614,7 @@ static int find_irq_by_gsi(unsigned gsi) return -1; } - -int xen_allocate_pirq_gsi(unsigned gsi) -{ - return gsi; -} +EXPORT_SYMBOL_GPL(xen_irq_from_gsi); /* * Do not make any assumptions regarding the relationship between the @@ -636,9 +632,9 @@ int xen_bind_pirq_gsi_to_irq(unsigned gsi, int irq = -1; struct physdev_irq irq_op; - spin_lock(&irq_mapping_update_lock); + mutex_lock(&irq_mapping_update_lock); - irq = find_irq_by_gsi(gsi); + irq = xen_irq_from_gsi(gsi); if (irq != -1) { printk(KERN_INFO "xen_map_pirq_gsi: returning irq %d for gsi %u\n", irq, gsi); @@ -689,7 +685,7 @@ int xen_bind_pirq_gsi_to_irq(unsigned gsi, handle_edge_irq, name); out: - spin_unlock(&irq_mapping_update_lock); + mutex_unlock(&irq_mapping_update_lock); return irq; } @@ -715,10 +711,10 @@ int xen_bind_pirq_msi_to_irq(struct pci_dev *dev, struct msi_desc *msidesc, { int irq, ret; - spin_lock(&irq_mapping_update_lock); + mutex_lock(&irq_mapping_update_lock); irq = xen_allocate_irq_dynamic(); - if (irq == -1) + if (irq < 0) goto out; irq_set_chip_and_handler_name(irq, &xen_pirq_chip, handle_edge_irq, @@ -729,12 +725,12 @@ int xen_bind_pirq_msi_to_irq(struct pci_dev *dev, struct msi_desc *msidesc, if (ret < 0) goto error_irq; out: - spin_unlock(&irq_mapping_update_lock); + mutex_unlock(&irq_mapping_update_lock); return irq; error_irq: - spin_unlock(&irq_mapping_update_lock); + mutex_unlock(&irq_mapping_update_lock); xen_free_irq(irq); - return -1; + return ret; } #endif @@ -745,7 +741,7 @@ int xen_destroy_irq(int irq) struct irq_info *info = info_for_irq(irq); int rc = -ENOENT; - spin_lock(&irq_mapping_update_lock); + mutex_lock(&irq_mapping_update_lock); desc = irq_to_desc(irq); if (!desc) @@ -771,7 +767,7 @@ int xen_destroy_irq(int irq) xen_free_irq(irq); out: - spin_unlock(&irq_mapping_update_lock); + mutex_unlock(&irq_mapping_update_lock); return rc; } @@ -781,10 +777,10 @@ int xen_irq_from_pirq(unsigned pirq) struct irq_info *info; - spin_lock(&irq_mapping_update_lock); + mutex_lock(&irq_mapping_update_lock); list_for_each_entry(info, &xen_irq_list_head, list) { - if (info == NULL || info->type != IRQT_PIRQ) + if (info->type != IRQT_PIRQ) continue; irq = info->irq; if (info->u.pirq.pirq == pirq) @@ -792,7 +788,7 @@ int xen_irq_from_pirq(unsigned pirq) } irq = -1; out: - spin_unlock(&irq_mapping_update_lock); + mutex_unlock(&irq_mapping_update_lock); return irq; } @@ -807,7 +803,7 @@ int bind_evtchn_to_irq(unsigned int evtchn) { int irq; - spin_lock(&irq_mapping_update_lock); + mutex_lock(&irq_mapping_update_lock); irq = evtchn_to_irq[evtchn]; @@ -823,7 +819,7 @@ int bind_evtchn_to_irq(unsigned int evtchn) } out: - spin_unlock(&irq_mapping_update_lock); + mutex_unlock(&irq_mapping_update_lock); return irq; } @@ -834,7 +830,7 @@ static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu) struct evtchn_bind_ipi bind_ipi; int evtchn, irq; - spin_lock(&irq_mapping_update_lock); + mutex_lock(&irq_mapping_update_lock); irq = per_cpu(ipi_to_irq, cpu)[ipi]; @@ -858,7 +854,7 @@ static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu) } out: - spin_unlock(&irq_mapping_update_lock); + mutex_unlock(&irq_mapping_update_lock); return irq; } @@ -877,13 +873,34 @@ static int bind_interdomain_evtchn_to_irq(unsigned int remote_domain, return err ? : bind_evtchn_to_irq(bind_interdomain.local_port); } +static int find_virq(unsigned int virq, unsigned int cpu) +{ + struct evtchn_status status; + int port, rc = -ENOENT; -int bind_virq_to_irq(unsigned int virq, unsigned int cpu) + memset(&status, 0, sizeof(status)); + for (port = 0; port <= NR_EVENT_CHANNELS; port++) { + status.dom = DOMID_SELF; + status.port = port; + rc = HYPERVISOR_event_channel_op(EVTCHNOP_status, &status); + if (rc < 0) + continue; + if (status.status != EVTCHNSTAT_virq) + continue; + if (status.u.virq == virq && status.vcpu == cpu) { + rc = port; + break; + } + } + return rc; +} + +int bind_virq_to_irq(unsigned int virq, unsigned int cpu, bool percpu) { struct evtchn_bind_virq bind_virq; - int evtchn, irq; + int evtchn, irq, ret; - spin_lock(&irq_mapping_update_lock); + mutex_lock(&irq_mapping_update_lock); irq = per_cpu(virq_to_irq, cpu)[virq]; @@ -892,15 +909,25 @@ int bind_virq_to_irq(unsigned int virq, unsigned int cpu) if (irq == -1) goto out; - irq_set_chip_and_handler_name(irq, &xen_percpu_chip, - handle_percpu_irq, "virq"); + if (percpu) + irq_set_chip_and_handler_name(irq, &xen_percpu_chip, + handle_percpu_irq, "virq"); + else + irq_set_chip_and_handler_name(irq, &xen_dynamic_chip, + handle_edge_irq, "virq"); bind_virq.virq = virq; bind_virq.vcpu = cpu; - if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq, - &bind_virq) != 0) - BUG(); - evtchn = bind_virq.port; + ret = HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq, + &bind_virq); + if (ret == 0) + evtchn = bind_virq.port; + else { + if (ret == -EEXIST) + ret = find_virq(virq, cpu); + BUG_ON(ret < 0); + evtchn = ret; + } xen_irq_info_virq_init(cpu, irq, evtchn, virq); @@ -908,7 +935,7 @@ int bind_virq_to_irq(unsigned int virq, unsigned int cpu) } out: - spin_unlock(&irq_mapping_update_lock); + mutex_unlock(&irq_mapping_update_lock); return irq; } @@ -918,7 +945,7 @@ static void unbind_from_irq(unsigned int irq) struct evtchn_close close; int evtchn = evtchn_from_irq(irq); - spin_lock(&irq_mapping_update_lock); + mutex_lock(&irq_mapping_update_lock); if (VALID_EVTCHN(evtchn)) { close.port = evtchn; @@ -948,7 +975,7 @@ static void unbind_from_irq(unsigned int irq) xen_free_irq(irq); - spin_unlock(&irq_mapping_update_lock); + mutex_unlock(&irq_mapping_update_lock); } int bind_evtchn_to_irqhandler(unsigned int evtchn, @@ -1000,7 +1027,7 @@ int bind_virq_to_irqhandler(unsigned int virq, unsigned int cpu, { int irq, retval; - irq = bind_virq_to_irq(virq, cpu); + irq = bind_virq_to_irq(virq, cpu, irqflags & IRQF_PERCPU); if (irq < 0) return irq; retval = request_irq(irq, handler, irqflags, devname, dev_id); @@ -1157,7 +1184,7 @@ static void __xen_evtchn_do_upcall(void) int cpu = get_cpu(); struct shared_info *s = HYPERVISOR_shared_info; struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu); - unsigned count; + unsigned count; do { unsigned long pending_words; @@ -1295,7 +1322,7 @@ void rebind_evtchn_irq(int evtchn, int irq) will also be masked. */ disable_irq(irq); - spin_lock(&irq_mapping_update_lock); + mutex_lock(&irq_mapping_update_lock); /* After resume the irq<->evtchn mappings are all cleared out */ BUG_ON(evtchn_to_irq[evtchn] != -1); @@ -1305,7 +1332,7 @@ void rebind_evtchn_irq(int evtchn, int irq) xen_irq_info_evtchn_init(irq, evtchn); - spin_unlock(&irq_mapping_update_lock); + mutex_unlock(&irq_mapping_update_lock); /* new event channels are always bound to cpu 0 */ irq_set_affinity(irq, cpumask_of(0)); @@ -1317,8 +1344,10 @@ void rebind_evtchn_irq(int evtchn, int irq) /* Rebind an evtchn so that it gets delivered to a specific cpu */ static int rebind_irq_to_cpu(unsigned irq, unsigned tcpu) { + struct shared_info *s = HYPERVISOR_shared_info; struct evtchn_bind_vcpu bind_vcpu; int evtchn = evtchn_from_irq(irq); + int masked; if (!VALID_EVTCHN(evtchn)) return -1; @@ -1335,6 +1364,12 @@ static int rebind_irq_to_cpu(unsigned irq, unsigned tcpu) bind_vcpu.vcpu = tcpu; /* + * Mask the event while changing the VCPU binding to prevent + * it being delivered on an unexpected VCPU. + */ + masked = sync_test_and_set_bit(evtchn, s->evtchn_mask); + + /* * If this fails, it usually just indicates that we're dealing with a * virq or IPI channel, which don't actually need to be rebound. Ignore * it, but don't do the xenlinux-level rebind in that case. @@ -1342,6 +1377,9 @@ static int rebind_irq_to_cpu(unsigned irq, unsigned tcpu) if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_vcpu, &bind_vcpu) >= 0) bind_evtchn_to_cpu(evtchn, tcpu); + if (!masked) + unmask_evtchn(evtchn); + return 0; } @@ -1686,6 +1724,7 @@ void __init xen_init_IRQ(void) evtchn_to_irq = kcalloc(NR_EVENT_CHANNELS, sizeof(*evtchn_to_irq), GFP_KERNEL); + BUG_ON(!evtchn_to_irq); for (i = 0; i < NR_EVENT_CHANNELS; i++) evtchn_to_irq[i] = -1; @@ -1704,6 +1743,6 @@ void __init xen_init_IRQ(void) } else { irq_ctx_init(smp_processor_id()); if (xen_initial_domain()) - xen_setup_pirqs(); + pci_xen_initial_domain(); } } diff --git a/drivers/xen/evtchn.c b/drivers/xen/evtchn.c index ce3a0f5..c93d59e 100644 --- a/drivers/xen/evtchn.c +++ b/drivers/xen/evtchn.c @@ -269,6 +269,14 @@ static int evtchn_bind_to_user(struct per_user_data *u, int port) u->name, (void *)(unsigned long)port); if (rc >= 0) rc = 0; + else { + /* bind failed, should close the port now */ + struct evtchn_close close; + close.port = port; + if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0) + BUG(); + set_port_user(port, NULL); + } return rc; } @@ -277,6 +285,8 @@ static void evtchn_unbind_from_user(struct per_user_data *u, int port) { int irq = irq_from_evtchn(port); + BUG_ON(irq < 0); + unbind_from_irqhandler(irq, (void *)(unsigned long)port); set_port_user(port, NULL); @@ -367,12 +377,18 @@ static long evtchn_ioctl(struct file *file, if (unbind.port >= NR_EVENT_CHANNELS) break; + spin_lock_irq(&port_user_lock); + rc = -ENOTCONN; - if (get_port_user(unbind.port) != u) + if (get_port_user(unbind.port) != u) { + spin_unlock_irq(&port_user_lock); break; + } disable_irq(irq_from_evtchn(unbind.port)); + spin_unlock_irq(&port_user_lock); + evtchn_unbind_from_user(u, unbind.port); rc = 0; @@ -472,15 +488,26 @@ static int evtchn_release(struct inode *inode, struct file *filp) int i; struct per_user_data *u = filp->private_data; + spin_lock_irq(&port_user_lock); + + free_page((unsigned long)u->ring); + for (i = 0; i < NR_EVENT_CHANNELS; i++) { if (get_port_user(i) != u) continue; disable_irq(irq_from_evtchn(i)); + } + + spin_unlock_irq(&port_user_lock); + + for (i = 0; i < NR_EVENT_CHANNELS; i++) { + if (get_port_user(i) != u) + continue; + evtchn_unbind_from_user(get_port_user(i), i); } - free_page((unsigned long)u->ring); kfree(u->name); kfree(u); diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c index b4e830e..5027662 100644 --- a/drivers/xen/gntdev.c +++ b/drivers/xen/gntdev.c @@ -60,7 +60,7 @@ static int use_ptemod; struct gntdev_priv { struct list_head maps; /* lock protects maps from concurrent changes */ - spinlock_t lock; + struct mutex lock; struct mm_struct *mm; struct mmu_notifier mn; }; @@ -83,6 +83,7 @@ struct grant_map { struct ioctl_gntdev_grant_ref *grants; struct gnttab_map_grant_ref *map_ops; struct gnttab_unmap_grant_ref *unmap_ops; + struct gnttab_map_grant_ref *kmap_ops; struct page **pages; }; @@ -104,6 +105,21 @@ static void gntdev_print_maps(struct gntdev_priv *priv, #endif } +static void gntdev_free_map(struct grant_map *map) +{ + if (map == NULL) + return; + + if (map->pages) + free_xenballooned_pages(map->count, map->pages); + kfree(map->pages); + kfree(map->grants); + kfree(map->map_ops); + kfree(map->unmap_ops); + kfree(map->kmap_ops); + kfree(map); +} + static struct grant_map *gntdev_alloc_map(struct gntdev_priv *priv, int count) { struct grant_map *add; @@ -113,22 +129,25 @@ static struct grant_map *gntdev_alloc_map(struct gntdev_priv *priv, int count) if (NULL == add) return NULL; - add->grants = kzalloc(sizeof(add->grants[0]) * count, GFP_KERNEL); - add->map_ops = kzalloc(sizeof(add->map_ops[0]) * count, GFP_KERNEL); - add->unmap_ops = kzalloc(sizeof(add->unmap_ops[0]) * count, GFP_KERNEL); - add->pages = kzalloc(sizeof(add->pages[0]) * count, GFP_KERNEL); + add->grants = kcalloc(count, sizeof(add->grants[0]), GFP_KERNEL); + add->map_ops = kcalloc(count, sizeof(add->map_ops[0]), GFP_KERNEL); + add->unmap_ops = kcalloc(count, sizeof(add->unmap_ops[0]), GFP_KERNEL); + add->kmap_ops = kcalloc(count, sizeof(add->kmap_ops[0]), GFP_KERNEL); + add->pages = kcalloc(count, sizeof(add->pages[0]), GFP_KERNEL); if (NULL == add->grants || NULL == add->map_ops || NULL == add->unmap_ops || + NULL == add->kmap_ops || NULL == add->pages) goto err; - if (alloc_xenballooned_pages(count, add->pages)) + if (alloc_xenballooned_pages(count, add->pages, false /* lowmem */)) goto err; for (i = 0; i < count; i++) { add->map_ops[i].handle = -1; add->unmap_ops[i].handle = -1; + add->kmap_ops[i].handle = -1; } add->index = 0; @@ -138,11 +157,7 @@ static struct grant_map *gntdev_alloc_map(struct gntdev_priv *priv, int count) return add; err: - kfree(add->pages); - kfree(add->grants); - kfree(add->map_ops); - kfree(add->unmap_ops); - kfree(add); + gntdev_free_map(add); return NULL; } @@ -188,21 +203,12 @@ static void gntdev_put_map(struct grant_map *map) atomic_sub(map->count, &pages_mapped); - if (map->notify.flags & UNMAP_NOTIFY_SEND_EVENT) { + if (map->notify.flags & UNMAP_NOTIFY_SEND_EVENT) notify_remote_via_evtchn(map->notify.event); - } - - if (map->pages) { - if (!use_ptemod) - unmap_grant_pages(map, 0, map->count); - free_xenballooned_pages(map->count, map->pages); - } - kfree(map->pages); - kfree(map->grants); - kfree(map->map_ops); - kfree(map->unmap_ops); - kfree(map); + if (map->pages && !use_ptemod) + unmap_grant_pages(map, 0, map->count); + gntdev_free_map(map); } /* ------------------------------------------------------------------ */ @@ -243,10 +249,35 @@ static int map_grant_pages(struct grant_map *map) gnttab_set_unmap_op(&map->unmap_ops[i], addr, map->flags, -1 /* handle */); } + } else { + /* + * Setup the map_ops corresponding to the pte entries pointing + * to the kernel linear addresses of the struct pages. + * These ptes are completely different from the user ptes dealt + * with find_grant_ptes. + */ + for (i = 0; i < map->count; i++) { + unsigned level; + unsigned long address = (unsigned long) + pfn_to_kaddr(page_to_pfn(map->pages[i])); + pte_t *ptep; + u64 pte_maddr = 0; + BUG_ON(PageHighMem(map->pages[i])); + + ptep = lookup_address(address, &level); + pte_maddr = arbitrary_virt_to_machine(ptep).maddr; + gnttab_set_map_op(&map->kmap_ops[i], pte_maddr, + map->flags | + GNTMAP_host_map | + GNTMAP_contains_pte, + map->grants[i].ref, + map->grants[i].domid); + } } pr_debug("map %d+%d\n", map->index, map->count); - err = gnttab_map_refs(map->map_ops, map->pages, map->count); + err = gnttab_map_refs(map->map_ops, use_ptemod ? map->kmap_ops : NULL, + map->pages, map->count); if (err) return err; @@ -364,7 +395,7 @@ static void mn_invl_range_start(struct mmu_notifier *mn, unsigned long mstart, mend; int err; - spin_lock(&priv->lock); + mutex_lock(&priv->lock); list_for_each_entry(map, &priv->maps, next) { if (!map->vma) continue; @@ -383,7 +414,7 @@ static void mn_invl_range_start(struct mmu_notifier *mn, (mend - mstart) >> PAGE_SHIFT); WARN_ON(err); } - spin_unlock(&priv->lock); + mutex_unlock(&priv->lock); } static void mn_invl_page(struct mmu_notifier *mn, @@ -400,7 +431,7 @@ static void mn_release(struct mmu_notifier *mn, struct grant_map *map; int err; - spin_lock(&priv->lock); + mutex_lock(&priv->lock); list_for_each_entry(map, &priv->maps, next) { if (!map->vma) continue; @@ -410,7 +441,7 @@ static void mn_release(struct mmu_notifier *mn, err = unmap_grant_pages(map, /* offset */ 0, map->count); WARN_ON(err); } - spin_unlock(&priv->lock); + mutex_unlock(&priv->lock); } struct mmu_notifier_ops gntdev_mmu_ops = { @@ -431,7 +462,7 @@ static int gntdev_open(struct inode *inode, struct file *flip) return -ENOMEM; INIT_LIST_HEAD(&priv->maps); - spin_lock_init(&priv->lock); + mutex_init(&priv->lock); if (use_ptemod) { priv->mm = get_task_mm(current); @@ -462,13 +493,13 @@ static int gntdev_release(struct inode *inode, struct file *flip) pr_debug("priv %p\n", priv); - spin_lock(&priv->lock); + mutex_lock(&priv->lock); while (!list_empty(&priv->maps)) { map = list_entry(priv->maps.next, struct grant_map, next); list_del(&map->next); gntdev_put_map(map); } - spin_unlock(&priv->lock); + mutex_unlock(&priv->lock); if (use_ptemod) mmu_notifier_unregister(&priv->mn, priv->mm); @@ -506,10 +537,10 @@ static long gntdev_ioctl_map_grant_ref(struct gntdev_priv *priv, return err; } - spin_lock(&priv->lock); + mutex_lock(&priv->lock); gntdev_add_map(priv, map); op.index = map->index << PAGE_SHIFT; - spin_unlock(&priv->lock); + mutex_unlock(&priv->lock); if (copy_to_user(u, &op, sizeof(op)) != 0) return -EFAULT; @@ -528,14 +559,15 @@ static long gntdev_ioctl_unmap_grant_ref(struct gntdev_priv *priv, return -EFAULT; pr_debug("priv %p, del %d+%d\n", priv, (int)op.index, (int)op.count); - spin_lock(&priv->lock); + mutex_lock(&priv->lock); map = gntdev_find_map_index(priv, op.index >> PAGE_SHIFT, op.count); if (map) { list_del(&map->next); - gntdev_put_map(map); err = 0; } - spin_unlock(&priv->lock); + mutex_unlock(&priv->lock); + if (map) + gntdev_put_map(map); return err; } @@ -578,7 +610,7 @@ static long gntdev_ioctl_notify(struct gntdev_priv *priv, void __user *u) if (op.action & ~(UNMAP_NOTIFY_CLEAR_BYTE|UNMAP_NOTIFY_SEND_EVENT)) return -EINVAL; - spin_lock(&priv->lock); + mutex_lock(&priv->lock); list_for_each_entry(map, &priv->maps, next) { uint64_t begin = map->index << PAGE_SHIFT; @@ -601,7 +633,7 @@ static long gntdev_ioctl_notify(struct gntdev_priv *priv, void __user *u) map->notify.event = op.event_channel_port; rc = 0; unlock_out: - spin_unlock(&priv->lock); + mutex_unlock(&priv->lock); return rc; } @@ -646,7 +678,7 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma) pr_debug("map %d+%d at %lx (pgoff %lx)\n", index, count, vma->vm_start, vma->vm_pgoff); - spin_lock(&priv->lock); + mutex_lock(&priv->lock); map = gntdev_find_map_index(priv, index, count); if (!map) goto unlock_out; @@ -681,7 +713,7 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma) map->flags |= GNTMAP_readonly; } - spin_unlock(&priv->lock); + mutex_unlock(&priv->lock); if (use_ptemod) { err = apply_to_page_range(vma->vm_mm, vma->vm_start, @@ -709,11 +741,11 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma) return 0; unlock_out: - spin_unlock(&priv->lock); + mutex_unlock(&priv->lock); return err; out_unlock_put: - spin_unlock(&priv->lock); + mutex_unlock(&priv->lock); out_put_map: if (use_ptemod) map->vma = NULL; diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c index 949af52..b657de6 100644 --- a/drivers/xen/grant-table.c +++ b/drivers/xen/grant-table.c @@ -82,7 +82,7 @@ static inline grant_ref_t *__gnttab_entry(grant_ref_t entry) static int get_free_entries(unsigned count) { unsigned long flags; - int ref, rc; + int ref, rc = 0; grant_ref_t head; spin_lock_irqsave(&gnttab_list_lock, flags); @@ -193,7 +193,7 @@ int gnttab_query_foreign_access(grant_ref_t ref) nflags = shared[ref].flags; - return (nflags & (GTF_reading|GTF_writing)); + return nflags & (GTF_reading|GTF_writing); } EXPORT_SYMBOL_GPL(gnttab_query_foreign_access); @@ -457,7 +457,8 @@ unsigned int gnttab_max_grant_frames(void) EXPORT_SYMBOL_GPL(gnttab_max_grant_frames); int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops, - struct page **pages, unsigned int count) + struct gnttab_map_grant_ref *kmap_ops, + struct page **pages, unsigned int count) { int i, ret; pte_t *pte; @@ -497,8 +498,7 @@ int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops, */ return -EOPNOTSUPP; } - ret = m2p_add_override(mfn, pages[i], - map_ops[i].flags & GNTMAP_contains_pte); + ret = m2p_add_override(mfn, pages[i], &kmap_ops[i]); if (ret) return ret; } diff --git a/drivers/xen/manage.c b/drivers/xen/manage.c index 0b5366b..caa3969 100644 --- a/drivers/xen/manage.c +++ b/drivers/xen/manage.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include @@ -92,7 +93,6 @@ static int xen_suspend(void *data) if (!si->cancelled) { xen_irq_resume(); - xen_console_resume(); xen_timer_resume(); } @@ -108,16 +108,17 @@ static void do_suspend(void) shutting_down = SHUTDOWN_SUSPEND; -#ifdef CONFIG_PREEMPT - /* If the kernel is preemptible, we need to freeze all the processes - to prevent them from being in the middle of a pagetable update - during suspend. */ err = freeze_processes(); if (err) { - printk(KERN_ERR "xen suspend: freeze failed %d\n", err); + pr_err("%s: freeze processes failed %d\n", __func__, err); goto out; } -#endif + + err = freeze_kernel_threads(); + if (err) { + pr_err("%s: freeze kernel threads failed %d\n", __func__, err); + goto out_thaw; + } err = dpm_suspend_start(PMSG_FREEZE); if (err) { @@ -148,6 +149,10 @@ static void do_suspend(void) err = stop_machine(xen_suspend, &si, cpumask_of(0)); + /* Resume console as early as possible. */ + if (!si.cancelled) + xen_console_resume(); + dpm_resume_noirq(si.cancelled ? PMSG_THAW : PMSG_RESTORE); if (err) { @@ -168,10 +173,8 @@ out_resume: clock_was_set(); out_thaw: -#ifdef CONFIG_PREEMPT thaw_processes(); out: -#endif shutting_down = SHUTDOWN_INVALID; } #endif /* CONFIG_HIBERNATE_CALLBACKS */ diff --git a/drivers/xen/pci.c b/drivers/xen/pci.c index cef4baf..b84bf0b 100644 --- a/drivers/xen/pci.c +++ b/drivers/xen/pci.c @@ -18,6 +18,7 @@ */ #include +#include #include #include #include @@ -26,26 +27,85 @@ #include #include "../pci/pci.h" +static bool __read_mostly pci_seg_supported = true; + static int xen_add_device(struct device *dev) { int r; struct pci_dev *pci_dev = to_pci_dev(dev); +#ifdef CONFIG_PCI_IOV + struct pci_dev *physfn = pci_dev->physfn; +#endif + + if (pci_seg_supported) { + struct physdev_pci_device_add add = { + .seg = pci_domain_nr(pci_dev->bus), + .bus = pci_dev->bus->number, + .devfn = pci_dev->devfn + }; +#ifdef CONFIG_ACPI + acpi_handle handle; +#endif + +#ifdef CONFIG_PCI_IOV + if (pci_dev->is_virtfn) { + add.flags = XEN_PCI_DEV_VIRTFN; + add.physfn.bus = physfn->bus->number; + add.physfn.devfn = physfn->devfn; + } else +#endif + if (pci_ari_enabled(pci_dev->bus) && PCI_SLOT(pci_dev->devfn)) + add.flags = XEN_PCI_DEV_EXTFN; + +#ifdef CONFIG_ACPI + handle = DEVICE_ACPI_HANDLE(&pci_dev->dev); + if (!handle) + handle = DEVICE_ACPI_HANDLE(pci_dev->bus->bridge); +#ifdef CONFIG_PCI_IOV + if (!handle && pci_dev->is_virtfn) + handle = DEVICE_ACPI_HANDLE(physfn->bus->bridge); +#endif + if (handle) { + acpi_status status; + + do { + unsigned long long pxm; + + status = acpi_evaluate_integer(handle, "_PXM", + NULL, &pxm); + if (ACPI_SUCCESS(status)) { + add.optarr[0] = pxm; + add.flags |= XEN_PCI_DEV_PXM; + break; + } + status = acpi_get_parent(handle, &handle); + } while (ACPI_SUCCESS(status)); + } +#endif /* CONFIG_ACPI */ + + r = HYPERVISOR_physdev_op(PHYSDEVOP_pci_device_add, &add); + if (r != -ENOSYS) + return r; + pci_seg_supported = false; + } + if (pci_domain_nr(pci_dev->bus)) + r = -ENOSYS; #ifdef CONFIG_PCI_IOV - if (pci_dev->is_virtfn) { + else if (pci_dev->is_virtfn) { struct physdev_manage_pci_ext manage_pci_ext = { .bus = pci_dev->bus->number, .devfn = pci_dev->devfn, .is_virtfn = 1, - .physfn.bus = pci_dev->physfn->bus->number, - .physfn.devfn = pci_dev->physfn->devfn, + .physfn.bus = physfn->bus->number, + .physfn.devfn = physfn->devfn, }; r = HYPERVISOR_physdev_op(PHYSDEVOP_manage_pci_add_ext, &manage_pci_ext); - } else + } #endif - if (pci_ari_enabled(pci_dev->bus) && PCI_SLOT(pci_dev->devfn)) { + else if (pci_ari_enabled(pci_dev->bus) && PCI_SLOT(pci_dev->devfn)) { struct physdev_manage_pci_ext manage_pci_ext = { .bus = pci_dev->bus->number, .devfn = pci_dev->devfn, @@ -56,7 +116,7 @@ static int xen_add_device(struct device *dev) &manage_pci_ext); } else { struct physdev_manage_pci manage_pci = { - .bus = pci_dev->bus->number, + .bus = pci_dev->bus->number, .devfn = pci_dev->devfn, }; @@ -71,13 +131,27 @@ static int xen_remove_device(struct device *dev) { int r; struct pci_dev *pci_dev = to_pci_dev(dev); - struct physdev_manage_pci manage_pci; - manage_pci.bus = pci_dev->bus->number; - manage_pci.devfn = pci_dev->devfn; + if (pci_seg_supported) { + struct physdev_pci_device device = { + .seg = pci_domain_nr(pci_dev->bus), + .bus = pci_dev->bus->number, + .devfn = pci_dev->devfn + }; - r = HYPERVISOR_physdev_op(PHYSDEVOP_manage_pci_remove, - &manage_pci); + r = HYPERVISOR_physdev_op(PHYSDEVOP_pci_device_remove, + &device); + } else if (pci_domain_nr(pci_dev->bus)) + r = -ENOSYS; + else { + struct physdev_manage_pci manage_pci = { + .bus = pci_dev->bus->number, + .devfn = pci_dev->devfn + }; + + r = HYPERVISOR_physdev_op(PHYSDEVOP_manage_pci_remove, + &manage_pci); + } return r; } @@ -96,13 +170,16 @@ static int xen_pci_notifier(struct notifier_block *nb, r = xen_remove_device(dev); break; default: - break; + return NOTIFY_DONE; } - - return r; + if (r) + dev_err(dev, "Failed to %s - passthrough or MSI/MSI-X might fail!\n", + action == BUS_NOTIFY_ADD_DEVICE ? "add" : + (action == BUS_NOTIFY_DEL_DEVICE ? "delete" : "?")); + return NOTIFY_OK; } -struct notifier_block device_nb = { +static struct notifier_block device_nb = { .notifier_call = xen_pci_notifier, }; diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c index fd60dff..89588e7 100644 --- a/drivers/xen/swiotlb-xen.c +++ b/drivers/xen/swiotlb-xen.c @@ -35,9 +35,11 @@ #include #include +#include #include #include #include +#include /* * Used to do a quick range check in swiotlb_tbl_unmap_single and * swiotlb_tbl_sync_single_*, to see if the memory was in fact allocated by this @@ -146,8 +148,10 @@ xen_swiotlb_fixup(void *buf, size_t size, unsigned long nslabs) void __init xen_swiotlb_init(int verbose) { unsigned long bytes; - int rc; + int rc = -ENOMEM; unsigned long nr_tbl; + char *m = NULL; + unsigned int repeat = 3; nr_tbl = swioltb_nr_tbl(); if (nr_tbl) @@ -156,16 +160,17 @@ void __init xen_swiotlb_init(int verbose) xen_io_tlb_nslabs = (64 * 1024 * 1024 >> IO_TLB_SHIFT); xen_io_tlb_nslabs = ALIGN(xen_io_tlb_nslabs, IO_TLB_SEGSIZE); } - +retry: bytes = xen_io_tlb_nslabs << IO_TLB_SHIFT; /* * Get IO TLB memory from any location. */ xen_io_tlb_start = alloc_bootmem_pages(PAGE_ALIGN(bytes)); - if (!xen_io_tlb_start) - panic("Cannot allocate SWIOTLB buffer"); - + if (!xen_io_tlb_start) { + m = "Cannot allocate Xen-SWIOTLB buffer!\n"; + goto error; + } xen_io_tlb_end = xen_io_tlb_start + bytes; /* * And replace that memory with pages under 4GB. @@ -173,17 +178,28 @@ void __init xen_swiotlb_init(int verbose) rc = xen_swiotlb_fixup(xen_io_tlb_start, bytes, xen_io_tlb_nslabs); - if (rc) + if (rc) { + free_bootmem(__pa(xen_io_tlb_start), PAGE_ALIGN(bytes)); + m = "Failed to get contiguous memory for DMA from Xen!\n"\ + "You either: don't have the permissions, do not have"\ + " enough free memory under 4GB, or the hypervisor memory"\ + "is too fragmented!"; goto error; - + } start_dma_addr = xen_virt_to_bus(xen_io_tlb_start); swiotlb_init_with_tbl(xen_io_tlb_start, xen_io_tlb_nslabs, verbose); return; error: - panic("DMA(%d): Failed to exchange pages allocated for DMA with Xen! "\ - "We either don't have the permission or you do not have enough"\ - "free memory under 4GB!\n", rc); + if (repeat--) { + xen_io_tlb_nslabs = max(1024UL, /* Min is 2MB */ + (xen_io_tlb_nslabs >> 1)); + printk(KERN_INFO "Xen-SWIOTLB: Lowering to %luMB\n", + (xen_io_tlb_nslabs << IO_TLB_SHIFT) >> 20); + goto retry; + } + xen_raw_printk("%s (rc:%d)", m, rc); + panic("%s (rc:%d)", m, rc); } void * @@ -194,6 +210,8 @@ xen_swiotlb_alloc_coherent(struct device *hwdev, size_t size, int order = get_order(size); u64 dma_mask = DMA_BIT_MASK(32); unsigned long vstart; + phys_addr_t phys; + dma_addr_t dev_addr; /* * Ignore region specifiers - the kernel's ideas of @@ -209,18 +227,26 @@ xen_swiotlb_alloc_coherent(struct device *hwdev, size_t size, vstart = __get_free_pages(flags, order); ret = (void *)vstart; + if (!ret) + return ret; + if (hwdev && hwdev->coherent_dma_mask) dma_mask = dma_alloc_coherent_mask(hwdev, flags); - if (ret) { + phys = virt_to_phys(ret); + dev_addr = xen_phys_to_bus(phys); + if (((dev_addr + size - 1 <= dma_mask)) && + !range_straddles_page_boundary(phys, size)) + *dma_handle = dev_addr; + else { if (xen_create_contiguous_region(vstart, order, fls64(dma_mask)) != 0) { free_pages(vstart, order); return NULL; } - memset(ret, 0, size); *dma_handle = virt_to_machine(ret).maddr; } + memset(ret, 0, size); return ret; } EXPORT_SYMBOL_GPL(xen_swiotlb_alloc_coherent); @@ -230,11 +256,21 @@ xen_swiotlb_free_coherent(struct device *hwdev, size_t size, void *vaddr, dma_addr_t dev_addr) { int order = get_order(size); + phys_addr_t phys; + u64 dma_mask = DMA_BIT_MASK(32); if (dma_release_from_coherent(hwdev, order, vaddr)) return; - xen_destroy_contiguous_region((unsigned long)vaddr, order); + if (hwdev && hwdev->coherent_dma_mask) + dma_mask = hwdev->coherent_dma_mask; + + phys = virt_to_phys(vaddr); + + if (((dev_addr + size - 1 > dma_mask)) || + range_straddles_page_boundary(phys, size)) + xen_destroy_contiguous_region((unsigned long)vaddr, order); + free_pages((unsigned long)vaddr, order); } EXPORT_SYMBOL_GPL(xen_swiotlb_free_coherent); diff --git a/drivers/xen/tmem.c b/drivers/xen/tmem.c index 816a449..d369965 100644 --- a/drivers/xen/tmem.c +++ b/drivers/xen/tmem.c @@ -1,7 +1,7 @@ /* * Xen implementation for transcendent memory (tmem) * - * Copyright (C) 2009-2010 Oracle Corp. All rights reserved. + * Copyright (C) 2009-2011 Oracle Corp. All rights reserved. * Author: Dan Magenheimer */ @@ -9,8 +9,14 @@ #include #include #include +#include #include +/* temporary ifdef until include/linux/frontswap.h is upstream */ +#ifdef CONFIG_FRONTSWAP +#include +#endif + #include #include #include @@ -122,14 +128,8 @@ static int xen_tmem_flush_object(u32 pool_id, struct tmem_oid oid) return xen_tmem_op(TMEM_FLUSH_OBJECT, pool_id, oid, 0, 0, 0, 0, 0); } -static int xen_tmem_destroy_pool(u32 pool_id) -{ - struct tmem_oid oid = { { 0 } }; - - return xen_tmem_op(TMEM_DESTROY_POOL, pool_id, oid, 0, 0, 0, 0, 0); -} - -int tmem_enabled; +int tmem_enabled __read_mostly; +EXPORT_SYMBOL(tmem_enabled); static int __init enable_tmem(char *s) { @@ -139,6 +139,14 @@ static int __init enable_tmem(char *s) __setup("tmem", enable_tmem); +#ifdef CONFIG_CLEANCACHE +static int xen_tmem_destroy_pool(u32 pool_id) +{ + struct tmem_oid oid = { { 0 } }; + + return xen_tmem_op(TMEM_DESTROY_POOL, pool_id, oid, 0, 0, 0, 0, 0); +} + /* cleancache ops */ static void tmem_cleancache_put_page(int pool, struct cleancache_filekey key, @@ -240,18 +248,156 @@ static struct cleancache_ops tmem_cleancache_ops = { .init_shared_fs = tmem_cleancache_init_shared_fs, .init_fs = tmem_cleancache_init_fs }; +#endif -static int __init xen_tmem_init(void) +#ifdef CONFIG_FRONTSWAP +/* frontswap tmem operations */ + +/* a single tmem poolid is used for all frontswap "types" (swapfiles) */ +static int tmem_frontswap_poolid; + +/* + * Swizzling increases objects per swaptype, increasing tmem concurrency + * for heavy swaploads. Later, larger nr_cpus -> larger SWIZ_BITS + */ +#define SWIZ_BITS 4 +#define SWIZ_MASK ((1 << SWIZ_BITS) - 1) +#define _oswiz(_type, _ind) ((_type << SWIZ_BITS) | (_ind & SWIZ_MASK)) +#define iswiz(_ind) (_ind >> SWIZ_BITS) + +static inline struct tmem_oid oswiz(unsigned type, u32 ind) { - struct cleancache_ops old_ops; + struct tmem_oid oid = { .oid = { 0 } }; + oid.oid[0] = _oswiz(type, ind); + return oid; +} +/* returns 0 if the page was successfully put into frontswap, -1 if not */ +static int tmem_frontswap_put_page(unsigned type, pgoff_t offset, + struct page *page) +{ + u64 ind64 = (u64)offset; + u32 ind = (u32)offset; + unsigned long pfn = page_to_pfn(page); + int pool = tmem_frontswap_poolid; + int ret; + + if (pool < 0) + return -1; + if (ind64 != ind) + return -1; + mb(); /* ensure page is quiescent; tmem may address it with an alias */ + ret = xen_tmem_put_page(pool, oswiz(type, ind), iswiz(ind), pfn); + /* translate Xen tmem return values to linux semantics */ + if (ret == 1) + return 0; + else + return -1; +} + +/* + * returns 0 if the page was successfully gotten from frontswap, -1 if + * was not present (should never happen!) + */ +static int tmem_frontswap_get_page(unsigned type, pgoff_t offset, + struct page *page) +{ + u64 ind64 = (u64)offset; + u32 ind = (u32)offset; + unsigned long pfn = page_to_pfn(page); + int pool = tmem_frontswap_poolid; + int ret; + + if (pool < 0) + return -1; + if (ind64 != ind) + return -1; + ret = xen_tmem_get_page(pool, oswiz(type, ind), iswiz(ind), pfn); + /* translate Xen tmem return values to linux semantics */ + if (ret == 1) + return 0; + else + return -1; +} + +/* flush a single page from frontswap */ +static void tmem_frontswap_flush_page(unsigned type, pgoff_t offset) +{ + u64 ind64 = (u64)offset; + u32 ind = (u32)offset; + int pool = tmem_frontswap_poolid; + + if (pool < 0) + return; + if (ind64 != ind) + return; + (void) xen_tmem_flush_page(pool, oswiz(type, ind), iswiz(ind)); +} + +/* flush all pages from the passed swaptype */ +static void tmem_frontswap_flush_area(unsigned type) +{ + int pool = tmem_frontswap_poolid; + int ind; + + if (pool < 0) + return; + for (ind = SWIZ_MASK; ind >= 0; ind--) + (void)xen_tmem_flush_object(pool, oswiz(type, ind)); +} + +static void tmem_frontswap_init(unsigned ignored) +{ + struct tmem_pool_uuid private = TMEM_POOL_PRIVATE_UUID; + + /* a single tmem poolid is used for all frontswap "types" (swapfiles) */ + if (tmem_frontswap_poolid < 0) + tmem_frontswap_poolid = + xen_tmem_new_pool(private, TMEM_POOL_PERSIST, PAGE_SIZE); +} + +static int __initdata use_frontswap = 1; + +static int __init no_frontswap(char *s) +{ + use_frontswap = 0; + return 1; +} + +__setup("nofrontswap", no_frontswap); + +static struct frontswap_ops tmem_frontswap_ops = { + .put_page = tmem_frontswap_put_page, + .get_page = tmem_frontswap_get_page, + .flush_page = tmem_frontswap_flush_page, + .flush_area = tmem_frontswap_flush_area, + .init = tmem_frontswap_init +}; +#endif + +static int __init xen_tmem_init(void) +{ if (!xen_domain()) return 0; +#ifdef CONFIG_FRONTSWAP + if (tmem_enabled && use_frontswap) { + char *s = ""; + struct frontswap_ops old_ops = + frontswap_register_ops(&tmem_frontswap_ops); + + tmem_frontswap_poolid = -1; + if (old_ops.init != NULL) + s = " (WARNING: frontswap_ops overridden)"; + printk(KERN_INFO "frontswap enabled, RAM provided by " + "Xen Transcendent Memory\n"); + } +#endif #ifdef CONFIG_CLEANCACHE BUG_ON(sizeof(struct cleancache_filekey) != sizeof(struct tmem_oid)); if (tmem_enabled && use_cleancache) { char *s = ""; - old_ops = cleancache_register_ops(&tmem_cleancache_ops); + struct cleancache_ops old_ops = + cleancache_register_ops(&tmem_cleancache_ops); if (old_ops.init_fs != NULL) s = " (WARNING: cleancache_ops overridden)"; printk(KERN_INFO "cleancache enabled, RAM provided by " diff --git a/drivers/xen/xen-balloon.c b/drivers/xen/xen-balloon.c index a4ff225..9cc2259 100644 --- a/drivers/xen/xen-balloon.c +++ b/drivers/xen/xen-balloon.c @@ -50,11 +50,6 @@ static struct sys_device balloon_sysdev; static int register_balloon(struct sys_device *sysdev); -static struct xenbus_watch target_watch = -{ - .node = "memory/target" -}; - /* React to a change in the target key */ static void watch_target(struct xenbus_watch *watch, const char **vec, unsigned int len) @@ -73,6 +68,11 @@ static void watch_target(struct xenbus_watch *watch, */ balloon_set_new_target(new_target >> (PAGE_SHIFT - 10)); } +static struct xenbus_watch target_watch = { + .node = "memory/target", + .callback = watch_target, +}; + static int balloon_init_watcher(struct notifier_block *notifier, unsigned long event, @@ -87,7 +87,9 @@ static int balloon_init_watcher(struct notifier_block *notifier, return NOTIFY_DONE; } -static struct notifier_block xenstore_notifier; +static struct notifier_block xenstore_notifier = { + .notifier_call = balloon_init_watcher, +}; static int __init balloon_init(void) { @@ -98,8 +100,7 @@ static int __init balloon_init(void) register_balloon(&balloon_sysdev); - target_watch.callback = watch_target; - xenstore_notifier.notifier_call = balloon_init_watcher; + register_xen_selfballooning(&balloon_sysdev); register_xenstore_notifier(&xenstore_notifier); diff --git a/drivers/xen/xenbus/xenbus_client.c b/drivers/xen/xenbus/xenbus_client.c index cdacf92..1906125 100644 --- a/drivers/xen/xenbus/xenbus_client.c +++ b/drivers/xen/xenbus/xenbus_client.c @@ -33,7 +33,9 @@ #include #include #include +#include #include +#include #include #include #include @@ -435,25 +437,26 @@ EXPORT_SYMBOL_GPL(xenbus_free_evtchn); int xenbus_map_ring_valloc(struct xenbus_device *dev, int gnt_ref, void **vaddr) { struct gnttab_map_grant_ref op = { - .flags = GNTMAP_host_map, + .flags = GNTMAP_host_map | GNTMAP_contains_pte, .ref = gnt_ref, .dom = dev->otherend_id, }; struct vm_struct *area; + pte_t *pte; *vaddr = NULL; - area = xen_alloc_vm_area(PAGE_SIZE); + area = alloc_vm_area(PAGE_SIZE, &pte); if (!area) return -ENOMEM; - op.host_addr = (unsigned long)area->addr; + op.host_addr = arbitrary_virt_to_machine(pte).maddr; if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1)) BUG(); if (op.status != GNTST_okay) { - xen_free_vm_area(area); + free_vm_area(area); xenbus_dev_fatal(dev, op.status, "mapping in shared page %d from domain %d", gnt_ref, dev->otherend_id); @@ -526,6 +529,7 @@ int xenbus_unmap_ring_vfree(struct xenbus_device *dev, void *vaddr) struct gnttab_unmap_grant_ref op = { .host_addr = (unsigned long)vaddr, }; + unsigned int level; /* It'd be nice if linux/vmalloc.h provided a find_vm_area(void *addr) * method so that we don't have to muck with vmalloc internals here. @@ -547,12 +551,14 @@ int xenbus_unmap_ring_vfree(struct xenbus_device *dev, void *vaddr) } op.handle = (grant_handle_t)area->phys_addr; + op.host_addr = arbitrary_virt_to_machine( + lookup_address((unsigned long)vaddr, &level)).maddr; if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1)) BUG(); if (op.status == GNTST_okay) - xen_free_vm_area(area); + free_vm_area(area); else xenbus_dev_error(dev, op.status, "unmapping page at handle %d error %d", diff --git a/drivers/xen/xenbus/xenbus_comms.c b/drivers/xen/xenbus/xenbus_comms.c index 090c61e..2eff7a6 100644 --- a/drivers/xen/xenbus/xenbus_comms.c +++ b/drivers/xen/xenbus/xenbus_comms.c @@ -212,7 +212,9 @@ int xb_init_comms(void) printk(KERN_WARNING "XENBUS response ring is not quiescent " "(%08x:%08x): fixing up\n", intf->rsp_cons, intf->rsp_prod); - intf->rsp_cons = intf->rsp_prod; + /* breaks kdump */ + if (!reset_devices) + intf->rsp_cons = intf->rsp_prod; } if (xenbus_irq) { diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c index 7397695..1b178c6 100644 --- a/drivers/xen/xenbus/xenbus_probe.c +++ b/drivers/xen/xenbus/xenbus_probe.c @@ -46,6 +46,7 @@ #include #include #include +#include #include #include @@ -309,8 +310,7 @@ void xenbus_unregister_driver(struct xenbus_driver *drv) } EXPORT_SYMBOL_GPL(xenbus_unregister_driver); -struct xb_find_info -{ +struct xb_find_info { struct xenbus_device *dev; const char *nodename; }; @@ -378,26 +378,32 @@ static void xenbus_dev_release(struct device *dev) kfree(to_xenbus_device(dev)); } -static ssize_t xendev_show_nodename(struct device *dev, - struct device_attribute *attr, char *buf) +static ssize_t nodename_show(struct device *dev, + struct device_attribute *attr, char *buf) { return sprintf(buf, "%s\n", to_xenbus_device(dev)->nodename); } -static DEVICE_ATTR(nodename, S_IRUSR | S_IRGRP | S_IROTH, xendev_show_nodename, NULL); -static ssize_t xendev_show_devtype(struct device *dev, - struct device_attribute *attr, char *buf) +static ssize_t devtype_show(struct device *dev, + struct device_attribute *attr, char *buf) { return sprintf(buf, "%s\n", to_xenbus_device(dev)->devicetype); } -static DEVICE_ATTR(devtype, S_IRUSR | S_IRGRP | S_IROTH, xendev_show_devtype, NULL); -static ssize_t xendev_show_modalias(struct device *dev, - struct device_attribute *attr, char *buf) +static ssize_t modalias_show(struct device *dev, + struct device_attribute *attr, char *buf) { - return sprintf(buf, "xen:%s\n", to_xenbus_device(dev)->devicetype); + return sprintf(buf, "%s:%s\n", dev->bus->name, + to_xenbus_device(dev)->devicetype); } -static DEVICE_ATTR(modalias, S_IRUSR | S_IRGRP | S_IROTH, xendev_show_modalias, NULL); + +struct device_attribute xenbus_dev_attrs[] = { + __ATTR_RO(nodename), + __ATTR_RO(devtype), + __ATTR_RO(modalias), + __ATTR_NULL +}; +EXPORT_SYMBOL_GPL(xenbus_dev_attrs); int xenbus_probe_node(struct xen_bus_type *bus, const char *type, @@ -449,25 +455,7 @@ int xenbus_probe_node(struct xen_bus_type *bus, if (err) goto fail; - err = device_create_file(&xendev->dev, &dev_attr_nodename); - if (err) - goto fail_unregister; - - err = device_create_file(&xendev->dev, &dev_attr_devtype); - if (err) - goto fail_remove_nodename; - - err = device_create_file(&xendev->dev, &dev_attr_modalias); - if (err) - goto fail_remove_devtype; - return 0; -fail_remove_devtype: - device_remove_file(&xendev->dev, &dev_attr_devtype); -fail_remove_nodename: - device_remove_file(&xendev->dev, &dev_attr_nodename); -fail_unregister: - device_unregister(&xendev->dev); fail: kfree(xendev); return err; @@ -651,7 +639,7 @@ int xenbus_dev_cancel(struct device *dev) EXPORT_SYMBOL_GPL(xenbus_dev_cancel); /* A flag to determine if xenstored is 'ready' (i.e. has started) */ -int xenstored_ready = 0; +int xenstored_ready; int register_xenstore_notifier(struct notifier_block *nb) @@ -696,64 +684,74 @@ static int __init xenbus_probe_initcall(void) device_initcall(xenbus_probe_initcall); -static int __init xenbus_init(void) +/* Set up event channel for xenstored which is run as a local process + * (this is normally used only in dom0) + */ +static int __init xenstored_local_init(void) { int err = 0; unsigned long page = 0; + struct evtchn_alloc_unbound alloc_unbound; - DPRINTK(""); + /* Allocate Xenstore page */ + page = get_zeroed_page(GFP_KERNEL); + if (!page) + goto out_err; - err = -ENODEV; - if (!xen_domain()) - return err; + xen_store_mfn = xen_start_info->store_mfn = + pfn_to_mfn(virt_to_phys((void *)page) >> + PAGE_SHIFT); - /* - * Domain0 doesn't have a store_evtchn or store_mfn yet. - */ - if (xen_initial_domain()) { - struct evtchn_alloc_unbound alloc_unbound; + /* Next allocate a local port which xenstored can bind to */ + alloc_unbound.dom = DOMID_SELF; + alloc_unbound.remote_dom = DOMID_SELF; - /* Allocate Xenstore page */ - page = get_zeroed_page(GFP_KERNEL); - if (!page) - goto out_error; + err = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound, + &alloc_unbound); + if (err == -ENOSYS) + goto out_err; - xen_store_mfn = xen_start_info->store_mfn = - pfn_to_mfn(virt_to_phys((void *)page) >> - PAGE_SHIFT); + BUG_ON(err); + xen_store_evtchn = xen_start_info->store_evtchn = + alloc_unbound.port; - /* Next allocate a local port which xenstored can bind to */ - alloc_unbound.dom = DOMID_SELF; - alloc_unbound.remote_dom = 0; + return 0; - err = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound, - &alloc_unbound); - if (err == -ENOSYS) - goto out_error; + out_err: + if (page != 0) + free_page(page); + return err; +} - BUG_ON(err); - xen_store_evtchn = xen_start_info->store_evtchn = - alloc_unbound.port; +static int __init xenbus_init(void) +{ + int err = 0; - xen_store_interface = mfn_to_virt(xen_store_mfn); + if (!xen_domain()) + return -ENODEV; + + if (xen_hvm_domain()) { + uint64_t v = 0; + err = hvm_get_parameter(HVM_PARAM_STORE_EVTCHN, &v); + if (err) + goto out_error; + xen_store_evtchn = (int)v; + err = hvm_get_parameter(HVM_PARAM_STORE_PFN, &v); + if (err) + goto out_error; + xen_store_mfn = (unsigned long)v; + xen_store_interface = ioremap(xen_store_mfn << PAGE_SHIFT, PAGE_SIZE); } else { - if (xen_hvm_domain()) { - uint64_t v = 0; - err = hvm_get_parameter(HVM_PARAM_STORE_EVTCHN, &v); - if (err) - goto out_error; - xen_store_evtchn = (int)v; - err = hvm_get_parameter(HVM_PARAM_STORE_PFN, &v); + xen_store_evtchn = xen_start_info->store_evtchn; + xen_store_mfn = xen_start_info->store_mfn; + if (xen_store_evtchn) + xenstored_ready = 1; + else { + err = xenstored_local_init(); if (err) goto out_error; - xen_store_mfn = (unsigned long)v; - xen_store_interface = ioremap(xen_store_mfn << PAGE_SHIFT, PAGE_SIZE); - } else { - xen_store_evtchn = xen_start_info->store_evtchn; - xen_store_mfn = xen_start_info->store_mfn; - xen_store_interface = mfn_to_virt(xen_store_mfn); - xenstored_ready = 1; } + xen_store_interface = mfn_to_virt(xen_store_mfn); } /* Initialize the interface to xenstore. */ @@ -772,12 +770,7 @@ static int __init xenbus_init(void) proc_mkdir("xen", NULL); #endif - return 0; - - out_error: - if (page != 0) - free_page(page); - +out_error: return err; } diff --git a/drivers/xen/xenbus/xenbus_probe.h b/drivers/xen/xenbus/xenbus_probe.h index 888b990..9b1de4e 100644 --- a/drivers/xen/xenbus/xenbus_probe.h +++ b/drivers/xen/xenbus/xenbus_probe.h @@ -36,8 +36,7 @@ #define XEN_BUS_ID_SIZE 20 -struct xen_bus_type -{ +struct xen_bus_type { char *root; unsigned int levels; int (*get_bus_id)(char bus_id[XEN_BUS_ID_SIZE], const char *nodename); @@ -48,6 +47,8 @@ struct xen_bus_type struct bus_type bus; }; +extern struct device_attribute xenbus_dev_attrs[]; + extern int xenbus_match(struct device *_dev, struct device_driver *_drv); extern int xenbus_dev_probe(struct device *_dev); extern int xenbus_dev_remove(struct device *_dev); diff --git a/drivers/xen/xenbus/xenbus_probe_backend.c b/drivers/xen/xenbus/xenbus_probe_backend.c index 6cf467b..c3c7cd1 100644 --- a/drivers/xen/xenbus/xenbus_probe_backend.c +++ b/drivers/xen/xenbus/xenbus_probe_backend.c @@ -42,6 +42,7 @@ #include #include #include +#include #include #include @@ -104,8 +105,9 @@ static int xenbus_uevent_backend(struct device *dev, xdev = to_xenbus_device(dev); bus = container_of(xdev->dev.bus, struct xen_bus_type, bus); - if (xdev == NULL) - return -ENODEV; + + if (add_uevent_var(env, "MODALIAS=xen-backend:%s", xdev->devicetype)) + return -ENOMEM; /* stuff we want to pass to /sbin/hotplug */ if (add_uevent_var(env, "XENBUS_TYPE=%s", xdev->devicetype)) @@ -183,10 +185,6 @@ static void frontend_changed(struct xenbus_watch *watch, xenbus_otherend_changed(watch, vec, len, 0); } -static struct device_attribute xenbus_backend_dev_attrs[] = { - __ATTR_NULL -}; - static struct xen_bus_type xenbus_backend = { .root = "backend", .levels = 3, /* backend/type// */ @@ -200,7 +198,7 @@ static struct xen_bus_type xenbus_backend = { .probe = xenbus_dev_probe, .remove = xenbus_dev_remove, .shutdown = xenbus_dev_shutdown, - .dev_attrs = xenbus_backend_dev_attrs, + .dev_attrs = xenbus_dev_attrs, }, }; diff --git a/drivers/xen/xenbus/xenbus_probe_frontend.c b/drivers/xen/xenbus/xenbus_probe_frontend.c index 560f217..2ce95c0 100644 --- a/drivers/xen/xenbus/xenbus_probe_frontend.c +++ b/drivers/xen/xenbus/xenbus_probe_frontend.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include @@ -81,10 +82,6 @@ static void backend_changed(struct xenbus_watch *watch, xenbus_otherend_changed(watch, vec, len, 1); } -static struct device_attribute xenbus_frontend_dev_attrs[] = { - __ATTR_NULL -}; - static const struct dev_pm_ops xenbus_pm_ops = { .suspend = xenbus_dev_suspend, .resume = xenbus_dev_resume, @@ -106,7 +103,7 @@ static struct xen_bus_type xenbus_frontend = { .probe = xenbus_dev_probe, .remove = xenbus_dev_remove, .shutdown = xenbus_dev_shutdown, - .dev_attrs = xenbus_frontend_dev_attrs, + .dev_attrs = xenbus_dev_attrs, .pm = &xenbus_pm_ops, }, @@ -289,10 +286,131 @@ int __xenbus_register_frontend(struct xenbus_driver *drv, } EXPORT_SYMBOL_GPL(__xenbus_register_frontend); +static DECLARE_WAIT_QUEUE_HEAD(backend_state_wq); +static int backend_state; + +static void xenbus_reset_backend_state_changed(struct xenbus_watch *w, + const char **v, unsigned int l) +{ + xenbus_scanf(XBT_NIL, v[XS_WATCH_PATH], "", "%i", &backend_state); + printk(KERN_DEBUG "XENBUS: backend %s %s\n", + v[XS_WATCH_PATH], xenbus_strstate(backend_state)); + wake_up(&backend_state_wq); +} + +static void xenbus_reset_wait_for_backend(char *be, int expected) +{ + long timeout; + timeout = wait_event_interruptible_timeout(backend_state_wq, + backend_state == expected, 5 * HZ); + if (timeout <= 0) + printk(KERN_INFO "XENBUS: backend %s timed out.\n", be); +} + +/* + * Reset frontend if it is in Connected or Closed state. + * Wait for backend to catch up. + * State Connected happens during kdump, Closed after kexec. + */ +static void xenbus_reset_frontend(char *fe, char *be, int be_state) +{ + struct xenbus_watch be_watch; + + printk(KERN_DEBUG "XENBUS: backend %s %s\n", + be, xenbus_strstate(be_state)); + + memset(&be_watch, 0, sizeof(be_watch)); + be_watch.node = kasprintf(GFP_NOIO | __GFP_HIGH, "%s/state", be); + if (!be_watch.node) + return; + + be_watch.callback = xenbus_reset_backend_state_changed; + backend_state = XenbusStateUnknown; + + printk(KERN_INFO "XENBUS: triggering reconnect on %s\n", be); + register_xenbus_watch(&be_watch); + + /* fall through to forward backend to state XenbusStateInitialising */ + switch (be_state) { + case XenbusStateConnected: + xenbus_printf(XBT_NIL, fe, "state", "%d", XenbusStateClosing); + xenbus_reset_wait_for_backend(be, XenbusStateClosing); + + case XenbusStateClosing: + xenbus_printf(XBT_NIL, fe, "state", "%d", XenbusStateClosed); + xenbus_reset_wait_for_backend(be, XenbusStateClosed); + + case XenbusStateClosed: + xenbus_printf(XBT_NIL, fe, "state", "%d", XenbusStateInitialising); + xenbus_reset_wait_for_backend(be, XenbusStateInitWait); + } + + unregister_xenbus_watch(&be_watch); + printk(KERN_INFO "XENBUS: reconnect done on %s\n", be); + kfree(be_watch.node); +} + +static void xenbus_check_frontend(char *class, char *dev) +{ + int be_state, fe_state, err; + char *backend, *frontend; + + frontend = kasprintf(GFP_NOIO | __GFP_HIGH, "device/%s/%s", class, dev); + if (!frontend) + return; + + err = xenbus_scanf(XBT_NIL, frontend, "state", "%i", &fe_state); + if (err != 1) + goto out; + + switch (fe_state) { + case XenbusStateConnected: + case XenbusStateClosed: + printk(KERN_DEBUG "XENBUS: frontend %s %s\n", + frontend, xenbus_strstate(fe_state)); + backend = xenbus_read(XBT_NIL, frontend, "backend", NULL); + if (!backend || IS_ERR(backend)) + goto out; + err = xenbus_scanf(XBT_NIL, backend, "state", "%i", &be_state); + if (err == 1) + xenbus_reset_frontend(frontend, backend, be_state); + kfree(backend); + break; + default: + break; + } +out: + kfree(frontend); +} + +static void xenbus_reset_state(void) +{ + char **devclass, **dev; + int devclass_n, dev_n; + int i, j; + + devclass = xenbus_directory(XBT_NIL, "device", "", &devclass_n); + if (IS_ERR(devclass)) + return; + + for (i = 0; i < devclass_n; i++) { + dev = xenbus_directory(XBT_NIL, "device", devclass[i], &dev_n); + if (IS_ERR(dev)) + continue; + for (j = 0; j < dev_n; j++) + xenbus_check_frontend(devclass[i], dev[j]); + kfree(dev); + } + kfree(devclass); +} + static int frontend_probe_and_watch(struct notifier_block *notifier, unsigned long event, void *data) { + /* reset devices in Connected or Closed state */ + if (xen_hvm_domain()) + xenbus_reset_state(); /* Enumerate devices in xenstore and watch for changes. */ xenbus_probe_devices(&xenbus_frontend); register_xenbus_watch(&fe_watch); diff --git a/drivers/xen/xenbus/xenbus_xs.c b/drivers/xen/xenbus/xenbus_xs.c index daee5db..a580b17 100644 --- a/drivers/xen/xenbus/xenbus_xs.c +++ b/drivers/xen/xenbus/xenbus_xs.c @@ -45,6 +45,7 @@ #include #include #include +#include #include "xenbus_comms.h" struct xs_stored_msg { @@ -638,8 +639,7 @@ int register_xenbus_watch(struct xenbus_watch *watch) err = xs_watch(watch->node, token); - /* Ignore errors due to multiple registration. */ - if ((err != 0) && (err != -EEXIST)) { + if (err) { spin_lock(&watches_lock); list_del(&watch->list); spin_unlock(&watches_lock); diff --git a/drivers/zorro/.gitignore b/drivers/zorro/.gitignore new file mode 100644 index 0000000..34f980b --- /dev/null +++ b/drivers/zorro/.gitignore @@ -0,0 +1,2 @@ +devlist.h +gen-devlist diff --git a/drivers/zorro/proc.c b/drivers/zorro/proc.c index e0c8472..988880d 100644 --- a/drivers/zorro/proc.c +++ b/drivers/zorro/proc.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/zorro/zorro-driver.c b/drivers/zorro/zorro-driver.c index 7ee2b6e..229624f 100644 --- a/drivers/zorro/zorro-driver.c +++ b/drivers/zorro/zorro-driver.c @@ -37,6 +37,7 @@ zorro_match_device(const struct zorro_device_id *ids, } return NULL; } +EXPORT_SYMBOL(zorro_match_device); static int zorro_device_probe(struct device *dev) @@ -91,6 +92,7 @@ int zorro_register_driver(struct zorro_driver *drv) /* register with core */ return driver_register(&drv->driver); } +EXPORT_SYMBOL(zorro_register_driver); /** @@ -107,6 +109,7 @@ void zorro_unregister_driver(struct zorro_driver *drv) { driver_unregister(&drv->driver); } +EXPORT_SYMBOL(zorro_unregister_driver); /** @@ -168,6 +171,7 @@ struct bus_type zorro_bus_type = { .probe = zorro_device_probe, .remove = zorro_device_remove, }; +EXPORT_SYMBOL(zorro_bus_type); static int __init zorro_driver_init(void) @@ -177,7 +181,3 @@ static int __init zorro_driver_init(void) postcore_initcall(zorro_driver_init); -EXPORT_SYMBOL(zorro_match_device); -EXPORT_SYMBOL(zorro_register_driver); -EXPORT_SYMBOL(zorro_unregister_driver); -EXPORT_SYMBOL(zorro_bus_type); diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index 9b72dcf..c0ddfd2 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile @@ -6,5 +6,8 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \ transaction.o inode.o file.o tree-defrag.o \ extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \ extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \ - export.o tree-log.o acl.o free-space-cache.o zlib.o lzo.o \ - compression.o delayed-ref.o relocation.o delayed-inode.o scrub.o + export.o tree-log.o free-space-cache.o zlib.o lzo.o \ + compression.o delayed-ref.o relocation.o delayed-inode.o scrub.o \ + reada.o backref.o + +btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c index f66fc99..89b156d 100644 --- a/fs/btrfs/acl.c +++ b/fs/btrfs/acl.c @@ -28,9 +28,7 @@ #include "btrfs_inode.h" #include "xattr.h" -#ifdef CONFIG_BTRFS_FS_POSIX_ACL - -static struct posix_acl *btrfs_get_acl(struct inode *inode, int type) +struct posix_acl *btrfs_get_acl(struct inode *inode, int type) { int size; const char *name; @@ -61,22 +59,19 @@ static struct posix_acl *btrfs_get_acl(struct inode *inode, int type) if (!value) return ERR_PTR(-ENOMEM); size = __btrfs_getxattr(inode, name, value, size); - if (size > 0) { - acl = posix_acl_from_xattr(value, size); - if (IS_ERR(acl)) { - kfree(value); - return acl; - } - set_cached_acl(inode, type, acl); - } - kfree(value); + } + if (size > 0) { + acl = posix_acl_from_xattr(value, size); } else if (size == -ENOENT || size == -ENODATA || size == 0) { /* FIXME, who returns -ENOENT? I think nobody */ acl = NULL; - set_cached_acl(inode, type, acl); } else { acl = ERR_PTR(-EIO); } + kfree(value); + + if (!IS_ERR(acl)) + set_cached_acl(inode, type, acl); return acl; } @@ -111,7 +106,6 @@ static int btrfs_set_acl(struct btrfs_trans_handle *trans, int ret, size = 0; const char *name; char *value = NULL; - mode_t mode; if (acl) { ret = posix_acl_valid(acl); @@ -122,13 +116,11 @@ static int btrfs_set_acl(struct btrfs_trans_handle *trans, switch (type) { case ACL_TYPE_ACCESS: - mode = inode->i_mode; name = POSIX_ACL_XATTR_ACCESS; if (acl) { - ret = posix_acl_equiv_mode(acl, &mode); + ret = posix_acl_equiv_mode(acl, &inode->i_mode); if (ret < 0) return ret; - inode->i_mode = mode; } ret = 0; break; @@ -195,28 +187,6 @@ out: return ret; } -int btrfs_check_acl(struct inode *inode, int mask, unsigned int flags) -{ - int error = -EAGAIN; - - if (flags & IPERM_FLAG_RCU) { - if (!negative_cached_acl(inode, ACL_TYPE_ACCESS)) - error = -ECHILD; - - } else { - struct posix_acl *acl; - acl = btrfs_get_acl(inode, ACL_TYPE_ACCESS); - if (IS_ERR(acl)) - return PTR_ERR(acl); - if (acl) { - error = posix_acl_permission(inode, acl, mask); - posix_acl_release(acl); - } - } - - return error; -} - /* * btrfs_init_acl is already generally called under fs_mutex, so the locking * stuff has been fixed to work with that. If the locking stuff changes, we @@ -244,31 +214,20 @@ int btrfs_init_acl(struct btrfs_trans_handle *trans, } if (IS_POSIXACL(dir) && acl) { - struct posix_acl *clone; - mode_t mode; - if (S_ISDIR(inode->i_mode)) { ret = btrfs_set_acl(trans, inode, acl, ACL_TYPE_DEFAULT); if (ret) goto failed; } - clone = posix_acl_clone(acl, GFP_NOFS); - ret = -ENOMEM; - if (!clone) - goto failed; - - mode = inode->i_mode; - ret = posix_acl_create_masq(clone, &mode); - if (ret >= 0) { - inode->i_mode = mode; - if (ret > 0) { - /* we need an acl */ - ret = btrfs_set_acl(trans, inode, clone, - ACL_TYPE_ACCESS); - } + ret = posix_acl_create(&acl, GFP_NOFS, &inode->i_mode); + if (ret < 0) + return ret; + + if (ret > 0) { + /* we need an acl */ + ret = btrfs_set_acl(trans, inode, acl, ACL_TYPE_ACCESS); } - posix_acl_release(clone); } failed: posix_acl_release(acl); @@ -278,7 +237,7 @@ failed: int btrfs_acl_chmod(struct inode *inode) { - struct posix_acl *acl, *clone; + struct posix_acl *acl; int ret = 0; if (S_ISLNK(inode->i_mode)) @@ -291,17 +250,11 @@ int btrfs_acl_chmod(struct inode *inode) if (IS_ERR_OR_NULL(acl)) return PTR_ERR(acl); - clone = posix_acl_clone(acl, GFP_KERNEL); + ret = posix_acl_chmod(&acl, GFP_KERNEL, inode->i_mode); + if (ret) + return ret; + ret = btrfs_set_acl(NULL, inode, acl, ACL_TYPE_ACCESS); posix_acl_release(acl); - if (!clone) - return -ENOMEM; - - ret = posix_acl_chmod_masq(clone, inode->i_mode); - if (!ret) - ret = btrfs_set_acl(NULL, inode, clone, ACL_TYPE_ACCESS); - - posix_acl_release(clone); - return ret; } @@ -318,18 +271,3 @@ const struct xattr_handler btrfs_xattr_acl_access_handler = { .get = btrfs_xattr_acl_get, .set = btrfs_xattr_acl_set, }; - -#else /* CONFIG_BTRFS_FS_POSIX_ACL */ - -int btrfs_acl_chmod(struct inode *inode) -{ - return 0; -} - -int btrfs_init_acl(struct btrfs_trans_handle *trans, - struct inode *inode, struct inode *dir) -{ - return 0; -} - -#endif /* CONFIG_BTRFS_FS_POSIX_ACL */ diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c index 8006a28..03321e5 100644 --- a/fs/btrfs/async-thread.c +++ b/fs/btrfs/async-thread.c @@ -64,6 +64,8 @@ struct btrfs_worker_thread { int idle; }; +static int __btrfs_start_workers(struct btrfs_workers *workers); + /* * btrfs_start_workers uses kthread_run, which can block waiting for memory * for a very long time. It will actually throttle on page writeback, @@ -88,27 +90,10 @@ static void start_new_worker_func(struct btrfs_work *work) { struct worker_start *start; start = container_of(work, struct worker_start, work); - btrfs_start_workers(start->queue, 1); + __btrfs_start_workers(start->queue); kfree(start); } -static int start_new_worker(struct btrfs_workers *queue) -{ - struct worker_start *start; - int ret; - - start = kzalloc(sizeof(*start), GFP_NOFS); - if (!start) - return -ENOMEM; - - start->work.func = start_new_worker_func; - start->queue = queue; - ret = btrfs_queue_worker(queue->atomic_worker_start, &start->work); - if (ret) - kfree(start); - return ret; -} - /* * helper function to move a thread onto the idle list after it * has finished some requests. @@ -153,12 +138,20 @@ static void check_busy_worker(struct btrfs_worker_thread *worker) static void check_pending_worker_creates(struct btrfs_worker_thread *worker) { struct btrfs_workers *workers = worker->workers; + struct worker_start *start; unsigned long flags; rmb(); if (!workers->atomic_start_pending) return; + start = kzalloc(sizeof(*start), GFP_NOFS); + if (!start) + return; + + start->work.func = start_new_worker_func; + start->queue = workers; + spin_lock_irqsave(&workers->lock, flags); if (!workers->atomic_start_pending) goto out; @@ -170,10 +163,11 @@ static void check_pending_worker_creates(struct btrfs_worker_thread *worker) workers->num_workers_starting += 1; spin_unlock_irqrestore(&workers->lock, flags); - start_new_worker(workers); + btrfs_queue_worker(workers->atomic_worker_start, &start->work); return; out: + kfree(start); spin_unlock_irqrestore(&workers->lock, flags); } @@ -338,7 +332,7 @@ again: run_ordered_completions(worker->workers, work); check_pending_worker_creates(worker); - + cond_resched(); } spin_lock_irq(&worker->lock); @@ -469,56 +463,55 @@ void btrfs_init_workers(struct btrfs_workers *workers, char *name, int max, * starts new worker threads. This does not enforce the max worker * count in case you need to temporarily go past it. */ -static int __btrfs_start_workers(struct btrfs_workers *workers, - int num_workers) +static int __btrfs_start_workers(struct btrfs_workers *workers) { struct btrfs_worker_thread *worker; int ret = 0; - int i; - for (i = 0; i < num_workers; i++) { - worker = kzalloc(sizeof(*worker), GFP_NOFS); - if (!worker) { - ret = -ENOMEM; - goto fail; - } + worker = kzalloc(sizeof(*worker), GFP_NOFS); + if (!worker) { + ret = -ENOMEM; + goto fail; + } - INIT_LIST_HEAD(&worker->pending); - INIT_LIST_HEAD(&worker->prio_pending); - INIT_LIST_HEAD(&worker->worker_list); - spin_lock_init(&worker->lock); - - atomic_set(&worker->num_pending, 0); - atomic_set(&worker->refs, 1); - worker->workers = workers; - worker->task = kthread_run(worker_loop, worker, - "btrfs-%s-%d", workers->name, - workers->num_workers + i); - if (IS_ERR(worker->task)) { - ret = PTR_ERR(worker->task); - kfree(worker); - goto fail; - } - spin_lock_irq(&workers->lock); - list_add_tail(&worker->worker_list, &workers->idle_list); - worker->idle = 1; - workers->num_workers++; - workers->num_workers_starting--; - WARN_ON(workers->num_workers_starting < 0); - spin_unlock_irq(&workers->lock); + INIT_LIST_HEAD(&worker->pending); + INIT_LIST_HEAD(&worker->prio_pending); + INIT_LIST_HEAD(&worker->worker_list); + spin_lock_init(&worker->lock); + + atomic_set(&worker->num_pending, 0); + atomic_set(&worker->refs, 1); + worker->workers = workers; + worker->task = kthread_run(worker_loop, worker, + "btrfs-%s-%d", workers->name, + workers->num_workers + 1); + if (IS_ERR(worker->task)) { + ret = PTR_ERR(worker->task); + kfree(worker); + goto fail; } + spin_lock_irq(&workers->lock); + list_add_tail(&worker->worker_list, &workers->idle_list); + worker->idle = 1; + workers->num_workers++; + workers->num_workers_starting--; + WARN_ON(workers->num_workers_starting < 0); + spin_unlock_irq(&workers->lock); + return 0; fail: - btrfs_stop_workers(workers); + spin_lock_irq(&workers->lock); + workers->num_workers_starting--; + spin_unlock_irq(&workers->lock); return ret; } -int btrfs_start_workers(struct btrfs_workers *workers, int num_workers) +int btrfs_start_workers(struct btrfs_workers *workers) { spin_lock_irq(&workers->lock); - workers->num_workers_starting += num_workers; + workers->num_workers_starting++; spin_unlock_irq(&workers->lock); - return __btrfs_start_workers(workers, num_workers); + return __btrfs_start_workers(workers); } /* @@ -575,9 +568,10 @@ static struct btrfs_worker_thread *find_worker(struct btrfs_workers *workers) struct btrfs_worker_thread *worker; unsigned long flags; struct list_head *fallback; + int ret; -again: spin_lock_irqsave(&workers->lock, flags); +again: worker = next_worker(workers); if (!worker) { @@ -591,7 +585,10 @@ again: workers->num_workers_starting++; spin_unlock_irqrestore(&workers->lock, flags); /* we're below the limit, start another worker */ - __btrfs_start_workers(workers, 1); + ret = __btrfs_start_workers(workers); + spin_lock_irqsave(&workers->lock, flags); + if (ret) + goto fallback; goto again; } } @@ -672,7 +669,7 @@ void btrfs_set_work_high_prio(struct btrfs_work *work) /* * places a struct btrfs_work into the pending queue of one of the kthreads */ -int btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work) +void btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work) { struct btrfs_worker_thread *worker; unsigned long flags; @@ -680,7 +677,7 @@ int btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work) /* don't requeue something already on a list */ if (test_and_set_bit(WORK_QUEUED_BIT, &work->flags)) - goto out; + return; worker = find_worker(workers); if (workers->ordered) { @@ -719,7 +716,4 @@ int btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work) if (wake) wake_up_process(worker->task); spin_unlock_irqrestore(&worker->lock, flags); - -out: - return 0; } diff --git a/fs/btrfs/async-thread.h b/fs/btrfs/async-thread.h index 5077746..f34cc31 100644 --- a/fs/btrfs/async-thread.h +++ b/fs/btrfs/async-thread.h @@ -109,8 +109,8 @@ struct btrfs_workers { char *name; }; -int btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work); -int btrfs_start_workers(struct btrfs_workers *workers, int num_workers); +void btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work); +int btrfs_start_workers(struct btrfs_workers *workers); int btrfs_stop_workers(struct btrfs_workers *workers); void btrfs_init_workers(struct btrfs_workers *workers, char *name, int max, struct btrfs_workers *async_starter); diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index 52d7eca..634608d 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -34,6 +34,9 @@ struct btrfs_inode { */ struct btrfs_key location; + /* Lock for counters */ + spinlock_t lock; + /* the extent_tree has caches of all the extent mappings to disk */ struct extent_map_tree extent_tree; @@ -100,11 +103,6 @@ struct btrfs_inode { */ u64 delalloc_bytes; - /* total number of bytes that may be used for this inode for - * delalloc - */ - u64 reserved_bytes; - /* * the size of the file stored in the metadata on disk. data=ordered * means the in-memory i_size might be larger than the size on disk @@ -112,9 +110,6 @@ struct btrfs_inode { */ u64 disk_i_size; - /* flags field from the on disk inode */ - u32 flags; - /* * if this is a directory then index_cnt is the counter for the index * number for new files that are created @@ -129,13 +124,22 @@ struct btrfs_inode { u64 last_unlink_trans; /* + * Number of bytes outstanding that are going to need csums. This is + * used in ENOSPC accounting. + */ + u64 csum_bytes; + + /* flags field from the on disk inode */ + u32 flags; + + /* * Counters to keep track of the number of extent item's we may use due * to delalloc and such. outstanding_extents is the number of extent * items we think we'll end up using, and reserved_extents is the number * of extent items we've reserved metadata for. */ - atomic_t outstanding_extents; - atomic_t reserved_extents; + unsigned outstanding_extents; + unsigned reserved_extents; /* * ordered_data_close is set by truncate when a file that used @@ -143,14 +147,12 @@ struct btrfs_inode { * the btrfs file release call will add this inode to the * ordered operations list so that we make sure to flush out any * new data the application may have written before commit. - * - * yes, its silly to have a single bitflag, but we might grow more - * of these. */ unsigned ordered_data_close:1; unsigned orphan_meta_reserved:1; unsigned dummy_inode:1; unsigned in_defrag:1; + unsigned delalloc_meta_reserved:1; /* * always compress this one file @@ -173,7 +175,11 @@ static inline u64 btrfs_ino(struct inode *inode) { u64 ino = BTRFS_I(inode)->location.objectid; - if (ino <= BTRFS_FIRST_FREE_OBJECTID) + /* + * !ino: btree_inode + * type == BTRFS_ROOT_ITEM_KEY: subvol dir + */ + if (!ino || BTRFS_I(inode)->location.type == BTRFS_ROOT_ITEM_KEY) ino = inode->i_ino; return ino; } @@ -184,4 +190,13 @@ static inline void btrfs_i_size_write(struct inode *inode, u64 size) BTRFS_I(inode)->disk_i_size = size; } +static inline bool btrfs_is_free_space_inode(struct btrfs_root *root, + struct inode *inode) +{ + if (root == root->fs_info->tree_root || + BTRFS_I(inode)->location.objectid == BTRFS_FREE_INO_OBJECTID) + return true; + return false; +} + #endif diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index bfe42b0..14f1c5a 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -85,7 +85,8 @@ struct compressed_bio { static inline int compressed_bio_size(struct btrfs_root *root, unsigned long disk_size) { - u16 csum_size = btrfs_super_csum_size(&root->fs_info->super_copy); + u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy); + return sizeof(struct compressed_bio) + ((disk_size + root->sectorsize - 1) / root->sectorsize) * csum_size; @@ -338,6 +339,7 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start, u64 first_byte = disk_start; struct block_device *bdev; int ret; + int skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; WARN_ON(start & ((u64)PAGE_CACHE_SIZE - 1)); cb = kmalloc(compressed_bio_size(root, compressed_len), GFP_NOFS); @@ -392,8 +394,11 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start, ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0); BUG_ON(ret); - ret = btrfs_csum_one_bio(root, inode, bio, start, 1); - BUG_ON(ret); + if (!skip_sum) { + ret = btrfs_csum_one_bio(root, inode, bio, + start, 1); + BUG_ON(ret); + } ret = btrfs_map_bio(root, WRITE, bio, 0, 1); BUG_ON(ret); @@ -418,8 +423,10 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start, ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0); BUG_ON(ret); - ret = btrfs_csum_one_bio(root, inode, bio, start, 1); - BUG_ON(ret); + if (!skip_sum) { + ret = btrfs_csum_one_bio(root, inode, bio, start, 1); + BUG_ON(ret); + } ret = btrfs_map_bio(root, WRITE, bio, 0, 1); BUG_ON(ret); diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 2e66786..dede441 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -54,8 +54,13 @@ noinline void btrfs_set_path_blocking(struct btrfs_path *p) { int i; for (i = 0; i < BTRFS_MAX_LEVEL; i++) { - if (p->nodes[i] && p->locks[i]) - btrfs_set_lock_blocking(p->nodes[i]); + if (!p->nodes[i] || !p->locks[i]) + continue; + btrfs_set_lock_blocking_rw(p->nodes[i], p->locks[i]); + if (p->locks[i] == BTRFS_READ_LOCK) + p->locks[i] = BTRFS_READ_LOCK_BLOCKING; + else if (p->locks[i] == BTRFS_WRITE_LOCK) + p->locks[i] = BTRFS_WRITE_LOCK_BLOCKING; } } @@ -68,7 +73,7 @@ noinline void btrfs_set_path_blocking(struct btrfs_path *p) * for held */ noinline void btrfs_clear_path_blocking(struct btrfs_path *p, - struct extent_buffer *held) + struct extent_buffer *held, int held_rw) { int i; @@ -79,19 +84,29 @@ noinline void btrfs_clear_path_blocking(struct btrfs_path *p, * really sure by forcing the path to blocking before we clear * the path blocking. */ - if (held) - btrfs_set_lock_blocking(held); + if (held) { + btrfs_set_lock_blocking_rw(held, held_rw); + if (held_rw == BTRFS_WRITE_LOCK) + held_rw = BTRFS_WRITE_LOCK_BLOCKING; + else if (held_rw == BTRFS_READ_LOCK) + held_rw = BTRFS_READ_LOCK_BLOCKING; + } btrfs_set_path_blocking(p); #endif for (i = BTRFS_MAX_LEVEL - 1; i >= 0; i--) { - if (p->nodes[i] && p->locks[i]) - btrfs_clear_lock_blocking(p->nodes[i]); + if (p->nodes[i] && p->locks[i]) { + btrfs_clear_lock_blocking_rw(p->nodes[i], p->locks[i]); + if (p->locks[i] == BTRFS_WRITE_LOCK_BLOCKING) + p->locks[i] = BTRFS_WRITE_LOCK; + else if (p->locks[i] == BTRFS_READ_LOCK_BLOCKING) + p->locks[i] = BTRFS_READ_LOCK; + } } #ifdef CONFIG_DEBUG_LOCK_ALLOC if (held) - btrfs_clear_lock_blocking(held); + btrfs_clear_lock_blocking_rw(held, held_rw); #endif } @@ -119,7 +134,7 @@ noinline void btrfs_release_path(struct btrfs_path *p) if (!p->nodes[i]) continue; if (p->locks[i]) { - btrfs_tree_unlock(p->nodes[i]); + btrfs_tree_unlock_rw(p->nodes[i], p->locks[i]); p->locks[i] = 0; } free_extent_buffer(p->nodes[i]); @@ -167,6 +182,25 @@ struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root) return eb; } +/* loop around taking references on and locking the root node of the + * tree until you end up with a lock on the root. A locked buffer + * is returned, with a reference held. + */ +struct extent_buffer *btrfs_read_lock_root_node(struct btrfs_root *root) +{ + struct extent_buffer *eb; + + while (1) { + eb = btrfs_root_node(root); + btrfs_tree_read_lock(eb); + if (eb == root->node) + break; + btrfs_tree_read_unlock(eb); + free_extent_buffer(eb); + } + return eb; +} + /* cowonly root (everything not a reference counted cow subvolume), just get * put onto a simple dirty list. transaction.c walks this to make sure they * get properly updated on disk. @@ -480,10 +514,25 @@ static inline int should_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *buf) { + /* ensure we can see the force_cow */ + smp_rmb(); + + /* + * We do not need to cow a block if + * 1) this block is not created or changed in this transaction; + * 2) this block does not belong to TREE_RELOC tree; + * 3) the root is not forced COW. + * + * What is forced COW: + * when we create snapshot during commiting the transaction, + * after we've finished coping src root, we must COW the shared + * block to ensure the metadata consistency. + */ if (btrfs_header_generation(buf) == trans->transid && !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN) && !(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID && - btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) + btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC)) && + !root->force_cow) return 0; return 1; } @@ -626,14 +675,6 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, for (i = start_slot; i < end_slot; i++) { int close = 1; - if (!parent->map_token) { - map_extent_buffer(parent, - btrfs_node_key_ptr_offset(i), - sizeof(struct btrfs_key_ptr), - &parent->map_token, &parent->kaddr, - &parent->map_start, &parent->map_len, - KM_USER1); - } btrfs_node_key(parent, &disk_key, i); if (!progress_passed && comp_keys(&disk_key, progress) < 0) continue; @@ -656,11 +697,6 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, last_block = blocknr; continue; } - if (parent->map_token) { - unmap_extent_buffer(parent, parent->map_token, - KM_USER1); - parent->map_token = NULL; - } cur = btrfs_find_tree_block(root, blocknr, blocksize); if (cur) @@ -701,11 +737,6 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, btrfs_tree_unlock(cur); free_extent_buffer(cur); } - if (parent->map_token) { - unmap_extent_buffer(parent, parent->map_token, - KM_USER1); - parent->map_token = NULL; - } return err; } @@ -746,7 +777,6 @@ static noinline int generic_bin_search(struct extent_buffer *eb, struct btrfs_disk_key *tmp = NULL; struct btrfs_disk_key unaligned; unsigned long offset; - char *map_token = NULL; char *kaddr = NULL; unsigned long map_start = 0; unsigned long map_len = 0; @@ -756,18 +786,13 @@ static noinline int generic_bin_search(struct extent_buffer *eb, mid = (low + high) / 2; offset = p + mid * item_size; - if (!map_token || offset < map_start || + if (!kaddr || offset < map_start || (offset + sizeof(struct btrfs_disk_key)) > map_start + map_len) { - if (map_token) { - unmap_extent_buffer(eb, map_token, KM_USER0); - map_token = NULL; - } err = map_private_extent_buffer(eb, offset, sizeof(struct btrfs_disk_key), - &map_token, &kaddr, - &map_start, &map_len, KM_USER0); + &kaddr, &map_start, &map_len); if (!err) { tmp = (struct btrfs_disk_key *)(kaddr + offset - @@ -790,14 +815,10 @@ static noinline int generic_bin_search(struct extent_buffer *eb, high = mid; else { *slot = mid; - if (map_token) - unmap_extent_buffer(eb, map_token, KM_USER0); return 0; } } *slot = low; - if (map_token) - unmap_extent_buffer(eb, map_token, KM_USER0); return 1; } @@ -890,14 +911,16 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, mid = path->nodes[level]; - WARN_ON(!path->locks[level]); + WARN_ON(path->locks[level] != BTRFS_WRITE_LOCK && + path->locks[level] != BTRFS_WRITE_LOCK_BLOCKING); WARN_ON(btrfs_header_generation(mid) != trans->transid); orig_ptr = btrfs_node_blockptr(mid, orig_slot); - if (level < BTRFS_MAX_LEVEL - 1) + if (level < BTRFS_MAX_LEVEL - 1) { parent = path->nodes[level + 1]; - pslot = path->slots[level + 1]; + pslot = path->slots[level + 1]; + } /* * deal with the case where there is only one pointer in the root @@ -1100,9 +1123,10 @@ static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans, mid = path->nodes[level]; WARN_ON(btrfs_header_generation(mid) != trans->transid); - if (level < BTRFS_MAX_LEVEL - 1) + if (level < BTRFS_MAX_LEVEL - 1) { parent = path->nodes[level + 1]; - pslot = path->slots[level + 1]; + pslot = path->slots[level + 1]; + } if (!parent) return 1; @@ -1228,7 +1252,6 @@ static void reada_for_search(struct btrfs_root *root, u32 nr; u32 blocksize; u32 nscan = 0; - bool map = true; if (level != 1) return; @@ -1250,19 +1273,8 @@ static void reada_for_search(struct btrfs_root *root, nritems = btrfs_header_nritems(node); nr = slot; - if (node->map_token || path->skip_locking) - map = false; while (1) { - if (map && !node->map_token) { - unsigned long offset = btrfs_node_key_ptr_offset(nr); - map_private_extent_buffer(node, offset, - sizeof(struct btrfs_key_ptr), - &node->map_token, - &node->kaddr, - &node->map_start, - &node->map_len, KM_USER1); - } if (direction < 0) { if (nr == 0) break; @@ -1281,11 +1293,6 @@ static void reada_for_search(struct btrfs_root *root, if ((search <= target && target - search <= 65536) || (search > target && search - target <= 65536)) { gen = btrfs_node_ptr_generation(node, nr); - if (map && node->map_token) { - unmap_extent_buffer(node, node->map_token, - KM_USER1); - node->map_token = NULL; - } readahead_tree_block(root, search, blocksize, gen); nread += blocksize; } @@ -1293,10 +1300,6 @@ static void reada_for_search(struct btrfs_root *root, if ((nread > 65536 || nscan > 32)) break; } - if (map && node->map_token) { - unmap_extent_buffer(node, node->map_token, KM_USER1); - node->map_token = NULL; - } } /* @@ -1409,7 +1412,7 @@ static noinline void unlock_up(struct btrfs_path *path, int level, t = path->nodes[i]; if (i >= lowest_unlock && i > skip_level && path->locks[i]) { - btrfs_tree_unlock(t); + btrfs_tree_unlock_rw(t, path->locks[i]); path->locks[i] = 0; } } @@ -1436,7 +1439,7 @@ noinline void btrfs_unlock_up_safe(struct btrfs_path *path, int level) continue; if (!path->locks[i]) continue; - btrfs_tree_unlock(path->nodes[i]); + btrfs_tree_unlock_rw(path->nodes[i], path->locks[i]); path->locks[i] = 0; } } @@ -1485,6 +1488,8 @@ read_block_for_search(struct btrfs_trans_handle *trans, * we can trust our generation number */ free_extent_buffer(tmp); + btrfs_set_path_blocking(p); + tmp = read_tree_block(root, blocknr, blocksize, gen); if (tmp && btrfs_buffer_uptodate(tmp, gen)) { *eb_ret = tmp; @@ -1540,20 +1545,27 @@ read_block_for_search(struct btrfs_trans_handle *trans, static int setup_nodes_for_search(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *p, - struct extent_buffer *b, int level, int ins_len) + struct extent_buffer *b, int level, int ins_len, + int *write_lock_level) { int ret; if ((p->search_for_split || ins_len > 0) && btrfs_header_nritems(b) >= BTRFS_NODEPTRS_PER_BLOCK(root) - 3) { int sret; + if (*write_lock_level < level + 1) { + *write_lock_level = level + 1; + btrfs_release_path(p); + goto again; + } + sret = reada_for_balance(root, p, level); if (sret) goto again; btrfs_set_path_blocking(p); sret = split_node(trans, root, p, level); - btrfs_clear_path_blocking(p, NULL); + btrfs_clear_path_blocking(p, NULL, 0); BUG_ON(sret > 0); if (sret) { @@ -1565,13 +1577,19 @@ setup_nodes_for_search(struct btrfs_trans_handle *trans, BTRFS_NODEPTRS_PER_BLOCK(root) / 2) { int sret; + if (*write_lock_level < level + 1) { + *write_lock_level = level + 1; + btrfs_release_path(p); + goto again; + } + sret = reada_for_balance(root, p, level); if (sret) goto again; btrfs_set_path_blocking(p); sret = balance_level(trans, root, p, level); - btrfs_clear_path_blocking(p, NULL); + btrfs_clear_path_blocking(p, NULL, 0); if (sret) { ret = sret; @@ -1615,27 +1633,78 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root int err; int level; int lowest_unlock = 1; + int root_lock; + /* everything at write_lock_level or lower must be write locked */ + int write_lock_level = 0; u8 lowest_level = 0; lowest_level = p->lowest_level; WARN_ON(lowest_level && ins_len > 0); WARN_ON(p->nodes[0] != NULL); - if (ins_len < 0) + if (ins_len < 0) { lowest_unlock = 2; + /* when we are removing items, we might have to go up to level + * two as we update tree pointers Make sure we keep write + * for those levels as well + */ + write_lock_level = 2; + } else if (ins_len > 0) { + /* + * for inserting items, make sure we have a write lock on + * level 1 so we can update keys + */ + write_lock_level = 1; + } + + if (!cow) + write_lock_level = -1; + + if (cow && (p->keep_locks || p->lowest_level)) + write_lock_level = BTRFS_MAX_LEVEL; + again: + /* + * we try very hard to do read locks on the root + */ + root_lock = BTRFS_READ_LOCK; + level = 0; if (p->search_commit_root) { + /* + * the commit roots are read only + * so we always do read locks + */ b = root->commit_root; extent_buffer_get(b); + level = btrfs_header_level(b); if (!p->skip_locking) - btrfs_tree_lock(b); + btrfs_tree_read_lock(b); } else { - if (p->skip_locking) + if (p->skip_locking) { b = btrfs_root_node(root); - else - b = btrfs_lock_root_node(root); + level = btrfs_header_level(b); + } else { + /* we don't know the level of the root node + * until we actually have it read locked + */ + b = btrfs_read_lock_root_node(root); + level = btrfs_header_level(b); + if (level <= write_lock_level) { + /* whoops, must trade for write lock */ + btrfs_tree_read_unlock(b); + free_extent_buffer(b); + b = btrfs_lock_root_node(root); + root_lock = BTRFS_WRITE_LOCK; + + /* the level might have changed, check again */ + level = btrfs_header_level(b); + } + } } + p->nodes[level] = b; + if (!p->skip_locking) + p->locks[level] = root_lock; while (b) { level = btrfs_header_level(b); @@ -1644,10 +1713,6 @@ again: * setup the path here so we can release it under lock * contention with the cow code */ - p->nodes[level] = b; - if (!p->skip_locking) - p->locks[level] = 1; - if (cow) { /* * if we don't really need to cow this block @@ -1659,6 +1724,16 @@ again: btrfs_set_path_blocking(p); + /* + * must have write locks on this node and the + * parent + */ + if (level + 1 > write_lock_level) { + write_lock_level = level + 1; + btrfs_release_path(p); + goto again; + } + err = btrfs_cow_block(trans, root, b, p->nodes[level + 1], p->slots[level + 1], &b); @@ -1671,10 +1746,7 @@ cow_done: BUG_ON(!cow && ins_len); p->nodes[level] = b; - if (!p->skip_locking) - p->locks[level] = 1; - - btrfs_clear_path_blocking(p, NULL); + btrfs_clear_path_blocking(p, NULL, 0); /* * we have a lock on b and as long as we aren't changing @@ -1700,7 +1772,7 @@ cow_done: } p->slots[level] = slot; err = setup_nodes_for_search(trans, root, p, b, level, - ins_len); + ins_len, &write_lock_level); if (err == -EAGAIN) goto again; if (err) { @@ -1710,6 +1782,19 @@ cow_done: b = p->nodes[level]; slot = p->slots[level]; + /* + * slot 0 is special, if we change the key + * we have to update the parent pointer + * which means we must have a write lock + * on the parent + */ + if (slot == 0 && cow && + write_lock_level < level + 1) { + write_lock_level = level + 1; + btrfs_release_path(p); + goto again; + } + unlock_up(p, level, lowest_unlock); if (level == lowest_level) { @@ -1728,23 +1813,42 @@ cow_done: } if (!p->skip_locking) { - btrfs_clear_path_blocking(p, NULL); - err = btrfs_try_spin_lock(b); - - if (!err) { - btrfs_set_path_blocking(p); - btrfs_tree_lock(b); - btrfs_clear_path_blocking(p, b); + level = btrfs_header_level(b); + if (level <= write_lock_level) { + err = btrfs_try_tree_write_lock(b); + if (!err) { + btrfs_set_path_blocking(p); + btrfs_tree_lock(b); + btrfs_clear_path_blocking(p, b, + BTRFS_WRITE_LOCK); + } + p->locks[level] = BTRFS_WRITE_LOCK; + } else { + err = btrfs_try_tree_read_lock(b); + if (!err) { + btrfs_set_path_blocking(p); + btrfs_tree_read_lock(b); + btrfs_clear_path_blocking(p, b, + BTRFS_READ_LOCK); + } + p->locks[level] = BTRFS_READ_LOCK; } + p->nodes[level] = b; } } else { p->slots[level] = slot; if (ins_len > 0 && btrfs_leaf_free_space(root, b) < ins_len) { + if (write_lock_level < 1) { + write_lock_level = 1; + btrfs_release_path(p); + goto again; + } + btrfs_set_path_blocking(p); err = split_leaf(trans, root, key, p, ins_len, ret == 0); - btrfs_clear_path_blocking(p, NULL); + btrfs_clear_path_blocking(p, NULL, 0); BUG_ON(err > 0); if (err) { @@ -2025,7 +2129,7 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans, add_root_to_dirty_list(root); extent_buffer_get(c); path->nodes[level] = c; - path->locks[level] = 1; + path->locks[level] = BTRFS_WRITE_LOCK; path->slots[level] = 0; return 0; } @@ -2253,14 +2357,6 @@ static noinline int __push_leaf_right(struct btrfs_trans_handle *trans, if (path->slots[0] == i) push_space += data_size; - if (!left->map_token) { - map_extent_buffer(left, (unsigned long)item, - sizeof(struct btrfs_item), - &left->map_token, &left->kaddr, - &left->map_start, &left->map_len, - KM_USER1); - } - this_item_size = btrfs_item_size(left, item); if (this_item_size + sizeof(*item) + push_space > free_space) break; @@ -2271,10 +2367,6 @@ static noinline int __push_leaf_right(struct btrfs_trans_handle *trans, break; i--; } - if (left->map_token) { - unmap_extent_buffer(left, left->map_token, KM_USER1); - left->map_token = NULL; - } if (push_items == 0) goto out_unlock; @@ -2316,21 +2408,10 @@ static noinline int __push_leaf_right(struct btrfs_trans_handle *trans, push_space = BTRFS_LEAF_DATA_SIZE(root); for (i = 0; i < right_nritems; i++) { item = btrfs_item_nr(right, i); - if (!right->map_token) { - map_extent_buffer(right, (unsigned long)item, - sizeof(struct btrfs_item), - &right->map_token, &right->kaddr, - &right->map_start, &right->map_len, - KM_USER1); - } push_space -= btrfs_item_size(right, item); btrfs_set_item_offset(right, item, push_space); } - if (right->map_token) { - unmap_extent_buffer(right, right->map_token, KM_USER1); - right->map_token = NULL; - } left_nritems -= push_items; btrfs_set_header_nritems(left, left_nritems); @@ -2467,13 +2548,6 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans, for (i = 0; i < nr; i++) { item = btrfs_item_nr(right, i); - if (!right->map_token) { - map_extent_buffer(right, (unsigned long)item, - sizeof(struct btrfs_item), - &right->map_token, &right->kaddr, - &right->map_start, &right->map_len, - KM_USER1); - } if (!empty && push_items > 0) { if (path->slots[0] < i) @@ -2496,11 +2570,6 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans, push_space += this_item_size + sizeof(*item); } - if (right->map_token) { - unmap_extent_buffer(right, right->map_token, KM_USER1); - right->map_token = NULL; - } - if (push_items == 0) { ret = 1; goto out; @@ -2530,23 +2599,12 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans, u32 ioff; item = btrfs_item_nr(left, i); - if (!left->map_token) { - map_extent_buffer(left, (unsigned long)item, - sizeof(struct btrfs_item), - &left->map_token, &left->kaddr, - &left->map_start, &left->map_len, - KM_USER1); - } ioff = btrfs_item_offset(left, item); btrfs_set_item_offset(left, item, ioff - (BTRFS_LEAF_DATA_SIZE(root) - old_left_item_size)); } btrfs_set_header_nritems(left, old_left_nritems + push_items); - if (left->map_token) { - unmap_extent_buffer(left, left->map_token, KM_USER1); - left->map_token = NULL; - } /* fixup right node */ if (push_items > right_nritems) { @@ -2574,21 +2632,9 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans, for (i = 0; i < right_nritems; i++) { item = btrfs_item_nr(right, i); - if (!right->map_token) { - map_extent_buffer(right, (unsigned long)item, - sizeof(struct btrfs_item), - &right->map_token, &right->kaddr, - &right->map_start, &right->map_len, - KM_USER1); - } - push_space = push_space - btrfs_item_size(right, item); btrfs_set_item_offset(right, item, push_space); } - if (right->map_token) { - unmap_extent_buffer(right, right->map_token, KM_USER1); - right->map_token = NULL; - } btrfs_mark_buffer_dirty(left); if (right_nritems) @@ -2729,23 +2775,10 @@ static noinline int copy_for_split(struct btrfs_trans_handle *trans, struct btrfs_item *item = btrfs_item_nr(right, i); u32 ioff; - if (!right->map_token) { - map_extent_buffer(right, (unsigned long)item, - sizeof(struct btrfs_item), - &right->map_token, &right->kaddr, - &right->map_start, &right->map_len, - KM_USER1); - } - ioff = btrfs_item_offset(right, item); btrfs_set_item_offset(right, item, ioff + rt_data_off); } - if (right->map_token) { - unmap_extent_buffer(right, right->map_token, KM_USER1); - right->map_token = NULL; - } - btrfs_set_header_nritems(l, mid); ret = 0; btrfs_item_key(right, &disk_key, 0); @@ -3264,23 +3297,10 @@ int btrfs_truncate_item(struct btrfs_trans_handle *trans, u32 ioff; item = btrfs_item_nr(leaf, i); - if (!leaf->map_token) { - map_extent_buffer(leaf, (unsigned long)item, - sizeof(struct btrfs_item), - &leaf->map_token, &leaf->kaddr, - &leaf->map_start, &leaf->map_len, - KM_USER1); - } - ioff = btrfs_item_offset(leaf, item); btrfs_set_item_offset(leaf, item, ioff + size_diff); } - if (leaf->map_token) { - unmap_extent_buffer(leaf, leaf->map_token, KM_USER1); - leaf->map_token = NULL; - } - /* shift the data */ if (from_end) { memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) + @@ -3377,22 +3397,10 @@ int btrfs_extend_item(struct btrfs_trans_handle *trans, u32 ioff; item = btrfs_item_nr(leaf, i); - if (!leaf->map_token) { - map_extent_buffer(leaf, (unsigned long)item, - sizeof(struct btrfs_item), - &leaf->map_token, &leaf->kaddr, - &leaf->map_start, &leaf->map_len, - KM_USER1); - } ioff = btrfs_item_offset(leaf, item); btrfs_set_item_offset(leaf, item, ioff - data_size); } - if (leaf->map_token) { - unmap_extent_buffer(leaf, leaf->map_token, KM_USER1); - leaf->map_token = NULL; - } - /* shift the data */ memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) + data_end - data_size, btrfs_leaf_data(leaf) + @@ -3494,27 +3502,13 @@ int btrfs_insert_some_items(struct btrfs_trans_handle *trans, * item0..itemN ... dataN.offset..dataN.size .. data0.size */ /* first correct the data pointers */ - WARN_ON(leaf->map_token); for (i = slot; i < nritems; i++) { u32 ioff; item = btrfs_item_nr(leaf, i); - if (!leaf->map_token) { - map_extent_buffer(leaf, (unsigned long)item, - sizeof(struct btrfs_item), - &leaf->map_token, &leaf->kaddr, - &leaf->map_start, &leaf->map_len, - KM_USER1); - } - ioff = btrfs_item_offset(leaf, item); btrfs_set_item_offset(leaf, item, ioff - total_data); } - if (leaf->map_token) { - unmap_extent_buffer(leaf, leaf->map_token, KM_USER1); - leaf->map_token = NULL; - } - /* shift the items */ memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + nr), btrfs_item_nr_offset(slot), @@ -3608,27 +3602,13 @@ int setup_items_for_insert(struct btrfs_trans_handle *trans, * item0..itemN ... dataN.offset..dataN.size .. data0.size */ /* first correct the data pointers */ - WARN_ON(leaf->map_token); for (i = slot; i < nritems; i++) { u32 ioff; item = btrfs_item_nr(leaf, i); - if (!leaf->map_token) { - map_extent_buffer(leaf, (unsigned long)item, - sizeof(struct btrfs_item), - &leaf->map_token, &leaf->kaddr, - &leaf->map_start, &leaf->map_len, - KM_USER1); - } - ioff = btrfs_item_offset(leaf, item); btrfs_set_item_offset(leaf, item, ioff - total_data); } - if (leaf->map_token) { - unmap_extent_buffer(leaf, leaf->map_token, KM_USER1); - leaf->map_token = NULL; - } - /* shift the items */ memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + nr), btrfs_item_nr_offset(slot), @@ -3840,22 +3820,10 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, u32 ioff; item = btrfs_item_nr(leaf, i); - if (!leaf->map_token) { - map_extent_buffer(leaf, (unsigned long)item, - sizeof(struct btrfs_item), - &leaf->map_token, &leaf->kaddr, - &leaf->map_start, &leaf->map_len, - KM_USER1); - } ioff = btrfs_item_offset(leaf, item); btrfs_set_item_offset(leaf, item, ioff + dsize); } - if (leaf->map_token) { - unmap_extent_buffer(leaf, leaf->map_token, KM_USER1); - leaf->map_token = NULL; - } - memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot), btrfs_item_nr_offset(slot + nr), sizeof(struct btrfs_item) * @@ -4004,11 +3972,11 @@ int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key, WARN_ON(!path->keep_locks); again: - cur = btrfs_lock_root_node(root); + cur = btrfs_read_lock_root_node(root); level = btrfs_header_level(cur); WARN_ON(path->nodes[level]); path->nodes[level] = cur; - path->locks[level] = 1; + path->locks[level] = BTRFS_READ_LOCK; if (btrfs_header_generation(cur) < min_trans) { ret = 1; @@ -4098,12 +4066,12 @@ find_next_key: cur = read_node_slot(root, cur, slot); BUG_ON(!cur); - btrfs_tree_lock(cur); + btrfs_tree_read_lock(cur); - path->locks[level - 1] = 1; + path->locks[level - 1] = BTRFS_READ_LOCK; path->nodes[level - 1] = cur; unlock_up(path, level, 1); - btrfs_clear_path_blocking(path, NULL); + btrfs_clear_path_blocking(path, NULL, 0); } out: if (ret == 0) @@ -4218,30 +4186,21 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) u32 nritems; int ret; int old_spinning = path->leave_spinning; - int force_blocking = 0; + int next_rw_lock = 0; nritems = btrfs_header_nritems(path->nodes[0]); if (nritems == 0) return 1; - /* - * we take the blocks in an order that upsets lockdep. Using - * blocking mode is the only way around it. - */ -#ifdef CONFIG_DEBUG_LOCK_ALLOC - force_blocking = 1; -#endif - btrfs_item_key_to_cpu(path->nodes[0], &key, nritems - 1); again: level = 1; next = NULL; + next_rw_lock = 0; btrfs_release_path(path); path->keep_locks = 1; - - if (!force_blocking) - path->leave_spinning = 1; + path->leave_spinning = 1; ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); path->keep_locks = 0; @@ -4281,11 +4240,12 @@ again: } if (next) { - btrfs_tree_unlock(next); + btrfs_tree_unlock_rw(next, next_rw_lock); free_extent_buffer(next); } next = c; + next_rw_lock = path->locks[level]; ret = read_block_for_search(NULL, root, path, &next, level, slot, &key); if (ret == -EAGAIN) @@ -4297,15 +4257,14 @@ again: } if (!path->skip_locking) { - ret = btrfs_try_spin_lock(next); + ret = btrfs_try_tree_read_lock(next); if (!ret) { btrfs_set_path_blocking(path); - btrfs_tree_lock(next); - if (!force_blocking) - btrfs_clear_path_blocking(path, next); + btrfs_tree_read_lock(next); + btrfs_clear_path_blocking(path, next, + BTRFS_READ_LOCK); } - if (force_blocking) - btrfs_set_lock_blocking(next); + next_rw_lock = BTRFS_READ_LOCK; } break; } @@ -4314,14 +4273,13 @@ again: level--; c = path->nodes[level]; if (path->locks[level]) - btrfs_tree_unlock(c); + btrfs_tree_unlock_rw(c, path->locks[level]); free_extent_buffer(c); path->nodes[level] = next; path->slots[level] = 0; if (!path->skip_locking) - path->locks[level] = 1; - + path->locks[level] = next_rw_lock; if (!level) break; @@ -4336,16 +4294,14 @@ again: } if (!path->skip_locking) { - btrfs_assert_tree_locked(path->nodes[level]); - ret = btrfs_try_spin_lock(next); + ret = btrfs_try_tree_read_lock(next); if (!ret) { btrfs_set_path_blocking(path); - btrfs_tree_lock(next); - if (!force_blocking) - btrfs_clear_path_blocking(path, next); + btrfs_tree_read_lock(next); + btrfs_clear_path_blocking(path, next, + BTRFS_READ_LOCK); } - if (force_blocking) - btrfs_set_lock_blocking(next); + next_rw_lock = BTRFS_READ_LOCK; } } ret = 0; diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 66179bc..83a871f 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -30,6 +30,7 @@ #include #include #include +#include #include "extent_io.h" #include "extent_map.h" #include "async-thread.h" @@ -360,6 +361,47 @@ struct btrfs_header { #define BTRFS_LABEL_SIZE 256 /* + * just in case we somehow lose the roots and are not able to mount, + * we store an array of the roots from previous transactions + * in the super. + */ +#define BTRFS_NUM_BACKUP_ROOTS 4 +struct btrfs_root_backup { + __le64 tree_root; + __le64 tree_root_gen; + + __le64 chunk_root; + __le64 chunk_root_gen; + + __le64 extent_root; + __le64 extent_root_gen; + + __le64 fs_root; + __le64 fs_root_gen; + + __le64 dev_root; + __le64 dev_root_gen; + + __le64 csum_root; + __le64 csum_root_gen; + + __le64 total_bytes; + __le64 bytes_used; + __le64 num_devices; + /* future */ + __le64 unsed_64[4]; + + u8 tree_root_level; + u8 chunk_root_level; + u8 extent_root_level; + u8 fs_root_level; + u8 dev_root_level; + u8 csum_root_level; + /* future and to align */ + u8 unused_8[10]; +} __attribute__ ((__packed__)); + +/* * the super block basically lists the main trees of the FS * it currently lacks any block count etc etc */ @@ -405,6 +447,7 @@ struct btrfs_super_block { /* future expansion */ __le64 reserved[31]; u8 sys_chunk_array[BTRFS_SYSTEM_CHUNK_ARRAY_SIZE]; + struct btrfs_root_backup super_roots[BTRFS_NUM_BACKUP_ROOTS]; } __attribute__ ((__packed__)); /* @@ -755,6 +798,8 @@ struct btrfs_space_info { chunks for this space */ unsigned int chunk_alloc:1; /* set if we are allocating a chunk */ + unsigned int flush:1; /* set if we are trying to make space */ + unsigned int force_alloc; /* set if we need to force a chunk alloc for this space */ @@ -764,20 +809,14 @@ struct btrfs_space_info { struct list_head block_groups[BTRFS_NR_RAID_TYPES]; spinlock_t lock; struct rw_semaphore groups_sem; - atomic_t caching_threads; + wait_queue_head_t wait; }; struct btrfs_block_rsv { u64 size; u64 reserved; - u64 freed[2]; struct btrfs_space_info *space_info; - struct list_head list; spinlock_t lock; - atomic_t usage; - unsigned int priority:8; - unsigned int durable:1; - unsigned int refill_used:1; unsigned int full:1; }; @@ -809,7 +848,8 @@ struct btrfs_free_cluster { enum btrfs_caching_type { BTRFS_CACHE_NO = 0, BTRFS_CACHE_STARTED = 1, - BTRFS_CACHE_FINISHED = 2, + BTRFS_CACHE_FAST = 2, + BTRFS_CACHE_FINISHED = 3, }; enum btrfs_disk_cache_state { @@ -824,6 +864,7 @@ struct btrfs_caching_control { struct list_head list; struct mutex mutex; wait_queue_head_t wait; + struct btrfs_work work; struct btrfs_block_group_cache *block_group; u64 progress; atomic_t count; @@ -837,10 +878,10 @@ struct btrfs_block_group_cache { spinlock_t lock; u64 pinned; u64 reserved; - u64 reserved_pinned; u64 bytes_super; u64 flags; u64 sectorsize; + u64 cache_generation; unsigned int ro:1; unsigned int dirty:1; unsigned int iref:1; @@ -896,6 +937,10 @@ struct btrfs_fs_info { spinlock_t block_group_cache_lock; struct rb_root block_group_cache_tree; + /* keep track of unallocated space */ + spinlock_t free_chunk_lock; + u64 free_chunk_space; + struct extent_io_tree freed_extents[2]; struct extent_io_tree *pinned_extents; @@ -913,14 +958,11 @@ struct btrfs_fs_info { struct btrfs_block_rsv trans_block_rsv; /* block reservation for chunk tree */ struct btrfs_block_rsv chunk_block_rsv; + /* block reservation for delayed operations */ + struct btrfs_block_rsv delayed_block_rsv; struct btrfs_block_rsv empty_block_rsv; - /* list of block reservations that cross multiple transactions */ - struct list_head durable_block_rsv_list; - - struct mutex durable_block_rsv_mutex; - u64 generation; u64 last_trans_committed; @@ -939,8 +981,8 @@ struct btrfs_fs_info { wait_queue_head_t transaction_blocked_wait; wait_queue_head_t async_submit_wait; - struct btrfs_super_block super_copy; - struct btrfs_super_block super_for_commit; + struct btrfs_super_block *super_copy; + struct btrfs_super_block *super_for_commit; struct block_device *__bdev; struct super_block *sb; struct inode *btree_inode; @@ -1032,6 +1074,9 @@ struct btrfs_fs_info { struct btrfs_workers endio_write_workers; struct btrfs_workers endio_freespace_worker; struct btrfs_workers submit_workers; + struct btrfs_workers caching_workers; + struct btrfs_workers readahead_workers; + /* * fixup workers take dirty pages that didn't properly go through * the cow mechanism and make them safe to write. It happens @@ -1114,6 +1159,13 @@ struct btrfs_fs_info { u64 fs_state; struct btrfs_delayed_root *delayed_root; + + /* readahead tree */ + spinlock_t reada_lock; + struct radix_tree_root reada_tree; + + /* next backup root to be overwritten */ + int backup_root_index; }; /* @@ -1219,7 +1271,9 @@ struct btrfs_root { * right now this just gets used so that a root has its own devid * for stat. It may be used for more later */ - struct super_block anon_super; + dev_t anon_dev; + + int force_cow; }; struct btrfs_ioctl_defrag_range_args { @@ -1358,6 +1412,7 @@ struct btrfs_ioctl_defrag_range_args { #define BTRFS_MOUNT_ENOSPC_DEBUG (1 << 15) #define BTRFS_MOUNT_AUTO_DEFRAG (1 << 16) #define BTRFS_MOUNT_INODE_MAP_CACHE (1 << 17) +#define BTRFS_MOUNT_RECOVERY (1 << 18) #define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt) #define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt) @@ -1410,17 +1465,15 @@ void btrfs_set_##name(struct extent_buffer *eb, type *s, u##bits val); #define BTRFS_SETGET_HEADER_FUNCS(name, type, member, bits) \ static inline u##bits btrfs_##name(struct extent_buffer *eb) \ { \ - type *p = kmap_atomic(eb->first_page, KM_USER0); \ + type *p = page_address(eb->first_page); \ u##bits res = le##bits##_to_cpu(p->member); \ - kunmap_atomic(p, KM_USER0); \ return res; \ } \ static inline void btrfs_set_##name(struct extent_buffer *eb, \ u##bits val) \ { \ - type *p = kmap_atomic(eb->first_page, KM_USER0); \ + type *p = page_address(eb->first_page); \ p->member = cpu_to_le##bits(val); \ - kunmap_atomic(p, KM_USER0); \ } #define BTRFS_SETGET_STACK_FUNCS(name, type, member, bits) \ @@ -1975,6 +2028,55 @@ static inline bool btrfs_root_readonly(struct btrfs_root *root) return (root->root_item.flags & cpu_to_le64(BTRFS_ROOT_SUBVOL_RDONLY)) != 0; } +/* struct btrfs_root_backup */ +BTRFS_SETGET_STACK_FUNCS(backup_tree_root, struct btrfs_root_backup, + tree_root, 64); +BTRFS_SETGET_STACK_FUNCS(backup_tree_root_gen, struct btrfs_root_backup, + tree_root_gen, 64); +BTRFS_SETGET_STACK_FUNCS(backup_tree_root_level, struct btrfs_root_backup, + tree_root_level, 8); + +BTRFS_SETGET_STACK_FUNCS(backup_chunk_root, struct btrfs_root_backup, + chunk_root, 64); +BTRFS_SETGET_STACK_FUNCS(backup_chunk_root_gen, struct btrfs_root_backup, + chunk_root_gen, 64); +BTRFS_SETGET_STACK_FUNCS(backup_chunk_root_level, struct btrfs_root_backup, + chunk_root_level, 8); + +BTRFS_SETGET_STACK_FUNCS(backup_extent_root, struct btrfs_root_backup, + extent_root, 64); +BTRFS_SETGET_STACK_FUNCS(backup_extent_root_gen, struct btrfs_root_backup, + extent_root_gen, 64); +BTRFS_SETGET_STACK_FUNCS(backup_extent_root_level, struct btrfs_root_backup, + extent_root_level, 8); + +BTRFS_SETGET_STACK_FUNCS(backup_fs_root, struct btrfs_root_backup, + fs_root, 64); +BTRFS_SETGET_STACK_FUNCS(backup_fs_root_gen, struct btrfs_root_backup, + fs_root_gen, 64); +BTRFS_SETGET_STACK_FUNCS(backup_fs_root_level, struct btrfs_root_backup, + fs_root_level, 8); + +BTRFS_SETGET_STACK_FUNCS(backup_dev_root, struct btrfs_root_backup, + dev_root, 64); +BTRFS_SETGET_STACK_FUNCS(backup_dev_root_gen, struct btrfs_root_backup, + dev_root_gen, 64); +BTRFS_SETGET_STACK_FUNCS(backup_dev_root_level, struct btrfs_root_backup, + dev_root_level, 8); + +BTRFS_SETGET_STACK_FUNCS(backup_csum_root, struct btrfs_root_backup, + csum_root, 64); +BTRFS_SETGET_STACK_FUNCS(backup_csum_root_gen, struct btrfs_root_backup, + csum_root_gen, 64); +BTRFS_SETGET_STACK_FUNCS(backup_csum_root_level, struct btrfs_root_backup, + csum_root_level, 8); +BTRFS_SETGET_STACK_FUNCS(backup_total_bytes, struct btrfs_root_backup, + total_bytes, 64); +BTRFS_SETGET_STACK_FUNCS(backup_bytes_used, struct btrfs_root_backup, + bytes_used, 64); +BTRFS_SETGET_STACK_FUNCS(backup_num_devices, struct btrfs_root_backup, + num_devices, 64); + /* struct btrfs_super_block */ BTRFS_SETGET_STACK_FUNCS(super_bytenr, struct btrfs_super_block, bytenr, 64); @@ -2126,14 +2228,30 @@ static inline bool btrfs_mixed_space_info(struct btrfs_space_info *space_info) (space_info->flags & BTRFS_BLOCK_GROUP_DATA)); } +static inline gfp_t btrfs_alloc_write_mask(struct address_space *mapping) +{ + return mapping_gfp_mask(mapping) & ~__GFP_FS; +} + /* extent-tree.c */ static inline u64 btrfs_calc_trans_metadata_size(struct btrfs_root *root, - int num_items) + unsigned num_items) { return (root->leafsize + root->nodesize * (BTRFS_MAX_LEVEL - 1)) * 3 * num_items; } +/* + * Doing a truncate won't result in new nodes or leaves, just what we need for + * COW. + */ +static inline u64 btrfs_calc_trunc_metadata_size(struct btrfs_root *root, + unsigned num_items) +{ + return (root->leafsize + root->nodesize * (BTRFS_MAX_LEVEL - 1)) * + num_items; +} + void btrfs_put_block_group(struct btrfs_block_group_cache *cache); int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, struct btrfs_root *root, unsigned long count); @@ -2143,6 +2261,9 @@ int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans, u64 num_bytes, u64 *refs, u64 *flags); int btrfs_pin_extent(struct btrfs_root *root, u64 bytenr, u64 num, int reserved); +int btrfs_pin_extent_for_log_replay(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + u64 bytenr, u64 num_bytes); int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 objectid, u64 offset, u64 bytenr); @@ -2193,8 +2314,8 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, u64 root_objectid, u64 owner, u64 offset); int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len); -int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache, - u64 num_bytes, int reserve, int sinfo); +int btrfs_free_and_pin_reserved_extent(struct btrfs_root *root, + u64 start, u64 len); int btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans, struct btrfs_root *root); int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, @@ -2222,9 +2343,6 @@ void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *ionde); void btrfs_clear_space_info_full(struct btrfs_fs_info *info); int btrfs_check_data_free_space(struct inode *inode, u64 bytes); void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes); -int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - int num_items); void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans, struct btrfs_root *root); int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans, @@ -2240,25 +2358,26 @@ void btrfs_init_block_rsv(struct btrfs_block_rsv *rsv); struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_root *root); void btrfs_free_block_rsv(struct btrfs_root *root, struct btrfs_block_rsv *rsv); -void btrfs_add_durable_block_rsv(struct btrfs_fs_info *fs_info, - struct btrfs_block_rsv *rsv); -int btrfs_block_rsv_add(struct btrfs_trans_handle *trans, - struct btrfs_root *root, +int btrfs_block_rsv_add(struct btrfs_root *root, struct btrfs_block_rsv *block_rsv, u64 num_bytes); -int btrfs_block_rsv_check(struct btrfs_trans_handle *trans, - struct btrfs_root *root, +int btrfs_block_rsv_add_noflush(struct btrfs_root *root, + struct btrfs_block_rsv *block_rsv, + u64 num_bytes); +int btrfs_block_rsv_check(struct btrfs_root *root, + struct btrfs_block_rsv *block_rsv, int min_factor); +int btrfs_block_rsv_refill(struct btrfs_root *root, struct btrfs_block_rsv *block_rsv, - u64 min_reserved, int min_factor); + u64 min_reserved); +int btrfs_block_rsv_refill_noflush(struct btrfs_root *root, + struct btrfs_block_rsv *block_rsv, + u64 min_reserved); int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src_rsv, struct btrfs_block_rsv *dst_rsv, u64 num_bytes); void btrfs_block_rsv_release(struct btrfs_root *root, struct btrfs_block_rsv *block_rsv, u64 num_bytes); -int btrfs_truncate_reserve_metadata(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct btrfs_block_rsv *rsv); int btrfs_set_block_group_ro(struct btrfs_root *root, struct btrfs_block_group_cache *cache); int btrfs_set_block_group_rw(struct btrfs_root *root, @@ -2330,7 +2449,7 @@ struct btrfs_path *btrfs_alloc_path(void); void btrfs_free_path(struct btrfs_path *p); void btrfs_set_path_blocking(struct btrfs_path *p); void btrfs_clear_path_blocking(struct btrfs_path *p, - struct extent_buffer *held); + struct extent_buffer *held, int held_rw); void btrfs_unlock_up_safe(struct btrfs_path *p, int level); int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, @@ -2365,8 +2484,8 @@ static inline int btrfs_insert_empty_item(struct btrfs_trans_handle *trans, int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path); int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path); int btrfs_leaf_free_space(struct btrfs_root *root, struct extent_buffer *leaf); -int btrfs_drop_snapshot(struct btrfs_root *root, - struct btrfs_block_rsv *block_rsv, int update_ref); +void btrfs_drop_snapshot(struct btrfs_root *root, + struct btrfs_block_rsv *block_rsv, int update_ref); int btrfs_drop_subtree(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *node, @@ -2379,6 +2498,18 @@ static inline int btrfs_fs_closing(struct btrfs_fs_info *fs_info) smp_mb(); return fs_info->closing; } +static inline void free_fs_info(struct btrfs_fs_info *fs_info) +{ + kfree(fs_info->delayed_root); + kfree(fs_info->extent_root); + kfree(fs_info->tree_root); + kfree(fs_info->chunk_root); + kfree(fs_info->dev_root); + kfree(fs_info->csum_root); + kfree(fs_info->super_copy); + kfree(fs_info->super_for_commit); + kfree(fs_info); +} /* root-item.c */ int btrfs_find_root_ref(struct btrfs_root *tree_root, @@ -2404,8 +2535,8 @@ int btrfs_find_last_root(struct btrfs_root *root, u64 objectid, struct btrfs_root_item *item, struct btrfs_key *key); int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid); int btrfs_find_orphan_roots(struct btrfs_root *tree_root); -int btrfs_set_root_node(struct btrfs_root_item *item, - struct extent_buffer *node); +void btrfs_set_root_node(struct btrfs_root_item *item, + struct extent_buffer *node); void btrfs_check_and_init_root_item(struct btrfs_root_item *item); /* dir-item.c */ @@ -2510,6 +2641,9 @@ int btrfs_csum_truncate(struct btrfs_trans_handle *trans, int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end, struct list_head *list, int search_commit); /* inode.c */ +struct extent_map *btrfs_get_extent_fiemap(struct inode *inode, struct page *page, + size_t pg_offset, u64 start, u64 len, + int create); /* RHEL and EL kernels have a patch that renames PG_checked to FsMisc */ #if defined(ClearPageFsMisc) && !defined(ClearPageChecked) @@ -2518,6 +2652,14 @@ int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end, #define PageChecked PageFsMisc #endif +/* This forces readahead on a given range of bytes in an inode */ +static inline void btrfs_force_ra(struct address_space *mapping, + struct file_ra_state *ra, struct file *file, + pgoff_t offset, unsigned long req_size) +{ + page_cache_sync_readahead(mapping, ra, file, offset, req_size); +} + struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry); int btrfs_set_inode_index(struct inode *dir, u64 *index); int btrfs_unlink_inode(struct btrfs_trans_handle *trans, @@ -2546,14 +2688,12 @@ int btrfs_create_subvol_root(struct btrfs_trans_handle *trans, int btrfs_merge_bio_hook(struct page *page, unsigned long offset, size_t size, struct bio *bio, unsigned long bio_flags); -unsigned long btrfs_force_ra(struct address_space *mapping, - struct file_ra_state *ra, struct file *file, - pgoff_t offset, pgoff_t last_index); int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); int btrfs_readpage(struct file *file, struct page *page); void btrfs_evict_inode(struct inode *inode); int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc); -void btrfs_dirty_inode(struct inode *inode, int flags); +int btrfs_dirty_inode(struct inode *inode); +int btrfs_update_time(struct file *file); struct inode *btrfs_alloc_inode(struct super_block *sb); void btrfs_destroy_inode(struct inode *inode); int btrfs_drop_inode(struct inode *inode); @@ -2571,11 +2711,6 @@ int btrfs_update_inode(struct btrfs_trans_handle *trans, int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode); int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode); int btrfs_orphan_cleanup(struct btrfs_root *root); -void btrfs_orphan_pre_snapshot(struct btrfs_trans_handle *trans, - struct btrfs_pending_snapshot *pending, - u64 *bytes_to_reserve); -void btrfs_orphan_post_snapshot(struct btrfs_trans_handle *trans, - struct btrfs_pending_snapshot *pending); void btrfs_orphan_commit_root(struct btrfs_trans_handle *trans, struct btrfs_root *root); int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size); @@ -2602,7 +2737,7 @@ int btrfs_defrag_file(struct inode *inode, struct file *file, int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans, struct inode *inode); int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info); -int btrfs_sync_file(struct file *file, int datasync); +int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync); int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, int skip_pinned); extern const struct file_operations btrfs_file_operations; @@ -2642,13 +2777,22 @@ do { \ /* acl.c */ #ifdef CONFIG_BTRFS_FS_POSIX_ACL -int btrfs_check_acl(struct inode *inode, int mask, unsigned int flags); -#else -#define btrfs_check_acl NULL -#endif +struct posix_acl *btrfs_get_acl(struct inode *inode, int type); int btrfs_init_acl(struct btrfs_trans_handle *trans, struct inode *inode, struct inode *dir); int btrfs_acl_chmod(struct inode *inode); +#else +#define btrfs_get_acl NULL +static inline int btrfs_init_acl(struct btrfs_trans_handle *trans, + struct inode *inode, struct inode *dir) +{ + return 0; +} +static inline int btrfs_acl_chmod(struct inode *inode) +{ + return 0; +} +#endif /* relocation.c */ int btrfs_relocate_block_group(struct btrfs_root *root, u64 group_start); @@ -2680,4 +2824,20 @@ int btrfs_scrub_cancel_devid(struct btrfs_root *root, u64 devid); int btrfs_scrub_progress(struct btrfs_root *root, u64 devid, struct btrfs_scrub_progress *progress); +/* reada.c */ +struct reada_control { + struct btrfs_root *root; /* tree to prefetch */ + struct btrfs_key key_start; + struct btrfs_key key_end; /* exclusive */ + atomic_t elems; + struct kref refcnt; + wait_queue_head_t wait; +}; +struct reada_control *btrfs_reada_add(struct btrfs_root *root, + struct btrfs_key *start, struct btrfs_key *end); +int btrfs_reada_wait(void *handle); +void btrfs_reada_detach(void *handle); +int btree_readahead_hook(struct btrfs_root *root, struct extent_buffer *eb, + u64 start, int err); + #endif diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index 98c68e6..9c1eccc 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c @@ -591,7 +591,7 @@ static int btrfs_delayed_item_reserve_metadata(struct btrfs_trans_handle *trans, return 0; src_rsv = trans->block_rsv; - dst_rsv = &root->fs_info->global_block_rsv; + dst_rsv = &root->fs_info->delayed_block_rsv; num_bytes = btrfs_calc_trans_metadata_size(root, 1); ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes); @@ -609,7 +609,7 @@ static void btrfs_delayed_item_release_metadata(struct btrfs_root *root, if (!item->bytes_reserved) return; - rsv = &root->fs_info->global_block_rsv; + rsv = &root->fs_info->delayed_block_rsv; btrfs_block_rsv_release(root, rsv, item->bytes_reserved); } @@ -617,24 +617,102 @@ static void btrfs_delayed_item_release_metadata(struct btrfs_root *root, static int btrfs_delayed_inode_reserve_metadata( struct btrfs_trans_handle *trans, struct btrfs_root *root, + struct inode *inode, struct btrfs_delayed_node *node) { struct btrfs_block_rsv *src_rsv; struct btrfs_block_rsv *dst_rsv; u64 num_bytes; int ret; - - if (!trans->bytes_reserved) - return 0; + int release = false; src_rsv = trans->block_rsv; - dst_rsv = &root->fs_info->global_block_rsv; + dst_rsv = &root->fs_info->delayed_block_rsv; num_bytes = btrfs_calc_trans_metadata_size(root, 1); + + /* + * btrfs_dirty_inode will update the inode under btrfs_join_transaction + * which doesn't reserve space for speed. This is a problem since we + * still need to reserve space for this update, so try to reserve the + * space. + * + * Now if src_rsv == delalloc_block_rsv we'll let it just steal since + * we're accounted for. + */ + if (!src_rsv || (!trans->bytes_reserved && + src_rsv != &root->fs_info->delalloc_block_rsv)) { + ret = btrfs_block_rsv_add_noflush(root, dst_rsv, num_bytes); + /* + * Since we're under a transaction reserve_metadata_bytes could + * try to commit the transaction which will make it return + * EAGAIN to make us stop the transaction we have, so return + * ENOSPC instead so that btrfs_dirty_inode knows what to do. + */ + if (ret == -EAGAIN) + ret = -ENOSPC; + if (!ret) + node->bytes_reserved = num_bytes; + return ret; + } else if (src_rsv == &root->fs_info->delalloc_block_rsv) { + spin_lock(&BTRFS_I(inode)->lock); + if (BTRFS_I(inode)->delalloc_meta_reserved) { + BTRFS_I(inode)->delalloc_meta_reserved = 0; + spin_unlock(&BTRFS_I(inode)->lock); + release = true; + goto migrate; + } + spin_unlock(&BTRFS_I(inode)->lock); + + /* Ok we didn't have space pre-reserved. This shouldn't happen + * too often but it can happen if we do delalloc to an existing + * inode which gets dirtied because of the time update, and then + * isn't touched again until after the transaction commits and + * then we try to write out the data. First try to be nice and + * reserve something strictly for us. If not be a pain and try + * to steal from the delalloc block rsv. + */ + ret = btrfs_block_rsv_add_noflush(root, dst_rsv, num_bytes); + if (!ret) + goto out; + + ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes); + if (!ret) + goto out; + + /* + * Ok this is a problem, let's just steal from the global rsv + * since this really shouldn't happen that often. + */ + WARN_ON(1); + ret = btrfs_block_rsv_migrate(&root->fs_info->global_block_rsv, + dst_rsv, num_bytes); + goto out; + } + +migrate: ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes); + +out: + /* + * Migrate only takes a reservation, it doesn't touch the size of the + * block_rsv. This is to simplify people who don't normally have things + * migrated from their block rsv. If they go to release their + * reservation, that will decrease the size as well, so if migrate + * reduced size we'd end up with a negative size. But for the + * delalloc_meta_reserved stuff we will only know to drop 1 reservation, + * but we could in fact do this reserve/migrate dance several times + * between the time we did the original reservation and we'd clean it + * up. So to take care of this, release the space for the meta + * reservation here. I think it may be time for a documentation page on + * how block rsvs. work. + */ if (!ret) node->bytes_reserved = num_bytes; + if (release) + btrfs_block_rsv_release(root, src_rsv, num_bytes); + return ret; } @@ -646,7 +724,7 @@ static void btrfs_delayed_inode_release_metadata(struct btrfs_root *root, if (!node->bytes_reserved) return; - rsv = &root->fs_info->global_block_rsv; + rsv = &root->fs_info->delayed_block_rsv; btrfs_block_rsv_release(root, rsv, node->bytes_reserved); node->bytes_reserved = 0; @@ -735,7 +813,7 @@ static int btrfs_batch_insert_items(struct btrfs_trans_handle *trans, } /* reset all the locked nodes in the patch to spinning locks. */ - btrfs_clear_path_blocking(path, NULL); + btrfs_clear_path_blocking(path, NULL, 0); /* insert the keys of the items */ ret = setup_items_for_insert(trans, root, path, keys, data_size, @@ -1026,7 +1104,7 @@ int btrfs_run_delayed_items(struct btrfs_trans_handle *trans, path->leave_spinning = 1; block_rsv = trans->block_rsv; - trans->block_rsv = &root->fs_info->global_block_rsv; + trans->block_rsv = &root->fs_info->delayed_block_rsv; delayed_root = btrfs_get_delayed_root(root); @@ -1069,7 +1147,7 @@ static int __btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans, path->leave_spinning = 1; block_rsv = trans->block_rsv; - trans->block_rsv = &node->root->fs_info->global_block_rsv; + trans->block_rsv = &node->root->fs_info->delayed_block_rsv; ret = btrfs_insert_delayed_items(trans, path, node->root, node); if (!ret) @@ -1149,7 +1227,7 @@ static void btrfs_async_run_delayed_node_done(struct btrfs_work *work) goto free_path; block_rsv = trans->block_rsv; - trans->block_rsv = &root->fs_info->global_block_rsv; + trans->block_rsv = &root->fs_info->delayed_block_rsv; ret = btrfs_insert_delayed_items(trans, path, root, delayed_node); if (!ret) @@ -1641,7 +1719,7 @@ int btrfs_fill_inode(struct inode *inode, u32 *rdev) inode->i_gid = btrfs_stack_inode_gid(inode_item); btrfs_i_size_write(inode, btrfs_stack_inode_size(inode_item)); inode->i_mode = btrfs_stack_inode_mode(inode_item); - inode->i_nlink = btrfs_stack_inode_nlink(inode_item); + set_nlink(inode, btrfs_stack_inode_nlink(inode_item)); inode_set_bytes(inode, btrfs_stack_inode_nbytes(inode_item)); BTRFS_I(inode)->generation = btrfs_stack_inode_generation(inode_item); BTRFS_I(inode)->sequence = btrfs_stack_inode_sequence(inode_item); @@ -1685,12 +1763,10 @@ int btrfs_delayed_update_inode(struct btrfs_trans_handle *trans, goto release_node; } - ret = btrfs_delayed_inode_reserve_metadata(trans, root, delayed_node); - /* - * we must reserve enough space when we start a new transaction, - * so reserving metadata failure is impossible - */ - BUG_ON(ret); + ret = btrfs_delayed_inode_reserve_metadata(trans, root, inode, + delayed_node); + if (ret) + goto release_node; fill_stack_inode_item(trans, &delayed_node->inode_item, inode); delayed_node->inode_dirty = 1; diff --git a/fs/btrfs/delayed-inode.h b/fs/btrfs/delayed-inode.h index 8d27af4..7083d08 100644 --- a/fs/btrfs/delayed-inode.h +++ b/fs/btrfs/delayed-inode.h @@ -25,7 +25,7 @@ #include #include #include -#include +#include #include "ctree.h" diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c index 685f259..31d84e7 100644 --- a/fs/btrfs/dir-item.c +++ b/fs/btrfs/dir-item.c @@ -89,13 +89,8 @@ int btrfs_insert_xattr_item(struct btrfs_trans_handle *trans, data_size = sizeof(*dir_item) + name_len + data_len; dir_item = insert_with_overflow(trans, root, path, &key, data_size, name, name_len); - /* - * FIXME: at some point we should handle xattr's that are larger than - * what we can fit in our leaf. We set location to NULL b/c we arent - * pointing at anything else, that will change if we store the xattr - * data in a separate inode. - */ - BUG_ON(IS_ERR(dir_item)); + if (IS_ERR(dir_item)) + return PTR_ERR(dir_item); memset(&location, 0, sizeof(location)); leaf = path->nodes[0]; @@ -203,8 +198,6 @@ struct btrfs_dir_item *btrfs_lookup_dir_item(struct btrfs_trans_handle *trans, struct btrfs_key key; int ins_len = mod < 0 ? -1 : 0; int cow = mod != 0; - struct btrfs_key found_key; - struct extent_buffer *leaf; key.objectid = dir; btrfs_set_key_type(&key, BTRFS_DIR_ITEM_KEY); @@ -214,18 +207,7 @@ struct btrfs_dir_item *btrfs_lookup_dir_item(struct btrfs_trans_handle *trans, ret = btrfs_search_slot(trans, root, &key, path, ins_len, cow); if (ret < 0) return ERR_PTR(ret); - if (ret > 0) { - if (path->slots[0] == 0) - return NULL; - path->slots[0]--; - } - - leaf = path->nodes[0]; - btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); - - if (found_key.objectid != dir || - btrfs_key_type(&found_key) != BTRFS_DIR_ITEM_KEY || - found_key.offset != key.offset) + if (ret > 0) return NULL; return btrfs_match_dir_item_name(root, path, name, name_len); @@ -320,8 +302,6 @@ struct btrfs_dir_item *btrfs_lookup_xattr(struct btrfs_trans_handle *trans, struct btrfs_key key; int ins_len = mod < 0 ? -1 : 0; int cow = mod != 0; - struct btrfs_key found_key; - struct extent_buffer *leaf; key.objectid = dir; btrfs_set_key_type(&key, BTRFS_XATTR_ITEM_KEY); @@ -329,18 +309,7 @@ struct btrfs_dir_item *btrfs_lookup_xattr(struct btrfs_trans_handle *trans, ret = btrfs_search_slot(trans, root, &key, path, ins_len, cow); if (ret < 0) return ERR_PTR(ret); - if (ret > 0) { - if (path->slots[0] == 0) - return NULL; - path->slots[0]--; - } - - leaf = path->nodes[0]; - btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); - - if (found_key.objectid != dir || - btrfs_key_type(&found_key) != BTRFS_XATTR_ITEM_KEY || - found_key.offset != key.offset) + if (ret > 0) return NULL; return btrfs_match_dir_item_name(root, path, name, name_len); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 57106a9..b8fc473 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -100,38 +100,83 @@ struct async_submit_bio { struct btrfs_work work; }; -/* These are used to set the lockdep class on the extent buffer locks. - * The class is set by the readpage_end_io_hook after the buffer has - * passed csum validation but before the pages are unlocked. +/* + * Lockdep class keys for extent_buffer->lock's in this root. For a given + * eb, the lockdep key is determined by the btrfs_root it belongs to and + * the level the eb occupies in the tree. + * + * Different roots are used for different purposes and may nest inside each + * other and they require separate keysets. As lockdep keys should be + * static, assign keysets according to the purpose of the root as indicated + * by btrfs_root->objectid. This ensures that all special purpose roots + * have separate keysets. * - * The lockdep class is also set by btrfs_init_new_buffer on freshly - * allocated blocks. + * Lock-nesting across peer nodes is always done with the immediate parent + * node locked thus preventing deadlock. As lockdep doesn't know this, use + * subclass to avoid triggering lockdep warning in such cases. * - * The class is based on the level in the tree block, which allows lockdep - * to know that lower nodes nest inside the locks of higher nodes. + * The key is set by the readpage_end_io_hook after the buffer has passed + * csum validation but before the pages are unlocked. It is also set by + * btrfs_init_new_buffer on freshly allocated blocks. * - * We also add a check to make sure the highest level of the tree is - * the same as our lockdep setup here. If BTRFS_MAX_LEVEL changes, this - * code needs update as well. + * We also add a check to make sure the highest level of the tree is the + * same as our lockdep setup here. If BTRFS_MAX_LEVEL changes, this code + * needs update as well. */ #ifdef CONFIG_DEBUG_LOCK_ALLOC # if BTRFS_MAX_LEVEL != 8 # error # endif -static struct lock_class_key btrfs_eb_class[BTRFS_MAX_LEVEL + 1]; -static const char *btrfs_eb_name[BTRFS_MAX_LEVEL + 1] = { - /* leaf */ - "btrfs-extent-00", - "btrfs-extent-01", - "btrfs-extent-02", - "btrfs-extent-03", - "btrfs-extent-04", - "btrfs-extent-05", - "btrfs-extent-06", - "btrfs-extent-07", - /* highest possible level */ - "btrfs-extent-08", + +static struct btrfs_lockdep_keyset { + u64 id; /* root objectid */ + const char *name_stem; /* lock name stem */ + char names[BTRFS_MAX_LEVEL + 1][20]; + struct lock_class_key keys[BTRFS_MAX_LEVEL + 1]; +} btrfs_lockdep_keysets[] = { + { .id = BTRFS_ROOT_TREE_OBJECTID, .name_stem = "root" }, + { .id = BTRFS_EXTENT_TREE_OBJECTID, .name_stem = "extent" }, + { .id = BTRFS_CHUNK_TREE_OBJECTID, .name_stem = "chunk" }, + { .id = BTRFS_DEV_TREE_OBJECTID, .name_stem = "dev" }, + { .id = BTRFS_FS_TREE_OBJECTID, .name_stem = "fs" }, + { .id = BTRFS_CSUM_TREE_OBJECTID, .name_stem = "csum" }, + { .id = BTRFS_ORPHAN_OBJECTID, .name_stem = "orphan" }, + { .id = BTRFS_TREE_LOG_OBJECTID, .name_stem = "log" }, + { .id = BTRFS_TREE_RELOC_OBJECTID, .name_stem = "treloc" }, + { .id = BTRFS_DATA_RELOC_TREE_OBJECTID, .name_stem = "dreloc" }, + { .id = 0, .name_stem = "tree" }, }; + +void __init btrfs_init_lockdep(void) +{ + int i, j; + + /* initialize lockdep class names */ + for (i = 0; i < ARRAY_SIZE(btrfs_lockdep_keysets); i++) { + struct btrfs_lockdep_keyset *ks = &btrfs_lockdep_keysets[i]; + + for (j = 0; j < ARRAY_SIZE(ks->names); j++) + snprintf(ks->names[j], sizeof(ks->names[j]), + "btrfs-%s-%02d", ks->name_stem, j); + } +} + +void btrfs_set_buffer_lockdep_class(u64 objectid, struct extent_buffer *eb, + int level) +{ + struct btrfs_lockdep_keyset *ks; + + BUG_ON(level >= ARRAY_SIZE(ks->keys)); + + /* find the matching keyset, id 0 is the default entry */ + for (ks = btrfs_lockdep_keysets; ks->id; ks++) + if (ks->id == objectid) + break; + + lockdep_set_class_and_name(&eb->lock, + &ks->keys[level], ks->names[level]); +} + #endif /* @@ -211,13 +256,11 @@ void btrfs_csum_final(u32 crc, char *result) static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf, int verify) { - u16 csum_size = - btrfs_super_csum_size(&root->fs_info->super_copy); + u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy); char *result = NULL; unsigned long len; unsigned long cur_len; unsigned long offset = BTRFS_CSUM_SIZE; - char *map_token = NULL; char *kaddr; unsigned long map_start; unsigned long map_len; @@ -228,8 +271,7 @@ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf, len = buf->len - offset; while (len > 0) { err = map_private_extent_buffer(buf, offset, 32, - &map_token, &kaddr, - &map_start, &map_len, KM_USER0); + &kaddr, &map_start, &map_len); if (err) return 1; cur_len = min(len, map_len - (offset - map_start)); @@ -237,7 +279,6 @@ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf, crc, cur_len); len -= cur_len; offset += cur_len; - unmap_extent_buffer(buf, map_token, KM_USER0); } if (csum_size > sizeof(inline_result)) { result = kzalloc(csum_size * sizeof(char), GFP_NOFS); @@ -325,7 +366,8 @@ static int btree_read_extent_buffer_pages(struct btrfs_root *root, clear_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags); io_tree = &BTRFS_I(root->fs_info->btree_inode)->io_tree; while (1) { - ret = read_extent_buffer_pages(io_tree, eb, start, 1, + ret = read_extent_buffer_pages(io_tree, eb, start, + WAIT_COMPLETE, btree_get_extent, mirror_num); if (!ret && !verify_parent_transid(io_tree, eb, parent_transid)) @@ -494,15 +536,6 @@ static noinline int check_leaf(struct btrfs_root *root, return 0; } -#ifdef CONFIG_DEBUG_LOCK_ALLOC -void btrfs_set_buffer_lockdep_class(struct extent_buffer *eb, int level) -{ - lockdep_set_class_and_name(&eb->lock, - &btrfs_eb_class[level], - btrfs_eb_name[level]); -} -#endif - static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end, struct extent_state *state) { @@ -553,7 +586,8 @@ static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end, } found_level = btrfs_header_level(eb); - btrfs_set_buffer_lockdep_class(eb, found_level); + btrfs_set_buffer_lockdep_class(btrfs_header_owner(eb), + eb, found_level); ret = csum_tree_block(root, eb, 1); if (ret) { @@ -574,11 +608,48 @@ static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end, end = min_t(u64, eb->len, PAGE_CACHE_SIZE); end = eb->start + end - 1; err: + if (test_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags)) { + clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags); + btree_readahead_hook(root, eb, eb->start, ret); + } + free_extent_buffer(eb); out: return ret; } +static int btree_io_failed_hook(struct bio *failed_bio, + struct page *page, u64 start, u64 end, + int mirror_num, struct extent_state *state) +{ + struct extent_io_tree *tree; + unsigned long len; + struct extent_buffer *eb; + struct btrfs_root *root = BTRFS_I(page->mapping->host)->root; + + tree = &BTRFS_I(page->mapping->host)->io_tree; + if (page->private == EXTENT_PAGE_PRIVATE) + goto out; + if (!page->private) + goto out; + + len = page->private >> 2; + WARN_ON(len == 0); + + eb = alloc_extent_buffer(tree, start, len, page); + if (eb == NULL) + goto out; + + if (test_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags)) { + clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags); + btree_readahead_hook(root, eb, eb->start, -EIO); + } + free_extent_buffer(eb); + +out: + return -EIO; /* we fixed nothing */ +} + static void end_workqueue_bio(struct bio *bio, int err) { struct end_io_wq *end_io_wq = bio->bi_private; @@ -875,7 +946,7 @@ static int btree_readpage(struct file *file, struct page *page) { struct extent_io_tree *tree; tree = &BTRFS_I(page->mapping->host)->io_tree; - return extent_read_full_page(tree, page, btree_get_extent); + return extent_read_full_page(tree, page, btree_get_extent, 0); } static int btree_releasepage(struct page *page, gfp_t gfp_flags) @@ -941,11 +1012,43 @@ int readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize, if (!buf) return 0; read_extent_buffer_pages(&BTRFS_I(btree_inode)->io_tree, - buf, 0, 0, btree_get_extent, 0); + buf, 0, WAIT_NONE, btree_get_extent, 0); free_extent_buffer(buf); return ret; } +int reada_tree_block_flagged(struct btrfs_root *root, u64 bytenr, u32 blocksize, + int mirror_num, struct extent_buffer **eb) +{ + struct extent_buffer *buf = NULL; + struct inode *btree_inode = root->fs_info->btree_inode; + struct extent_io_tree *io_tree = &BTRFS_I(btree_inode)->io_tree; + int ret; + + buf = btrfs_find_create_tree_block(root, bytenr, blocksize); + if (!buf) + return 0; + + set_bit(EXTENT_BUFFER_READAHEAD, &buf->bflags); + + ret = read_extent_buffer_pages(io_tree, buf, 0, WAIT_PAGE_LOCK, + btree_get_extent, mirror_num); + if (ret) { + free_extent_buffer(buf); + return ret; + } + + if (test_bit(EXTENT_BUFFER_CORRUPT, &buf->bflags)) { + free_extent_buffer(buf); + return -EIO; + } else if (extent_buffer_uptodate(io_tree, buf, NULL)) { + *eb = buf; + } else { + free_extent_buffer(buf); + } + return 0; +} + struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize) { @@ -1078,12 +1181,7 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, init_completion(&root->kobj_unregister); root->defrag_running = 0; root->root_key.objectid = objectid; - root->anon_super.s_root = NULL; - root->anon_super.s_dev = 0; - INIT_LIST_HEAD(&root->anon_super.s_list); - INIT_LIST_HEAD(&root->anon_super.s_instances); - init_rwsem(&root->anon_super.s_umount); - + root->anon_dev = 0; return 0; } @@ -1107,10 +1205,12 @@ static int find_and_setup_root(struct btrfs_root *tree_root, generation = btrfs_root_generation(&root->root_item); blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item)); + root->commit_root = NULL; root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item), blocksize, generation); if (!root->node || !btrfs_buffer_uptodate(root->node, generation)) { free_extent_buffer(root->node); + root->node = NULL; return -EIO; } root->commit_root = btrfs_root_node(root); @@ -1312,7 +1412,7 @@ again: spin_lock_init(&root->cache_lock); init_waitqueue_head(&root->cache_wait); - ret = set_anon_super(&root->anon_super, NULL); + ret = get_anon_bdev(&root->anon_dev); if (ret) goto fail; @@ -1549,6 +1649,235 @@ sleep: return 0; } +/* + * this will find the highest generation in the array of + * root backups. The index of the highest array is returned, + * or -1 if we can't find anything. + * + * We check to make sure the array is valid by comparing the + * generation of the latest root in the array with the generation + * in the super block. If they don't match we pitch it. + */ +static int find_newest_super_backup(struct btrfs_fs_info *info, u64 newest_gen) +{ + u64 cur; + int newest_index = -1; + struct btrfs_root_backup *root_backup; + int i; + + for (i = 0; i < BTRFS_NUM_BACKUP_ROOTS; i++) { + root_backup = info->super_copy->super_roots + i; + cur = btrfs_backup_tree_root_gen(root_backup); + if (cur == newest_gen) + newest_index = i; + } + + /* check to see if we actually wrapped around */ + if (newest_index == BTRFS_NUM_BACKUP_ROOTS - 1) { + root_backup = info->super_copy->super_roots; + cur = btrfs_backup_tree_root_gen(root_backup); + if (cur == newest_gen) + newest_index = 0; + } + return newest_index; +} + + +/* + * find the oldest backup so we know where to store new entries + * in the backup array. This will set the backup_root_index + * field in the fs_info struct + */ +static void find_oldest_super_backup(struct btrfs_fs_info *info, + u64 newest_gen) +{ + int newest_index = -1; + + newest_index = find_newest_super_backup(info, newest_gen); + /* if there was garbage in there, just move along */ + if (newest_index == -1) { + info->backup_root_index = 0; + } else { + info->backup_root_index = (newest_index + 1) % BTRFS_NUM_BACKUP_ROOTS; + } +} + +/* + * copy all the root pointers into the super backup array. + * this will bump the backup pointer by one when it is + * done + */ +static void backup_super_roots(struct btrfs_fs_info *info) +{ + int next_backup; + struct btrfs_root_backup *root_backup; + int last_backup; + + next_backup = info->backup_root_index; + last_backup = (next_backup + BTRFS_NUM_BACKUP_ROOTS - 1) % + BTRFS_NUM_BACKUP_ROOTS; + + /* + * just overwrite the last backup if we're at the same generation + * this happens only at umount + */ + root_backup = info->super_for_commit->super_roots + last_backup; + if (btrfs_backup_tree_root_gen(root_backup) == + btrfs_header_generation(info->tree_root->node)) + next_backup = last_backup; + + root_backup = info->super_for_commit->super_roots + next_backup; + + /* + * make sure all of our padding and empty slots get zero filled + * regardless of which ones we use today + */ + memset(root_backup, 0, sizeof(*root_backup)); + + info->backup_root_index = (next_backup + 1) % BTRFS_NUM_BACKUP_ROOTS; + + btrfs_set_backup_tree_root(root_backup, info->tree_root->node->start); + btrfs_set_backup_tree_root_gen(root_backup, + btrfs_header_generation(info->tree_root->node)); + + btrfs_set_backup_tree_root_level(root_backup, + btrfs_header_level(info->tree_root->node)); + + btrfs_set_backup_chunk_root(root_backup, info->chunk_root->node->start); + btrfs_set_backup_chunk_root_gen(root_backup, + btrfs_header_generation(info->chunk_root->node)); + btrfs_set_backup_chunk_root_level(root_backup, + btrfs_header_level(info->chunk_root->node)); + + btrfs_set_backup_extent_root(root_backup, info->extent_root->node->start); + btrfs_set_backup_extent_root_gen(root_backup, + btrfs_header_generation(info->extent_root->node)); + btrfs_set_backup_extent_root_level(root_backup, + btrfs_header_level(info->extent_root->node)); + + /* + * we might commit during log recovery, which happens before we set + * the fs_root. Make sure it is valid before we fill it in. + */ + if (info->fs_root && info->fs_root->node) { + btrfs_set_backup_fs_root(root_backup, + info->fs_root->node->start); + btrfs_set_backup_fs_root_gen(root_backup, + btrfs_header_generation(info->fs_root->node)); + btrfs_set_backup_fs_root_level(root_backup, + btrfs_header_level(info->fs_root->node)); + } + + btrfs_set_backup_dev_root(root_backup, info->dev_root->node->start); + btrfs_set_backup_dev_root_gen(root_backup, + btrfs_header_generation(info->dev_root->node)); + btrfs_set_backup_dev_root_level(root_backup, + btrfs_header_level(info->dev_root->node)); + + btrfs_set_backup_csum_root(root_backup, info->csum_root->node->start); + btrfs_set_backup_csum_root_gen(root_backup, + btrfs_header_generation(info->csum_root->node)); + btrfs_set_backup_csum_root_level(root_backup, + btrfs_header_level(info->csum_root->node)); + + btrfs_set_backup_total_bytes(root_backup, + btrfs_super_total_bytes(info->super_copy)); + btrfs_set_backup_bytes_used(root_backup, + btrfs_super_bytes_used(info->super_copy)); + btrfs_set_backup_num_devices(root_backup, + btrfs_super_num_devices(info->super_copy)); + + /* + * if we don't copy this out to the super_copy, it won't get remembered + * for the next commit + */ + memcpy(&info->super_copy->super_roots, + &info->super_for_commit->super_roots, + sizeof(*root_backup) * BTRFS_NUM_BACKUP_ROOTS); +} + +/* + * this copies info out of the root backup array and back into + * the in-memory super block. It is meant to help iterate through + * the array, so you send it the number of backups you've already + * tried and the last backup index you used. + * + * this returns -1 when it has tried all the backups + */ +static noinline int next_root_backup(struct btrfs_fs_info *info, + struct btrfs_super_block *super, + int *num_backups_tried, int *backup_index) +{ + struct btrfs_root_backup *root_backup; + int newest = *backup_index; + + if (*num_backups_tried == 0) { + u64 gen = btrfs_super_generation(super); + + newest = find_newest_super_backup(info, gen); + if (newest == -1) + return -1; + + *backup_index = newest; + *num_backups_tried = 1; + } else if (*num_backups_tried == BTRFS_NUM_BACKUP_ROOTS) { + /* we've tried all the backups, all done */ + return -1; + } else { + /* jump to the next oldest backup */ + newest = (*backup_index + BTRFS_NUM_BACKUP_ROOTS - 1) % + BTRFS_NUM_BACKUP_ROOTS; + *backup_index = newest; + *num_backups_tried += 1; + } + root_backup = super->super_roots + newest; + + btrfs_set_super_generation(super, + btrfs_backup_tree_root_gen(root_backup)); + btrfs_set_super_root(super, btrfs_backup_tree_root(root_backup)); + btrfs_set_super_root_level(super, + btrfs_backup_tree_root_level(root_backup)); + btrfs_set_super_bytes_used(super, btrfs_backup_bytes_used(root_backup)); + + /* + * fixme: the total bytes and num_devices need to match or we should + * need a fsck + */ + btrfs_set_super_total_bytes(super, btrfs_backup_total_bytes(root_backup)); + btrfs_set_super_num_devices(super, btrfs_backup_num_devices(root_backup)); + return 0; +} + +/* helper to cleanup tree roots */ +static void free_root_pointers(struct btrfs_fs_info *info, int chunk_root) +{ + free_extent_buffer(info->tree_root->node); + free_extent_buffer(info->tree_root->commit_root); + free_extent_buffer(info->dev_root->node); + free_extent_buffer(info->dev_root->commit_root); + free_extent_buffer(info->extent_root->node); + free_extent_buffer(info->extent_root->commit_root); + free_extent_buffer(info->csum_root->node); + free_extent_buffer(info->csum_root->commit_root); + + info->tree_root->node = NULL; + info->tree_root->commit_root = NULL; + info->dev_root->node = NULL; + info->dev_root->commit_root = NULL; + info->extent_root->node = NULL; + info->extent_root->commit_root = NULL; + info->csum_root->node = NULL; + info->csum_root->commit_root = NULL; + + if (chunk_root) { + free_extent_buffer(info->chunk_root->node); + free_extent_buffer(info->chunk_root->commit_root); + info->chunk_root->node = NULL; + info->chunk_root->commit_root = NULL; + } +} + + struct btrfs_root *open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_devices, char *options) @@ -1562,29 +1891,32 @@ struct btrfs_root *open_ctree(struct super_block *sb, u64 features; struct btrfs_key location; struct buffer_head *bh; - struct btrfs_root *extent_root = kzalloc(sizeof(struct btrfs_root), - GFP_NOFS); - struct btrfs_root *csum_root = kzalloc(sizeof(struct btrfs_root), - GFP_NOFS); + struct btrfs_super_block *disk_super; struct btrfs_root *tree_root = btrfs_sb(sb); - struct btrfs_fs_info *fs_info = NULL; - struct btrfs_root *chunk_root = kzalloc(sizeof(struct btrfs_root), - GFP_NOFS); - struct btrfs_root *dev_root = kzalloc(sizeof(struct btrfs_root), - GFP_NOFS); + struct btrfs_fs_info *fs_info = tree_root->fs_info; + struct btrfs_root *extent_root; + struct btrfs_root *csum_root; + struct btrfs_root *chunk_root; + struct btrfs_root *dev_root; struct btrfs_root *log_tree_root; - int ret; int err = -EINVAL; - - struct btrfs_super_block *disk_super; - - if (!extent_root || !tree_root || !tree_root->fs_info || - !chunk_root || !dev_root || !csum_root) { + int num_backups_tried = 0; + int backup_index = 0; + + extent_root = fs_info->extent_root = + kzalloc(sizeof(struct btrfs_root), GFP_NOFS); + csum_root = fs_info->csum_root = + kzalloc(sizeof(struct btrfs_root), GFP_NOFS); + chunk_root = fs_info->chunk_root = + kzalloc(sizeof(struct btrfs_root), GFP_NOFS); + dev_root = fs_info->dev_root = + kzalloc(sizeof(struct btrfs_root), GFP_NOFS); + + if (!extent_root || !csum_root || !chunk_root || !dev_root) { err = -ENOMEM; goto fail; } - fs_info = tree_root->fs_info; ret = init_srcu_struct(&fs_info->subvol_srcu); if (ret) { @@ -1604,7 +1936,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, goto fail_bdi; } - fs_info->btree_inode->i_mapping->flags &= ~__GFP_FS; + mapping_set_gfp_mask(fs_info->btree_inode->i_mapping, GFP_NOFS); INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_ATOMIC); INIT_LIST_HEAD(&fs_info->trans_list); @@ -1620,15 +1952,10 @@ struct btrfs_root *open_ctree(struct super_block *sb, spin_lock_init(&fs_info->fs_roots_radix_lock); spin_lock_init(&fs_info->delayed_iput_lock); spin_lock_init(&fs_info->defrag_inodes_lock); + spin_lock_init(&fs_info->free_chunk_lock); mutex_init(&fs_info->reloc_mutex); init_completion(&fs_info->kobj_unregister); - fs_info->tree_root = tree_root; - fs_info->extent_root = extent_root; - fs_info->csum_root = csum_root; - fs_info->chunk_root = chunk_root; - fs_info->dev_root = dev_root; - fs_info->fs_devices = fs_devices; INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots); INIT_LIST_HEAD(&fs_info->space_info); btrfs_mapping_init(&fs_info->mapping_tree); @@ -1637,8 +1964,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, btrfs_init_block_rsv(&fs_info->trans_block_rsv); btrfs_init_block_rsv(&fs_info->chunk_block_rsv); btrfs_init_block_rsv(&fs_info->empty_block_rsv); - INIT_LIST_HEAD(&fs_info->durable_block_rsv_list); - mutex_init(&fs_info->durable_block_rsv_mutex); + btrfs_init_block_rsv(&fs_info->delayed_block_rsv); atomic_set(&fs_info->nr_async_submits, 0); atomic_set(&fs_info->async_delalloc_pages, 0); atomic_set(&fs_info->async_submit_draining, 0); @@ -1649,6 +1975,11 @@ struct btrfs_root *open_ctree(struct super_block *sb, fs_info->metadata_ratio = 0; fs_info->defrag_inodes = RB_ROOT; fs_info->trans_no_join = 0; + fs_info->free_chunk_space = 0; + + /* readahead state */ + INIT_RADIX_TREE(&fs_info->reada_tree, GFP_NOFS & ~__GFP_WAIT); + spin_lock_init(&fs_info->reada_lock); fs_info->thread_pool_size = min_t(unsigned long, num_online_cpus() + 2, 8); @@ -1677,7 +2008,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, sb->s_bdi = &fs_info->bdi; fs_info->btree_inode->i_ino = BTRFS_BTREE_INODE_OBJECTID; - fs_info->btree_inode->i_nlink = 1; + set_nlink(fs_info->btree_inode, 1); /* * we set the i_size on the btree inode to the max possible int. * the real end of the address space is determined by all of @@ -1738,14 +2069,14 @@ struct btrfs_root *open_ctree(struct super_block *sb, goto fail_alloc; } - memcpy(&fs_info->super_copy, bh->b_data, sizeof(fs_info->super_copy)); - memcpy(&fs_info->super_for_commit, &fs_info->super_copy, - sizeof(fs_info->super_for_commit)); + memcpy(fs_info->super_copy, bh->b_data, sizeof(*fs_info->super_copy)); + memcpy(fs_info->super_for_commit, fs_info->super_copy, + sizeof(*fs_info->super_for_commit)); brelse(bh); - memcpy(fs_info->fsid, fs_info->super_copy.fsid, BTRFS_FSID_SIZE); + memcpy(fs_info->fsid, fs_info->super_copy->fsid, BTRFS_FSID_SIZE); - disk_super = &fs_info->super_copy; + disk_super = fs_info->super_copy; if (!btrfs_super_root(disk_super)) goto fail_alloc; @@ -1755,6 +2086,13 @@ struct btrfs_root *open_ctree(struct super_block *sb, btrfs_check_super_valid(fs_info, sb->s_flags & MS_RDONLY); /* + * run through our array of backup supers and setup + * our ring pointer to the oldest one + */ + generation = btrfs_super_generation(disk_super); + find_oldest_super_backup(fs_info, generation); + + /* * In the long term, we'll store the compression type in the super * block, and it'll be used for per file compression control. */ @@ -1808,6 +2146,9 @@ struct btrfs_root *open_ctree(struct super_block *sb, fs_info->thread_pool_size), &fs_info->generic_worker); + btrfs_init_workers(&fs_info->caching_workers, "cache", + 2, &fs_info->generic_worker); + /* a higher idle thresh on the submit workers makes it much more * likely that bios will be send down in a sane order to the * devices @@ -1839,6 +2180,9 @@ struct btrfs_root *open_ctree(struct super_block *sb, btrfs_init_workers(&fs_info->delayed_workers, "delayed-meta", fs_info->thread_pool_size, &fs_info->generic_worker); + btrfs_init_workers(&fs_info->readahead_workers, "readahead", + fs_info->thread_pool_size, + &fs_info->generic_worker); /* * endios are largely parallel and should have a very @@ -1849,18 +2193,29 @@ struct btrfs_root *open_ctree(struct super_block *sb, fs_info->endio_write_workers.idle_thresh = 2; fs_info->endio_meta_write_workers.idle_thresh = 2; + fs_info->readahead_workers.idle_thresh = 2; - btrfs_start_workers(&fs_info->workers, 1); - btrfs_start_workers(&fs_info->generic_worker, 1); - btrfs_start_workers(&fs_info->submit_workers, 1); - btrfs_start_workers(&fs_info->delalloc_workers, 1); - btrfs_start_workers(&fs_info->fixup_workers, 1); - btrfs_start_workers(&fs_info->endio_workers, 1); - btrfs_start_workers(&fs_info->endio_meta_workers, 1); - btrfs_start_workers(&fs_info->endio_meta_write_workers, 1); - btrfs_start_workers(&fs_info->endio_write_workers, 1); - btrfs_start_workers(&fs_info->endio_freespace_worker, 1); - btrfs_start_workers(&fs_info->delayed_workers, 1); + /* + * btrfs_start_workers can really only fail because of ENOMEM so just + * return -ENOMEM if any of these fail. + */ + ret = btrfs_start_workers(&fs_info->workers); + ret |= btrfs_start_workers(&fs_info->generic_worker); + ret |= btrfs_start_workers(&fs_info->submit_workers); + ret |= btrfs_start_workers(&fs_info->delalloc_workers); + ret |= btrfs_start_workers(&fs_info->fixup_workers); + ret |= btrfs_start_workers(&fs_info->endio_workers); + ret |= btrfs_start_workers(&fs_info->endio_meta_workers); + ret |= btrfs_start_workers(&fs_info->endio_meta_write_workers); + ret |= btrfs_start_workers(&fs_info->endio_write_workers); + ret |= btrfs_start_workers(&fs_info->endio_freespace_worker); + ret |= btrfs_start_workers(&fs_info->delayed_workers); + ret |= btrfs_start_workers(&fs_info->caching_workers); + ret |= btrfs_start_workers(&fs_info->readahead_workers); + if (ret) { + ret = -ENOMEM; + goto fail_sb_buffer; + } fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super); fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages, @@ -1907,7 +2262,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, if (!test_bit(EXTENT_BUFFER_UPTODATE, &chunk_root->node->bflags)) { printk(KERN_WARNING "btrfs: failed to read chunk root on %s\n", sb->s_id); - goto fail_chunk_root; + goto fail_tree_roots; } btrfs_set_root_node(&chunk_root->root_item, chunk_root->node); chunk_root->commit_root = btrfs_root_node(chunk_root); @@ -1922,11 +2277,12 @@ struct btrfs_root *open_ctree(struct super_block *sb, if (ret) { printk(KERN_WARNING "btrfs: failed to read chunk tree on %s\n", sb->s_id); - goto fail_chunk_root; + goto fail_tree_roots; } btrfs_close_extra_devices(fs_devices); +retry_root_backup: blocksize = btrfs_level_size(tree_root, btrfs_super_root_level(disk_super)); generation = btrfs_super_generation(disk_super); @@ -1934,32 +2290,33 @@ struct btrfs_root *open_ctree(struct super_block *sb, tree_root->node = read_tree_block(tree_root, btrfs_super_root(disk_super), blocksize, generation); - if (!tree_root->node) - goto fail_chunk_root; - if (!test_bit(EXTENT_BUFFER_UPTODATE, &tree_root->node->bflags)) { + if (!tree_root->node || + !test_bit(EXTENT_BUFFER_UPTODATE, &tree_root->node->bflags)) { printk(KERN_WARNING "btrfs: failed to read tree root on %s\n", sb->s_id); - goto fail_tree_root; + + goto recovery_tree_root; } + btrfs_set_root_node(&tree_root->root_item, tree_root->node); tree_root->commit_root = btrfs_root_node(tree_root); ret = find_and_setup_root(tree_root, fs_info, BTRFS_EXTENT_TREE_OBJECTID, extent_root); if (ret) - goto fail_tree_root; + goto recovery_tree_root; extent_root->track_dirty = 1; ret = find_and_setup_root(tree_root, fs_info, BTRFS_DEV_TREE_OBJECTID, dev_root); if (ret) - goto fail_extent_root; + goto recovery_tree_root; dev_root->track_dirty = 1; ret = find_and_setup_root(tree_root, fs_info, BTRFS_CSUM_TREE_OBJECTID, csum_root); if (ret) - goto fail_dev_root; + goto recovery_tree_root; csum_root->track_dirty = 1; @@ -2092,22 +2449,13 @@ fail_cleaner: fail_block_groups: btrfs_free_block_groups(fs_info); - free_extent_buffer(csum_root->node); - free_extent_buffer(csum_root->commit_root); -fail_dev_root: - free_extent_buffer(dev_root->node); - free_extent_buffer(dev_root->commit_root); -fail_extent_root: - free_extent_buffer(extent_root->node); - free_extent_buffer(extent_root->commit_root); -fail_tree_root: - free_extent_buffer(tree_root->node); - free_extent_buffer(tree_root->commit_root); -fail_chunk_root: - free_extent_buffer(chunk_root->node); - free_extent_buffer(chunk_root->commit_root); + +fail_tree_roots: + free_root_pointers(fs_info, 1); + fail_sb_buffer: btrfs_stop_workers(&fs_info->generic_worker); + btrfs_stop_workers(&fs_info->readahead_workers); btrfs_stop_workers(&fs_info->fixup_workers); btrfs_stop_workers(&fs_info->delalloc_workers); btrfs_stop_workers(&fs_info->workers); @@ -2118,26 +2466,39 @@ fail_sb_buffer: btrfs_stop_workers(&fs_info->endio_freespace_worker); btrfs_stop_workers(&fs_info->submit_workers); btrfs_stop_workers(&fs_info->delayed_workers); + btrfs_stop_workers(&fs_info->caching_workers); fail_alloc: - kfree(fs_info->delayed_root); fail_iput: + btrfs_mapping_tree_free(&fs_info->mapping_tree); + invalidate_inode_pages2(fs_info->btree_inode->i_mapping); iput(fs_info->btree_inode); - - btrfs_close_devices(fs_info->fs_devices); - btrfs_mapping_tree_free(&fs_info->mapping_tree); fail_bdi: bdi_destroy(&fs_info->bdi); fail_srcu: cleanup_srcu_struct(&fs_info->subvol_srcu); fail: - kfree(extent_root); - kfree(tree_root); - kfree(fs_info); - kfree(chunk_root); - kfree(dev_root); - kfree(csum_root); + btrfs_close_devices(fs_info->fs_devices); + free_fs_info(fs_info); return ERR_PTR(err); + +recovery_tree_root: + if (!btrfs_test_opt(tree_root, RECOVERY)) + goto fail_tree_roots; + + free_root_pointers(fs_info, 0); + + /* don't use the log in recovery mode, it won't be valid */ + btrfs_set_super_log_root(disk_super, 0); + + /* we can't trust the free space cache either */ + btrfs_set_opt(fs_info->mount_opt, CLEAR_CACHE); + + ret = next_root_backup(fs_info, fs_info->super_copy, + &num_backups_tried, &backup_index); + if (ret == -1) + goto fail_block_groups; + goto retry_root_backup; } static void btrfs_end_buffer_write_sync(struct buffer_head *bh, int uptodate) @@ -2221,22 +2582,10 @@ static int write_dev_supers(struct btrfs_device *device, int errors = 0; u32 crc; u64 bytenr; - int last_barrier = 0; if (max_mirrors == 0) max_mirrors = BTRFS_SUPER_MIRROR_MAX; - /* make sure only the last submit_bh does a barrier */ - if (do_barriers) { - for (i = 0; i < max_mirrors; i++) { - bytenr = btrfs_sb_offset(i); - if (bytenr + BTRFS_SUPER_INFO_SIZE >= - device->total_bytes) - break; - last_barrier = i; - } - } - for (i = 0; i < max_mirrors; i++) { bytenr = btrfs_sb_offset(i); if (bytenr + BTRFS_SUPER_INFO_SIZE >= device->total_bytes) @@ -2282,17 +2631,140 @@ static int write_dev_supers(struct btrfs_device *device, bh->b_end_io = btrfs_end_buffer_write_sync; } - if (i == last_barrier && do_barriers) - ret = submit_bh(WRITE_FLUSH_FUA, bh); - else - ret = submit_bh(WRITE_SYNC, bh); - + /* + * we fua the first super. The others we allow + * to go down lazy. + */ + ret = submit_bh(WRITE_FUA, bh); if (ret) errors++; } return errors < i ? 0 : -1; } +/* + * endio for the write_dev_flush, this will wake anyone waiting + * for the barrier when it is done + */ +static void btrfs_end_empty_barrier(struct bio *bio, int err) +{ + if (err) { + if (err == -EOPNOTSUPP) + set_bit(BIO_EOPNOTSUPP, &bio->bi_flags); + clear_bit(BIO_UPTODATE, &bio->bi_flags); + } + if (bio->bi_private) + complete(bio->bi_private); + bio_put(bio); +} + +/* + * trigger flushes for one the devices. If you pass wait == 0, the flushes are + * sent down. With wait == 1, it waits for the previous flush. + * + * any device where the flush fails with eopnotsupp are flagged as not-barrier + * capable + */ +static int write_dev_flush(struct btrfs_device *device, int wait) +{ + struct bio *bio; + int ret = 0; + + if (device->nobarriers) + return 0; + + if (wait) { + bio = device->flush_bio; + if (!bio) + return 0; + + wait_for_completion(&device->flush_wait); + + if (bio_flagged(bio, BIO_EOPNOTSUPP)) { + printk("btrfs: disabling barriers on dev %s\n", + device->name); + device->nobarriers = 1; + } + if (!bio_flagged(bio, BIO_UPTODATE)) { + ret = -EIO; + } + + /* drop the reference from the wait == 0 run */ + bio_put(bio); + device->flush_bio = NULL; + + return ret; + } + + /* + * one reference for us, and we leave it for the + * caller + */ + device->flush_bio = NULL;; + bio = bio_alloc(GFP_NOFS, 0); + if (!bio) + return -ENOMEM; + + bio->bi_end_io = btrfs_end_empty_barrier; + bio->bi_bdev = device->bdev; + init_completion(&device->flush_wait); + bio->bi_private = &device->flush_wait; + device->flush_bio = bio; + + bio_get(bio); + submit_bio(WRITE_FLUSH, bio); + + return 0; +} + +/* + * send an empty flush down to each device in parallel, + * then wait for them + */ +static int barrier_all_devices(struct btrfs_fs_info *info) +{ + struct list_head *head; + struct btrfs_device *dev; + int errors = 0; + int ret; + + /* send down all the barriers */ + head = &info->fs_devices->devices; + list_for_each_entry_rcu(dev, head, dev_list) { + if (dev->missing) + continue; + if (!dev->bdev) { + errors++; + continue; + } + if (!dev->in_fs_metadata || !dev->writeable) + continue; + + ret = write_dev_flush(dev, 0); + if (ret) + errors++; + } + + /* wait for all the barriers */ + list_for_each_entry_rcu(dev, head, dev_list) { + if (dev->missing) + continue; + if (!dev->bdev) { + errors++; + continue; + } + if (!dev->in_fs_metadata || !dev->writeable) + continue; + + ret = write_dev_flush(dev, 1); + if (ret) + errors++; + } + if (errors) + return -EIO; + return 0; +} + int write_all_supers(struct btrfs_root *root, int max_mirrors) { struct list_head *head; @@ -2305,14 +2777,19 @@ int write_all_supers(struct btrfs_root *root, int max_mirrors) int total_errors = 0; u64 flags; - max_errors = btrfs_super_num_devices(&root->fs_info->super_copy) - 1; + max_errors = btrfs_super_num_devices(root->fs_info->super_copy) - 1; do_barriers = !btrfs_test_opt(root, NOBARRIER); + backup_super_roots(root->fs_info); - sb = &root->fs_info->super_for_commit; + sb = root->fs_info->super_for_commit; dev_item = &sb->dev_item; mutex_lock(&root->fs_info->fs_devices->device_list_mutex); head = &root->fs_info->fs_devices->devices; + + if (do_barriers) + barrier_all_devices(root->fs_info); + list_for_each_entry_rcu(dev, head, dev_list) { if (!dev->bdev) { total_errors++; @@ -2394,10 +2871,8 @@ static void free_fs_root(struct btrfs_root *root) { iput(root->cache_inode); WARN_ON(!RB_EMPTY_ROOT(&root->inode_tree)); - if (root->anon_super.s_dev) { - down_write(&root->anon_super.s_umount); - kill_anon_super(&root->anon_super); - } + if (root->anon_dev) + free_anon_bdev(root->anon_dev); free_extent_buffer(root->node); free_extent_buffer(root->commit_root); kfree(root->free_ino_ctl); @@ -2514,8 +2989,6 @@ int close_ctree(struct btrfs_root *root) /* clear out the rbtree of defraggable inodes */ btrfs_run_defrag_inodes(root->fs_info); - btrfs_put_block_group_cache(fs_info); - /* * Here come 2 situations when btrfs is broken to flip readonly: * @@ -2541,6 +3014,8 @@ int close_ctree(struct btrfs_root *root) printk(KERN_ERR "btrfs: commit super ret %d\n", ret); } + btrfs_put_block_group_cache(fs_info); + kthread_stop(root->fs_info->transaction_kthread); kthread_stop(root->fs_info->cleaner_kthread); @@ -2572,7 +3047,6 @@ int close_ctree(struct btrfs_root *root) del_fs_roots(fs_info); iput(fs_info->btree_inode); - kfree(fs_info->delayed_root); btrfs_stop_workers(&fs_info->generic_worker); btrfs_stop_workers(&fs_info->fixup_workers); @@ -2585,6 +3059,8 @@ int close_ctree(struct btrfs_root *root) btrfs_stop_workers(&fs_info->endio_freespace_worker); btrfs_stop_workers(&fs_info->submit_workers); btrfs_stop_workers(&fs_info->delayed_workers); + btrfs_stop_workers(&fs_info->caching_workers); + btrfs_stop_workers(&fs_info->readahead_workers); btrfs_close_devices(fs_info->fs_devices); btrfs_mapping_tree_free(&fs_info->mapping_tree); @@ -2592,12 +3068,7 @@ int close_ctree(struct btrfs_root *root) bdi_destroy(&fs_info->bdi); cleanup_srcu_struct(&fs_info->subvol_srcu); - kfree(fs_info->extent_root); - kfree(fs_info->tree_root); - kfree(fs_info->chunk_root); - kfree(fs_info->dev_root); - kfree(fs_info->csum_root); - kfree(fs_info); + free_fs_info(fs_info); return 0; } @@ -2703,7 +3174,8 @@ int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid) return ret; } -int btree_lock_page_hook(struct page *page) +static int btree_lock_page_hook(struct page *page, void *data, + void (*flush_fn)(void *)) { struct inode *inode = page->mapping->host; struct btrfs_root *root = BTRFS_I(inode)->root; @@ -2720,7 +3192,10 @@ int btree_lock_page_hook(struct page *page) if (!eb) goto out; - btrfs_tree_lock(eb); + if (!btrfs_try_tree_write_lock(eb)) { + flush_fn(data); + btrfs_tree_lock(eb); + } btrfs_set_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN); if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)) { @@ -2735,7 +3210,10 @@ int btree_lock_page_hook(struct page *page) btrfs_tree_unlock(eb); free_extent_buffer(eb); out: - lock_page(page); + if (!trylock_page(page)) { + flush_fn(data); + lock_page(page); + } return 0; } @@ -3003,12 +3481,6 @@ static int btrfs_destroy_pinned_extent(struct btrfs_root *root, if (ret) break; - /* opt_discard */ - if (btrfs_test_opt(root, DISCARD)) - ret = btrfs_error_discard_extent(root, start, - end + 1 - start, - NULL); - clear_extent_dirty(unpin, start, end, GFP_NOFS); btrfs_error_unpin_extent_range(root, start, end); cond_resched(); @@ -3091,6 +3563,7 @@ static int btrfs_cleanup_transaction(struct btrfs_root *root) static struct extent_io_ops btree_extent_io_ops = { .write_cache_pages_lock_hook = btree_lock_page_hook, .readpage_end_io_hook = btree_readpage_end_io_hook, + .readpage_io_failed_hook = btree_io_failed_hook, .submit_bio_hook = btree_submit_bio_hook, /* note we're sharing with inode.c for the merge bio hook */ .merge_bio_hook = btrfs_merge_bio_hook, diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index a0b610a..c99d0a8f 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -40,6 +40,8 @@ struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize, u64 parent_transid); int readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize, u64 parent_transid); +int reada_tree_block_flagged(struct btrfs_root *root, u64 bytenr, u32 blocksize, + int mirror_num, struct extent_buffer **eb); struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize); int clean_tree_block(struct btrfs_trans_handle *trans, @@ -83,14 +85,16 @@ int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info); int btrfs_add_log_tree(struct btrfs_trans_handle *trans, struct btrfs_root *root); -int btree_lock_page_hook(struct page *page); - #ifdef CONFIG_DEBUG_LOCK_ALLOC -void btrfs_set_buffer_lockdep_class(struct extent_buffer *eb, int level); +void btrfs_init_lockdep(void); +void btrfs_set_buffer_lockdep_class(u64 objectid, + struct extent_buffer *eb, int level); #else -static inline void btrfs_set_buffer_lockdep_class(struct extent_buffer *eb, - int level) +static inline void btrfs_init_lockdep(void) +{ } +static inline void btrfs_set_buffer_lockdep_class(u64 objectid, + struct extent_buffer *eb, int level) { } #endif diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 01220b7..da528f8 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -23,6 +23,7 @@ #include #include #include +#include #include "compat.h" #include "hash.h" #include "ctree.h" @@ -52,6 +53,21 @@ enum { CHUNK_ALLOC_LIMITED = 2, }; +/* + * Control how reservations are dealt with. + * + * RESERVE_FREE - freeing a reservation. + * RESERVE_ALLOC - allocating space and we need to update bytes_may_use for + * ENOSPC accounting + * RESERVE_ALLOC_NO_ACCOUNT - allocating space and we should not update + * bytes_may_use as the ENOSPC accounting is done elsewhere + */ +enum { + RESERVE_FREE = 0, + RESERVE_ALLOC = 1, + RESERVE_ALLOC_NO_ACCOUNT = 2, +}; + static int update_block_group(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 bytenr, u64 num_bytes, int alloc); @@ -81,6 +97,8 @@ static int find_next_key(struct btrfs_path *path, int level, struct btrfs_key *key); static void dump_space_info(struct btrfs_space_info *info, u64 bytes, int dump_block_groups); +static int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache, + u64 num_bytes, int reserve); static noinline int block_group_cache_done(struct btrfs_block_group_cache *cache) @@ -104,7 +122,6 @@ void btrfs_put_block_group(struct btrfs_block_group_cache *cache) if (atomic_dec_and_test(&cache->count)) { WARN_ON(cache->pinned > 0); WARN_ON(cache->reserved > 0); - WARN_ON(cache->reserved_pinned > 0); kfree(cache->free_space_ctl); kfree(cache); } @@ -320,12 +337,12 @@ static u64 add_new_free_space(struct btrfs_block_group_cache *block_group, return total_added; } -static int caching_kthread(void *data) +static noinline void caching_thread(struct btrfs_work *work) { - struct btrfs_block_group_cache *block_group = data; - struct btrfs_fs_info *fs_info = block_group->fs_info; - struct btrfs_caching_control *caching_ctl = block_group->caching_ctl; - struct btrfs_root *extent_root = fs_info->extent_root; + struct btrfs_block_group_cache *block_group; + struct btrfs_fs_info *fs_info; + struct btrfs_caching_control *caching_ctl; + struct btrfs_root *extent_root; struct btrfs_path *path; struct extent_buffer *leaf; struct btrfs_key key; @@ -334,9 +351,14 @@ static int caching_kthread(void *data) u32 nritems; int ret = 0; + caching_ctl = container_of(work, struct btrfs_caching_control, work); + block_group = caching_ctl->block_group; + fs_info = block_group->fs_info; + extent_root = fs_info->extent_root; + path = btrfs_alloc_path(); if (!path) - return -ENOMEM; + goto out; last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET); @@ -433,13 +455,11 @@ err: free_excluded_extents(extent_root, block_group); mutex_unlock(&caching_ctl->mutex); +out: wake_up(&caching_ctl->wait); put_caching_control(caching_ctl); - atomic_dec(&block_group->space_info->caching_threads); btrfs_put_block_group(block_group); - - return 0; } static int cache_block_group(struct btrfs_block_group_cache *cache, @@ -447,14 +467,59 @@ static int cache_block_group(struct btrfs_block_group_cache *cache, struct btrfs_root *root, int load_cache_only) { + DEFINE_WAIT(wait); struct btrfs_fs_info *fs_info = cache->fs_info; struct btrfs_caching_control *caching_ctl; - struct task_struct *tsk; int ret = 0; - smp_mb(); - if (cache->cached != BTRFS_CACHE_NO) + caching_ctl = kzalloc(sizeof(*caching_ctl), GFP_NOFS); + BUG_ON(!caching_ctl); + + INIT_LIST_HEAD(&caching_ctl->list); + mutex_init(&caching_ctl->mutex); + init_waitqueue_head(&caching_ctl->wait); + caching_ctl->block_group = cache; + caching_ctl->progress = cache->key.objectid; + atomic_set(&caching_ctl->count, 1); + caching_ctl->work.func = caching_thread; + + spin_lock(&cache->lock); + /* + * This should be a rare occasion, but this could happen I think in the + * case where one thread starts to load the space cache info, and then + * some other thread starts a transaction commit which tries to do an + * allocation while the other thread is still loading the space cache + * info. The previous loop should have kept us from choosing this block + * group, but if we've moved to the state where we will wait on caching + * block groups we need to first check if we're doing a fast load here, + * so we can wait for it to finish, otherwise we could end up allocating + * from a block group who's cache gets evicted for one reason or + * another. + */ + while (cache->cached == BTRFS_CACHE_FAST) { + struct btrfs_caching_control *ctl; + + ctl = cache->caching_ctl; + atomic_inc(&ctl->count); + prepare_to_wait(&ctl->wait, &wait, TASK_UNINTERRUPTIBLE); + spin_unlock(&cache->lock); + + schedule(); + + finish_wait(&ctl->wait, &wait); + put_caching_control(ctl); + spin_lock(&cache->lock); + } + + if (cache->cached != BTRFS_CACHE_NO) { + spin_unlock(&cache->lock); + kfree(caching_ctl); return 0; + } + WARN_ON(cache->caching_ctl); + cache->caching_ctl = caching_ctl; + cache->cached = BTRFS_CACHE_FAST; + spin_unlock(&cache->lock); /* * We can't do the read from on-disk cache during a commit since we need @@ -463,69 +528,59 @@ static int cache_block_group(struct btrfs_block_group_cache *cache, * we likely hold important locks. */ if (trans && (!trans->transaction->in_commit) && - (root && root != root->fs_info->tree_root)) { - spin_lock(&cache->lock); - if (cache->cached != BTRFS_CACHE_NO) { - spin_unlock(&cache->lock); - return 0; - } - cache->cached = BTRFS_CACHE_STARTED; - spin_unlock(&cache->lock); - + (root && root != root->fs_info->tree_root) && + btrfs_test_opt(root, SPACE_CACHE)) { ret = load_free_space_cache(fs_info, cache); spin_lock(&cache->lock); if (ret == 1) { + cache->caching_ctl = NULL; cache->cached = BTRFS_CACHE_FINISHED; cache->last_byte_to_unpin = (u64)-1; } else { - cache->cached = BTRFS_CACHE_NO; + if (load_cache_only) { + cache->caching_ctl = NULL; + cache->cached = BTRFS_CACHE_NO; + } else { + cache->cached = BTRFS_CACHE_STARTED; + } } spin_unlock(&cache->lock); + wake_up(&caching_ctl->wait); if (ret == 1) { + put_caching_control(caching_ctl); free_excluded_extents(fs_info->extent_root, cache); return 0; } + } else { + /* + * We are not going to do the fast caching, set cached to the + * appropriate value and wakeup any waiters. + */ + spin_lock(&cache->lock); + if (load_cache_only) { + cache->caching_ctl = NULL; + cache->cached = BTRFS_CACHE_NO; + } else { + cache->cached = BTRFS_CACHE_STARTED; + } + spin_unlock(&cache->lock); + wake_up(&caching_ctl->wait); } - if (load_cache_only) - return 0; - - caching_ctl = kzalloc(sizeof(*caching_ctl), GFP_NOFS); - BUG_ON(!caching_ctl); - - INIT_LIST_HEAD(&caching_ctl->list); - mutex_init(&caching_ctl->mutex); - init_waitqueue_head(&caching_ctl->wait); - caching_ctl->block_group = cache; - caching_ctl->progress = cache->key.objectid; - /* one for caching kthread, one for caching block group list */ - atomic_set(&caching_ctl->count, 2); - - spin_lock(&cache->lock); - if (cache->cached != BTRFS_CACHE_NO) { - spin_unlock(&cache->lock); - kfree(caching_ctl); + if (load_cache_only) { + put_caching_control(caching_ctl); return 0; } - cache->caching_ctl = caching_ctl; - cache->cached = BTRFS_CACHE_STARTED; - spin_unlock(&cache->lock); down_write(&fs_info->extent_commit_sem); + atomic_inc(&caching_ctl->count); list_add_tail(&caching_ctl->list, &fs_info->caching_block_groups); up_write(&fs_info->extent_commit_sem); - atomic_inc(&cache->space_info->caching_threads); btrfs_get_block_group(cache); - tsk = kthread_run(caching_kthread, cache, "btrfs-cache-%llu\n", - cache->key.objectid); - if (IS_ERR(tsk)) { - ret = PTR_ERR(tsk); - printk(KERN_ERR "error running thread %d\n", ret); - BUG(); - } + btrfs_queue_worker(&fs_info->caching_workers, &caching_ctl->work); return ret; } @@ -667,7 +722,9 @@ int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len) struct btrfs_path *path; path = btrfs_alloc_path(); - BUG_ON(!path); + if (!path) + return -ENOMEM; + key.objectid = start; key.offset = len; btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); @@ -1772,18 +1829,18 @@ static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr, { int ret; u64 discarded_bytes = 0; - struct btrfs_multi_bio *multi = NULL; + struct btrfs_bio *bbio = NULL; /* Tell the block device(s) that the sectors can be discarded */ ret = btrfs_map_block(&root->fs_info->mapping_tree, REQ_DISCARD, - bytenr, &num_bytes, &multi, 0); + bytenr, &num_bytes, &bbio, 0); if (!ret) { - struct btrfs_bio_stripe *stripe = multi->stripes; + struct btrfs_bio_stripe *stripe = bbio->stripes; int i; - for (i = 0; i < multi->num_stripes; i++, stripe++) { + for (i = 0; i < bbio->num_stripes; i++, stripe++) { if (!stripe->dev->can_discard) continue; @@ -1802,7 +1859,7 @@ static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr, */ ret = 0; } - kfree(multi); + kfree(bbio); } if (actual_bytes) @@ -2702,6 +2759,13 @@ again: goto again; } + /* We've already setup this transaction, go ahead and exit */ + if (block_group->cache_generation == trans->transid && + i_size_read(inode)) { + dcs = BTRFS_DC_SETUP; + goto out_put; + } + /* * We want to set the generation to 0, that way if anything goes wrong * from here on out we know not to trust this cache when we load up next @@ -2751,12 +2815,15 @@ again: if (!ret) dcs = BTRFS_DC_SETUP; btrfs_free_reserved_data_space(inode, num_pages); + out_put: iput(inode); out_free: btrfs_release_path(path); out: spin_lock(&block_group->lock); + if (!ret && dcs == BTRFS_DC_SETUP) + block_group->cache_generation = trans->transid; block_group->disk_cache_state = dcs; spin_unlock(&block_group->lock); @@ -2940,9 +3007,10 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags, found->full = 0; found->force_alloc = CHUNK_ALLOC_NO_FORCE; found->chunk_alloc = 0; + found->flush = 0; + init_waitqueue_head(&found->wait); *space_info = found; list_add_rcu(&found->list, &info->space_info); - atomic_set(&found->caching_threads, 0); return 0; } @@ -3123,16 +3191,13 @@ commit_trans: return -ENOSPC; } data_sinfo->bytes_may_use += bytes; - BTRFS_I(inode)->reserved_bytes += bytes; spin_unlock(&data_sinfo->lock); return 0; } /* - * called when we are clearing an delalloc extent from the - * inode's io_tree or there was an error for whatever reason - * after calling btrfs_check_data_free_space + * Called if we need to clear a data reservation for this inode. */ void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes) { @@ -3145,7 +3210,6 @@ void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes) data_sinfo = BTRFS_I(inode)->space_info; spin_lock(&data_sinfo->lock); data_sinfo->bytes_may_use -= bytes; - BTRFS_I(inode)->reserved_bytes -= bytes; spin_unlock(&data_sinfo->lock); } @@ -3166,6 +3230,7 @@ static int should_alloc_chunk(struct btrfs_root *root, struct btrfs_space_info *sinfo, u64 alloc_bytes, int force) { + struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv; u64 num_bytes = sinfo->total_bytes - sinfo->bytes_readonly; u64 num_allocated = sinfo->bytes_used + sinfo->bytes_reserved; u64 thresh; @@ -3174,11 +3239,18 @@ static int should_alloc_chunk(struct btrfs_root *root, return 1; /* + * We need to take into account the global rsv because for all intents + * and purposes it's used space. Don't worry about locking the + * global_rsv, it doesn't change except when the transaction commits. + */ + num_allocated += global_rsv->size; + + /* * in limited mode, we want to have some free space up to * about 1% of the FS size. */ if (force == CHUNK_ALLOC_LIMITED) { - thresh = btrfs_super_total_bytes(&root->fs_info->super_copy); + thresh = btrfs_super_total_bytes(root->fs_info->super_copy); thresh = max_t(u64, 64 * 1024 * 1024, div_factor_fine(thresh, 1)); @@ -3200,7 +3272,7 @@ static int should_alloc_chunk(struct btrfs_root *root, if (num_allocated + alloc_bytes < div_factor(num_bytes, 8)) return 0; - thresh = btrfs_super_total_bytes(&root->fs_info->super_copy); + thresh = btrfs_super_total_bytes(root->fs_info->super_copy); /* 256MB or 5% of the FS */ thresh = max_t(u64, 256 * 1024 * 1024, div_factor_fine(thresh, 5)); @@ -3283,6 +3355,9 @@ again: } ret = btrfs_alloc_chunk(trans, extent_root, flags); + if (ret < 0 && ret != -ENOSPC) + goto out; + spin_lock(&space_info->lock); if (ret) space_info->full = 1; @@ -3292,6 +3367,7 @@ again: space_info->force_alloc = CHUNK_ALLOC_NO_FORCE; space_info->chunk_alloc = 0; spin_unlock(&space_info->lock); +out: mutex_unlock(&extent_root->fs_info->chunk_mutex); return ret; } @@ -3299,42 +3375,54 @@ again: /* * shrink metadata reservation for delalloc */ -static int shrink_delalloc(struct btrfs_trans_handle *trans, - struct btrfs_root *root, u64 to_reclaim, int sync) +static int shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, + bool wait_ordered) { struct btrfs_block_rsv *block_rsv; struct btrfs_space_info *space_info; + struct btrfs_trans_handle *trans; u64 reserved; u64 max_reclaim; u64 reclaimed = 0; long time_left; - int nr_pages = (2 * 1024 * 1024) >> PAGE_CACHE_SHIFT; + unsigned long nr_pages = (2 * 1024 * 1024) >> PAGE_CACHE_SHIFT; int loops = 0; unsigned long progress; + trans = (struct btrfs_trans_handle *)current->journal_info; block_rsv = &root->fs_info->delalloc_block_rsv; space_info = block_rsv->space_info; smp_mb(); - reserved = space_info->bytes_reserved; + reserved = space_info->bytes_may_use; progress = space_info->reservation_progress; if (reserved == 0) return 0; - max_reclaim = min(reserved, to_reclaim); + smp_mb(); + if (root->fs_info->delalloc_bytes == 0) { + if (trans) + return 0; + btrfs_wait_ordered_extents(root, 0, 0); + return 0; + } + max_reclaim = min(reserved, to_reclaim); + nr_pages = max_t(unsigned long, nr_pages, + max_reclaim >> PAGE_CACHE_SHIFT); while (loops < 1024) { /* have the flusher threads jump in and do some IO */ smp_mb(); nr_pages = min_t(unsigned long, nr_pages, root->fs_info->delalloc_bytes >> PAGE_CACHE_SHIFT); - writeback_inodes_sb_nr_if_idle(root->fs_info->sb, nr_pages); + writeback_inodes_sb_nr_if_idle(root->fs_info->sb, nr_pages, + WB_REASON_FS_FREE_SPACE); spin_lock(&space_info->lock); - if (reserved > space_info->bytes_reserved) - reclaimed += reserved - space_info->bytes_reserved; - reserved = space_info->bytes_reserved; + if (reserved > space_info->bytes_may_use) + reclaimed += reserved - space_info->bytes_may_use; + reserved = space_info->bytes_may_use; spin_unlock(&space_info->lock); loops++; @@ -3345,11 +3433,15 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans, if (trans && trans->transaction->blocked) return -EAGAIN; - time_left = schedule_timeout_interruptible(1); + if (wait_ordered && !trans) { + btrfs_wait_ordered_extents(root, 0, 0); + } else { + time_left = schedule_timeout_interruptible(1); - /* We were interrupted, exit */ - if (time_left) - break; + /* We were interrupted, exit */ + if (time_left) + break; + } /* we've kicked the IO a few times, if anything has been freed, * exit. There is no sense in looping here for a long time @@ -3364,42 +3456,121 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans, } } + return reclaimed >= to_reclaim; } -/* - * Retries tells us how many times we've called reserve_metadata_bytes. The - * idea is if this is the first call (retries == 0) then we will add to our - * reserved count if we can't make the allocation in order to hold our place - * while we go and try and free up space. That way for retries > 1 we don't try - * and add space, we just check to see if the amount of unused space is >= the - * total space, meaning that our reservation is valid. +/** + * maybe_commit_transaction - possibly commit the transaction if its ok to + * @root - the root we're allocating for + * @bytes - the number of bytes we want to reserve + * @force - force the commit * - * However if we don't intend to retry this reservation, pass -1 as retries so - * that it short circuits this logic. + * This will check to make sure that committing the transaction will actually + * get us somewhere and then commit the transaction if it does. Otherwise it + * will return -ENOSPC. */ -static int reserve_metadata_bytes(struct btrfs_trans_handle *trans, - struct btrfs_root *root, +static int may_commit_transaction(struct btrfs_root *root, + struct btrfs_space_info *space_info, + u64 bytes, int force) +{ + struct btrfs_block_rsv *delayed_rsv = &root->fs_info->delayed_block_rsv; + struct btrfs_trans_handle *trans; + + trans = (struct btrfs_trans_handle *)current->journal_info; + if (trans) + return -EAGAIN; + + if (force) + goto commit; + + /* See if there is enough pinned space to make this reservation */ + spin_lock(&space_info->lock); + if (space_info->bytes_pinned >= bytes) { + spin_unlock(&space_info->lock); + goto commit; + } + spin_unlock(&space_info->lock); + + /* + * See if there is some space in the delayed insertion reservation for + * this reservation. + */ + if (space_info != delayed_rsv->space_info) + return -ENOSPC; + + spin_lock(&delayed_rsv->lock); + if (delayed_rsv->size < bytes) { + spin_unlock(&delayed_rsv->lock); + return -ENOSPC; + } + spin_unlock(&delayed_rsv->lock); + +commit: + trans = btrfs_join_transaction(root); + if (IS_ERR(trans)) + return -ENOSPC; + + return btrfs_commit_transaction(trans, root); +} + +/** + * reserve_metadata_bytes - try to reserve bytes from the block_rsv's space + * @root - the root we're allocating for + * @block_rsv - the block_rsv we're allocating for + * @orig_bytes - the number of bytes we want + * @flush - wether or not we can flush to make our reservation + * + * This will reserve orgi_bytes number of bytes from the space info associated + * with the block_rsv. If there is not enough space it will make an attempt to + * flush out space to make room. It will do this by flushing delalloc if + * possible or committing the transaction. If flush is 0 then no attempts to + * regain reservations will be made and this will fail if there is not enough + * space already. + */ +static int reserve_metadata_bytes(struct btrfs_root *root, struct btrfs_block_rsv *block_rsv, u64 orig_bytes, int flush) { struct btrfs_space_info *space_info = block_rsv->space_info; - u64 unused; + u64 used; u64 num_bytes = orig_bytes; int retries = 0; int ret = 0; - bool reserved = false; bool committed = false; + bool flushing = false; + bool wait_ordered = false; again: - ret = -ENOSPC; - if (reserved) - num_bytes = 0; - + ret = 0; spin_lock(&space_info->lock); - unused = space_info->bytes_used + space_info->bytes_reserved + - space_info->bytes_pinned + space_info->bytes_readonly + - space_info->bytes_may_use; + /* + * We only want to wait if somebody other than us is flushing and we are + * actually alloed to flush. + */ + while (flush && !flushing && space_info->flush) { + spin_unlock(&space_info->lock); + /* + * If we have a trans handle we can't wait because the flusher + * may have to commit the transaction, which would mean we would + * deadlock since we are waiting for the flusher to finish, but + * hold the current transaction open. + */ + if (current->journal_info) + return -EAGAIN; + ret = wait_event_interruptible(space_info->wait, + !space_info->flush); + /* Must have been interrupted, return */ + if (ret) + return -EINTR; + + spin_lock(&space_info->lock); + } + + ret = -ENOSPC; + used = space_info->bytes_used + space_info->bytes_reserved + + space_info->bytes_pinned + space_info->bytes_readonly + + space_info->bytes_may_use; /* * The idea here is that we've not already over-reserved the block group @@ -3408,11 +3579,9 @@ again: * lets start flushing stuff first and then come back and try to make * our reservation. */ - if (unused <= space_info->total_bytes) { - unused = space_info->total_bytes - unused; - if (unused >= num_bytes) { - if (!reserved) - space_info->bytes_reserved += orig_bytes; + if (used <= space_info->total_bytes) { + if (used + orig_bytes <= space_info->total_bytes) { + space_info->bytes_may_use += orig_bytes; ret = 0; } else { /* @@ -3428,91 +3597,129 @@ again: * amount plus the amount of bytes that we need for this * reservation. */ - num_bytes = unused - space_info->total_bytes + + wait_ordered = true; + num_bytes = used - space_info->total_bytes + (orig_bytes * (retries + 1)); } + if (ret) { + u64 profile = btrfs_get_alloc_profile(root, 0); + u64 avail; + + /* + * If we have a lot of space that's pinned, don't bother doing + * the overcommit dance yet and just commit the transaction. + */ + avail = (space_info->total_bytes - space_info->bytes_used) * 8; + do_div(avail, 10); + if (space_info->bytes_pinned >= avail && flush && !committed) { + space_info->flush = 1; + flushing = true; + spin_unlock(&space_info->lock); + ret = may_commit_transaction(root, space_info, + orig_bytes, 1); + if (ret) + goto out; + committed = true; + goto again; + } + + spin_lock(&root->fs_info->free_chunk_lock); + avail = root->fs_info->free_chunk_space; + + /* + * If we have dup, raid1 or raid10 then only half of the free + * space is actually useable. + */ + if (profile & (BTRFS_BLOCK_GROUP_DUP | + BTRFS_BLOCK_GROUP_RAID1 | + BTRFS_BLOCK_GROUP_RAID10)) + avail >>= 1; + + /* + * If we aren't flushing don't let us overcommit too much, say + * 1/8th of the space. If we can flush, let it overcommit up to + * 1/2 of the space. + */ + if (flush) + avail >>= 3; + else + avail >>= 1; + spin_unlock(&root->fs_info->free_chunk_lock); + + if (used + num_bytes < space_info->total_bytes + avail) { + space_info->bytes_may_use += orig_bytes; + ret = 0; + } else { + wait_ordered = true; + } + } + /* * Couldn't make our reservation, save our place so while we're trying * to reclaim space we can actually use it instead of somebody else * stealing it from us. */ - if (ret && !reserved) { - space_info->bytes_reserved += orig_bytes; - reserved = true; + if (ret && flush) { + flushing = true; + space_info->flush = 1; } spin_unlock(&space_info->lock); - if (!ret) - return 0; - - if (!flush) + if (!ret || !flush) goto out; /* * We do synchronous shrinking since we don't actually unreserve * metadata until after the IO is completed. */ - ret = shrink_delalloc(trans, root, num_bytes, 1); - if (ret > 0) - return 0; - else if (ret < 0) + ret = shrink_delalloc(root, num_bytes, wait_ordered); + if (ret < 0) goto out; + ret = 0; + /* * So if we were overcommitted it's possible that somebody else flushed * out enough space and we simply didn't have enough space to reclaim, * so go back around and try again. */ if (retries < 2) { + wait_ordered = true; retries++; goto again; } - spin_lock(&space_info->lock); - /* - * Not enough space to be reclaimed, don't bother committing the - * transaction. - */ - if (space_info->bytes_pinned < orig_bytes) - ret = -ENOSPC; - spin_unlock(&space_info->lock); - if (ret) - goto out; - - ret = -EAGAIN; - if (trans || committed) - goto out; - ret = -ENOSPC; - trans = btrfs_join_transaction(root); - if (IS_ERR(trans)) + if (committed) goto out; - ret = btrfs_commit_transaction(trans, root); + + ret = may_commit_transaction(root, space_info, orig_bytes, 0); if (!ret) { - trans = NULL; committed = true; goto again; } out: - if (reserved) { + if (flushing) { spin_lock(&space_info->lock); - space_info->bytes_reserved -= orig_bytes; + space_info->flush = 0; + wake_up_all(&space_info->wait); spin_unlock(&space_info->lock); } - return ret; } static struct btrfs_block_rsv *get_block_rsv(struct btrfs_trans_handle *trans, struct btrfs_root *root) { - struct btrfs_block_rsv *block_rsv; - if (root->ref_cows) + struct btrfs_block_rsv *block_rsv = NULL; + + if (root->ref_cows || root == root->fs_info->csum_root) block_rsv = trans->block_rsv; - else + + if (!block_rsv) block_rsv = root->block_rsv; if (!block_rsv) @@ -3583,7 +3790,7 @@ static void block_rsv_release_bytes(struct btrfs_block_rsv *block_rsv, } if (num_bytes) { spin_lock(&space_info->lock); - space_info->bytes_reserved -= num_bytes; + space_info->bytes_may_use -= num_bytes; space_info->reservation_progress++; spin_unlock(&space_info->lock); } @@ -3607,9 +3814,6 @@ void btrfs_init_block_rsv(struct btrfs_block_rsv *rsv) { memset(rsv, 0, sizeof(*rsv)); spin_lock_init(&rsv->lock); - atomic_set(&rsv->usage, 1); - rsv->priority = 6; - INIT_LIST_HEAD(&rsv->list); } struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_root *root) @@ -3630,38 +3834,20 @@ struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_root *root) void btrfs_free_block_rsv(struct btrfs_root *root, struct btrfs_block_rsv *rsv) { - if (rsv && atomic_dec_and_test(&rsv->usage)) { - btrfs_block_rsv_release(root, rsv, (u64)-1); - if (!rsv->durable) - kfree(rsv); - } -} - -/* - * make the block_rsv struct be able to capture freed space. - * the captured space will re-add to the the block_rsv struct - * after transaction commit - */ -void btrfs_add_durable_block_rsv(struct btrfs_fs_info *fs_info, - struct btrfs_block_rsv *block_rsv) -{ - block_rsv->durable = 1; - mutex_lock(&fs_info->durable_block_rsv_mutex); - list_add_tail(&block_rsv->list, &fs_info->durable_block_rsv_list); - mutex_unlock(&fs_info->durable_block_rsv_mutex); + btrfs_block_rsv_release(root, rsv, (u64)-1); + kfree(rsv); } -int btrfs_block_rsv_add(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct btrfs_block_rsv *block_rsv, - u64 num_bytes) +static inline int __block_rsv_add(struct btrfs_root *root, + struct btrfs_block_rsv *block_rsv, + u64 num_bytes, int flush) { int ret; if (num_bytes == 0) return 0; - ret = reserve_metadata_bytes(trans, root, block_rsv, num_bytes, 1); + ret = reserve_metadata_bytes(root, block_rsv, num_bytes, flush); if (!ret) { block_rsv_add_bytes(block_rsv, num_bytes, 1); return 0; @@ -3670,56 +3856,80 @@ int btrfs_block_rsv_add(struct btrfs_trans_handle *trans, return ret; } -int btrfs_block_rsv_check(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct btrfs_block_rsv *block_rsv, - u64 min_reserved, int min_factor) +int btrfs_block_rsv_add(struct btrfs_root *root, + struct btrfs_block_rsv *block_rsv, + u64 num_bytes) +{ + return __block_rsv_add(root, block_rsv, num_bytes, 1); +} + +int btrfs_block_rsv_add_noflush(struct btrfs_root *root, + struct btrfs_block_rsv *block_rsv, + u64 num_bytes) +{ + return __block_rsv_add(root, block_rsv, num_bytes, 0); +} + +int btrfs_block_rsv_check(struct btrfs_root *root, + struct btrfs_block_rsv *block_rsv, int min_factor) { u64 num_bytes = 0; - int commit_trans = 0; int ret = -ENOSPC; if (!block_rsv) return 0; spin_lock(&block_rsv->lock); - if (min_factor > 0) - num_bytes = div_factor(block_rsv->size, min_factor); - if (min_reserved > num_bytes) - num_bytes = min_reserved; + num_bytes = div_factor(block_rsv->size, min_factor); + if (block_rsv->reserved >= num_bytes) + ret = 0; + spin_unlock(&block_rsv->lock); - if (block_rsv->reserved >= num_bytes) { + return ret; +} + +static inline int __btrfs_block_rsv_refill(struct btrfs_root *root, + struct btrfs_block_rsv *block_rsv, + u64 min_reserved, int flush) +{ + u64 num_bytes = 0; + int ret = -ENOSPC; + + if (!block_rsv) + return 0; + + spin_lock(&block_rsv->lock); + num_bytes = min_reserved; + if (block_rsv->reserved >= num_bytes) ret = 0; - } else { + else num_bytes -= block_rsv->reserved; - if (block_rsv->durable && - block_rsv->freed[0] + block_rsv->freed[1] >= num_bytes) - commit_trans = 1; - } spin_unlock(&block_rsv->lock); + if (!ret) return 0; - if (block_rsv->refill_used) { - ret = reserve_metadata_bytes(trans, root, block_rsv, - num_bytes, 0); - if (!ret) { - block_rsv_add_bytes(block_rsv, num_bytes, 0); - return 0; - } + ret = reserve_metadata_bytes(root, block_rsv, num_bytes, flush); + if (!ret) { + block_rsv_add_bytes(block_rsv, num_bytes, 0); + return 0; } - if (commit_trans) { - if (trans) - return -EAGAIN; + return ret; +} - trans = btrfs_join_transaction(root); - BUG_ON(IS_ERR(trans)); - ret = btrfs_commit_transaction(trans, root); - return 0; - } +int btrfs_block_rsv_refill(struct btrfs_root *root, + struct btrfs_block_rsv *block_rsv, + u64 min_reserved) +{ + return __btrfs_block_rsv_refill(root, block_rsv, min_reserved, 1); +} - return -ENOSPC; +int btrfs_block_rsv_refill_noflush(struct btrfs_root *root, + struct btrfs_block_rsv *block_rsv, + u64 min_reserved) +{ + return __btrfs_block_rsv_refill(root, block_rsv, min_reserved, 0); } int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src_rsv, @@ -3751,7 +3961,7 @@ static u64 calc_global_metadata_size(struct btrfs_fs_info *fs_info) u64 num_bytes; u64 meta_used; u64 data_used; - int csum_size = btrfs_super_csum_size(&fs_info->super_copy); + int csum_size = btrfs_super_csum_size(fs_info->super_copy); sinfo = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_DATA); spin_lock(&sinfo->lock); @@ -3795,12 +4005,12 @@ static void update_global_block_rsv(struct btrfs_fs_info *fs_info) if (sinfo->total_bytes > num_bytes) { num_bytes = sinfo->total_bytes - num_bytes; block_rsv->reserved += num_bytes; - sinfo->bytes_reserved += num_bytes; + sinfo->bytes_may_use += num_bytes; } if (block_rsv->reserved >= block_rsv->size) { num_bytes = block_rsv->reserved - block_rsv->size; - sinfo->bytes_reserved -= num_bytes; + sinfo->bytes_may_use -= num_bytes; sinfo->reservation_progress++; block_rsv->reserved = block_rsv->size; block_rsv->full = 1; @@ -3816,16 +4026,13 @@ static void init_global_block_rsv(struct btrfs_fs_info *fs_info) space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_SYSTEM); fs_info->chunk_block_rsv.space_info = space_info; - fs_info->chunk_block_rsv.priority = 10; space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA); fs_info->global_block_rsv.space_info = space_info; - fs_info->global_block_rsv.priority = 10; - fs_info->global_block_rsv.refill_used = 1; fs_info->delalloc_block_rsv.space_info = space_info; fs_info->trans_block_rsv.space_info = space_info; fs_info->empty_block_rsv.space_info = space_info; - fs_info->empty_block_rsv.priority = 10; + fs_info->delayed_block_rsv.space_info = space_info; fs_info->extent_root->block_rsv = &fs_info->global_block_rsv; fs_info->csum_root->block_rsv = &fs_info->global_block_rsv; @@ -3833,10 +4040,6 @@ static void init_global_block_rsv(struct btrfs_fs_info *fs_info) fs_info->tree_root->block_rsv = &fs_info->global_block_rsv; fs_info->chunk_root->block_rsv = &fs_info->chunk_block_rsv; - btrfs_add_durable_block_rsv(fs_info, &fs_info->global_block_rsv); - - btrfs_add_durable_block_rsv(fs_info, &fs_info->delalloc_block_rsv); - update_global_block_rsv(fs_info); } @@ -3849,57 +4052,8 @@ static void release_global_block_rsv(struct btrfs_fs_info *fs_info) WARN_ON(fs_info->trans_block_rsv.reserved > 0); WARN_ON(fs_info->chunk_block_rsv.size > 0); WARN_ON(fs_info->chunk_block_rsv.reserved > 0); -} - -int btrfs_truncate_reserve_metadata(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct btrfs_block_rsv *rsv) -{ - struct btrfs_block_rsv *trans_rsv = &root->fs_info->trans_block_rsv; - u64 num_bytes; - int ret; - - /* - * Truncate should be freeing data, but give us 2 items just in case it - * needs to use some space. We may want to be smarter about this in the - * future. - */ - num_bytes = btrfs_calc_trans_metadata_size(root, 2); - - /* We already have enough bytes, just return */ - if (rsv->reserved >= num_bytes) - return 0; - - num_bytes -= rsv->reserved; - - /* - * You should have reserved enough space before hand to do this, so this - * should not fail. - */ - ret = block_rsv_migrate_bytes(trans_rsv, rsv, num_bytes); - BUG_ON(ret); - - return 0; -} - -int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - int num_items) -{ - u64 num_bytes; - int ret; - - if (num_items == 0 || root->fs_info->chunk_root == root) - return 0; - - num_bytes = btrfs_calc_trans_metadata_size(root, num_items); - ret = btrfs_block_rsv_add(trans, root, &root->fs_info->trans_block_rsv, - num_bytes); - if (!ret) { - trans->bytes_reserved += num_bytes; - trans->block_rsv = &root->fs_info->trans_block_rsv; - } - return ret; + WARN_ON(fs_info->delayed_block_rsv.size > 0); + WARN_ON(fs_info->delayed_block_rsv.reserved > 0); } void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans, @@ -3908,9 +4062,7 @@ void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans, if (!trans->bytes_reserved) return; - BUG_ON(trans->block_rsv != &root->fs_info->trans_block_rsv); - btrfs_block_rsv_release(root, trans->block_rsv, - trans->bytes_reserved); + btrfs_block_rsv_release(root, trans->block_rsv, trans->bytes_reserved); trans->bytes_reserved = 0; } @@ -3952,90 +4104,229 @@ int btrfs_snap_reserve_metadata(struct btrfs_trans_handle *trans, return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes); } -static u64 calc_csum_metadata_size(struct inode *inode, u64 num_bytes) +/** + * drop_outstanding_extent - drop an outstanding extent + * @inode: the inode we're dropping the extent for + * + * This is called when we are freeing up an outstanding extent, either called + * after an error or after an extent is written. This will return the number of + * reserved extents that need to be freed. This must be called with + * BTRFS_I(inode)->lock held. + */ +static unsigned drop_outstanding_extent(struct inode *inode) { - return num_bytes >>= 3; + unsigned drop_inode_space = 0; + unsigned dropped_extents = 0; + + BUG_ON(!BTRFS_I(inode)->outstanding_extents); + BTRFS_I(inode)->outstanding_extents--; + + if (BTRFS_I(inode)->outstanding_extents == 0 && + BTRFS_I(inode)->delalloc_meta_reserved) { + drop_inode_space = 1; + BTRFS_I(inode)->delalloc_meta_reserved = 0; + } + + /* + * If we have more or the same amount of outsanding extents than we have + * reserved then we need to leave the reserved extents count alone. + */ + if (BTRFS_I(inode)->outstanding_extents >= + BTRFS_I(inode)->reserved_extents) + return drop_inode_space; + + dropped_extents = BTRFS_I(inode)->reserved_extents - + BTRFS_I(inode)->outstanding_extents; + BTRFS_I(inode)->reserved_extents -= dropped_extents; + return dropped_extents + drop_inode_space; +} + +/** + * calc_csum_metadata_size - return the amount of metada space that must be + * reserved/free'd for the given bytes. + * @inode: the inode we're manipulating + * @num_bytes: the number of bytes in question + * @reserve: 1 if we are reserving space, 0 if we are freeing space + * + * This adjusts the number of csum_bytes in the inode and then returns the + * correct amount of metadata that must either be reserved or freed. We + * calculate how many checksums we can fit into one leaf and then divide the + * number of bytes that will need to be checksumed by this value to figure out + * how many checksums will be required. If we are adding bytes then the number + * may go up and we will return the number of additional bytes that must be + * reserved. If it is going down we will return the number of bytes that must + * be freed. + * + * This must be called with BTRFS_I(inode)->lock held. + */ +static u64 calc_csum_metadata_size(struct inode *inode, u64 num_bytes, + int reserve) +{ + struct btrfs_root *root = BTRFS_I(inode)->root; + u64 csum_size; + int num_csums_per_leaf; + int num_csums; + int old_csums; + + if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM && + BTRFS_I(inode)->csum_bytes == 0) + return 0; + + old_csums = (int)div64_u64(BTRFS_I(inode)->csum_bytes, root->sectorsize); + if (reserve) + BTRFS_I(inode)->csum_bytes += num_bytes; + else + BTRFS_I(inode)->csum_bytes -= num_bytes; + csum_size = BTRFS_LEAF_DATA_SIZE(root) - sizeof(struct btrfs_item); + num_csums_per_leaf = (int)div64_u64(csum_size, + sizeof(struct btrfs_csum_item) + + sizeof(struct btrfs_disk_key)); + num_csums = (int)div64_u64(BTRFS_I(inode)->csum_bytes, root->sectorsize); + num_csums = num_csums + num_csums_per_leaf - 1; + num_csums = num_csums / num_csums_per_leaf; + + old_csums = old_csums + num_csums_per_leaf - 1; + old_csums = old_csums / num_csums_per_leaf; + + /* No change, no need to reserve more */ + if (old_csums == num_csums) + return 0; + + if (reserve) + return btrfs_calc_trans_metadata_size(root, + num_csums - old_csums); + + return btrfs_calc_trans_metadata_size(root, old_csums - num_csums); } int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes) { struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_block_rsv *block_rsv = &root->fs_info->delalloc_block_rsv; - u64 to_reserve; - int nr_extents; - int reserved_extents; + u64 to_reserve = 0; + u64 csum_bytes; + unsigned nr_extents = 0; + int extra_reserve = 0; + int flush = 1; int ret; - if (btrfs_transaction_in_commit(root->fs_info)) + /* Need to be holding the i_mutex here if we aren't free space cache */ + if (btrfs_is_free_space_inode(root, inode)) + flush = 0; + else + WARN_ON(!mutex_is_locked(&inode->i_mutex)); + + if (flush && btrfs_transaction_in_commit(root->fs_info)) schedule_timeout(1); num_bytes = ALIGN(num_bytes, root->sectorsize); - nr_extents = atomic_read(&BTRFS_I(inode)->outstanding_extents) + 1; - reserved_extents = atomic_read(&BTRFS_I(inode)->reserved_extents); + spin_lock(&BTRFS_I(inode)->lock); + BTRFS_I(inode)->outstanding_extents++; - if (nr_extents > reserved_extents) { - nr_extents -= reserved_extents; - to_reserve = btrfs_calc_trans_metadata_size(root, nr_extents); - } else { - nr_extents = 0; - to_reserve = 0; + if (BTRFS_I(inode)->outstanding_extents > + BTRFS_I(inode)->reserved_extents) + nr_extents = BTRFS_I(inode)->outstanding_extents - + BTRFS_I(inode)->reserved_extents; + + /* + * Add an item to reserve for updating the inode when we complete the + * delalloc io. + */ + if (!BTRFS_I(inode)->delalloc_meta_reserved) { + nr_extents++; + extra_reserve = 1; } - to_reserve += calc_csum_metadata_size(inode, num_bytes); - ret = reserve_metadata_bytes(NULL, root, block_rsv, to_reserve, 1); - if (ret) + to_reserve = btrfs_calc_trans_metadata_size(root, nr_extents); + to_reserve += calc_csum_metadata_size(inode, num_bytes, 1); + csum_bytes = BTRFS_I(inode)->csum_bytes; + spin_unlock(&BTRFS_I(inode)->lock); + + ret = reserve_metadata_bytes(root, block_rsv, to_reserve, flush); + if (ret) { + u64 to_free = 0; + unsigned dropped; + + spin_lock(&BTRFS_I(inode)->lock); + dropped = drop_outstanding_extent(inode); + /* + * If the inodes csum_bytes is the same as the original + * csum_bytes then we know we haven't raced with any free()ers + * so we can just reduce our inodes csum bytes and carry on. + * Otherwise we have to do the normal free thing to account for + * the case that the free side didn't free up its reserve + * because of this outstanding reservation. + */ + if (BTRFS_I(inode)->csum_bytes == csum_bytes) + calc_csum_metadata_size(inode, num_bytes, 0); + else + to_free = calc_csum_metadata_size(inode, num_bytes, 0); + spin_unlock(&BTRFS_I(inode)->lock); + if (dropped) + to_free += btrfs_calc_trans_metadata_size(root, dropped); + + if (to_free) + btrfs_block_rsv_release(root, block_rsv, to_free); return ret; + } - atomic_add(nr_extents, &BTRFS_I(inode)->reserved_extents); - atomic_inc(&BTRFS_I(inode)->outstanding_extents); + spin_lock(&BTRFS_I(inode)->lock); + if (extra_reserve) { + BTRFS_I(inode)->delalloc_meta_reserved = 1; + nr_extents--; + } + BTRFS_I(inode)->reserved_extents += nr_extents; + spin_unlock(&BTRFS_I(inode)->lock); block_rsv_add_bytes(block_rsv, to_reserve, 1); - if (block_rsv->size > 512 * 1024 * 1024) - shrink_delalloc(NULL, root, to_reserve, 0); - return 0; } +/** + * btrfs_delalloc_release_metadata - release a metadata reservation for an inode + * @inode: the inode to release the reservation for + * @num_bytes: the number of bytes we're releasing + * + * This will release the metadata reservation for an inode. This can be called + * once we complete IO for a given set of bytes to release their metadata + * reservations. + */ void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes) { struct btrfs_root *root = BTRFS_I(inode)->root; - u64 to_free; - int nr_extents; - int reserved_extents; + u64 to_free = 0; + unsigned dropped; num_bytes = ALIGN(num_bytes, root->sectorsize); - atomic_dec(&BTRFS_I(inode)->outstanding_extents); - WARN_ON(atomic_read(&BTRFS_I(inode)->outstanding_extents) < 0); - - reserved_extents = atomic_read(&BTRFS_I(inode)->reserved_extents); - do { - int old, new; - - nr_extents = atomic_read(&BTRFS_I(inode)->outstanding_extents); - if (nr_extents >= reserved_extents) { - nr_extents = 0; - break; - } - old = reserved_extents; - nr_extents = reserved_extents - nr_extents; - new = reserved_extents - nr_extents; - old = atomic_cmpxchg(&BTRFS_I(inode)->reserved_extents, - reserved_extents, new); - if (likely(old == reserved_extents)) - break; - reserved_extents = old; - } while (1); + spin_lock(&BTRFS_I(inode)->lock); + dropped = drop_outstanding_extent(inode); - to_free = calc_csum_metadata_size(inode, num_bytes); - if (nr_extents > 0) - to_free += btrfs_calc_trans_metadata_size(root, nr_extents); + to_free = calc_csum_metadata_size(inode, num_bytes, 0); + spin_unlock(&BTRFS_I(inode)->lock); + if (dropped > 0) + to_free += btrfs_calc_trans_metadata_size(root, dropped); btrfs_block_rsv_release(root, &root->fs_info->delalloc_block_rsv, to_free); } +/** + * btrfs_delalloc_reserve_space - reserve data and metadata space for delalloc + * @inode: inode we're writing to + * @num_bytes: the number of bytes we want to allocate + * + * This will do the following things + * + * o reserve space in the data space info for num_bytes + * o reserve space in the metadata space info based on number of outstanding + * extents and how much csums will be needed + * o add to the inodes ->delalloc_bytes + * o add it to the fs_info's delalloc inodes list. + * + * This will return 0 for success and -ENOSPC if there is no space left. + */ int btrfs_delalloc_reserve_space(struct inode *inode, u64 num_bytes) { int ret; @@ -4053,6 +4344,19 @@ int btrfs_delalloc_reserve_space(struct inode *inode, u64 num_bytes) return 0; } +/** + * btrfs_delalloc_release_space - release data and metadata space for delalloc + * @inode: inode we're releasing space for + * @num_bytes: the number of bytes we want to free up + * + * This must be matched with a call to btrfs_delalloc_reserve_space. This is + * called in the case that we don't need the metadata AND data reservations + * anymore. So if there is an error or we insert an inline extent. + * + * This function will release the metadata space that was not used and will + * decrement ->delalloc_bytes and remove it from the fs_info delalloc_inodes + * list if there are no delalloc bytes left. + */ void btrfs_delalloc_release_space(struct inode *inode, u64 num_bytes) { btrfs_delalloc_release_metadata(inode, num_bytes); @@ -4072,12 +4376,12 @@ static int update_block_group(struct btrfs_trans_handle *trans, /* block accounting for super block */ spin_lock(&info->delalloc_lock); - old_val = btrfs_super_bytes_used(&info->super_copy); + old_val = btrfs_super_bytes_used(info->super_copy); if (alloc) old_val += num_bytes; else old_val -= num_bytes; - btrfs_set_super_bytes_used(&info->super_copy, old_val); + btrfs_set_super_bytes_used(info->super_copy, old_val); spin_unlock(&info->delalloc_lock); while (total) { @@ -4105,7 +4409,7 @@ static int update_block_group(struct btrfs_trans_handle *trans, spin_lock(&cache->space_info->lock); spin_lock(&cache->lock); - if (btrfs_super_cache_generation(&info->super_copy) != 0 && + if (btrfs_test_opt(root, SPACE_CACHE) && cache->disk_cache_state < BTRFS_DC_CLEAR) cache->disk_cache_state = BTRFS_DC_CLEAR; @@ -4117,7 +4421,6 @@ static int update_block_group(struct btrfs_trans_handle *trans, btrfs_set_block_group_used(&cache->item, old_val); cache->reserved -= num_bytes; cache->space_info->bytes_reserved -= num_bytes; - cache->space_info->reservation_progress++; cache->space_info->bytes_used += num_bytes; cache->space_info->disk_used += num_bytes * factor; spin_unlock(&cache->lock); @@ -4169,7 +4472,6 @@ static int pin_down_extent(struct btrfs_root *root, if (reserved) { cache->reserved -= num_bytes; cache->space_info->bytes_reserved -= num_bytes; - cache->space_info->reservation_progress++; } spin_unlock(&cache->lock); spin_unlock(&cache->space_info->lock); @@ -4197,45 +4499,82 @@ int btrfs_pin_extent(struct btrfs_root *root, } /* - * update size of reserved extents. this function may return -EAGAIN - * if 'reserve' is true or 'sinfo' is false. + * this function must be called within transaction */ -int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache, - u64 num_bytes, int reserve, int sinfo) +int btrfs_pin_extent_for_log_replay(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + u64 bytenr, u64 num_bytes) { + struct btrfs_block_group_cache *cache; + + cache = btrfs_lookup_block_group(root->fs_info, bytenr); + BUG_ON(!cache); + + /* + * pull in the free space cache (if any) so that our pin + * removes the free space from the cache. We have load_only set + * to one because the slow code to read in the free extents does check + * the pinned extents. + */ + cache_block_group(cache, trans, root, 1); + + pin_down_extent(root, cache, bytenr, num_bytes, 0); + + /* remove us from the free space cache (if we're there at all) */ + btrfs_remove_free_space(cache, bytenr, num_bytes); + btrfs_put_block_group(cache); + return 0; +} + +/** + * btrfs_update_reserved_bytes - update the block_group and space info counters + * @cache: The cache we are manipulating + * @num_bytes: The number of bytes in question + * @reserve: One of the reservation enums + * + * This is called by the allocator when it reserves space, or by somebody who is + * freeing space that was never actually used on disk. For example if you + * reserve some space for a new leaf in transaction A and before transaction A + * commits you free that leaf, you call this with reserve set to 0 in order to + * clear the reservation. + * + * Metadata reservations should be called with RESERVE_ALLOC so we do the proper + * ENOSPC accounting. For data we handle the reservation through clearing the + * delalloc bits in the io_tree. We have to do this since we could end up + * allocating less disk space for the amount of data we have reserved in the + * case of compression. + * + * If this is a reservation and the block group has become read only we cannot + * make the reservation and return -EAGAIN, otherwise this function always + * succeeds. + */ +static int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache, + u64 num_bytes, int reserve) +{ + struct btrfs_space_info *space_info = cache->space_info; int ret = 0; - if (sinfo) { - struct btrfs_space_info *space_info = cache->space_info; - spin_lock(&space_info->lock); - spin_lock(&cache->lock); - if (reserve) { - if (cache->ro) { - ret = -EAGAIN; - } else { - cache->reserved += num_bytes; - space_info->bytes_reserved += num_bytes; - } - } else { - if (cache->ro) - space_info->bytes_readonly += num_bytes; - cache->reserved -= num_bytes; - space_info->bytes_reserved -= num_bytes; - space_info->reservation_progress++; - } - spin_unlock(&cache->lock); - spin_unlock(&space_info->lock); - } else { - spin_lock(&cache->lock); + spin_lock(&space_info->lock); + spin_lock(&cache->lock); + if (reserve != RESERVE_FREE) { if (cache->ro) { ret = -EAGAIN; } else { - if (reserve) - cache->reserved += num_bytes; - else - cache->reserved -= num_bytes; + cache->reserved += num_bytes; + space_info->bytes_reserved += num_bytes; + if (reserve == RESERVE_ALLOC) { + BUG_ON(space_info->bytes_may_use < num_bytes); + space_info->bytes_may_use -= num_bytes; + } } - spin_unlock(&cache->lock); + } else { + if (cache->ro) + space_info->bytes_readonly += num_bytes; + cache->reserved -= num_bytes; + space_info->bytes_reserved -= num_bytes; + space_info->reservation_progress++; } + spin_unlock(&cache->lock); + spin_unlock(&space_info->lock); return ret; } @@ -4272,7 +4611,8 @@ int btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans, return 0; } -static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end) +static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end, + const bool return_free_space) { struct btrfs_fs_info *fs_info = root->fs_info; struct btrfs_block_group_cache *cache = NULL; @@ -4292,7 +4632,8 @@ static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end) if (start < cache->last_byte_to_unpin) { len = min(len, cache->last_byte_to_unpin - start); - btrfs_add_free_space(cache, start, len); + if (return_free_space) + btrfs_add_free_space(cache, start, len); } start += len; @@ -4301,13 +4642,8 @@ static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end) spin_lock(&cache->lock); cache->pinned -= len; cache->space_info->bytes_pinned -= len; - if (cache->ro) { + if (cache->ro) cache->space_info->bytes_readonly += len; - } else if (cache->reserved_pinned > 0) { - len = min(len, cache->reserved_pinned); - cache->reserved_pinned -= len; - cache->space_info->bytes_reserved += len; - } spin_unlock(&cache->lock); spin_unlock(&cache->space_info->lock); } @@ -4322,11 +4658,8 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, { struct btrfs_fs_info *fs_info = root->fs_info; struct extent_io_tree *unpin; - struct btrfs_block_rsv *block_rsv; - struct btrfs_block_rsv *next_rsv; u64 start; u64 end; - int idx; int ret; if (fs_info->pinned_extents == &fs_info->freed_extents[0]) @@ -4345,34 +4678,10 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, end + 1 - start, NULL); clear_extent_dirty(unpin, start, end, GFP_NOFS); - unpin_extent_range(root, start, end); + unpin_extent_range(root, start, end, true); cond_resched(); } - mutex_lock(&fs_info->durable_block_rsv_mutex); - list_for_each_entry_safe(block_rsv, next_rsv, - &fs_info->durable_block_rsv_list, list) { - - idx = trans->transid & 0x1; - if (block_rsv->freed[idx] > 0) { - block_rsv_add_bytes(block_rsv, - block_rsv->freed[idx], 0); - block_rsv->freed[idx] = 0; - } - if (atomic_read(&block_rsv->usage) == 0) { - btrfs_block_rsv_release(root, block_rsv, (u64)-1); - - if (block_rsv->freed[0] == 0 && - block_rsv->freed[1] == 0) { - list_del_init(&block_rsv->list); - kfree(block_rsv); - } - } else { - btrfs_block_rsv_release(root, block_rsv, 0); - } - } - mutex_unlock(&fs_info->durable_block_rsv_mutex); - return 0; } @@ -4452,7 +4761,9 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, printk(KERN_ERR "umm, got %d back from search" ", was looking for %llu\n", ret, (unsigned long long)bytenr); - btrfs_print_leaf(extent_root, path->nodes[0]); + if (ret > 0) + btrfs_print_leaf(extent_root, + path->nodes[0]); } BUG_ON(ret); extent_slot = path->slots[0]; @@ -4648,7 +4959,6 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans, struct extent_buffer *buf, u64 parent, int last_ref) { - struct btrfs_block_rsv *block_rsv; struct btrfs_block_group_cache *cache = NULL; int ret; @@ -4663,64 +4973,24 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans, if (!last_ref) return; - block_rsv = get_block_rsv(trans, root); cache = btrfs_lookup_block_group(root->fs_info, buf->start); - if (block_rsv->space_info != cache->space_info) - goto out; if (btrfs_header_generation(buf) == trans->transid) { if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) { ret = check_ref_cleanup(trans, root, buf->start); if (!ret) - goto pin; + goto out; } if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) { pin_down_extent(root, cache, buf->start, buf->len, 1); - goto pin; + goto out; } WARN_ON(test_bit(EXTENT_BUFFER_DIRTY, &buf->bflags)); btrfs_add_free_space(cache, buf->start, buf->len); - ret = btrfs_update_reserved_bytes(cache, buf->len, 0, 0); - if (ret == -EAGAIN) { - /* block group became read-only */ - btrfs_update_reserved_bytes(cache, buf->len, 0, 1); - goto out; - } - - ret = 1; - spin_lock(&block_rsv->lock); - if (block_rsv->reserved < block_rsv->size) { - block_rsv->reserved += buf->len; - ret = 0; - } - spin_unlock(&block_rsv->lock); - - if (ret) { - spin_lock(&cache->space_info->lock); - cache->space_info->bytes_reserved -= buf->len; - cache->space_info->reservation_progress++; - spin_unlock(&cache->space_info->lock); - } - goto out; - } -pin: - if (block_rsv->durable && !cache->ro) { - ret = 0; - spin_lock(&cache->lock); - if (!cache->ro) { - cache->reserved_pinned += buf->len; - ret = 1; - } - spin_unlock(&cache->lock); - - if (ret) { - spin_lock(&block_rsv->lock); - block_rsv->freed[trans->transid & 0x1] += buf->len; - spin_unlock(&block_rsv->lock); - } + btrfs_update_reserved_bytes(cache, buf->len, RESERVE_FREE); } out: /* @@ -4856,17 +5126,20 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root = orig_root->fs_info->extent_root; struct btrfs_free_cluster *last_ptr = NULL; struct btrfs_block_group_cache *block_group = NULL; + struct btrfs_block_group_cache *used_block_group; int empty_cluster = 2 * 1024 * 1024; int allowed_chunk_alloc = 0; int done_chunk_alloc = 0; struct btrfs_space_info *space_info; - int last_ptr_loop = 0; int loop = 0; int index = 0; + int alloc_type = (data & BTRFS_BLOCK_GROUP_DATA) ? + RESERVE_ALLOC_NO_ACCOUNT : RESERVE_ALLOC; bool found_uncached_bg = false; bool failed_cluster_refill = false; bool failed_alloc = false; bool use_cluster = true; + bool have_caching_bg = false; u64 ideal_cache_percent = 0; u64 ideal_cache_offset = 0; @@ -4919,6 +5192,7 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans, ideal_cache: block_group = btrfs_lookup_block_group(root->fs_info, search_start); + used_block_group = block_group; /* * we don't want to use the block group if it doesn't match our * allocation bits, or if its not cached. @@ -4949,12 +5223,14 @@ ideal_cache: } } search: + have_caching_bg = false; down_read(&space_info->groups_sem); list_for_each_entry(block_group, &space_info->block_groups[index], list) { u64 offset; int cached; + used_block_group = block_group; btrfs_get_block_group(block_group); search_start = block_group->key.objectid; @@ -4978,13 +5254,15 @@ search: } have_block_group: - if (unlikely(block_group->cached == BTRFS_CACHE_NO)) { + cached = block_group_cache_done(block_group); + if (unlikely(!cached)) { u64 free_percent; + found_uncached_bg = true; ret = cache_block_group(block_group, trans, orig_root, 1); if (block_group->cached == BTRFS_CACHE_FINISHED) - goto have_block_group; + goto alloc; free_percent = btrfs_block_group_used(&block_group->item); free_percent *= 100; @@ -4998,19 +5276,14 @@ have_block_group: } /* - * We only want to start kthread caching if we are at - * the point where we will wait for caching to make - * progress, or if our ideal search is over and we've - * found somebody to start caching. + * The caching workers are limited to 2 threads, so we + * can queue as much work as we care to. */ - if (loop > LOOP_CACHING_NOWAIT || - (loop > LOOP_FIND_IDEAL && - atomic_read(&space_info->caching_threads) < 2)) { + if (loop > LOOP_FIND_IDEAL) { ret = cache_block_group(block_group, trans, orig_root, 0); BUG_ON(ret); } - found_uncached_bg = true; /* * If loop is set for cached only, try the next block @@ -5020,92 +5293,80 @@ have_block_group: goto loop; } - cached = block_group_cache_done(block_group); - if (unlikely(!cached)) - found_uncached_bg = true; - +alloc: if (unlikely(block_group->ro)) goto loop; spin_lock(&block_group->free_space_ctl->tree_lock); if (cached && block_group->free_space_ctl->free_space < - num_bytes + empty_size) { + num_bytes + empty_cluster + empty_size) { spin_unlock(&block_group->free_space_ctl->tree_lock); goto loop; } spin_unlock(&block_group->free_space_ctl->tree_lock); /* - * Ok we want to try and use the cluster allocator, so lets look - * there, unless we are on LOOP_NO_EMPTY_SIZE, since we will - * have tried the cluster allocator plenty of times at this - * point and not have found anything, so we are likely way too - * fragmented for the clustering stuff to find anything, so lets - * just skip it and let the allocator find whatever block it can - * find + * Ok we want to try and use the cluster allocator, so + * lets look there */ - if (last_ptr && loop < LOOP_NO_EMPTY_SIZE) { + if (last_ptr) { /* * the refill lock keeps out other * people trying to start a new cluster */ spin_lock(&last_ptr->refill_lock); - if (last_ptr->block_group && - (last_ptr->block_group->ro || - !block_group_bits(last_ptr->block_group, data))) { - offset = 0; + used_block_group = last_ptr->block_group; + if (used_block_group != block_group && + (!used_block_group || + used_block_group->ro || + !block_group_bits(used_block_group, data))) { + used_block_group = block_group; goto refill_cluster; } - offset = btrfs_alloc_from_cluster(block_group, last_ptr, - num_bytes, search_start); + if (used_block_group != block_group) + btrfs_get_block_group(used_block_group); + + offset = btrfs_alloc_from_cluster(used_block_group, + last_ptr, num_bytes, used_block_group->key.objectid); if (offset) { /* we have a block, we're done */ spin_unlock(&last_ptr->refill_lock); goto checks; } - spin_lock(&last_ptr->lock); - /* - * whoops, this cluster doesn't actually point to - * this block group. Get a ref on the block - * group is does point to and try again - */ - if (!last_ptr_loop && last_ptr->block_group && - last_ptr->block_group != block_group) { - - btrfs_put_block_group(block_group); - block_group = last_ptr->block_group; - btrfs_get_block_group(block_group); - spin_unlock(&last_ptr->lock); - spin_unlock(&last_ptr->refill_lock); - - last_ptr_loop = 1; - search_start = block_group->key.objectid; - /* - * we know this block group is properly - * in the list because - * btrfs_remove_block_group, drops the - * cluster before it removes the block - * group from the list - */ - goto have_block_group; + WARN_ON(last_ptr->block_group != used_block_group); + if (used_block_group != block_group) { + btrfs_put_block_group(used_block_group); + used_block_group = block_group; } - spin_unlock(&last_ptr->lock); refill_cluster: + BUG_ON(used_block_group != block_group); + /* If we are on LOOP_NO_EMPTY_SIZE, we can't + * set up a new clusters, so lets just skip it + * and let the allocator find whatever block + * it can find. If we reach this point, we + * will have tried the cluster allocator + * plenty of times and not have found + * anything, so we are likely way too + * fragmented for the clustering stuff to find + * anything. */ + if (loop >= LOOP_NO_EMPTY_SIZE) { + spin_unlock(&last_ptr->refill_lock); + goto unclustered_alloc; + } + /* * this cluster didn't work out, free it and * start over */ btrfs_return_cluster_to_free_space(NULL, last_ptr); - last_ptr_loop = 0; - /* allocate a cluster in this block group */ ret = btrfs_find_space_cluster(trans, root, block_group, last_ptr, - offset, num_bytes, + search_start, num_bytes, empty_cluster + empty_size); if (ret == 0) { /* @@ -5141,6 +5402,7 @@ refill_cluster: goto loop; } +unclustered_alloc: offset = btrfs_find_space_for_alloc(block_group, search_start, num_bytes, empty_size); /* @@ -5159,20 +5421,22 @@ refill_cluster: failed_alloc = true; goto have_block_group; } else if (!offset) { + if (!cached) + have_caching_bg = true; goto loop; } checks: search_start = stripe_align(root, offset); /* move on to the next group */ if (search_start + num_bytes >= search_end) { - btrfs_add_free_space(block_group, offset, num_bytes); + btrfs_add_free_space(used_block_group, offset, num_bytes); goto loop; } /* move on to the next group */ if (search_start + num_bytes > - block_group->key.objectid + block_group->key.offset) { - btrfs_add_free_space(block_group, offset, num_bytes); + used_block_group->key.objectid + used_block_group->key.offset) { + btrfs_add_free_space(used_block_group, offset, num_bytes); goto loop; } @@ -5180,14 +5444,14 @@ checks: ins->offset = num_bytes; if (offset < search_start) - btrfs_add_free_space(block_group, offset, + btrfs_add_free_space(used_block_group, offset, search_start - offset); BUG_ON(offset > search_start); - ret = btrfs_update_reserved_bytes(block_group, num_bytes, 1, - (data & BTRFS_BLOCK_GROUP_DATA)); + ret = btrfs_update_reserved_bytes(used_block_group, num_bytes, + alloc_type); if (ret == -EAGAIN) { - btrfs_add_free_space(block_group, offset, num_bytes); + btrfs_add_free_space(used_block_group, offset, num_bytes); goto loop; } @@ -5196,19 +5460,26 @@ checks: ins->offset = num_bytes; if (offset < search_start) - btrfs_add_free_space(block_group, offset, + btrfs_add_free_space(used_block_group, offset, search_start - offset); BUG_ON(offset > search_start); + if (used_block_group != block_group) + btrfs_put_block_group(used_block_group); btrfs_put_block_group(block_group); break; loop: failed_cluster_refill = false; failed_alloc = false; BUG_ON(index != get_block_group_index(block_group)); + if (used_block_group != block_group) + btrfs_put_block_group(used_block_group); btrfs_put_block_group(block_group); } up_read(&space_info->groups_sem); + if (!ins->objectid && loop >= LOOP_CACHING_WAIT && have_caching_bg) + goto search; + if (!ins->objectid && ++index < BTRFS_NR_RAID_TYPES) goto search; @@ -5227,8 +5498,7 @@ loop: if (loop == LOOP_FIND_IDEAL && found_uncached_bg) { found_uncached_bg = false; loop++; - if (!ideal_cache_percent && - atomic_read(&space_info->caching_threads)) + if (!ideal_cache_percent) goto search; /* @@ -5308,7 +5578,8 @@ static void dump_space_info(struct btrfs_space_info *info, u64 bytes, int index = 0; spin_lock(&info->lock); - printk(KERN_INFO "space_info has %llu free, is %sfull\n", + printk(KERN_INFO "space_info %llu has %llu free, is %sfull\n", + (unsigned long long)info->flags, (unsigned long long)(info->total_bytes - info->bytes_used - info->bytes_pinned - info->bytes_reserved - info->bytes_readonly), @@ -5394,7 +5665,8 @@ again: return ret; } -int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len) +static int __btrfs_free_reserved_extent(struct btrfs_root *root, + u64 start, u64 len, int pin) { struct btrfs_block_group_cache *cache; int ret = 0; @@ -5406,11 +5678,14 @@ int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len) return -ENOSPC; } - if (btrfs_test_opt(root, DISCARD)) - ret = btrfs_discard_extent(root, start, len, NULL); - - btrfs_add_free_space(cache, start, len); - btrfs_update_reserved_bytes(cache, len, 0, 1); + if (pin) + pin_down_extent(root, cache, start, len, 1); + else { + if (btrfs_test_opt(root, DISCARD)) + ret = btrfs_discard_extent(root, start, len, NULL); + btrfs_add_free_space(cache, start, len); + btrfs_update_reserved_bytes(cache, len, RESERVE_FREE); + } btrfs_put_block_group(cache); trace_btrfs_reserved_extent_free(root, start, len); @@ -5418,6 +5693,18 @@ int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len) return ret; } +int btrfs_free_reserved_extent(struct btrfs_root *root, + u64 start, u64 len) +{ + return __btrfs_free_reserved_extent(root, start, len, 0); +} + +int btrfs_free_and_pin_reserved_extent(struct btrfs_root *root, + u64 start, u64 len) +{ + return __btrfs_free_reserved_extent(root, start, len, 1); +} + static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 parent, u64 root_objectid, @@ -5502,7 +5789,8 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans, u32 size = sizeof(*extent_item) + sizeof(*block_info) + sizeof(*iref); path = btrfs_alloc_path(); - BUG_ON(!path); + if (!path) + return -ENOMEM; path->leave_spinning = 1; ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path, @@ -5612,7 +5900,8 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans, put_caching_control(caching_ctl); } - ret = btrfs_update_reserved_bytes(block_group, ins->offset, 1, 1); + ret = btrfs_update_reserved_bytes(block_group, ins->offset, + RESERVE_ALLOC_NO_ACCOUNT); BUG_ON(ret); btrfs_put_block_group(block_group); ret = alloc_reserved_file_extent(trans, root, 0, root_objectid, @@ -5631,7 +5920,7 @@ struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans, if (!buf) return ERR_PTR(-ENOMEM); btrfs_set_header_generation(buf, trans->transid); - btrfs_set_buffer_lockdep_class(buf, level); + btrfs_set_buffer_lockdep_class(root->root_key.objectid, buf, level); btrfs_tree_lock(buf); clean_tree_block(trans, root, buf); @@ -5669,8 +5958,7 @@ use_block_rsv(struct btrfs_trans_handle *trans, block_rsv = get_block_rsv(trans, root); if (block_rsv->size == 0) { - ret = reserve_metadata_bytes(trans, root, block_rsv, - blocksize, 0); + ret = reserve_metadata_bytes(root, block_rsv, blocksize, 0); /* * If we couldn't reserve metadata bytes try and use some from * the global reserve. @@ -5690,13 +5978,15 @@ use_block_rsv(struct btrfs_trans_handle *trans, if (!ret) return block_rsv; if (ret) { - WARN_ON(1); - ret = reserve_metadata_bytes(trans, root, block_rsv, blocksize, - 0); + static DEFINE_RATELIMIT_STATE(_rs, + DEFAULT_RATELIMIT_INTERVAL, + /*DEFAULT_RATELIMIT_BURST*/ 2); + if (__ratelimit(&_rs)) { + printk(KERN_DEBUG "btrfs: block rsv returned %d\n", ret); + WARN_ON(1); + } + ret = reserve_metadata_bytes(root, block_rsv, blocksize, 0); if (!ret) { - spin_lock(&block_rsv->lock); - block_rsv->size += blocksize; - spin_unlock(&block_rsv->lock); return block_rsv; } else if (ret && block_rsv != global_rsv) { ret = block_rsv_use_bytes(global_rsv, blocksize); @@ -5918,7 +6208,7 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans, return 1; if (path->locks[level] && !wc->keep_locks) { - btrfs_tree_unlock(eb); + btrfs_tree_unlock_rw(eb, path->locks[level]); path->locks[level] = 0; } return 0; @@ -5942,7 +6232,7 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans, * keep the tree lock */ if (path->locks[level] && level > 0) { - btrfs_tree_unlock(eb); + btrfs_tree_unlock_rw(eb, path->locks[level]); path->locks[level] = 0; } return 0; @@ -6055,7 +6345,7 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans, BUG_ON(level != btrfs_header_level(next)); path->nodes[level] = next; path->slots[level] = 0; - path->locks[level] = 1; + path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING; wc->level = level; if (wc->level == 1) wc->reada_slot = 0; @@ -6126,7 +6416,7 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans, BUG_ON(level == 0); btrfs_tree_lock(eb); btrfs_set_lock_blocking(eb); - path->locks[level] = 1; + path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING; ret = btrfs_lookup_extent_info(trans, root, eb->start, eb->len, @@ -6135,8 +6425,7 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans, BUG_ON(ret); BUG_ON(wc->refs[level] == 0); if (wc->refs[level] == 1) { - btrfs_tree_unlock(eb); - path->locks[level] = 0; + btrfs_tree_unlock_rw(eb, path->locks[level]); return 1; } } @@ -6158,7 +6447,7 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans, btrfs_header_generation(eb) == trans->transid) { btrfs_tree_lock(eb); btrfs_set_lock_blocking(eb); - path->locks[level] = 1; + path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING; } clean_tree_block(trans, root, eb); } @@ -6237,7 +6526,8 @@ static noinline int walk_up_tree(struct btrfs_trans_handle *trans, return 0; if (path->locks[level]) { - btrfs_tree_unlock(path->nodes[level]); + btrfs_tree_unlock_rw(path->nodes[level], + path->locks[level]); path->locks[level] = 0; } free_extent_buffer(path->nodes[level]); @@ -6259,8 +6549,8 @@ static noinline int walk_up_tree(struct btrfs_trans_handle *trans, * also make sure backrefs for the shared block and all lower level * blocks are properly updated. */ -int btrfs_drop_snapshot(struct btrfs_root *root, - struct btrfs_block_rsv *block_rsv, int update_ref) +void btrfs_drop_snapshot(struct btrfs_root *root, + struct btrfs_block_rsv *block_rsv, int update_ref) { struct btrfs_path *path; struct btrfs_trans_handle *trans; @@ -6271,12 +6561,20 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int err = 0; int ret; int level; + bool root_dropped = false; path = btrfs_alloc_path(); - BUG_ON(!path); + if (!path) { + err = -ENOMEM; + goto out; + } wc = kzalloc(sizeof(*wc), GFP_NOFS); - BUG_ON(!wc); + if (!wc) { + btrfs_free_path(path); + err = -ENOMEM; + goto out; + } trans = btrfs_start_transaction(tree_root, 0); BUG_ON(IS_ERR(trans)); @@ -6289,7 +6587,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root, path->nodes[level] = btrfs_lock_root_node(root); btrfs_set_lock_blocking(path->nodes[level]); path->slots[level] = 0; - path->locks[level] = 1; + path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING; memset(&wc->update_progress, 0, sizeof(wc->update_progress)); } else { @@ -6304,7 +6602,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root, path->lowest_level = 0; if (ret < 0) { err = ret; - goto out; + goto out_free; } WARN_ON(ret > 0); @@ -6318,6 +6616,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root, while (1) { btrfs_tree_lock(path->nodes[level]); btrfs_set_lock_blocking(path->nodes[level]); + path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING; ret = btrfs_lookup_extent_info(trans, root, path->nodes[level]->start, @@ -6331,6 +6630,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root, break; btrfs_tree_unlock(path->nodes[level]); + path->locks[level] = 0; WARN_ON(wc->refs[level] != 1); level--; } @@ -6411,11 +6711,24 @@ int btrfs_drop_snapshot(struct btrfs_root *root, free_extent_buffer(root->commit_root); kfree(root); } -out: + root_dropped = true; +out_free: btrfs_end_transaction_throttle(trans, tree_root); kfree(wc); btrfs_free_path(path); - return err; +out: + /* + * So if we need to stop dropping the snapshot for whatever reason we + * need to make sure to add it back to the dead root list so that we + * keep trying to do the work later. This also cleans up roots if we + * don't have it in the radix (like when we recover after a power fail + * or unmount) so we don't leak memory. + */ + if (root_dropped == false) + btrfs_add_dead_root(root); + if (err && err != -EAGAIN) + btrfs_std_error(root->fs_info, err); + return; } /* @@ -6457,7 +6770,7 @@ int btrfs_drop_subtree(struct btrfs_trans_handle *trans, level = btrfs_header_level(node); path->nodes[level] = node; path->slots[level] = 0; - path->locks[level] = 1; + path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING; wc->refs[parent_level] = 1; wc->flags[parent_level] = BTRFS_BLOCK_FLAG_FULL_BACKREF; @@ -6532,30 +6845,45 @@ static u64 update_block_group_flags(struct btrfs_root *root, u64 flags) return flags; } -static int set_block_group_ro(struct btrfs_block_group_cache *cache) +static int set_block_group_ro(struct btrfs_block_group_cache *cache, int force) { struct btrfs_space_info *sinfo = cache->space_info; u64 num_bytes; + u64 min_allocable_bytes; int ret = -ENOSPC; - if (cache->ro) - return 0; + + /* + * We need some metadata space and system metadata space for + * allocating chunks in some corner cases until we force to set + * it to be readonly. + */ + if ((sinfo->flags & + (BTRFS_BLOCK_GROUP_SYSTEM | BTRFS_BLOCK_GROUP_METADATA)) && + !force) + min_allocable_bytes = 1 * 1024 * 1024; + else + min_allocable_bytes = 0; spin_lock(&sinfo->lock); spin_lock(&cache->lock); + + if (cache->ro) { + ret = 0; + goto out; + } + num_bytes = cache->key.offset - cache->reserved - cache->pinned - cache->bytes_super - btrfs_block_group_used(&cache->item); if (sinfo->bytes_used + sinfo->bytes_reserved + sinfo->bytes_pinned + - sinfo->bytes_may_use + sinfo->bytes_readonly + - cache->reserved_pinned + num_bytes <= sinfo->total_bytes) { + sinfo->bytes_may_use + sinfo->bytes_readonly + num_bytes + + min_allocable_bytes <= sinfo->total_bytes) { sinfo->bytes_readonly += num_bytes; - sinfo->bytes_reserved += cache->reserved_pinned; - cache->reserved_pinned = 0; cache->ro = 1; ret = 0; } - +out: spin_unlock(&cache->lock); spin_unlock(&sinfo->lock); return ret; @@ -6579,7 +6907,7 @@ int btrfs_set_block_group_ro(struct btrfs_root *root, do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, CHUNK_ALLOC_FORCE); - ret = set_block_group_ro(cache); + ret = set_block_group_ro(cache, 0); if (!ret) goto out; alloc_flags = get_alloc_profile(root, cache->space_info->flags); @@ -6587,7 +6915,7 @@ int btrfs_set_block_group_ro(struct btrfs_root *root, CHUNK_ALLOC_FORCE); if (ret < 0) goto out; - ret = set_block_group_ro(cache); + ret = set_block_group_ro(cache, 0); out: btrfs_end_transaction(trans, root); return ret; @@ -6688,6 +7016,10 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr) struct btrfs_space_info *space_info; struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices; struct btrfs_device *device; + u64 min_free; + u64 dev_min = 1; + u64 dev_nr = 0; + int index; int full = 0; int ret = 0; @@ -6697,8 +7029,10 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr) if (!block_group) return -1; + min_free = btrfs_block_group_used(&block_group->item); + /* no bytes used, we're good */ - if (!btrfs_block_group_used(&block_group->item)) + if (!min_free) goto out; space_info = block_group->space_info; @@ -6714,10 +7048,9 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr) * all of the extents from this block group. If we can, we're good */ if ((space_info->total_bytes != block_group->key.offset) && - (space_info->bytes_used + space_info->bytes_reserved + - space_info->bytes_pinned + space_info->bytes_readonly + - btrfs_block_group_used(&block_group->item) < - space_info->total_bytes)) { + (space_info->bytes_used + space_info->bytes_reserved + + space_info->bytes_pinned + space_info->bytes_readonly + + min_free < space_info->total_bytes)) { spin_unlock(&space_info->lock); goto out; } @@ -6734,9 +7067,31 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr) if (full) goto out; + /* + * index: + * 0: raid10 + * 1: raid1 + * 2: dup + * 3: raid0 + * 4: single + */ + index = get_block_group_index(block_group); + if (index == 0) { + dev_min = 4; + /* Divide by 2 */ + min_free >>= 1; + } else if (index == 1) { + dev_min = 2; + } else if (index == 2) { + /* Multiply by 2 */ + min_free <<= 1; + } else if (index == 3) { + dev_min = fs_devices->rw_devices; + do_div(min_free, dev_min); + } + mutex_lock(&root->fs_info->chunk_mutex); list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) { - u64 min_free = btrfs_block_group_used(&block_group->item); u64 dev_offset; /* @@ -6747,7 +7102,11 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr) ret = find_free_dev_extent(NULL, device, min_free, &dev_offset, NULL); if (!ret) + dev_nr++; + + if (dev_nr >= dev_min) break; + ret = -1; } } @@ -6887,7 +7246,8 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info) struct btrfs_space_info, list); if (space_info->bytes_pinned > 0 || - space_info->bytes_reserved > 0) { + space_info->bytes_reserved > 0 || + space_info->bytes_may_use > 0) { WARN_ON(1); dump_space_info(space_info, 0, 0); } @@ -6929,14 +7289,12 @@ int btrfs_read_block_groups(struct btrfs_root *root) return -ENOMEM; path->reada = 1; - cache_gen = btrfs_super_cache_generation(&root->fs_info->super_copy); - if (cache_gen != 0 && - btrfs_super_generation(&root->fs_info->super_copy) != cache_gen) + cache_gen = btrfs_super_cache_generation(root->fs_info->super_copy); + if (btrfs_test_opt(root, SPACE_CACHE) && + btrfs_super_generation(root->fs_info->super_copy) != cache_gen) need_clear = 1; if (btrfs_test_opt(root, CLEAR_CACHE)) need_clear = 1; - if (!btrfs_test_opt(root, SPACE_CACHE) && cache_gen) - printk(KERN_INFO "btrfs: disk space caching is enabled\n"); while (1) { ret = find_first_block_group(root, path, &key); @@ -7024,7 +7382,7 @@ int btrfs_read_block_groups(struct btrfs_root *root) set_avail_alloc_bits(root->fs_info, cache->flags); if (btrfs_chunk_readonly(root, cache->key.objectid)) - set_block_group_ro(cache); + set_block_group_ro(cache, 1); } list_for_each_entry_rcu(space_info, &root->fs_info->space_info, list) { @@ -7038,9 +7396,9 @@ int btrfs_read_block_groups(struct btrfs_root *root) * mirrored block groups. */ list_for_each_entry(cache, &space_info->block_groups[3], list) - set_block_group_ro(cache); + set_block_group_ro(cache, 1); list_for_each_entry(cache, &space_info->block_groups[4], list) - set_block_group_ro(cache); + set_block_group_ro(cache, 1); } init_global_block_rsv(info); @@ -7170,11 +7528,15 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, spin_unlock(&cluster->refill_lock); path = btrfs_alloc_path(); - BUG_ON(!path); + if (!path) { + ret = -ENOMEM; + goto out; + } - inode = lookup_free_space_inode(root, block_group, path); + inode = lookup_free_space_inode(tree_root, block_group, path); if (!IS_ERR(inode)) { - btrfs_orphan_add(trans, inode); + ret = btrfs_orphan_add(trans, inode); + BUG_ON(ret); clear_nlink(inode); /* One for the block groups ref */ spin_lock(&block_group->lock); @@ -7187,7 +7549,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, spin_unlock(&block_group->lock); } /* One for our lookup ref */ - iput(inode); + btrfs_add_delayed_iput(inode); } key.objectid = BTRFS_FREE_SPACE_OBJECTID; @@ -7258,7 +7620,7 @@ int btrfs_init_space_info(struct btrfs_fs_info *fs_info) int mixed = 0; int ret; - disk_super = &fs_info->super_copy; + disk_super = fs_info->super_copy; if (!btrfs_super_root(disk_super)) return 1; @@ -7289,7 +7651,7 @@ out: int btrfs_error_unpin_extent_range(struct btrfs_root *root, u64 start, u64 end) { - return unpin_extent_range(root, start, end); + return unpin_extent_range(root, start, end, false); } int btrfs_error_discard_extent(struct btrfs_root *root, u64 bytenr, diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 7055d11..9a837a8 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -17,6 +17,7 @@ #include "compat.h" #include "ctree.h" #include "btrfs_inode.h" +#include "volumes.h" static struct kmem_cache *extent_state_cache; static struct kmem_cache *extent_buffer_cache; @@ -254,14 +255,14 @@ static void merge_cb(struct extent_io_tree *tree, struct extent_state *new, * * This should be called with the tree lock held. */ -static int merge_state(struct extent_io_tree *tree, - struct extent_state *state) +static void merge_state(struct extent_io_tree *tree, + struct extent_state *state) { struct extent_state *other; struct rb_node *other_node; if (state->state & (EXTENT_IOBITS | EXTENT_BOUNDARY)) - return 0; + return; other_node = rb_prev(&state->rb_node); if (other_node) { @@ -281,26 +282,19 @@ static int merge_state(struct extent_io_tree *tree, if (other->start == state->end + 1 && other->state == state->state) { merge_cb(tree, state, other); - other->start = state->start; - state->tree = NULL; - rb_erase(&state->rb_node, &tree->state); - free_extent_state(state); - state = NULL; + state->end = other->end; + other->tree = NULL; + rb_erase(&other->rb_node, &tree->state); + free_extent_state(other); } } - - return 0; } -static int set_state_cb(struct extent_io_tree *tree, +static void set_state_cb(struct extent_io_tree *tree, struct extent_state *state, int *bits) { - if (tree->ops && tree->ops->set_bit_hook) { - return tree->ops->set_bit_hook(tree->mapping->host, - state, bits); - } - - return 0; + if (tree->ops && tree->ops->set_bit_hook) + tree->ops->set_bit_hook(tree->mapping->host, state, bits); } static void clear_state_cb(struct extent_io_tree *tree, @@ -310,6 +304,9 @@ static void clear_state_cb(struct extent_io_tree *tree, tree->ops->clear_bit_hook(tree->mapping->host, state, bits); } +static void set_state_bits(struct extent_io_tree *tree, + struct extent_state *state, int *bits); + /* * insert an extent_state struct into the tree. 'bits' are set on the * struct before it is inserted. @@ -325,8 +322,6 @@ static int insert_state(struct extent_io_tree *tree, int *bits) { struct rb_node *node; - int bits_to_set = *bits & ~EXTENT_CTLBITS; - int ret; if (end < start) { printk(KERN_ERR "btrfs end < start %llu %llu\n", @@ -336,13 +331,9 @@ static int insert_state(struct extent_io_tree *tree, } state->start = start; state->end = end; - ret = set_state_cb(tree, state, bits); - if (ret) - return ret; - if (bits_to_set & EXTENT_DIRTY) - tree->dirty_bytes += end - start + 1; - state->state |= bits_to_set; + set_state_bits(tree, state, bits); + node = tree_insert(&tree->state, end, &state->rb_node); if (node) { struct extent_state *found; @@ -351,7 +342,6 @@ static int insert_state(struct extent_io_tree *tree, "%llu %llu\n", (unsigned long long)found->start, (unsigned long long)found->end, (unsigned long long)start, (unsigned long long)end); - free_extent_state(state); return -EEXIST; } state->tree = tree; @@ -359,13 +349,11 @@ static int insert_state(struct extent_io_tree *tree, return 0; } -static int split_cb(struct extent_io_tree *tree, struct extent_state *orig, +static void split_cb(struct extent_io_tree *tree, struct extent_state *orig, u64 split) { if (tree->ops && tree->ops->split_extent_hook) - return tree->ops->split_extent_hook(tree->mapping->host, - orig, split); - return 0; + tree->ops->split_extent_hook(tree->mapping->host, orig, split); } /* @@ -500,7 +488,8 @@ again: cached_state = NULL; } - if (cached && cached->tree && cached->start == start) { + if (cached && cached->tree && cached->start <= start && + cached->end > start) { if (clear) atomic_dec(&cached->refs); state = cached; @@ -660,34 +649,25 @@ again: if (start > end) break; - if (need_resched()) { - spin_unlock(&tree->lock); - cond_resched(); - spin_lock(&tree->lock); - } + cond_resched_lock(&tree->lock); } out: spin_unlock(&tree->lock); return 0; } -static int set_state_bits(struct extent_io_tree *tree, +static void set_state_bits(struct extent_io_tree *tree, struct extent_state *state, int *bits) { - int ret; int bits_to_set = *bits & ~EXTENT_CTLBITS; - ret = set_state_cb(tree, state, bits); - if (ret) - return ret; + set_state_cb(tree, state, bits); if ((bits_to_set & EXTENT_DIRTY) && !(state->state & EXTENT_DIRTY)) { u64 range = state->end - state->start + 1; tree->dirty_bytes += range; } state->state |= bits_to_set; - - return 0; } static void cache_state(struct extent_state *state, @@ -742,7 +722,8 @@ again: spin_lock(&tree->lock); if (cached_state && *cached_state) { state = *cached_state; - if (state->start == start && state->tree) { + if (state->start <= start && state->end > start && + state->tree) { node = &state->rb_node; goto hit_next; } @@ -779,17 +760,15 @@ hit_next: goto out; } - err = set_state_bits(tree, state, &bits); - if (err) - goto out; + set_state_bits(tree, state, &bits); - next_node = rb_next(node); cache_state(state, cached_state); merge_state(tree, state); if (last_end == (u64)-1) goto out; start = last_end + 1; + next_node = rb_next(&state->rb_node); if (next_node && start < end && prealloc && !need_resched()) { state = rb_entry(next_node, struct extent_state, rb_node); @@ -830,9 +809,7 @@ hit_next: if (err) goto out; if (state->end <= end) { - err = set_state_bits(tree, state, &bits); - if (err) - goto out; + set_state_bits(tree, state, &bits); cache_state(state, cached_state); merge_state(tree, state); if (last_end == (u64)-1) @@ -862,7 +839,6 @@ hit_next: * Avoid to free 'prealloc' if it can be merged with * the later extent. */ - atomic_inc(&prealloc->refs); err = insert_state(tree, prealloc, start, this_end, &bits); BUG_ON(err == -EEXIST); @@ -872,7 +848,6 @@ hit_next: goto out; } cache_state(prealloc, cached_state); - free_extent_state(prealloc); prealloc = NULL; start = this_end + 1; goto search_again; @@ -895,12 +870,204 @@ hit_next: err = split_state(tree, state, prealloc, end + 1); BUG_ON(err == -EEXIST); - err = set_state_bits(tree, prealloc, &bits); + set_state_bits(tree, prealloc, &bits); + cache_state(prealloc, cached_state); + merge_state(tree, prealloc); + prealloc = NULL; + goto out; + } + + goto search_again; + +out: + spin_unlock(&tree->lock); + if (prealloc) + free_extent_state(prealloc); + + return err; + +search_again: + if (start > end) + goto out; + spin_unlock(&tree->lock); + if (mask & __GFP_WAIT) + cond_resched(); + goto again; +} + +/** + * convert_extent - convert all bits in a given range from one bit to another + * @tree: the io tree to search + * @start: the start offset in bytes + * @end: the end offset in bytes (inclusive) + * @bits: the bits to set in this range + * @clear_bits: the bits to clear in this range + * @mask: the allocation mask + * + * This will go through and set bits for the given range. If any states exist + * already in this range they are set with the given bit and cleared of the + * clear_bits. This is only meant to be used by things that are mergeable, ie + * converting from say DELALLOC to DIRTY. This is not meant to be used with + * boundary bits like LOCK. + */ +int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, + int bits, int clear_bits, gfp_t mask) +{ + struct extent_state *state; + struct extent_state *prealloc = NULL; + struct rb_node *node; + int err = 0; + u64 last_start; + u64 last_end; + +again: + if (!prealloc && (mask & __GFP_WAIT)) { + prealloc = alloc_extent_state(mask); + if (!prealloc) + return -ENOMEM; + } + + spin_lock(&tree->lock); + /* + * this search will find all the extents that end after + * our range starts. + */ + node = tree_search(tree, start); + if (!node) { + prealloc = alloc_extent_state_atomic(prealloc); + if (!prealloc) { + err = -ENOMEM; + goto out; + } + err = insert_state(tree, prealloc, start, end, &bits); + prealloc = NULL; + BUG_ON(err == -EEXIST); + goto out; + } + state = rb_entry(node, struct extent_state, rb_node); +hit_next: + last_start = state->start; + last_end = state->end; + + /* + * | ---- desired range ---- | + * | state | + * + * Just lock what we found and keep going + */ + if (state->start == start && state->end <= end) { + struct rb_node *next_node; + + set_state_bits(tree, state, &bits); + clear_state_bit(tree, state, &clear_bits, 0); + + merge_state(tree, state); + if (last_end == (u64)-1) + goto out; + + start = last_end + 1; + next_node = rb_next(&state->rb_node); + if (next_node && start < end && prealloc && !need_resched()) { + state = rb_entry(next_node, struct extent_state, + rb_node); + if (state->start == start) + goto hit_next; + } + goto search_again; + } + + /* + * | ---- desired range ---- | + * | state | + * or + * | ------------- state -------------- | + * + * We need to split the extent we found, and may flip bits on + * second half. + * + * If the extent we found extends past our + * range, we just split and search again. It'll get split + * again the next time though. + * + * If the extent we found is inside our range, we set the + * desired bit on it. + */ + if (state->start < start) { + prealloc = alloc_extent_state_atomic(prealloc); + if (!prealloc) { + err = -ENOMEM; + goto out; + } + err = split_state(tree, state, prealloc, start); + BUG_ON(err == -EEXIST); + prealloc = NULL; + if (err) + goto out; + if (state->end <= end) { + set_state_bits(tree, state, &bits); + clear_state_bit(tree, state, &clear_bits, 0); + merge_state(tree, state); + if (last_end == (u64)-1) + goto out; + start = last_end + 1; + } + goto search_again; + } + /* + * | ---- desired range ---- | + * | state | or | state | + * + * There's a hole, we need to insert something in it and + * ignore the extent we found. + */ + if (state->start > start) { + u64 this_end; + if (end < last_start) + this_end = end; + else + this_end = last_start - 1; + + prealloc = alloc_extent_state_atomic(prealloc); + if (!prealloc) { + err = -ENOMEM; + goto out; + } + + /* + * Avoid to free 'prealloc' if it can be merged with + * the later extent. + */ + err = insert_state(tree, prealloc, start, this_end, + &bits); + BUG_ON(err == -EEXIST); if (err) { + free_extent_state(prealloc); prealloc = NULL; goto out; } - cache_state(prealloc, cached_state); + prealloc = NULL; + start = this_end + 1; + goto search_again; + } + /* + * | ---- desired range ---- | + * | state | + * We need to split the extent, and set the bit + * on the first half + */ + if (state->start <= end && state->end > end) { + prealloc = alloc_extent_state_atomic(prealloc); + if (!prealloc) { + err = -ENOMEM; + goto out; + } + + err = split_state(tree, state, prealloc, end + 1); + BUG_ON(err == -EEXIST); + + set_state_bits(tree, prealloc, &bits); + clear_state_bit(tree, prealloc, &clear_bits, 0); + merge_state(tree, prealloc); prealloc = NULL; goto out; @@ -949,7 +1116,7 @@ int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end, struct extent_state **cached_state, gfp_t mask) { return set_extent_bit(tree, start, end, - EXTENT_DELALLOC | EXTENT_DIRTY | EXTENT_UPTODATE, + EXTENT_DELALLOC | EXTENT_UPTODATE, 0, NULL, cached_state, mask); } @@ -1042,19 +1209,33 @@ int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask) mask); } -/* - * helper function to set both pages and extents in the tree writeback - */ -static int set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end) +int extent_range_clear_dirty_for_io(struct inode *inode, u64 start, u64 end) { unsigned long index = start >> PAGE_CACHE_SHIFT; unsigned long end_index = end >> PAGE_CACHE_SHIFT; struct page *page; while (index <= end_index) { - page = find_get_page(tree->mapping, index); - BUG_ON(!page); - set_page_writeback(page); + page = find_get_page(inode->i_mapping, index); + BUG_ON(!page); /* Pages should be in the extent_io_tree */ + clear_page_dirty_for_io(page); + page_cache_release(page); + index++; + } + return 0; +} + +int extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end) +{ + unsigned long index = start >> PAGE_CACHE_SHIFT; + unsigned long end_index = end >> PAGE_CACHE_SHIFT; + struct page *page; + + while (index <= end_index) { + page = find_get_page(inode->i_mapping, index); + BUG_ON(!page); /* Pages should be in the extent_io_tree */ + account_page_redirty(page); + __set_page_dirty_nobuffers(page); page_cache_release(page); index++; } @@ -1062,43 +1243,22 @@ static int set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end) } /* - * find the first offset in the io tree with 'bits' set. zero is - * returned if we find something, and *start_ret and *end_ret are - * set to reflect the state struct that was found. - * - * If nothing was found, 1 is returned, < 0 on error + * helper function to set both pages and extents in the tree writeback */ -int find_first_extent_bit(struct extent_io_tree *tree, u64 start, - u64 *start_ret, u64 *end_ret, int bits) +static int set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end) { - struct rb_node *node; - struct extent_state *state; - int ret = 1; - - spin_lock(&tree->lock); - /* - * this search will find all the extents that end after - * our range starts. - */ - node = tree_search(tree, start); - if (!node) - goto out; + unsigned long index = start >> PAGE_CACHE_SHIFT; + unsigned long end_index = end >> PAGE_CACHE_SHIFT; + struct page *page; - while (1) { - state = rb_entry(node, struct extent_state, rb_node); - if (state->end >= start && (state->state & bits)) { - *start_ret = state->start; - *end_ret = state->end; - ret = 0; - break; - } - node = rb_next(node); - if (!node) - break; + while (index <= end_index) { + page = find_get_page(tree->mapping, index); + BUG_ON(!page); + set_page_writeback(page); + page_cache_release(page); + index++; } -out: - spin_unlock(&tree->lock); - return ret; + return 0; } /* find the first state struct with 'bits' set after 'start', and @@ -1133,6 +1293,30 @@ out: } /* + * find the first offset in the io tree with 'bits' set. zero is + * returned if we find something, and *start_ret and *end_ret are + * set to reflect the state struct that was found. + * + * If nothing was found, 1 is returned, < 0 on error + */ +int find_first_extent_bit(struct extent_io_tree *tree, u64 start, + u64 *start_ret, u64 *end_ret, int bits) +{ + struct extent_state *state; + int ret = 1; + + spin_lock(&tree->lock); + state = find_first_extent_bit_state(tree, start, bits); + if (state) { + *start_ret = state->start; + *end_ret = state->end; + ret = 0; + } + spin_unlock(&tree->lock); + return ret; +} + +/* * find a contiguous range of bytes in the file marked as delalloc, not * more than 'max_bytes'. start and end are used to return the range, * @@ -1339,6 +1523,7 @@ again: * shortening the size of the delalloc range we're searching */ free_extent_state(cached_state); + cached_state = NULL; if (!loops) { unsigned long offset = (*start) & (PAGE_CACHE_SIZE - 1); max_bytes = PAGE_CACHE_SIZE - offset; @@ -1564,7 +1749,8 @@ int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end, int bitset = 0; spin_lock(&tree->lock); - if (cached && cached->tree && cached->start == start) + if (cached && cached->tree && cached->start <= start && + cached->end > start) node = &cached->rb_node; else node = tree_search(tree, start); @@ -1644,6 +1830,368 @@ static int check_page_writeback(struct extent_io_tree *tree, return 0; } +/* + * When IO fails, either with EIO or csum verification fails, we + * try other mirrors that might have a good copy of the data. This + * io_failure_record is used to record state as we go through all the + * mirrors. If another mirror has good data, the page is set up to date + * and things continue. If a good mirror can't be found, the original + * bio end_io callback is called to indicate things have failed. + */ +struct io_failure_record { + struct page *page; + u64 start; + u64 len; + u64 logical; + unsigned long bio_flags; + int this_mirror; + int failed_mirror; + int in_validation; +}; + +static int free_io_failure(struct inode *inode, struct io_failure_record *rec, + int did_repair) +{ + int ret; + int err = 0; + struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree; + + set_state_private(failure_tree, rec->start, 0); + ret = clear_extent_bits(failure_tree, rec->start, + rec->start + rec->len - 1, + EXTENT_LOCKED | EXTENT_DIRTY, GFP_NOFS); + if (ret) + err = ret; + + if (did_repair) { + ret = clear_extent_bits(&BTRFS_I(inode)->io_tree, rec->start, + rec->start + rec->len - 1, + EXTENT_DAMAGED, GFP_NOFS); + if (ret && !err) + err = ret; + } + + kfree(rec); + return err; +} + +static void repair_io_failure_callback(struct bio *bio, int err) +{ + complete(bio->bi_private); +} + +/* + * this bypasses the standard btrfs submit functions deliberately, as + * the standard behavior is to write all copies in a raid setup. here we only + * want to write the one bad copy. so we do the mapping for ourselves and issue + * submit_bio directly. + * to avoid any synchonization issues, wait for the data after writing, which + * actually prevents the read that triggered the error from finishing. + * currently, there can be no more than two copies of every data bit. thus, + * exactly one rewrite is required. + */ +int repair_io_failure(struct btrfs_mapping_tree *map_tree, u64 start, + u64 length, u64 logical, struct page *page, + int mirror_num) +{ + struct bio *bio; + struct btrfs_device *dev; + DECLARE_COMPLETION_ONSTACK(compl); + u64 map_length = 0; + u64 sector; + struct btrfs_bio *bbio = NULL; + int ret; + + BUG_ON(!mirror_num); + + bio = bio_alloc(GFP_NOFS, 1); + if (!bio) + return -EIO; + bio->bi_private = &compl; + bio->bi_end_io = repair_io_failure_callback; + bio->bi_size = 0; + map_length = length; + + ret = btrfs_map_block(map_tree, WRITE, logical, + &map_length, &bbio, mirror_num); + if (ret) { + bio_put(bio); + return -EIO; + } + BUG_ON(mirror_num != bbio->mirror_num); + sector = bbio->stripes[mirror_num-1].physical >> 9; + bio->bi_sector = sector; + dev = bbio->stripes[mirror_num-1].dev; + kfree(bbio); + if (!dev || !dev->bdev || !dev->writeable) { + bio_put(bio); + return -EIO; + } + bio->bi_bdev = dev->bdev; + bio_add_page(bio, page, length, start-page_offset(page)); + submit_bio(WRITE_SYNC, bio); + wait_for_completion(&compl); + + if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) { + /* try to remap that extent elsewhere? */ + bio_put(bio); + return -EIO; + } + + printk(KERN_INFO "btrfs read error corrected: ino %lu off %llu (dev %s " + "sector %llu)\n", page->mapping->host->i_ino, start, + dev->name, sector); + + bio_put(bio); + return 0; +} + +/* + * each time an IO finishes, we do a fast check in the IO failure tree + * to see if we need to process or clean up an io_failure_record + */ +static int clean_io_failure(u64 start, struct page *page) +{ + u64 private; + u64 private_failure; + struct io_failure_record *failrec; + struct btrfs_mapping_tree *map_tree; + struct extent_state *state; + int num_copies; + int did_repair = 0; + int ret; + struct inode *inode = page->mapping->host; + + private = 0; + ret = count_range_bits(&BTRFS_I(inode)->io_failure_tree, &private, + (u64)-1, 1, EXTENT_DIRTY, 0); + if (!ret) + return 0; + + ret = get_state_private(&BTRFS_I(inode)->io_failure_tree, start, + &private_failure); + if (ret) + return 0; + + failrec = (struct io_failure_record *)(unsigned long) private_failure; + BUG_ON(!failrec->this_mirror); + + if (failrec->in_validation) { + /* there was no real error, just free the record */ + pr_debug("clean_io_failure: freeing dummy error at %llu\n", + failrec->start); + did_repair = 1; + goto out; + } + + spin_lock(&BTRFS_I(inode)->io_tree.lock); + state = find_first_extent_bit_state(&BTRFS_I(inode)->io_tree, + failrec->start, + EXTENT_LOCKED); + spin_unlock(&BTRFS_I(inode)->io_tree.lock); + + if (state && state->start == failrec->start) { + map_tree = &BTRFS_I(inode)->root->fs_info->mapping_tree; + num_copies = btrfs_num_copies(map_tree, failrec->logical, + failrec->len); + if (num_copies > 1) { + ret = repair_io_failure(map_tree, start, failrec->len, + failrec->logical, page, + failrec->failed_mirror); + did_repair = !ret; + } + } + +out: + if (!ret) + ret = free_io_failure(inode, failrec, did_repair); + + return ret; +} + +/* + * this is a generic handler for readpage errors (default + * readpage_io_failed_hook). if other copies exist, read those and write back + * good data to the failed position. does not investigate in remapping the + * failed extent elsewhere, hoping the device will be smart enough to do this as + * needed + */ + +static int bio_readpage_error(struct bio *failed_bio, struct page *page, + u64 start, u64 end, int failed_mirror, + struct extent_state *state) +{ + struct io_failure_record *failrec = NULL; + u64 private; + struct extent_map *em; + struct inode *inode = page->mapping->host; + struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree; + struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree; + struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; + struct bio *bio; + int num_copies; + int ret; + int read_mode; + u64 logical; + + BUG_ON(failed_bio->bi_rw & REQ_WRITE); + + ret = get_state_private(failure_tree, start, &private); + if (ret) { + failrec = kzalloc(sizeof(*failrec), GFP_NOFS); + if (!failrec) + return -ENOMEM; + failrec->start = start; + failrec->len = end - start + 1; + failrec->this_mirror = 0; + failrec->bio_flags = 0; + failrec->in_validation = 0; + + read_lock(&em_tree->lock); + em = lookup_extent_mapping(em_tree, start, failrec->len); + if (!em) { + read_unlock(&em_tree->lock); + kfree(failrec); + return -EIO; + } + + if (em->start > start || em->start + em->len < start) { + free_extent_map(em); + em = NULL; + } + read_unlock(&em_tree->lock); + + if (!em || IS_ERR(em)) { + kfree(failrec); + return -EIO; + } + logical = start - em->start; + logical = em->block_start + logical; + if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) { + logical = em->block_start; + failrec->bio_flags = EXTENT_BIO_COMPRESSED; + extent_set_compress_type(&failrec->bio_flags, + em->compress_type); + } + pr_debug("bio_readpage_error: (new) logical=%llu, start=%llu, " + "len=%llu\n", logical, start, failrec->len); + failrec->logical = logical; + free_extent_map(em); + + /* set the bits in the private failure tree */ + ret = set_extent_bits(failure_tree, start, end, + EXTENT_LOCKED | EXTENT_DIRTY, GFP_NOFS); + if (ret >= 0) + ret = set_state_private(failure_tree, start, + (u64)(unsigned long)failrec); + /* set the bits in the inode's tree */ + if (ret >= 0) + ret = set_extent_bits(tree, start, end, EXTENT_DAMAGED, + GFP_NOFS); + if (ret < 0) { + kfree(failrec); + return ret; + } + } else { + failrec = (struct io_failure_record *)(unsigned long)private; + pr_debug("bio_readpage_error: (found) logical=%llu, " + "start=%llu, len=%llu, validation=%d\n", + failrec->logical, failrec->start, failrec->len, + failrec->in_validation); + /* + * when data can be on disk more than twice, add to failrec here + * (e.g. with a list for failed_mirror) to make + * clean_io_failure() clean all those errors at once. + */ + } + num_copies = btrfs_num_copies( + &BTRFS_I(inode)->root->fs_info->mapping_tree, + failrec->logical, failrec->len); + if (num_copies == 1) { + /* + * we only have a single copy of the data, so don't bother with + * all the retry and error correction code that follows. no + * matter what the error is, it is very likely to persist. + */ + pr_debug("bio_readpage_error: cannot repair, num_copies == 1. " + "state=%p, num_copies=%d, next_mirror %d, " + "failed_mirror %d\n", state, num_copies, + failrec->this_mirror, failed_mirror); + free_io_failure(inode, failrec, 0); + return -EIO; + } + + if (!state) { + spin_lock(&tree->lock); + state = find_first_extent_bit_state(tree, failrec->start, + EXTENT_LOCKED); + if (state && state->start != failrec->start) + state = NULL; + spin_unlock(&tree->lock); + } + + /* + * there are two premises: + * a) deliver good data to the caller + * b) correct the bad sectors on disk + */ + if (failed_bio->bi_vcnt > 1) { + /* + * to fulfill b), we need to know the exact failing sectors, as + * we don't want to rewrite any more than the failed ones. thus, + * we need separate read requests for the failed bio + * + * if the following BUG_ON triggers, our validation request got + * merged. we need separate requests for our algorithm to work. + */ + BUG_ON(failrec->in_validation); + failrec->in_validation = 1; + failrec->this_mirror = failed_mirror; + read_mode = READ_SYNC | REQ_FAILFAST_DEV; + } else { + /* + * we're ready to fulfill a) and b) alongside. get a good copy + * of the failed sector and if we succeed, we have setup + * everything for repair_io_failure to do the rest for us. + */ + if (failrec->in_validation) { + BUG_ON(failrec->this_mirror != failed_mirror); + failrec->in_validation = 0; + failrec->this_mirror = 0; + } + failrec->failed_mirror = failed_mirror; + failrec->this_mirror++; + if (failrec->this_mirror == failed_mirror) + failrec->this_mirror++; + read_mode = READ_SYNC; + } + + if (!state || failrec->this_mirror > num_copies) { + pr_debug("bio_readpage_error: (fail) state=%p, num_copies=%d, " + "next_mirror %d, failed_mirror %d\n", state, + num_copies, failrec->this_mirror, failed_mirror); + free_io_failure(inode, failrec, 0); + return -EIO; + } + + bio = bio_alloc(GFP_NOFS, 1); + bio->bi_private = state; + bio->bi_end_io = failed_bio->bi_end_io; + bio->bi_sector = failrec->logical >> 9; + bio->bi_bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev; + bio->bi_size = 0; + + bio_add_page(bio, page, failrec->len, start - page_offset(page)); + + pr_debug("bio_readpage_error: submitting new read[%#x] to " + "this_mirror=%d, num_copies=%d, in_validation=%d\n", read_mode, + failrec->this_mirror, num_copies, failrec->in_validation); + + tree->ops->submit_bio_hook(inode, read_mode, bio, failrec->this_mirror, + failrec->bio_flags, 0); + return 0; +} + /* lots and lots of room for performance fixes in the end_bio funcs */ /* @@ -1742,6 +2290,9 @@ static void end_bio_extent_readpage(struct bio *bio, int err) struct extent_state *cached = NULL; struct extent_state *state; + pr_debug("end_bio_extent_readpage: bi_vcnt=%d, idx=%d, err=%d, " + "mirror=%ld\n", bio->bi_vcnt, bio->bi_idx, err, + (long int)bio->bi_bdev); tree = &BTRFS_I(page->mapping->host)->io_tree; start = ((u64)page->index << PAGE_CACHE_SHIFT) + @@ -1772,12 +2323,26 @@ static void end_bio_extent_readpage(struct bio *bio, int err) state); if (ret) uptodate = 0; + else + clean_io_failure(start, page); } - if (!uptodate && tree->ops && - tree->ops->readpage_io_failed_hook) { - ret = tree->ops->readpage_io_failed_hook(bio, page, - start, end, NULL); + if (!uptodate) { + int failed_mirror; + failed_mirror = (int)(unsigned long)bio->bi_bdev; + /* + * The generic bio_readpage_error handles errors the + * following way: If possible, new read requests are + * created and submitted and will end up in + * end_bio_extent_readpage as well (if we're lucky, not + * in the !uptodate case). In that case it returns 0 and + * we just go on with the next page in our bio. If it + * can't handle the error it will return -EIO and we + * remain responsible for that page. + */ + ret = bio_readpage_error(bio, page, start, end, + failed_mirror, NULL); if (ret == 0) { +error_handled: uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); if (err) @@ -1785,6 +2350,13 @@ static void end_bio_extent_readpage(struct bio *bio, int err) uncache_state(&cached); continue; } + if (tree->ops && tree->ops->readpage_io_failed_hook) { + ret = tree->ops->readpage_io_failed_hook( + bio, page, start, end, + failed_mirror, state); + if (ret == 0) + goto error_handled; + } } if (uptodate) { @@ -1856,6 +2428,7 @@ static int submit_one_bio(int rw, struct bio *bio, int mirror_num, mirror_num, bio_flags, start); else submit_bio(rw, bio); + if (bio_flagged(bio, BIO_EOPNOTSUPP)) ret = -EOPNOTSUPP; bio_put(bio); @@ -1871,7 +2444,8 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree, bio_end_io_t end_io_func, int mirror_num, unsigned long prev_bio_flags, - unsigned long bio_flags) + unsigned long bio_flags, + bool force_bio_submit) { int ret = 0; struct bio *bio; @@ -1890,6 +2464,7 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree, sector; if (prev_bio_flags != bio_flags || !contig || + force_bio_submit || (tree->ops && tree->ops->merge_bio_hook && tree->ops->merge_bio_hook(page, offset, page_size, bio, bio_flags)) || @@ -1946,7 +2521,8 @@ static int __extent_read_full_page(struct extent_io_tree *tree, struct page *page, get_extent_t *get_extent, struct bio **bio, int mirror_num, - unsigned long *bio_flags) + unsigned long *bio_flags, + u64 *prev_em_start) { struct inode *inode = page->mapping->host; u64 start = (u64)page->index << PAGE_CACHE_SHIFT; @@ -2002,6 +2578,8 @@ static int __extent_read_full_page(struct extent_io_tree *tree, } } while (cur <= end) { + bool force_bio_submit = false; + if (cur >= last_byte) { char *userpage; struct extent_state *cached = NULL; @@ -2048,6 +2626,49 @@ static int __extent_read_full_page(struct extent_io_tree *tree, block_start = em->block_start; if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) block_start = EXTENT_MAP_HOLE; + + /* + * If we have a file range that points to a compressed extent + * and it's followed by a consecutive file range that points to + * to the same compressed extent (possibly with a different + * offset and/or length, so it either points to the whole extent + * or only part of it), we must make sure we do not submit a + * single bio to populate the pages for the 2 ranges because + * this makes the compressed extent read zero out the pages + * belonging to the 2nd range. Imagine the following scenario: + * + * File layout + * [0 - 8K] [8K - 24K] + * | | + * | | + * points to extent X, points to extent X, + * offset 4K, length of 8K offset 0, length 16K + * + * [extent X, compressed length = 4K uncompressed length = 16K] + * + * If the bio to read the compressed extent covers both ranges, + * it will decompress extent X into the pages belonging to the + * first range and then it will stop, zeroing out the remaining + * pages that belong to the other range that points to extent X. + * So here we make sure we submit 2 bios, one for the first + * range and another one for the third range. Both will target + * the same physical extent from disk, but we can't currently + * make the compressed bio endio callback populate the pages + * for both ranges because each compressed bio is tightly + * coupled with a single extent map, and each range can have + * an extent map with a different offset value relative to the + * uncompressed data of our extent and different lengths. This + * is a corner case so we prioritize correctness over + * non-optimal behavior (submitting 2 bios for the same extent). + */ + if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags) && + prev_em_start && *prev_em_start != (u64)-1 && + *prev_em_start != em->orig_start) + force_bio_submit = true; + + if (prev_em_start) + *prev_em_start = em->orig_start; + free_extent_map(em); em = NULL; @@ -2102,7 +2723,8 @@ static int __extent_read_full_page(struct extent_io_tree *tree, bdev, bio, pnr, end_bio_extent_readpage, mirror_num, *bio_flags, - this_bio_flag); + this_bio_flag, + force_bio_submit); nr++; *bio_flags = this_bio_flag; } @@ -2121,16 +2743,16 @@ out: } int extent_read_full_page(struct extent_io_tree *tree, struct page *page, - get_extent_t *get_extent) + get_extent_t *get_extent, int mirror_num) { struct bio *bio = NULL; unsigned long bio_flags = 0; int ret; - ret = __extent_read_full_page(tree, page, get_extent, &bio, 0, - &bio_flags); + ret = __extent_read_full_page(tree, page, get_extent, &bio, mirror_num, + &bio_flags, NULL); if (bio) - ret = submit_one_bio(READ, bio, 0, bio_flags); + ret = submit_one_bio(READ, bio, mirror_num, bio_flags); return ret; } @@ -2181,6 +2803,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, int compressed; int write_flags; unsigned long nr_written = 0; + bool fill_delalloc = true; if (wbc->sync_mode == WB_SYNC_ALL) write_flags = WRITE_SYNC; @@ -2190,6 +2813,9 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, trace___extent_writepage(page, inode, wbc); WARN_ON(!PageLocked(page)); + + ClearPageError(page); + pg_offset = i_size & (PAGE_CACHE_SIZE - 1); if (page->index > end_index || (page->index == end_index && !pg_offset)) { @@ -2211,10 +2837,13 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, set_page_extent_mapped(page); + if (!tree->ops || !tree->ops->fill_delalloc) + fill_delalloc = false; + delalloc_start = start; delalloc_end = 0; page_started = 0; - if (!epd->extent_locked) { + if (!epd->extent_locked && fill_delalloc) { u64 delalloc_to_write = 0; /* * make sure the wbc mapping index is at least updated @@ -2380,7 +3009,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, sector, iosize, pg_offset, bdev, &epd->bio, max_nr, end_bio_extent_writepage, - 0, 0, 0); + 0, 0, 0, false); if (ret) SetPageError(page); } @@ -2432,6 +3061,7 @@ static int extent_write_cache_pages(struct extent_io_tree *tree, pgoff_t index; pgoff_t end; /* Inclusive */ int scanned = 0; + int tag; pagevec_init(&pvec, 0); if (wbc->range_cyclic) { @@ -2442,11 +3072,16 @@ static int extent_write_cache_pages(struct extent_io_tree *tree, end = wbc->range_end >> PAGE_CACHE_SHIFT; scanned = 1; } + if (wbc->sync_mode == WB_SYNC_ALL) + tag = PAGECACHE_TAG_TOWRITE; + else + tag = PAGECACHE_TAG_DIRTY; retry: + if (wbc->sync_mode == WB_SYNC_ALL) + tag_pages_for_writeback(mapping, index, end); while (!done && !nr_to_write_done && (index <= end) && - (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, - PAGECACHE_TAG_DIRTY, min(end - index, - (pgoff_t)PAGEVEC_SIZE-1) + 1))) { + (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag, + min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1))) { unsigned i; scanned = 1; @@ -2460,10 +3095,16 @@ retry: * swizzled back from swapper_space to tmpfs file * mapping */ - if (tree->ops && tree->ops->write_cache_pages_lock_hook) - tree->ops->write_cache_pages_lock_hook(page); - else - lock_page(page); + if (tree->ops && + tree->ops->write_cache_pages_lock_hook) { + tree->ops->write_cache_pages_lock_hook(page, + data, flush_fn); + } else { + if (!trylock_page(page)) { + flush_fn(data); + lock_page(page); + } + } if (unlikely(page->mapping != mapping)) { unlock_page(page); @@ -2541,7 +3182,6 @@ int extent_write_full_page(struct extent_io_tree *tree, struct page *page, struct writeback_control *wbc) { int ret; - struct address_space *mapping = page->mapping; struct extent_page_data epd = { .bio = NULL, .tree = tree, @@ -2549,18 +3189,9 @@ int extent_write_full_page(struct extent_io_tree *tree, struct page *page, .extent_locked = 0, .sync_io = wbc->sync_mode == WB_SYNC_ALL, }; - struct writeback_control wbc_writepages = { - .sync_mode = wbc->sync_mode, - .older_than_this = NULL, - .nr_to_write = 64, - .range_start = page_offset(page) + PAGE_CACHE_SIZE, - .range_end = (loff_t)-1, - }; ret = __extent_writepage(page, wbc, &epd); - extent_write_cache_pages(tree, mapping, &wbc_writepages, - __extent_writepage, &epd, flush_write_bio); flush_epd_write_bio(&epd); return ret; } @@ -2584,7 +3215,6 @@ int extent_write_locked_range(struct extent_io_tree *tree, struct inode *inode, }; struct writeback_control wbc_writepages = { .sync_mode = mode, - .older_than_this = NULL, .nr_to_write = nr_pages * 2, .range_start = start, .range_end = end + 1, @@ -2638,6 +3268,7 @@ int extent_readpages(struct extent_io_tree *tree, struct bio *bio = NULL; unsigned page_idx; unsigned long bio_flags = 0; + u64 prev_em_start = (u64)-1; for (page_idx = 0; page_idx < nr_pages; page_idx++) { struct page *page = list_entry(pages->prev, struct page, lru); @@ -2647,7 +3278,8 @@ int extent_readpages(struct extent_io_tree *tree, if (!add_to_page_cache_lru(page, mapping, page->index, GFP_NOFS)) { __extent_read_full_page(tree, page, get_extent, - &bio, 0, &bio_flags); + &bio, 0, &bio_flags, + &prev_em_start); } page_cache_release(page); } @@ -2840,6 +3472,9 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, return -ENOMEM; path->leave_spinning = 1; + start = ALIGN(start, BTRFS_I(inode)->root->sectorsize); + len = ALIGN(len, BTRFS_I(inode)->root->sectorsize); + /* * lookup the last file extent. We're not using i_size here * because there might be preallocation past i_size @@ -2887,7 +3522,7 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, lock_extent_bits(&BTRFS_I(inode)->io_tree, start, start + len, 0, &cached_state, GFP_NOFS); - em = get_extent_skip_holes(inode, off, last_for_get_extent, + em = get_extent_skip_holes(inode, start, last_for_get_extent, get_extent); if (!em) goto out; @@ -2976,7 +3611,7 @@ out: return ret; } -static inline struct page *extent_buffer_page(struct extent_buffer *eb, +inline struct page *extent_buffer_page(struct extent_buffer *eb, unsigned long i) { struct page *p; @@ -3001,7 +3636,7 @@ static inline struct page *extent_buffer_page(struct extent_buffer *eb, return p; } -static inline unsigned long num_extent_pages(u64 start, u64 len) +inline unsigned long num_extent_pages(u64 start, u64 len) { return ((start + len + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT) - (start >> PAGE_CACHE_SHIFT); @@ -3022,8 +3657,15 @@ static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree, return NULL; eb->start = start; eb->len = len; - spin_lock_init(&eb->lock); - init_waitqueue_head(&eb->lock_wq); + rwlock_init(&eb->lock); + atomic_set(&eb->write_locks, 0); + atomic_set(&eb->read_locks, 0); + atomic_set(&eb->blocking_readers, 0); + atomic_set(&eb->blocking_writers, 0); + atomic_set(&eb->spinning_readers, 0); + atomic_set(&eb->spinning_writers, 0); + init_waitqueue_head(&eb->write_lock_wq); + init_waitqueue_head(&eb->read_lock_wq); #if LEAK_DEBUG spin_lock_irqsave(&leak_lock, flags); @@ -3119,7 +3761,7 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree, i = 0; } for (; i < num_pages; i++, index++) { - p = find_or_create_page(mapping, index, GFP_NOFS | __GFP_HIGHMEM); + p = find_or_create_page(mapping, index, GFP_NOFS); if (!p) { WARN_ON(1); goto free_eb; @@ -3247,6 +3889,7 @@ int clear_extent_buffer_dirty(struct extent_io_tree *tree, PAGECACHE_TAG_DIRTY); } spin_unlock_irq(&page->mapping->tree_lock); + ClearPageError(page); unlock_page(page); } return 0; @@ -3266,6 +3909,22 @@ int set_extent_buffer_dirty(struct extent_io_tree *tree, return was_dirty; } +static int __eb_straddles_pages(u64 start, u64 len) +{ + if (len < PAGE_CACHE_SIZE) + return 1; + if (start & (PAGE_CACHE_SIZE - 1)) + return 1; + if ((start + len) & (PAGE_CACHE_SIZE - 1)) + return 1; + return 0; +} + +static int eb_straddles_pages(struct extent_buffer *eb) +{ + return __eb_straddles_pages(eb->start, eb->len); +} + int clear_extent_buffer_uptodate(struct extent_io_tree *tree, struct extent_buffer *eb, struct extent_state **cached_state) @@ -3277,8 +3936,10 @@ int clear_extent_buffer_uptodate(struct extent_io_tree *tree, num_pages = num_extent_pages(eb->start, eb->len); clear_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags); - clear_extent_uptodate(tree, eb->start, eb->start + eb->len - 1, - cached_state, GFP_NOFS); + if (eb_straddles_pages(eb)) { + clear_extent_uptodate(tree, eb->start, eb->start + eb->len - 1, + cached_state, GFP_NOFS); + } for (i = 0; i < num_pages; i++) { page = extent_buffer_page(eb, i); if (page) @@ -3296,8 +3957,10 @@ int set_extent_buffer_uptodate(struct extent_io_tree *tree, num_pages = num_extent_pages(eb->start, eb->len); - set_extent_uptodate(tree, eb->start, eb->start + eb->len - 1, - NULL, GFP_NOFS); + if (eb_straddles_pages(eb)) { + set_extent_uptodate(tree, eb->start, eb->start + eb->len - 1, + NULL, GFP_NOFS); + } for (i = 0; i < num_pages; i++) { page = extent_buffer_page(eb, i); if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) || @@ -3320,9 +3983,12 @@ int extent_range_uptodate(struct extent_io_tree *tree, int uptodate; unsigned long index; - ret = test_range_bit(tree, start, end, EXTENT_UPTODATE, 1, NULL); - if (ret) - return 1; + if (__eb_straddles_pages(start, end - start + 1)) { + ret = test_range_bit(tree, start, end, + EXTENT_UPTODATE, 1, NULL); + if (ret) + return 1; + } while (start <= end) { index = start >> PAGE_CACHE_SHIFT; page = find_get_page(tree->mapping, index); @@ -3350,10 +4016,12 @@ int extent_buffer_uptodate(struct extent_io_tree *tree, if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags)) return 1; - ret = test_range_bit(tree, eb->start, eb->start + eb->len - 1, - EXTENT_UPTODATE, 1, cached_state); - if (ret) - return ret; + if (eb_straddles_pages(eb)) { + ret = test_range_bit(tree, eb->start, eb->start + eb->len - 1, + EXTENT_UPTODATE, 1, cached_state); + if (ret) + return ret; + } num_pages = num_extent_pages(eb->start, eb->len); for (i = 0; i < num_pages; i++) { @@ -3367,8 +4035,7 @@ int extent_buffer_uptodate(struct extent_io_tree *tree, } int read_extent_buffer_pages(struct extent_io_tree *tree, - struct extent_buffer *eb, - u64 start, int wait, + struct extent_buffer *eb, u64 start, int wait, get_extent_t *get_extent, int mirror_num) { unsigned long i; @@ -3382,13 +4049,16 @@ int read_extent_buffer_pages(struct extent_io_tree *tree, unsigned long num_pages; struct bio *bio = NULL; unsigned long bio_flags = 0; + u64 prev_em_start = (u64)-1; if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags)) return 0; - if (test_range_bit(tree, eb->start, eb->start + eb->len - 1, - EXTENT_UPTODATE, 1, NULL)) { - return 0; + if (eb_straddles_pages(eb)) { + if (test_range_bit(tree, eb->start, eb->start + eb->len - 1, + EXTENT_UPTODATE, 1, NULL)) { + return 0; + } } if (start) { @@ -3402,7 +4072,7 @@ int read_extent_buffer_pages(struct extent_io_tree *tree, num_pages = num_extent_pages(eb->start, eb->len); for (i = start_i; i < num_pages; i++) { page = extent_buffer_page(eb, i); - if (!wait) { + if (wait == WAIT_NONE) { if (!trylock_page(page)) goto unlock_exit; } else { @@ -3435,7 +4105,8 @@ int read_extent_buffer_pages(struct extent_io_tree *tree, ClearPageError(page); err = __extent_read_full_page(tree, page, get_extent, &bio, - mirror_num, &bio_flags); + mirror_num, &bio_flags, + &prev_em_start); if (err) ret = err; } else { @@ -3446,7 +4117,7 @@ int read_extent_buffer_pages(struct extent_io_tree *tree, if (bio) submit_one_bio(READ, bio, mirror_num, bio_flags); - if (ret || !wait) + if (ret || wait != WAIT_COMPLETE) return ret; for (i = start_i; i < num_pages; i++) { @@ -3492,9 +4163,8 @@ void read_extent_buffer(struct extent_buffer *eb, void *dstv, page = extent_buffer_page(eb, i); cur = min(len, (PAGE_CACHE_SIZE - offset)); - kaddr = kmap_atomic(page, KM_USER1); + kaddr = page_address(page); memcpy(dst, kaddr + offset, cur); - kunmap_atomic(kaddr, KM_USER1); dst += cur; len -= cur; @@ -3504,9 +4174,9 @@ void read_extent_buffer(struct extent_buffer *eb, void *dstv, } int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start, - unsigned long min_len, char **token, char **map, + unsigned long min_len, char **map, unsigned long *map_start, - unsigned long *map_len, int km) + unsigned long *map_len) { size_t offset = start & (PAGE_CACHE_SIZE - 1); char *kaddr; @@ -3536,42 +4206,12 @@ int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start, } p = extent_buffer_page(eb, i); - kaddr = kmap_atomic(p, km); - *token = kaddr; + kaddr = page_address(p); *map = kaddr + offset; *map_len = PAGE_CACHE_SIZE - offset; return 0; } -int map_extent_buffer(struct extent_buffer *eb, unsigned long start, - unsigned long min_len, - char **token, char **map, - unsigned long *map_start, - unsigned long *map_len, int km) -{ - int err; - int save = 0; - if (eb->map_token) { - unmap_extent_buffer(eb, eb->map_token, km); - eb->map_token = NULL; - save = 1; - } - err = map_private_extent_buffer(eb, start, min_len, token, map, - map_start, map_len, km); - if (!err && save) { - eb->map_token = *token; - eb->kaddr = *map; - eb->map_start = *map_start; - eb->map_len = *map_len; - } - return err; -} - -void unmap_extent_buffer(struct extent_buffer *eb, char *token, int km) -{ - kunmap_atomic(token, km); -} - int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv, unsigned long start, unsigned long len) @@ -3595,9 +4235,8 @@ int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv, cur = min(len, (PAGE_CACHE_SIZE - offset)); - kaddr = kmap_atomic(page, KM_USER0); + kaddr = page_address(page); ret = memcmp(ptr, kaddr + offset, cur); - kunmap_atomic(kaddr, KM_USER0); if (ret) break; @@ -3630,9 +4269,8 @@ void write_extent_buffer(struct extent_buffer *eb, const void *srcv, WARN_ON(!PageUptodate(page)); cur = min(len, PAGE_CACHE_SIZE - offset); - kaddr = kmap_atomic(page, KM_USER1); + kaddr = page_address(page); memcpy(kaddr + offset, src, cur); - kunmap_atomic(kaddr, KM_USER1); src += cur; len -= cur; @@ -3661,9 +4299,8 @@ void memset_extent_buffer(struct extent_buffer *eb, char c, WARN_ON(!PageUptodate(page)); cur = min(len, PAGE_CACHE_SIZE - offset); - kaddr = kmap_atomic(page, KM_USER0); + kaddr = page_address(page); memset(kaddr + offset, c, cur); - kunmap_atomic(kaddr, KM_USER0); len -= cur; offset = 0; @@ -3694,9 +4331,8 @@ void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src, cur = min(len, (unsigned long)(PAGE_CACHE_SIZE - offset)); - kaddr = kmap_atomic(page, KM_USER0); + kaddr = page_address(page); read_extent_buffer(src, kaddr + offset, src_offset, cur); - kunmap_atomic(kaddr, KM_USER0); src_offset += cur; len -= cur; @@ -3709,20 +4345,17 @@ static void move_pages(struct page *dst_page, struct page *src_page, unsigned long dst_off, unsigned long src_off, unsigned long len) { - char *dst_kaddr = kmap_atomic(dst_page, KM_USER0); + char *dst_kaddr = page_address(dst_page); if (dst_page == src_page) { memmove(dst_kaddr + dst_off, dst_kaddr + src_off, len); } else { - char *src_kaddr = kmap_atomic(src_page, KM_USER1); + char *src_kaddr = page_address(src_page); char *p = dst_kaddr + dst_off + len; char *s = src_kaddr + src_off + len; while (len--) *--p = *--s; - - kunmap_atomic(src_kaddr, KM_USER1); } - kunmap_atomic(dst_kaddr, KM_USER0); } static inline bool areas_overlap(unsigned long src, unsigned long dst, unsigned long len) @@ -3735,20 +4368,17 @@ static void copy_pages(struct page *dst_page, struct page *src_page, unsigned long dst_off, unsigned long src_off, unsigned long len) { - char *dst_kaddr = kmap_atomic(dst_page, KM_USER0); + char *dst_kaddr = page_address(dst_page); char *src_kaddr; if (dst_page != src_page) { - src_kaddr = kmap_atomic(src_page, KM_USER1); + src_kaddr = page_address(src_page); } else { src_kaddr = dst_kaddr; BUG_ON(areas_overlap(src_off, dst_off, len)); } memcpy(dst_kaddr + dst_off, src_kaddr + src_off, len); - kunmap_atomic(dst_kaddr, KM_USER0); - if (dst_page != src_page) - kunmap_atomic(src_kaddr, KM_USER1); } void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index a11a92e..2e32510 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -17,6 +17,8 @@ #define EXTENT_NODATASUM (1 << 10) #define EXTENT_DO_ACCOUNTING (1 << 11) #define EXTENT_FIRST_DELALLOC (1 << 12) +#define EXTENT_NEED_WAIT (1 << 13) +#define EXTENT_DAMAGED (1 << 14) #define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK) #define EXTENT_CTLBITS (EXTENT_DO_ACCOUNTING | EXTENT_FIRST_DELALLOC) @@ -32,6 +34,7 @@ #define EXTENT_BUFFER_BLOCKING 1 #define EXTENT_BUFFER_DIRTY 2 #define EXTENT_BUFFER_CORRUPT 3 +#define EXTENT_BUFFER_READAHEAD 4 /* this got triggered by readahead */ /* these are flags for extent_clear_unlock_delalloc */ #define EXTENT_CLEAR_UNLOCK_PAGE 0x1 @@ -67,7 +70,7 @@ struct extent_io_ops { unsigned long bio_flags); int (*readpage_io_hook)(struct page *page, u64 start, u64 end); int (*readpage_io_failed_hook)(struct bio *bio, struct page *page, - u64 start, u64 end, + u64 start, u64 end, int failed_mirror, struct extent_state *state); int (*writepage_io_failed_hook)(struct bio *bio, struct page *page, u64 start, u64 end, @@ -76,16 +79,17 @@ struct extent_io_ops { struct extent_state *state); int (*writepage_end_io_hook)(struct page *page, u64 start, u64 end, struct extent_state *state, int uptodate); - int (*set_bit_hook)(struct inode *inode, struct extent_state *state, - int *bits); - int (*clear_bit_hook)(struct inode *inode, struct extent_state *state, - int *bits); - int (*merge_extent_hook)(struct inode *inode, - struct extent_state *new, - struct extent_state *other); - int (*split_extent_hook)(struct inode *inode, - struct extent_state *orig, u64 split); - int (*write_cache_pages_lock_hook)(struct page *page); + void (*set_bit_hook)(struct inode *inode, struct extent_state *state, + int *bits); + void (*clear_bit_hook)(struct inode *inode, struct extent_state *state, + int *bits); + void (*merge_extent_hook)(struct inode *inode, + struct extent_state *new, + struct extent_state *other); + void (*split_extent_hook)(struct inode *inode, + struct extent_state *orig, u64 split); + int (*write_cache_pages_lock_hook)(struct page *page, void *data, + void (*flush_fn)(void *)); }; struct extent_io_tree { @@ -108,8 +112,6 @@ struct extent_state { wait_queue_head_t wq; atomic_t refs; unsigned long state; - u64 split_start; - u64 split_end; /* for use by the FS */ u64 private; @@ -120,8 +122,6 @@ struct extent_state { struct extent_buffer { u64 start; unsigned long len; - char *map_token; - char *kaddr; unsigned long map_start; unsigned long map_len; struct page *first_page; @@ -130,14 +130,26 @@ struct extent_buffer { struct rcu_head rcu_head; atomic_t refs; - /* the spinlock is used to protect most operations */ - spinlock_t lock; + /* count of read lock holders on the extent buffer */ + atomic_t write_locks; + atomic_t read_locks; + atomic_t blocking_writers; + atomic_t blocking_readers; + atomic_t spinning_readers; + atomic_t spinning_writers; + + /* protects write locks */ + rwlock_t lock; + + /* readers use lock_wq while they wait for the write + * lock holders to unlock + */ + wait_queue_head_t write_lock_wq; - /* - * when we keep the lock held while blocking, waiters go onto - * the wq + /* writers use read_lock_wq while they wait for readers + * to unlock */ - wait_queue_head_t lock_wq; + wait_queue_head_t read_lock_wq; }; static inline void extent_set_compress_type(unsigned long *bio_flags, @@ -177,7 +189,7 @@ int unlock_extent_cached(struct extent_io_tree *tree, u64 start, u64 end, int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask); int extent_read_full_page(struct extent_io_tree *tree, struct page *page, - get_extent_t *get_extent); + get_extent_t *get_extent, int mirror_num); int __init extent_io_init(void); void extent_io_exit(void); @@ -206,6 +218,8 @@ int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask); int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask); +int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, + int bits, int clear_bits, gfp_t mask); int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end, struct extent_state **cached_state, gfp_t mask); int find_first_extent_bit(struct extent_io_tree *tree, u64 start, @@ -240,9 +254,14 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree, struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree, u64 start, unsigned long len); void free_extent_buffer(struct extent_buffer *eb); +#define WAIT_NONE 0 +#define WAIT_COMPLETE 1 +#define WAIT_PAGE_LOCK 2 int read_extent_buffer_pages(struct extent_io_tree *tree, struct extent_buffer *eb, u64 start, int wait, get_extent_t *get_extent, int mirror_num); +unsigned long num_extent_pages(u64 start, u64 len); +struct page *extent_buffer_page(struct extent_buffer *eb, unsigned long i); static inline void extent_buffer_get(struct extent_buffer *eb) { @@ -279,17 +298,14 @@ int clear_extent_buffer_uptodate(struct extent_io_tree *tree, int extent_buffer_uptodate(struct extent_io_tree *tree, struct extent_buffer *eb, struct extent_state *cached_state); -int map_extent_buffer(struct extent_buffer *eb, unsigned long offset, - unsigned long min_len, char **token, char **map, - unsigned long *map_start, - unsigned long *map_len, int km); int map_private_extent_buffer(struct extent_buffer *eb, unsigned long offset, - unsigned long min_len, char **token, char **map, + unsigned long min_len, char **map, unsigned long *map_start, - unsigned long *map_len, int km); -void unmap_extent_buffer(struct extent_buffer *eb, char *token, int km); + unsigned long *map_len); int extent_range_uptodate(struct extent_io_tree *tree, u64 start, u64 end); +int extent_range_clear_dirty_for_io(struct inode *inode, u64 start, u64 end); +int extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end); int extent_clear_unlock_delalloc(struct inode *inode, struct extent_io_tree *tree, u64 start, u64 end, struct page *locked_page, @@ -297,4 +313,10 @@ int extent_clear_unlock_delalloc(struct inode *inode, struct bio * btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs, gfp_t gfp_flags); + +struct btrfs_mapping_tree; + +int repair_io_failure(struct btrfs_mapping_tree *map_tree, u64 start, + u64 length, u64 logical, struct page *page, + int mirror_num); #endif diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 2d04103..7c97b33 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -183,22 +183,10 @@ static int mergable_maps(struct extent_map *prev, struct extent_map *next) return 0; } -int unpin_extent_cache(struct extent_map_tree *tree, u64 start, u64 len) +static void try_merge_map(struct extent_map_tree *tree, struct extent_map *em) { - int ret = 0; struct extent_map *merge = NULL; struct rb_node *rb; - struct extent_map *em; - - write_lock(&tree->lock); - em = lookup_extent_mapping(tree, start, len); - - WARN_ON(!em || em->start != start); - - if (!em) - goto out; - - clear_bit(EXTENT_FLAG_PINNED, &em->flags); if (em->start != 0) { rb = rb_prev(&em->rb_node); @@ -225,6 +213,24 @@ int unpin_extent_cache(struct extent_map_tree *tree, u64 start, u64 len) merge->in_tree = 0; free_extent_map(merge); } +} + +int unpin_extent_cache(struct extent_map_tree *tree, u64 start, u64 len) +{ + int ret = 0; + struct extent_map *em; + + write_lock(&tree->lock); + em = lookup_extent_mapping(tree, start, len); + + WARN_ON(!em || em->start != start); + + if (!em) + goto out; + + clear_bit(EXTENT_FLAG_PINNED, &em->flags); + + try_merge_map(tree, em); free_extent_map(em); out: @@ -247,7 +253,6 @@ int add_extent_mapping(struct extent_map_tree *tree, struct extent_map *em) { int ret = 0; - struct extent_map *merge = NULL; struct rb_node *rb; struct extent_map *exist; @@ -263,30 +268,8 @@ int add_extent_mapping(struct extent_map_tree *tree, goto out; } atomic_inc(&em->refs); - if (em->start != 0) { - rb = rb_prev(&em->rb_node); - if (rb) - merge = rb_entry(rb, struct extent_map, rb_node); - if (rb && mergable_maps(merge, em)) { - em->start = merge->start; - em->len += merge->len; - em->block_len += merge->block_len; - em->block_start = merge->block_start; - merge->in_tree = 0; - rb_erase(&merge->rb_node, &tree->map); - free_extent_map(merge); - } - } - rb = rb_next(&em->rb_node); - if (rb) - merge = rb_entry(rb, struct extent_map, rb_node); - if (rb && mergable_maps(em, merge)) { - em->len += merge->len; - em->block_len += merge->len; - rb_erase(&merge->rb_node, &tree->map); - merge->in_tree = 0; - free_extent_map(merge); - } + + try_merge_map(tree, em); out: return ret; } @@ -299,19 +282,8 @@ static u64 range_end(u64 start, u64 len) return start + len; } -/** - * lookup_extent_mapping - lookup extent_map - * @tree: tree to lookup in - * @start: byte offset to start the search - * @len: length of the lookup range - * - * Find and return the first extent_map struct in @tree that intersects the - * [start, len] range. There may be additional objects in the tree that - * intersect, so check the object returned carefully to make sure that no - * additional lookups are needed. - */ -struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree, - u64 start, u64 len) +struct extent_map *__lookup_extent_mapping(struct extent_map_tree *tree, + u64 start, u64 len, int strict) { struct extent_map *em; struct rb_node *rb_node; @@ -320,38 +292,42 @@ struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree, u64 end = range_end(start, len); rb_node = __tree_search(&tree->map, start, &prev, &next); - if (!rb_node && prev) { - em = rb_entry(prev, struct extent_map, rb_node); - if (end > em->start && start < extent_map_end(em)) - goto found; - } - if (!rb_node && next) { - em = rb_entry(next, struct extent_map, rb_node); - if (end > em->start && start < extent_map_end(em)) - goto found; - } if (!rb_node) { - em = NULL; - goto out; - } - if (IS_ERR(rb_node)) { - em = ERR_CAST(rb_node); - goto out; + if (prev) + rb_node = prev; + else if (next) + rb_node = next; + else + return NULL; } + em = rb_entry(rb_node, struct extent_map, rb_node); - if (end > em->start && start < extent_map_end(em)) - goto found; - em = NULL; - goto out; + if (strict && !(end > em->start && start < extent_map_end(em))) + return NULL; -found: atomic_inc(&em->refs); -out: return em; } /** + * lookup_extent_mapping - lookup extent_map + * @tree: tree to lookup in + * @start: byte offset to start the search + * @len: length of the lookup range + * + * Find and return the first extent_map struct in @tree that intersects the + * [start, len] range. There may be additional objects in the tree that + * intersect, so check the object returned carefully to make sure that no + * additional lookups are needed. + */ +struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree, + u64 start, u64 len) +{ + return __lookup_extent_mapping(tree, start, len, 1); +} + +/** * search_extent_mapping - find a nearby extent map * @tree: tree to lookup in * @start: byte offset to start the search @@ -365,38 +341,7 @@ out: struct extent_map *search_extent_mapping(struct extent_map_tree *tree, u64 start, u64 len) { - struct extent_map *em; - struct rb_node *rb_node; - struct rb_node *prev = NULL; - struct rb_node *next = NULL; - - rb_node = __tree_search(&tree->map, start, &prev, &next); - if (!rb_node && prev) { - em = rb_entry(prev, struct extent_map, rb_node); - goto found; - } - if (!rb_node && next) { - em = rb_entry(next, struct extent_map, rb_node); - goto found; - } - if (!rb_node) { - em = NULL; - goto out; - } - if (IS_ERR(rb_node)) { - em = ERR_CAST(rb_node); - goto out; - } - em = rb_entry(rb_node, struct extent_map, rb_node); - goto found; - - em = NULL; - goto out; - -found: - atomic_inc(&em->refs); -out: - return em; + return __lookup_extent_mapping(tree, start, len, 0); } /** diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index 90d4ee5..d7c9b68 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -91,8 +91,7 @@ struct btrfs_csum_item *btrfs_lookup_csum(struct btrfs_trans_handle *trans, struct btrfs_csum_item *item; struct extent_buffer *leaf; u64 csum_offset = 0; - u16 csum_size = - btrfs_super_csum_size(&root->fs_info->super_copy); + u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy); int csums_in_item; file_key.objectid = BTRFS_EXTENT_CSUM_OBJECTID; @@ -162,8 +161,7 @@ static int __btrfs_lookup_bio_sums(struct btrfs_root *root, u64 item_last_offset = 0; u64 disk_bytenr; u32 diff; - u16 csum_size = - btrfs_super_csum_size(&root->fs_info->super_copy); + u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy); int ret; struct btrfs_path *path; struct btrfs_csum_item *item = NULL; @@ -177,6 +175,17 @@ static int __btrfs_lookup_bio_sums(struct btrfs_root *root, WARN_ON(bio->bi_vcnt <= 0); + /* + * the free space stuff is only read when it hasn't been + * updated in the current transaction. So, we can safely + * read from the commit root and sidestep a nasty deadlock + * between reading the free space cache and updating the csum tree. + */ + if (btrfs_is_free_space_inode(root, inode)) { + path->search_commit_root = 1; + path->skip_locking = 1; + } + disk_bytenr = (u64)bio->bi_sector << 9; if (dio) offset = logical_offset; @@ -279,10 +288,11 @@ int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end, int ret; size_t size; u64 csum_end; - u16 csum_size = btrfs_super_csum_size(&root->fs_info->super_copy); + u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy); path = btrfs_alloc_path(); - BUG_ON(!path); + if (!path) + return -ENOMEM; if (search_commit) { path->skip_locking = 1; @@ -480,8 +490,7 @@ static noinline int truncate_one_csum(struct btrfs_trans_handle *trans, u64 bytenr, u64 len) { struct extent_buffer *leaf; - u16 csum_size = - btrfs_super_csum_size(&root->fs_info->super_copy); + u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy); u64 csum_end; u64 end_byte = bytenr + len; u32 blocksize_bits = root->fs_info->sb->s_blocksize_bits; @@ -537,8 +546,7 @@ int btrfs_del_csums(struct btrfs_trans_handle *trans, u64 csum_end; struct extent_buffer *leaf; int ret; - u16 csum_size = - btrfs_super_csum_size(&root->fs_info->super_copy); + u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy); int blocksize_bits = root->fs_info->sb->s_blocksize_bits; root = root->fs_info->csum_root; @@ -664,15 +672,12 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans, struct btrfs_sector_sum *sector_sum; u32 nritems; u32 ins_size; - char *eb_map; - char *eb_token; - unsigned long map_len; - unsigned long map_start; - u16 csum_size = - btrfs_super_csum_size(&root->fs_info->super_copy); + u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy); path = btrfs_alloc_path(); - BUG_ON(!path); + if (!path) + return -ENOMEM; + sector_sum = sums->sums; again: next_offset = (u64)-1; @@ -712,7 +717,7 @@ again: found_next = 1; if (ret != 0) goto insert; - slot = 0; + slot = path->slots[0]; } btrfs_item_key_to_cpu(path->nodes[0], &found_key, slot); if (found_key.objectid != BTRFS_EXTENT_CSUM_OBJECTID || @@ -814,30 +819,9 @@ found: item_end = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_csum_item); item_end = (struct btrfs_csum_item *)((unsigned char *)item_end + btrfs_item_size_nr(leaf, path->slots[0])); - eb_token = NULL; next_sector: - if (!eb_token || - (unsigned long)item + csum_size >= map_start + map_len) { - int err; - - if (eb_token) - unmap_extent_buffer(leaf, eb_token, KM_USER1); - eb_token = NULL; - err = map_private_extent_buffer(leaf, (unsigned long)item, - csum_size, - &eb_token, &eb_map, - &map_start, &map_len, KM_USER1); - if (err) - eb_token = NULL; - } - if (eb_token) { - memcpy(eb_token + ((unsigned long)item & (PAGE_CACHE_SIZE - 1)), - §or_sum->sum, csum_size); - } else { - write_extent_buffer(leaf, §or_sum->sum, - (unsigned long)item, csum_size); - } + write_extent_buffer(leaf, §or_sum->sum, (unsigned long)item, csum_size); total_bytes += root->sectorsize; sector_sum++; @@ -850,10 +834,7 @@ next_sector: goto next_sector; } } - if (eb_token) { - unmap_extent_buffer(leaf, eb_token, KM_USER1); - eb_token = NULL; - } + btrfs_mark_buffer_dirty(path->nodes[0]); if (total_bytes < sums->len) { btrfs_release_path(path); diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index fa4ef18..97fbe93 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -74,7 +74,7 @@ struct inode_defrag { * If an existing record is found the defrag item you * pass in is freed */ -static int __btrfs_add_inode_defrag(struct inode *inode, +static void __btrfs_add_inode_defrag(struct inode *inode, struct inode_defrag *defrag) { struct btrfs_root *root = BTRFS_I(inode)->root; @@ -106,11 +106,11 @@ static int __btrfs_add_inode_defrag(struct inode *inode, BTRFS_I(inode)->in_defrag = 1; rb_link_node(&defrag->rb_node, parent, p); rb_insert_color(&defrag->rb_node, &root->fs_info->defrag_inodes); - return 0; + return; exists: kfree(defrag); - return 0; + return; } @@ -123,7 +123,6 @@ int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans, { struct btrfs_root *root = BTRFS_I(inode)->root; struct inode_defrag *defrag; - int ret = 0; u64 transid; if (!btrfs_test_opt(root, AUTO_DEFRAG)) @@ -150,9 +149,11 @@ int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans, spin_lock(&root->fs_info->defrag_inodes_lock); if (!BTRFS_I(inode)->in_defrag) - ret = __btrfs_add_inode_defrag(inode, defrag); + __btrfs_add_inode_defrag(inode, defrag); + else + kfree(defrag); spin_unlock(&root->fs_info->defrag_inodes_lock); - return ret; + return 0; } /* @@ -855,7 +856,8 @@ int btrfs_mark_extent_written(struct btrfs_trans_handle *trans, btrfs_drop_extent_cache(inode, start, end - 1, 0); path = btrfs_alloc_path(); - BUG_ON(!path); + if (!path) + return -ENOMEM; again: recow = 0; split = start; @@ -1034,11 +1036,13 @@ out: * on error we return an unlocked page and the error value * on success we return a locked page and 0 */ -static int prepare_uptodate_page(struct page *page, u64 pos) +static int prepare_uptodate_page(struct page *page, u64 pos, + bool force_uptodate) { int ret = 0; - if ((pos & (PAGE_CACHE_SIZE - 1)) && !PageUptodate(page)) { + if (((pos & (PAGE_CACHE_SIZE - 1)) || force_uptodate) && + !PageUptodate(page)) { ret = btrfs_readpage(NULL, page); if (ret) return ret; @@ -1059,12 +1063,13 @@ static int prepare_uptodate_page(struct page *page, u64 pos) static noinline int prepare_pages(struct btrfs_root *root, struct file *file, struct page **pages, size_t num_pages, loff_t pos, unsigned long first_index, - unsigned long last_index, size_t write_bytes) + size_t write_bytes, bool force_uptodate) { struct extent_state *cached_state = NULL; int i; unsigned long index = pos >> PAGE_CACHE_SHIFT; struct inode *inode = fdentry(file)->d_inode; + gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping); int err = 0; int faili = 0; u64 start_pos; @@ -1073,15 +1078,10 @@ static noinline int prepare_pages(struct btrfs_root *root, struct file *file, start_pos = pos & ~((u64)root->sectorsize - 1); last_pos = ((u64)index + num_pages) << PAGE_CACHE_SHIFT; - if (start_pos > inode->i_size) { - err = btrfs_cont_expand(inode, i_size_read(inode), start_pos); - if (err) - return err; - } - again: for (i = 0; i < num_pages; i++) { - pages[i] = grab_cache_page(inode->i_mapping, index + i); + pages[i] = find_or_create_page(inode->i_mapping, index + i, + mask); if (!pages[i]) { faili = i - 1; err = -ENOMEM; @@ -1089,10 +1089,11 @@ again: } if (i == 0) - err = prepare_uptodate_page(pages[i], pos); + err = prepare_uptodate_page(pages[i], pos, + force_uptodate); if (i == num_pages - 1) err = prepare_uptodate_page(pages[i], - pos + write_bytes); + pos + write_bytes, false); if (err) { page_cache_release(pages[i]); faili = i - 1; @@ -1158,20 +1159,21 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file, struct btrfs_root *root = BTRFS_I(inode)->root; struct page **pages = NULL; unsigned long first_index; - unsigned long last_index; size_t num_written = 0; int nrptrs; int ret = 0; + bool force_page_uptodate = false; nrptrs = min((iov_iter_count(i) + PAGE_CACHE_SIZE - 1) / PAGE_CACHE_SIZE, PAGE_CACHE_SIZE / (sizeof(struct page *))); + nrptrs = min(nrptrs, current->nr_dirtied_pause - current->nr_dirtied); + nrptrs = max(nrptrs, 8); pages = kmalloc(nrptrs * sizeof(struct page *), GFP_KERNEL); if (!pages) return -ENOMEM; first_index = pos >> PAGE_CACHE_SHIFT; - last_index = (pos + iov_iter_count(i)) >> PAGE_CACHE_SHIFT; while (iov_iter_count(i) > 0) { size_t offset = pos & (PAGE_CACHE_SIZE - 1); @@ -1205,8 +1207,8 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file, * contents of pages from loop to loop */ ret = prepare_pages(root, file, pages, num_pages, - pos, first_index, last_index, - write_bytes); + pos, first_index, write_bytes, + force_page_uptodate); if (ret) { btrfs_delalloc_release_space(inode, num_pages << PAGE_CACHE_SHIFT); @@ -1223,12 +1225,15 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file, if (copied < write_bytes) nrptrs = 1; - if (copied == 0) + if (copied == 0) { + force_page_uptodate = true; dirty_pages = 0; - else + } else { + force_page_uptodate = false; dirty_pages = (copied + offset + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; + } /* * If we had a short copy we need to release the excess delaloc @@ -1238,9 +1243,11 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file, * managed to copy. */ if (num_pages > dirty_pages) { - if (copied > 0) - atomic_inc( - &BTRFS_I(inode)->outstanding_extents); + if (copied > 0) { + spin_lock(&BTRFS_I(inode)->lock); + BTRFS_I(inode)->outstanding_extents++; + spin_unlock(&BTRFS_I(inode)->lock); + } btrfs_delalloc_release_space(inode, (num_pages - dirty_pages) << PAGE_CACHE_SHIFT); @@ -1336,6 +1343,7 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb, struct inode *inode = fdentry(file)->d_inode; struct btrfs_root *root = BTRFS_I(inode)->root; loff_t *ppos = &iocb->ki_pos; + u64 start_pos; ssize_t num_written = 0; ssize_t err = 0; size_t count, ocount; @@ -1381,9 +1389,22 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb, goto out; } - file_update_time(file); + err = btrfs_update_time(file); + if (err) { + mutex_unlock(&inode->i_mutex); + goto out; + } BTRFS_I(inode)->sequence++; + start_pos = round_down(pos, root->sectorsize); + if (start_pos > i_size_read(inode)) { + err = btrfs_cont_expand(inode, i_size_read(inode), start_pos); + if (err) { + mutex_unlock(&inode->i_mutex); + goto out; + } + } + if (unlikely(file->f_flags & O_DIRECT)) { num_written = __btrfs_direct_write(iocb, iov, nr_segs, pos, ppos, count, ocount); @@ -1452,7 +1473,7 @@ int btrfs_release_file(struct inode *inode, struct file *filp) * important optimization for directories because holding the mutex prevents * new operations on the dir while we write to disk. */ -int btrfs_sync_file(struct file *file, int datasync) +int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) { struct dentry *dentry = file->f_path.dentry; struct inode *inode = dentry->d_inode; @@ -1462,9 +1483,13 @@ int btrfs_sync_file(struct file *file, int datasync) trace_btrfs_sync_file(file, datasync); + ret = filemap_write_and_wait_range(inode->i_mapping, start, end); + if (ret) + return ret; + mutex_lock(&inode->i_mutex); + /* we wait first, since the writeback may change the inode */ root->log_batch++; - /* the VFS called filemap_fdatawrite for us */ btrfs_wait_ordered_range(inode, 0, (u64)-1); root->log_batch++; @@ -1472,8 +1497,10 @@ int btrfs_sync_file(struct file *file, int datasync) * check the transaction that last modified this inode * and see if its already been committed */ - if (!BTRFS_I(inode)->last_trans) + if (!BTRFS_I(inode)->last_trans) { + mutex_unlock(&inode->i_mutex); goto out; + } /* * if the last transaction that changed this file was before @@ -1484,6 +1511,7 @@ int btrfs_sync_file(struct file *file, int datasync) if (BTRFS_I(inode)->last_trans <= root->fs_info->last_trans_committed) { BTRFS_I(inode)->last_trans = 0; + mutex_unlock(&inode->i_mutex); goto out; } @@ -1496,12 +1524,15 @@ int btrfs_sync_file(struct file *file, int datasync) trans = btrfs_start_transaction(root, 0); if (IS_ERR(trans)) { ret = PTR_ERR(trans); + mutex_unlock(&inode->i_mutex); goto out; } ret = btrfs_log_dentry_safe(trans, root, dentry); - if (ret < 0) + if (ret < 0) { + mutex_unlock(&inode->i_mutex); goto out; + } /* we've logged all the items and now have a consistent * version of the file in the log. It is possible that @@ -1513,7 +1544,7 @@ int btrfs_sync_file(struct file *file, int datasync) * file again, but that will end up using the synchronization * inside btrfs_sync_log to keep things safe. */ - mutex_unlock(&dentry->d_inode->i_mutex); + mutex_unlock(&inode->i_mutex); if (ret != BTRFS_NO_LOG_SYNC) { if (ret > 0) { @@ -1528,7 +1559,6 @@ int btrfs_sync_file(struct file *file, int datasync) } else { ret = btrfs_end_transaction(trans, root); } - mutex_lock(&dentry->d_inode->i_mutex); out: return ret > 0 ? -EIO : ret; } @@ -1592,10 +1622,6 @@ static long btrfs_fallocate(struct file *file, int mode, goto out; } - ret = btrfs_check_data_free_space(inode, alloc_end - alloc_start); - if (ret) - goto out; - locked_end = alloc_end - 1; while (1) { struct btrfs_ordered_extent *ordered; @@ -1629,23 +1655,53 @@ static long btrfs_fallocate(struct file *file, int mode, cur_offset = alloc_start; while (1) { + u64 actual_end; + em = btrfs_get_extent(inode, NULL, 0, cur_offset, alloc_end - cur_offset, 0); BUG_ON(IS_ERR_OR_NULL(em)); last_byte = min(extent_map_end(em), alloc_end); + actual_end = min_t(u64, extent_map_end(em), offset + len); last_byte = (last_byte + mask) & ~mask; + if (em->block_start == EXTENT_MAP_HOLE || (cur_offset >= inode->i_size && !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) { + + /* + * Make sure we have enough space before we do the + * allocation. + */ + ret = btrfs_check_data_free_space(inode, last_byte - + cur_offset); + if (ret) { + free_extent_map(em); + break; + } + ret = btrfs_prealloc_file_range(inode, mode, cur_offset, last_byte - cur_offset, 1 << inode->i_blkbits, offset + len, &alloc_hint); + + /* Let go of our reservation. */ + btrfs_free_reserved_data_space(inode, last_byte - + cur_offset); if (ret < 0) { free_extent_map(em); break; } + } else if (actual_end > inode->i_size && + !(mode & FALLOC_FL_KEEP_SIZE)) { + /* + * We didn't need to allocate any more space, but we + * still extended the size of the file so we need to + * update i_size. + */ + inode->i_ctime = CURRENT_TIME; + i_size_write(inode, actual_end); + btrfs_ordered_update_i_size(inode, actual_end, NULL); } free_extent_map(em); @@ -1657,15 +1713,168 @@ static long btrfs_fallocate(struct file *file, int mode, } unlock_extent_cached(&BTRFS_I(inode)->io_tree, alloc_start, locked_end, &cached_state, GFP_NOFS); - - btrfs_free_reserved_data_space(inode, alloc_end - alloc_start); out: mutex_unlock(&inode->i_mutex); return ret; } +static int find_desired_extent(struct inode *inode, loff_t *offset, int origin) +{ + struct btrfs_root *root = BTRFS_I(inode)->root; + struct extent_map *em; + struct extent_state *cached_state = NULL; + u64 lockstart = *offset; + u64 lockend = i_size_read(inode); + u64 start = *offset; + u64 orig_start = *offset; + u64 len = i_size_read(inode); + u64 last_end = 0; + int ret = 0; + + lockend = max_t(u64, root->sectorsize, lockend); + if (lockend <= lockstart) + lockend = lockstart + root->sectorsize; + + len = lockend - lockstart + 1; + + len = max_t(u64, len, root->sectorsize); + if (inode->i_size == 0) + return -ENXIO; + + lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend, 0, + &cached_state, GFP_NOFS); + + /* + * Delalloc is such a pain. If we have a hole and we have pending + * delalloc for a portion of the hole we will get back a hole that + * exists for the entire range since it hasn't been actually written + * yet. So to take care of this case we need to look for an extent just + * before the position we want in case there is outstanding delalloc + * going on here. + */ + if (origin == SEEK_HOLE && start != 0) { + if (start <= root->sectorsize) + em = btrfs_get_extent_fiemap(inode, NULL, 0, 0, + root->sectorsize, 0); + else + em = btrfs_get_extent_fiemap(inode, NULL, 0, + start - root->sectorsize, + root->sectorsize, 0); + if (IS_ERR(em)) { + ret = -ENXIO; + goto out; + } + last_end = em->start + em->len; + if (em->block_start == EXTENT_MAP_DELALLOC) + last_end = min_t(u64, last_end, inode->i_size); + free_extent_map(em); + } + + while (1) { + em = btrfs_get_extent_fiemap(inode, NULL, 0, start, len, 0); + if (IS_ERR(em)) { + ret = -ENXIO; + break; + } + + if (em->block_start == EXTENT_MAP_HOLE) { + if (test_bit(EXTENT_FLAG_VACANCY, &em->flags)) { + if (last_end <= orig_start) { + free_extent_map(em); + ret = -ENXIO; + break; + } + } + + if (origin == SEEK_HOLE) { + *offset = start; + free_extent_map(em); + break; + } + } else { + if (origin == SEEK_DATA) { + if (em->block_start == EXTENT_MAP_DELALLOC) { + if (start >= inode->i_size) { + free_extent_map(em); + ret = -ENXIO; + break; + } + } + + *offset = start; + free_extent_map(em); + break; + } + } + + start = em->start + em->len; + last_end = em->start + em->len; + + if (em->block_start == EXTENT_MAP_DELALLOC) + last_end = min_t(u64, last_end, inode->i_size); + + if (test_bit(EXTENT_FLAG_VACANCY, &em->flags)) { + free_extent_map(em); + ret = -ENXIO; + break; + } + free_extent_map(em); + cond_resched(); + } + if (!ret) + *offset = min(*offset, inode->i_size); +out: + unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend, + &cached_state, GFP_NOFS); + return ret; +} + +static loff_t btrfs_file_llseek(struct file *file, loff_t offset, int origin) +{ + struct inode *inode = file->f_mapping->host; + int ret; + + mutex_lock(&inode->i_mutex); + switch (origin) { + case SEEK_END: + case SEEK_CUR: + offset = generic_file_llseek(file, offset, origin); + goto out; + case SEEK_DATA: + case SEEK_HOLE: + if (offset >= i_size_read(inode)) { + mutex_unlock(&inode->i_mutex); + return -ENXIO; + } + + ret = find_desired_extent(inode, &offset, origin); + if (ret) { + mutex_unlock(&inode->i_mutex); + return ret; + } + } + + if (offset < 0 && !(file->f_mode & FMODE_UNSIGNED_OFFSET)) { + offset = -EINVAL; + goto out; + } + if (offset > inode->i_sb->s_maxbytes) { + offset = -EINVAL; + goto out; + } + + /* Special lock needed here? */ + if (offset != file->f_pos) { + file->f_pos = offset; + file->f_version = 0; + } +out: + mutex_unlock(&inode->i_mutex); + return offset; +} + const struct file_operations btrfs_file_operations = { - .llseek = generic_file_llseek, + .llseek = btrfs_file_llseek, .read = do_sync_read, .write = do_sync_write, .aio_read = generic_file_aio_read, diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index bf0d615..ec23d43 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -20,6 +20,7 @@ #include #include #include +#include #include "ctree.h" #include "free-space-cache.h" #include "transaction.h" @@ -84,6 +85,7 @@ struct inode *lookup_free_space_inode(struct btrfs_root *root, *block_group, struct btrfs_path *path) { struct inode *inode = NULL; + u32 flags = BTRFS_INODE_NODATASUM | BTRFS_INODE_NODATACOW; spin_lock(&block_group->lock); if (block_group->inode) @@ -98,7 +100,14 @@ struct inode *lookup_free_space_inode(struct btrfs_root *root, return inode; spin_lock(&block_group->lock); - if (!btrfs_fs_closing(root->fs_info)) { + if (!((BTRFS_I(inode)->flags & flags) == flags)) { + printk(KERN_INFO "Old style space inode found, converting.\n"); + BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM | + BTRFS_INODE_NODATACOW; + block_group->disk_cache_state = BTRFS_DC_CLEAR; + } + + if (!block_group->iref) { block_group->inode = igrab(inode); block_group->iref = 1; } @@ -116,12 +125,17 @@ int __create_free_space_inode(struct btrfs_root *root, struct btrfs_free_space_header *header; struct btrfs_inode_item *inode_item; struct extent_buffer *leaf; + u64 flags = BTRFS_INODE_NOCOMPRESS | BTRFS_INODE_PREALLOC; int ret; ret = btrfs_insert_empty_inode(trans, root, path, ino); if (ret) return ret; + /* We inline crc's for the free disk space cache */ + if (ino != BTRFS_FREE_INO_OBJECTID) + flags |= BTRFS_INODE_NODATASUM | BTRFS_INODE_NODATACOW; + leaf = path->nodes[0]; inode_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_inode_item); @@ -134,8 +148,7 @@ int __create_free_space_inode(struct btrfs_root *root, btrfs_set_inode_uid(leaf, inode_item, 0); btrfs_set_inode_gid(leaf, inode_item, 0); btrfs_set_inode_mode(leaf, inode_item, S_IFREG | 0600); - btrfs_set_inode_flags(leaf, inode_item, BTRFS_INODE_NOCOMPRESS | - BTRFS_INODE_PREALLOC | BTRFS_INODE_NODATASUM); + btrfs_set_inode_flags(leaf, inode_item, flags); btrfs_set_inode_nlink(leaf, inode_item, 1); btrfs_set_inode_transid(leaf, inode_item, trans->transid); btrfs_set_inode_block_group(leaf, inode_item, offset); @@ -184,15 +197,25 @@ int btrfs_truncate_free_space_cache(struct btrfs_root *root, struct btrfs_path *path, struct inode *inode) { + struct btrfs_block_rsv *rsv; + u64 needed_bytes; loff_t oldsize; int ret = 0; - trans->block_rsv = root->orphan_block_rsv; - ret = btrfs_block_rsv_check(trans, root, - root->orphan_block_rsv, - 0, 5); - if (ret) - return ret; + rsv = trans->block_rsv; + trans->block_rsv = &root->fs_info->global_block_rsv; + + /* 1 for slack space, 1 for updating the inode */ + needed_bytes = btrfs_calc_trunc_metadata_size(root, 1) + + btrfs_calc_trans_metadata_size(root, 1); + + spin_lock(&trans->block_rsv->lock); + if (trans->block_rsv->reserved < needed_bytes) { + spin_unlock(&trans->block_rsv->lock); + trans->block_rsv = rsv; + return -ENOSPC; + } + spin_unlock(&trans->block_rsv->lock); oldsize = i_size_read(inode); btrfs_i_size_write(inode, 0); @@ -204,12 +227,16 @@ int btrfs_truncate_free_space_cache(struct btrfs_root *root, */ ret = btrfs_truncate_inode_items(trans, root, inode, 0, BTRFS_EXTENT_DATA_KEY); + if (ret) { + trans->block_rsv = rsv; WARN_ON(1); return ret; } ret = btrfs_update_inode(trans, root, inode); + trans->block_rsv = rsv; + return ret; } @@ -232,31 +259,348 @@ static int readahead_cache(struct inode *inode) return 0; } +struct io_ctl { + void *cur, *orig; + struct page *page; + struct page **pages; + struct btrfs_root *root; + unsigned long size; + int index; + int num_pages; + unsigned check_crcs:1; +}; + +static int io_ctl_init(struct io_ctl *io_ctl, struct inode *inode, + struct btrfs_root *root) +{ + memset(io_ctl, 0, sizeof(struct io_ctl)); + io_ctl->num_pages = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> + PAGE_CACHE_SHIFT; + io_ctl->pages = kzalloc(sizeof(struct page *) * io_ctl->num_pages, + GFP_NOFS); + if (!io_ctl->pages) + return -ENOMEM; + io_ctl->root = root; + if (btrfs_ino(inode) != BTRFS_FREE_INO_OBJECTID) + io_ctl->check_crcs = 1; + return 0; +} + +static void io_ctl_free(struct io_ctl *io_ctl) +{ + kfree(io_ctl->pages); +} + +static void io_ctl_unmap_page(struct io_ctl *io_ctl) +{ + if (io_ctl->cur) { + kunmap(io_ctl->page); + io_ctl->cur = NULL; + io_ctl->orig = NULL; + } +} + +static void io_ctl_map_page(struct io_ctl *io_ctl, int clear) +{ + WARN_ON(io_ctl->cur); + BUG_ON(io_ctl->index >= io_ctl->num_pages); + io_ctl->page = io_ctl->pages[io_ctl->index++]; + io_ctl->cur = kmap(io_ctl->page); + io_ctl->orig = io_ctl->cur; + io_ctl->size = PAGE_CACHE_SIZE; + if (clear) + memset(io_ctl->cur, 0, PAGE_CACHE_SIZE); +} + +static void io_ctl_drop_pages(struct io_ctl *io_ctl) +{ + int i; + + io_ctl_unmap_page(io_ctl); + + for (i = 0; i < io_ctl->num_pages; i++) { + ClearPageChecked(io_ctl->pages[i]); + unlock_page(io_ctl->pages[i]); + page_cache_release(io_ctl->pages[i]); + } +} + +static int io_ctl_prepare_pages(struct io_ctl *io_ctl, struct inode *inode, + int uptodate) +{ + struct page *page; + gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping); + int i; + + for (i = 0; i < io_ctl->num_pages; i++) { + page = find_or_create_page(inode->i_mapping, i, mask); + if (!page) { + io_ctl_drop_pages(io_ctl); + return -ENOMEM; + } + io_ctl->pages[i] = page; + if (uptodate && !PageUptodate(page)) { + btrfs_readpage(NULL, page); + lock_page(page); + if (!PageUptodate(page)) { + printk(KERN_ERR "btrfs: error reading free " + "space cache\n"); + io_ctl_drop_pages(io_ctl); + return -EIO; + } + } + } + + for (i = 0; i < io_ctl->num_pages; i++) { + clear_page_dirty_for_io(io_ctl->pages[i]); + set_page_extent_mapped(io_ctl->pages[i]); + } + + return 0; +} + +static void io_ctl_set_generation(struct io_ctl *io_ctl, u64 generation) +{ + u64 *val; + + io_ctl_map_page(io_ctl, 1); + + /* + * Skip the csum areas. If we don't check crcs then we just have a + * 64bit chunk at the front of the first page. + */ + if (io_ctl->check_crcs) { + io_ctl->cur += (sizeof(u32) * io_ctl->num_pages); + io_ctl->size -= sizeof(u64) + (sizeof(u32) * io_ctl->num_pages); + } else { + io_ctl->cur += sizeof(u64); + io_ctl->size -= sizeof(u64) * 2; + } + + val = io_ctl->cur; + *val = cpu_to_le64(generation); + io_ctl->cur += sizeof(u64); +} + +static int io_ctl_check_generation(struct io_ctl *io_ctl, u64 generation) +{ + u64 *gen; + + /* + * Skip the crc area. If we don't check crcs then we just have a 64bit + * chunk at the front of the first page. + */ + if (io_ctl->check_crcs) { + io_ctl->cur += sizeof(u32) * io_ctl->num_pages; + io_ctl->size -= sizeof(u64) + + (sizeof(u32) * io_ctl->num_pages); + } else { + io_ctl->cur += sizeof(u64); + io_ctl->size -= sizeof(u64) * 2; + } + + gen = io_ctl->cur; + if (le64_to_cpu(*gen) != generation) { + printk_ratelimited(KERN_ERR "btrfs: space cache generation " + "(%Lu) does not match inode (%Lu)\n", *gen, + generation); + io_ctl_unmap_page(io_ctl); + return -EIO; + } + io_ctl->cur += sizeof(u64); + return 0; +} + +static void io_ctl_set_crc(struct io_ctl *io_ctl, int index) +{ + u32 *tmp; + u32 crc = ~(u32)0; + unsigned offset = 0; + + if (!io_ctl->check_crcs) { + io_ctl_unmap_page(io_ctl); + return; + } + + if (index == 0) + offset = sizeof(u32) * io_ctl->num_pages;; + + crc = btrfs_csum_data(io_ctl->root, io_ctl->orig + offset, crc, + PAGE_CACHE_SIZE - offset); + btrfs_csum_final(crc, (char *)&crc); + io_ctl_unmap_page(io_ctl); + tmp = kmap(io_ctl->pages[0]); + tmp += index; + *tmp = crc; + kunmap(io_ctl->pages[0]); +} + +static int io_ctl_check_crc(struct io_ctl *io_ctl, int index) +{ + u32 *tmp, val; + u32 crc = ~(u32)0; + unsigned offset = 0; + + if (!io_ctl->check_crcs) { + io_ctl_map_page(io_ctl, 0); + return 0; + } + + if (index == 0) + offset = sizeof(u32) * io_ctl->num_pages; + + tmp = kmap(io_ctl->pages[0]); + tmp += index; + val = *tmp; + kunmap(io_ctl->pages[0]); + + io_ctl_map_page(io_ctl, 0); + crc = btrfs_csum_data(io_ctl->root, io_ctl->orig + offset, crc, + PAGE_CACHE_SIZE - offset); + btrfs_csum_final(crc, (char *)&crc); + if (val != crc) { + printk_ratelimited(KERN_ERR "btrfs: csum mismatch on free " + "space cache\n"); + io_ctl_unmap_page(io_ctl); + return -EIO; + } + + return 0; +} + +static int io_ctl_add_entry(struct io_ctl *io_ctl, u64 offset, u64 bytes, + void *bitmap) +{ + struct btrfs_free_space_entry *entry; + + if (!io_ctl->cur) + return -ENOSPC; + + entry = io_ctl->cur; + entry->offset = cpu_to_le64(offset); + entry->bytes = cpu_to_le64(bytes); + entry->type = (bitmap) ? BTRFS_FREE_SPACE_BITMAP : + BTRFS_FREE_SPACE_EXTENT; + io_ctl->cur += sizeof(struct btrfs_free_space_entry); + io_ctl->size -= sizeof(struct btrfs_free_space_entry); + + if (io_ctl->size >= sizeof(struct btrfs_free_space_entry)) + return 0; + + io_ctl_set_crc(io_ctl, io_ctl->index - 1); + + /* No more pages to map */ + if (io_ctl->index >= io_ctl->num_pages) + return 0; + + /* map the next page */ + io_ctl_map_page(io_ctl, 1); + return 0; +} + +static int io_ctl_add_bitmap(struct io_ctl *io_ctl, void *bitmap) +{ + if (!io_ctl->cur) + return -ENOSPC; + + /* + * If we aren't at the start of the current page, unmap this one and + * map the next one if there is any left. + */ + if (io_ctl->cur != io_ctl->orig) { + io_ctl_set_crc(io_ctl, io_ctl->index - 1); + if (io_ctl->index >= io_ctl->num_pages) + return -ENOSPC; + io_ctl_map_page(io_ctl, 0); + } + + memcpy(io_ctl->cur, bitmap, PAGE_CACHE_SIZE); + io_ctl_set_crc(io_ctl, io_ctl->index - 1); + if (io_ctl->index < io_ctl->num_pages) + io_ctl_map_page(io_ctl, 0); + return 0; +} + +static void io_ctl_zero_remaining_pages(struct io_ctl *io_ctl) +{ + /* + * If we're not on the boundary we know we've modified the page and we + * need to crc the page. + */ + if (io_ctl->cur != io_ctl->orig) + io_ctl_set_crc(io_ctl, io_ctl->index - 1); + else + io_ctl_unmap_page(io_ctl); + + while (io_ctl->index < io_ctl->num_pages) { + io_ctl_map_page(io_ctl, 1); + io_ctl_set_crc(io_ctl, io_ctl->index - 1); + } +} + +static int io_ctl_read_entry(struct io_ctl *io_ctl, + struct btrfs_free_space *entry, u8 *type) +{ + struct btrfs_free_space_entry *e; + int ret; + + if (!io_ctl->cur) { + ret = io_ctl_check_crc(io_ctl, io_ctl->index); + if (ret) + return ret; + } + + e = io_ctl->cur; + entry->offset = le64_to_cpu(e->offset); + entry->bytes = le64_to_cpu(e->bytes); + *type = e->type; + io_ctl->cur += sizeof(struct btrfs_free_space_entry); + io_ctl->size -= sizeof(struct btrfs_free_space_entry); + + if (io_ctl->size >= sizeof(struct btrfs_free_space_entry)) + return 0; + + io_ctl_unmap_page(io_ctl); + + return 0; +} + +static int io_ctl_read_bitmap(struct io_ctl *io_ctl, + struct btrfs_free_space *entry) +{ + int ret; + + ret = io_ctl_check_crc(io_ctl, io_ctl->index); + if (ret) + return ret; + + memcpy(entry->bitmap, io_ctl->cur, PAGE_CACHE_SIZE); + io_ctl_unmap_page(io_ctl); + + return 0; +} + int __load_free_space_cache(struct btrfs_root *root, struct inode *inode, struct btrfs_free_space_ctl *ctl, struct btrfs_path *path, u64 offset) { struct btrfs_free_space_header *header; struct extent_buffer *leaf; - struct page *page; - u32 *checksums = NULL, *crc; - char *disk_crcs = NULL; + struct io_ctl io_ctl; struct btrfs_key key; + struct btrfs_free_space *e, *n; struct list_head bitmaps; u64 num_entries; u64 num_bitmaps; u64 generation; - u32 cur_crc = ~(u32)0; - pgoff_t index = 0; - unsigned long first_page_offset; - int num_checksums; + u8 type; int ret = 0; INIT_LIST_HEAD(&bitmaps); /* Nothing in the space cache, goodbye */ if (!i_size_read(inode)) - goto out; + return 0; key.objectid = BTRFS_FREE_SPACE_OBJECTID; key.offset = offset; @@ -264,11 +608,10 @@ int __load_free_space_cache(struct btrfs_root *root, struct inode *inode, ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); if (ret < 0) - goto out; + return 0; else if (ret > 0) { btrfs_release_path(path); - ret = 0; - goto out; + return 0; } ret = -1; @@ -286,194 +629,102 @@ int __load_free_space_cache(struct btrfs_root *root, struct inode *inode, " not match free space cache generation (%llu)\n", (unsigned long long)BTRFS_I(inode)->generation, (unsigned long long)generation); - goto out; + return 0; } if (!num_entries) - goto out; - - /* Setup everything for doing checksumming */ - num_checksums = i_size_read(inode) / PAGE_CACHE_SIZE; - checksums = crc = kzalloc(sizeof(u32) * num_checksums, GFP_NOFS); - if (!checksums) - goto out; - first_page_offset = (sizeof(u32) * num_checksums) + sizeof(u64); - disk_crcs = kzalloc(first_page_offset, GFP_NOFS); - if (!disk_crcs) - goto out; + return 0; + io_ctl_init(&io_ctl, inode, root); ret = readahead_cache(inode); if (ret) goto out; - while (1) { - struct btrfs_free_space_entry *entry; - struct btrfs_free_space *e; - void *addr; - unsigned long offset = 0; - unsigned long start_offset = 0; - int need_loop = 0; + ret = io_ctl_prepare_pages(&io_ctl, inode, 1); + if (ret) + goto out; - if (!num_entries && !num_bitmaps) - break; + ret = io_ctl_check_crc(&io_ctl, 0); + if (ret) + goto free_cache; - if (index == 0) { - start_offset = first_page_offset; - offset = start_offset; - } + ret = io_ctl_check_generation(&io_ctl, generation); + if (ret) + goto free_cache; - page = grab_cache_page(inode->i_mapping, index); - if (!page) + while (num_entries) { + e = kmem_cache_zalloc(btrfs_free_space_cachep, + GFP_NOFS); + if (!e) goto free_cache; - if (!PageUptodate(page)) { - btrfs_readpage(NULL, page); - lock_page(page); - if (!PageUptodate(page)) { - unlock_page(page); - page_cache_release(page); - printk(KERN_ERR "btrfs: error reading free " - "space cache\n"); - goto free_cache; - } - } - addr = kmap(page); - - if (index == 0) { - u64 *gen; - - memcpy(disk_crcs, addr, first_page_offset); - gen = addr + (sizeof(u32) * num_checksums); - if (*gen != BTRFS_I(inode)->generation) { - printk(KERN_ERR "btrfs: space cache generation" - " (%llu) does not match inode (%llu)\n", - (unsigned long long)*gen, - (unsigned long long) - BTRFS_I(inode)->generation); - kunmap(page); - unlock_page(page); - page_cache_release(page); - goto free_cache; - } - crc = (u32 *)disk_crcs; - } - entry = addr + start_offset; - - /* First lets check our crc before we do anything fun */ - cur_crc = ~(u32)0; - cur_crc = btrfs_csum_data(root, addr + start_offset, cur_crc, - PAGE_CACHE_SIZE - start_offset); - btrfs_csum_final(cur_crc, (char *)&cur_crc); - if (cur_crc != *crc) { - printk(KERN_ERR "btrfs: crc mismatch for page %lu\n", - index); - kunmap(page); - unlock_page(page); - page_cache_release(page); + ret = io_ctl_read_entry(&io_ctl, e, &type); + if (ret) { + kmem_cache_free(btrfs_free_space_cachep, e); goto free_cache; } - crc++; - while (1) { - if (!num_entries) - break; + if (!e->bytes) { + kmem_cache_free(btrfs_free_space_cachep, e); + goto free_cache; + } - need_loop = 1; - e = kmem_cache_zalloc(btrfs_free_space_cachep, - GFP_NOFS); - if (!e) { - kunmap(page); - unlock_page(page); - page_cache_release(page); + if (type == BTRFS_FREE_SPACE_EXTENT) { + spin_lock(&ctl->tree_lock); + ret = link_free_space(ctl, e); + spin_unlock(&ctl->tree_lock); + if (ret) { + printk(KERN_ERR "Duplicate entries in " + "free space cache, dumping\n"); + kmem_cache_free(btrfs_free_space_cachep, e); goto free_cache; } - - e->offset = le64_to_cpu(entry->offset); - e->bytes = le64_to_cpu(entry->bytes); - if (!e->bytes) { - kunmap(page); - kmem_cache_free(btrfs_free_space_cachep, e); - unlock_page(page); - page_cache_release(page); + } else { + BUG_ON(!num_bitmaps); + num_bitmaps--; + e->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS); + if (!e->bitmap) { + kmem_cache_free( + btrfs_free_space_cachep, e); goto free_cache; } - - if (entry->type == BTRFS_FREE_SPACE_EXTENT) { - spin_lock(&ctl->tree_lock); - ret = link_free_space(ctl, e); - spin_unlock(&ctl->tree_lock); - if (ret) { - printk(KERN_ERR "Duplicate entries in " - "free space cache, dumping\n"); - kunmap(page); - unlock_page(page); - page_cache_release(page); - goto free_cache; - } - } else { - e->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS); - if (!e->bitmap) { - kunmap(page); - kmem_cache_free( - btrfs_free_space_cachep, e); - unlock_page(page); - page_cache_release(page); - goto free_cache; - } - spin_lock(&ctl->tree_lock); - ret = link_free_space(ctl, e); - ctl->total_bitmaps++; - ctl->op->recalc_thresholds(ctl); - spin_unlock(&ctl->tree_lock); - if (ret) { - printk(KERN_ERR "Duplicate entries in " - "free space cache, dumping\n"); - kunmap(page); - unlock_page(page); - page_cache_release(page); - goto free_cache; - } - list_add_tail(&e->list, &bitmaps); + spin_lock(&ctl->tree_lock); + ret = link_free_space(ctl, e); + ctl->total_bitmaps++; + ctl->op->recalc_thresholds(ctl); + spin_unlock(&ctl->tree_lock); + if (ret) { + printk(KERN_ERR "Duplicate entries in " + "free space cache, dumping\n"); + kmem_cache_free(btrfs_free_space_cachep, e); + goto free_cache; } - - num_entries--; - offset += sizeof(struct btrfs_free_space_entry); - if (offset + sizeof(struct btrfs_free_space_entry) >= - PAGE_CACHE_SIZE) - break; - entry++; + list_add_tail(&e->list, &bitmaps); } - /* - * We read an entry out of this page, we need to move on to the - * next page. - */ - if (need_loop) { - kunmap(page); - goto next; - } + num_entries--; + } - /* - * We add the bitmaps at the end of the entries in order that - * the bitmap entries are added to the cache. - */ - e = list_entry(bitmaps.next, struct btrfs_free_space, list); + io_ctl_unmap_page(&io_ctl); + + /* + * We add the bitmaps at the end of the entries in order that + * the bitmap entries are added to the cache. + */ + list_for_each_entry_safe(e, n, &bitmaps, list) { list_del_init(&e->list); - memcpy(e->bitmap, addr, PAGE_CACHE_SIZE); - kunmap(page); - num_bitmaps--; -next: - unlock_page(page); - page_cache_release(page); - index++; + ret = io_ctl_read_bitmap(&io_ctl, e); + if (ret) + goto free_cache; } + io_ctl_drop_pages(&io_ctl); ret = 1; out: - kfree(checksums); - kfree(disk_crcs); + io_ctl_free(&io_ctl); return ret; free_cache: + io_ctl_drop_pages(&io_ctl); __btrfs_remove_free_space_cache(ctl); goto out; } @@ -485,7 +736,7 @@ int load_free_space_cache(struct btrfs_fs_info *fs_info, struct btrfs_root *root = fs_info->tree_root; struct inode *inode; struct btrfs_path *path; - int ret; + int ret = 0; bool matched; u64 used = btrfs_block_group_used(&block_group->item); @@ -517,6 +768,14 @@ int load_free_space_cache(struct btrfs_fs_info *fs_info, return 0; } + /* We may have converted the inode and made the cache invalid. */ + spin_lock(&block_group->lock); + if (block_group->disk_cache_state != BTRFS_DC_WRITTEN) { + spin_unlock(&block_group->lock); + goto out; + } + spin_unlock(&block_group->lock); + ret = __load_free_space_cache(fs_info->tree_root, inode, ctl, path, block_group->key.objectid); btrfs_free_path(path); @@ -550,6 +809,19 @@ out: return ret; } +/** + * __btrfs_write_out_cache - write out cached info to an inode + * @root - the root the inode belongs to + * @ctl - the free space cache we are going to write out + * @block_group - the block_group for this cache if it belongs to a block_group + * @trans - the trans handle + * @path - the path to use + * @offset - the offset for the key we'll insert + * + * This function writes out a free space cache struct to disk for quick recovery + * on mount. This will return 0 if it was successfull in writing the cache out, + * and -1 if it was not. + */ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, struct btrfs_free_space_ctl *ctl, struct btrfs_block_group_cache *block_group, @@ -560,64 +832,24 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, struct extent_buffer *leaf; struct rb_node *node; struct list_head *pos, *n; - struct page **pages; - struct page *page; struct extent_state *cached_state = NULL; struct btrfs_free_cluster *cluster = NULL; struct extent_io_tree *unpin = NULL; + struct io_ctl io_ctl; struct list_head bitmap_list; struct btrfs_key key; u64 start, end, len; - u64 bytes = 0; - u32 *crc, *checksums; - unsigned long first_page_offset; - int index = 0, num_pages = 0; int entries = 0; int bitmaps = 0; - int ret = -1; - bool next_page = false; - bool out_of_space = false; + int ret; + int err = -1; INIT_LIST_HEAD(&bitmap_list); - node = rb_first(&ctl->free_space_offset); - if (!node) - return 0; - if (!i_size_read(inode)) return -1; - num_pages = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> - PAGE_CACHE_SHIFT; - - /* Since the first page has all of our checksums and our generation we - * need to calculate the offset into the page that we can start writing - * our entries. - */ - first_page_offset = (sizeof(u32) * num_pages) + sizeof(u64); - - filemap_write_and_wait(inode->i_mapping); - btrfs_wait_ordered_range(inode, inode->i_size & - ~(root->sectorsize - 1), (u64)-1); - - /* make sure we don't overflow that first page */ - if (first_page_offset + sizeof(struct btrfs_free_space_entry) >= PAGE_CACHE_SIZE) { - /* this is really the same as running out of space, where we also return 0 */ - printk(KERN_CRIT "Btrfs: free space cache was too big for the crc page\n"); - ret = 0; - goto out_update; - } - - /* We need a checksum per page. */ - crc = checksums = kzalloc(sizeof(u32) * num_pages, GFP_NOFS); - if (!crc) - return -1; - - pages = kzalloc(sizeof(struct page *) * num_pages, GFP_NOFS); - if (!pages) { - kfree(crc); - return -1; - } + io_ctl_init(&io_ctl, inode, root); /* Get the cluster for this block_group if it exists */ if (block_group && !list_empty(&block_group->cluster_list)) @@ -631,30 +863,9 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, */ unpin = root->fs_info->pinned_extents; - /* - * Lock all pages first so we can lock the extent safely. - * - * NOTE: Because we hold the ref the entire time we're going to write to - * the page find_get_page should never fail, so we don't do a check - * after find_get_page at this point. Just putting this here so people - * know and don't freak out. - */ - while (index < num_pages) { - page = grab_cache_page(inode->i_mapping, index); - if (!page) { - int i; - - for (i = 0; i < num_pages; i++) { - unlock_page(pages[i]); - page_cache_release(pages[i]); - } - goto out_free; - } - pages[index] = page; - index++; - } + /* Lock all pages first so we can lock the extent safely. */ + io_ctl_prepare_pages(&io_ctl, inode, 0); - index = 0; lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1, 0, &cached_state, GFP_NOFS); @@ -665,192 +876,112 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, if (block_group) start = block_group->key.objectid; - /* Write out the extent entries */ - do { - struct btrfs_free_space_entry *entry; - void *addr; - unsigned long offset = 0; - unsigned long start_offset = 0; - - next_page = false; - - if (index == 0) { - start_offset = first_page_offset; - offset = start_offset; - } + node = rb_first(&ctl->free_space_offset); + if (!node && cluster) { + node = rb_first(&cluster->root); + cluster = NULL; + } - if (index >= num_pages) { - out_of_space = true; - break; - } + /* Make sure we can fit our crcs into the first page */ + if (io_ctl.check_crcs && + (io_ctl.num_pages * sizeof(u32)) >= PAGE_CACHE_SIZE) { + WARN_ON(1); + goto out_nospc; + } - page = pages[index]; + io_ctl_set_generation(&io_ctl, trans->transid); - addr = kmap(page); - entry = addr + start_offset; + /* Write out the extent entries */ + while (node) { + struct btrfs_free_space *e; - memset(addr, 0, PAGE_CACHE_SIZE); - while (node && !next_page) { - struct btrfs_free_space *e; + e = rb_entry(node, struct btrfs_free_space, offset_index); + entries++; - e = rb_entry(node, struct btrfs_free_space, offset_index); - entries++; + ret = io_ctl_add_entry(&io_ctl, e->offset, e->bytes, + e->bitmap); + if (ret) + goto out_nospc; - entry->offset = cpu_to_le64(e->offset); - entry->bytes = cpu_to_le64(e->bytes); - if (e->bitmap) { - entry->type = BTRFS_FREE_SPACE_BITMAP; - list_add_tail(&e->list, &bitmap_list); - bitmaps++; - } else { - entry->type = BTRFS_FREE_SPACE_EXTENT; - } - node = rb_next(node); - if (!node && cluster) { - node = rb_first(&cluster->root); - cluster = NULL; - } - offset += sizeof(struct btrfs_free_space_entry); - if (offset + sizeof(struct btrfs_free_space_entry) >= - PAGE_CACHE_SIZE) - next_page = true; - entry++; + if (e->bitmap) { + list_add_tail(&e->list, &bitmap_list); + bitmaps++; } + node = rb_next(node); + if (!node && cluster) { + node = rb_first(&cluster->root); + cluster = NULL; + } + } - /* - * We want to add any pinned extents to our free space cache - * so we don't leak the space - */ - while (block_group && !next_page && - (start < block_group->key.objectid + - block_group->key.offset)) { - ret = find_first_extent_bit(unpin, start, &start, &end, - EXTENT_DIRTY); - if (ret) { - ret = 0; - break; - } - - /* This pinned extent is out of our range */ - if (start >= block_group->key.objectid + - block_group->key.offset) - break; - - len = block_group->key.objectid + - block_group->key.offset - start; - len = min(len, end + 1 - start); - - entries++; - entry->offset = cpu_to_le64(start); - entry->bytes = cpu_to_le64(len); - entry->type = BTRFS_FREE_SPACE_EXTENT; - - start = end + 1; - offset += sizeof(struct btrfs_free_space_entry); - if (offset + sizeof(struct btrfs_free_space_entry) >= - PAGE_CACHE_SIZE) - next_page = true; - entry++; + /* + * We want to add any pinned extents to our free space cache + * so we don't leak the space + */ + while (block_group && (start < block_group->key.objectid + + block_group->key.offset)) { + ret = find_first_extent_bit(unpin, start, &start, &end, + EXTENT_DIRTY); + if (ret) { + ret = 0; + break; } - *crc = ~(u32)0; - *crc = btrfs_csum_data(root, addr + start_offset, *crc, - PAGE_CACHE_SIZE - start_offset); - kunmap(page); - btrfs_csum_final(*crc, (char *)crc); - crc++; + /* This pinned extent is out of our range */ + if (start >= block_group->key.objectid + + block_group->key.offset) + break; + + len = block_group->key.objectid + + block_group->key.offset - start; + len = min(len, end + 1 - start); - bytes += PAGE_CACHE_SIZE; + entries++; + ret = io_ctl_add_entry(&io_ctl, start, len, NULL); + if (ret) + goto out_nospc; - index++; - } while (node || next_page); + start = end + 1; + } /* Write out the bitmaps */ list_for_each_safe(pos, n, &bitmap_list) { - void *addr; struct btrfs_free_space *entry = list_entry(pos, struct btrfs_free_space, list); - if (index >= num_pages) { - out_of_space = true; - break; - } - page = pages[index]; - - addr = kmap(page); - memcpy(addr, entry->bitmap, PAGE_CACHE_SIZE); - *crc = ~(u32)0; - *crc = btrfs_csum_data(root, addr, *crc, PAGE_CACHE_SIZE); - kunmap(page); - btrfs_csum_final(*crc, (char *)crc); - crc++; - bytes += PAGE_CACHE_SIZE; - + ret = io_ctl_add_bitmap(&io_ctl, entry->bitmap); + if (ret) + goto out_nospc; list_del_init(&entry->list); - index++; - } - - if (out_of_space) { - btrfs_drop_pages(pages, num_pages); - unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0, - i_size_read(inode) - 1, &cached_state, - GFP_NOFS); - ret = 0; - goto out_free; } /* Zero out the rest of the pages just to make sure */ - while (index < num_pages) { - void *addr; - - page = pages[index]; - addr = kmap(page); - memset(addr, 0, PAGE_CACHE_SIZE); - kunmap(page); - bytes += PAGE_CACHE_SIZE; - index++; - } - - /* Write the checksums and trans id to the first page */ - { - void *addr; - u64 *gen; - - page = pages[0]; + io_ctl_zero_remaining_pages(&io_ctl); - addr = kmap(page); - memcpy(addr, checksums, sizeof(u32) * num_pages); - gen = addr + (sizeof(u32) * num_pages); - *gen = trans->transid; - kunmap(page); - } - - ret = btrfs_dirty_pages(root, inode, pages, num_pages, 0, - bytes, &cached_state); - btrfs_drop_pages(pages, num_pages); + ret = btrfs_dirty_pages(root, inode, io_ctl.pages, io_ctl.num_pages, + 0, i_size_read(inode), &cached_state); + io_ctl_drop_pages(&io_ctl); unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1, &cached_state, GFP_NOFS); - if (ret) { - ret = 0; - goto out_free; - } + if (ret) + goto out; - BTRFS_I(inode)->generation = trans->transid; - filemap_write_and_wait(inode->i_mapping); + ret = filemap_write_and_wait(inode->i_mapping); + if (ret) + goto out; key.objectid = BTRFS_FREE_SPACE_OBJECTID; key.offset = offset; key.type = 0; - ret = btrfs_search_slot(trans, root, &key, path, 1, 1); + ret = btrfs_search_slot(trans, root, &key, path, 0, 1); if (ret < 0) { - ret = -1; - clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, bytes - 1, - EXTENT_DIRTY | EXTENT_DELALLOC | - EXTENT_DO_ACCOUNTING, 0, 0, NULL, GFP_NOFS); - goto out_free; + clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, inode->i_size - 1, + EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, NULL, + GFP_NOFS); + goto out; } leaf = path->nodes[0]; if (ret > 0) { @@ -860,15 +991,16 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); if (found_key.objectid != BTRFS_FREE_SPACE_OBJECTID || found_key.offset != offset) { - ret = -1; - clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, bytes - 1, - EXTENT_DIRTY | EXTENT_DELALLOC | - EXTENT_DO_ACCOUNTING, 0, 0, NULL, - GFP_NOFS); + clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, + inode->i_size - 1, + EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, + NULL, GFP_NOFS); btrfs_release_path(path); - goto out_free; + goto out; } } + + BTRFS_I(inode)->generation = trans->transid; header = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_free_space_header); btrfs_set_free_space_entries(leaf, header, entries); @@ -877,19 +1009,26 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, btrfs_mark_buffer_dirty(leaf); btrfs_release_path(path); - ret = 1; - -out_free: - kfree(checksums); - kfree(pages); - -out_update: - if (ret != 1) { - invalidate_inode_pages2_range(inode->i_mapping, 0, index); + err = 0; +out: + io_ctl_free(&io_ctl); + if (err) { + invalidate_inode_pages2(inode->i_mapping); BTRFS_I(inode)->generation = 0; } btrfs_update_inode(trans, root, inode); - return ret; + return err; + +out_nospc: + list_for_each_safe(pos, n, &bitmap_list) { + struct btrfs_free_space *entry = + list_entry(pos, struct btrfs_free_space, list); + list_del_init(&entry->list); + } + io_ctl_drop_pages(&io_ctl); + unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0, + i_size_read(inode) - 1, &cached_state, GFP_NOFS); + goto out; } int btrfs_write_out_cache(struct btrfs_root *root, @@ -916,14 +1055,15 @@ int btrfs_write_out_cache(struct btrfs_root *root, ret = __btrfs_write_out_cache(root, inode, ctl, block_group, trans, path, block_group->key.objectid); - if (ret < 0) { + if (ret) { spin_lock(&block_group->lock); block_group->disk_cache_state = BTRFS_DC_ERROR; spin_unlock(&block_group->lock); ret = 0; - +#ifdef DEBUG printk(KERN_ERR "btrfs: failed to write free space cace " "for block group %llu\n", block_group->key.objectid); +#endif } iput(inode); @@ -1219,9 +1359,9 @@ static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl) div64_u64(extent_bytes, (sizeof(struct btrfs_free_space))); } -static void bitmap_clear_bits(struct btrfs_free_space_ctl *ctl, - struct btrfs_free_space *info, u64 offset, - u64 bytes) +static inline void __bitmap_clear_bits(struct btrfs_free_space_ctl *ctl, + struct btrfs_free_space *info, + u64 offset, u64 bytes) { unsigned long start, count; @@ -1232,6 +1372,13 @@ static void bitmap_clear_bits(struct btrfs_free_space_ctl *ctl, bitmap_clear(info->bitmap, start, count); info->bytes -= bytes; +} + +static void bitmap_clear_bits(struct btrfs_free_space_ctl *ctl, + struct btrfs_free_space *info, u64 offset, + u64 bytes) +{ + __bitmap_clear_bits(ctl, info, offset, bytes); ctl->free_space -= bytes; } @@ -1323,6 +1470,7 @@ static void add_new_bitmap(struct btrfs_free_space_ctl *ctl, { info->offset = offset_to_bitmap(ctl, offset); info->bytes = 0; + INIT_LIST_HEAD(&info->list); link_free_space(ctl, info); ctl->total_bitmaps++; @@ -1702,7 +1850,13 @@ again: info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), 1, 0); if (!info) { - WARN_ON(1); + /* the tree logging code might be calling us before we + * have fully loaded the free space rbtree for this + * block group. So it is possible the entry won't + * be in the rbtree yet at all. The caching code + * will make sure not to put it in the rbtree if + * the logging code has pinned it. + */ goto out_lock; } } @@ -1741,6 +1895,7 @@ again: ctl->total_bitmaps--; } kmem_cache_free(btrfs_free_space_cachep, info); + ret = 0; goto out_lock; } @@ -1748,7 +1903,8 @@ again: unlink_free_space(ctl, info); info->offset += bytes; info->bytes -= bytes; - link_free_space(ctl, info); + ret = link_free_space(ctl, info); + WARN_ON(ret); goto out_lock; } @@ -2035,7 +2191,7 @@ static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group_cache *block_group, return 0; ret = search_start; - bitmap_clear_bits(ctl, entry, ret, bytes); + __bitmap_clear_bits(ctl, entry, ret, bytes); return ret; } @@ -2090,7 +2246,6 @@ u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group, continue; } } else { - ret = entry->offset; entry->offset += bytes; @@ -2165,6 +2320,7 @@ again: if (!found) { start = i; + cluster->max_size = 0; found = true; } @@ -2308,16 +2464,23 @@ setup_cluster_bitmap(struct btrfs_block_group_cache *block_group, { struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; struct btrfs_free_space *entry; - struct rb_node *node; int ret = -ENOSPC; + u64 bitmap_offset = offset_to_bitmap(ctl, offset); if (ctl->total_bitmaps == 0) return -ENOSPC; /* - * First check our cached list of bitmaps and see if there is an entry - * here that will work. + * The bitmap that covers offset won't be in the list unless offset + * is just its start offset. */ + entry = list_first_entry(bitmaps, struct btrfs_free_space, list); + if (entry->offset != bitmap_offset) { + entry = tree_search_offset(ctl, bitmap_offset, 1, 0); + if (entry && list_empty(&entry->list)) + list_add(&entry->list, bitmaps); + } + list_for_each_entry(entry, bitmaps, list) { if (entry->bytes < min_bytes) continue; @@ -2328,38 +2491,10 @@ setup_cluster_bitmap(struct btrfs_block_group_cache *block_group, } /* - * If we do have entries on our list and we are here then we didn't find - * anything, so go ahead and get the next entry after the last entry in - * this list and start the search from there. + * The bitmaps list has all the bitmaps that record free space + * starting after offset, so no more search is required. */ - if (!list_empty(bitmaps)) { - entry = list_entry(bitmaps->prev, struct btrfs_free_space, - list); - node = rb_next(&entry->offset_index); - if (!node) - return -ENOSPC; - entry = rb_entry(node, struct btrfs_free_space, offset_index); - goto search; - } - - entry = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), 0, 1); - if (!entry) - return -ENOSPC; - -search: - node = &entry->offset_index; - do { - entry = rb_entry(node, struct btrfs_free_space, offset_index); - node = rb_next(&entry->offset_index); - if (!entry->bitmap) - continue; - if (entry->bytes < min_bytes) - continue; - ret = btrfs_bitmap_cluster(block_group, entry, cluster, offset, - bytes, min_bytes); - } while (ret && node); - - return ret; + return -ENOSPC; } /* @@ -2377,8 +2512,8 @@ int btrfs_find_space_cluster(struct btrfs_trans_handle *trans, u64 offset, u64 bytes, u64 empty_size) { struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; - struct list_head bitmaps; struct btrfs_free_space *entry, *tmp; + LIST_HEAD(bitmaps); u64 min_bytes; int ret; @@ -2417,7 +2552,6 @@ int btrfs_find_space_cluster(struct btrfs_trans_handle *trans, goto out; } - INIT_LIST_HEAD(&bitmaps); ret = setup_cluster_no_bitmap(block_group, cluster, &bitmaps, offset, bytes, min_bytes); if (ret) @@ -2513,9 +2647,19 @@ int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group, spin_unlock(&ctl->tree_lock); if (bytes >= minlen) { - int update_ret; - update_ret = btrfs_update_reserved_bytes(block_group, - bytes, 1, 1); + struct btrfs_space_info *space_info; + int update = 0; + + space_info = block_group->space_info; + spin_lock(&space_info->lock); + spin_lock(&block_group->lock); + if (!block_group->ro) { + block_group->reserved += bytes; + space_info->bytes_reserved += bytes; + update = 1; + } + spin_unlock(&block_group->lock); + spin_unlock(&space_info->lock); ret = btrfs_error_discard_extent(fs_info->extent_root, start, @@ -2523,9 +2667,16 @@ int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group, &actually_trimmed); btrfs_add_free_space(block_group, start, bytes); - if (!update_ret) - btrfs_update_reserved_bytes(block_group, - bytes, 0, 1); + if (update) { + spin_lock(&space_info->lock); + spin_lock(&block_group->lock); + if (block_group->ro) + space_info->bytes_readonly += bytes; + block_group->reserved -= bytes; + space_info->bytes_reserved -= bytes; + spin_unlock(&space_info->lock); + spin_unlock(&block_group->lock); + } if (ret) break; @@ -2684,9 +2835,13 @@ int btrfs_write_out_ino_cache(struct btrfs_root *root, return 0; ret = __btrfs_write_out_cache(root, inode, ctl, NULL, trans, path, 0); - if (ret < 0) + if (ret) { + btrfs_delalloc_release_metadata(inode, inode->i_size); +#ifdef DEBUG printk(KERN_ERR "btrfs: failed to write free ino cache " "for root %llu\n", root->root_key.objectid); +#endif + } iput(inode); return ret; diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c index b4087e0..b3d1efe 100644 --- a/fs/btrfs/inode-map.c +++ b/fs/btrfs/inode-map.c @@ -207,24 +207,14 @@ again: void btrfs_return_ino(struct btrfs_root *root, u64 objectid) { - struct btrfs_free_space_ctl *ctl = root->free_ino_ctl; struct btrfs_free_space_ctl *pinned = root->free_ino_pinned; if (!btrfs_test_opt(root, INODE_MAP_CACHE)) return; - again: if (root->cached == BTRFS_CACHE_FINISHED) { - __btrfs_add_free_space(ctl, objectid, 1); + __btrfs_add_free_space(pinned, objectid, 1); } else { - /* - * If we are in the process of caching free ino chunks, - * to avoid adding the same inode number to the free_ino - * tree twice due to cross transaction, we'll leave it - * in the pinned tree until a transaction is committed - * or the caching work is done. - */ - mutex_lock(&root->fs_commit_mutex); spin_lock(&root->cache_lock); if (root->cached == BTRFS_CACHE_FINISHED) { @@ -236,11 +226,7 @@ again: start_caching(root); - if (objectid <= root->cache_progress || - objectid > root->highest_objectid) - __btrfs_add_free_space(ctl, objectid, 1); - else - __btrfs_add_free_space(pinned, objectid, 1); + __btrfs_add_free_space(pinned, objectid, 1); mutex_unlock(&root->fs_commit_mutex); } @@ -258,6 +244,7 @@ void btrfs_unpin_free_ino(struct btrfs_root *root) { struct btrfs_free_space_ctl *ctl = root->free_ino_ctl; struct rb_root *rbroot = &root->free_ino_pinned->free_space_offset; + spinlock_t *rbroot_lock = &root->free_ino_pinned->tree_lock; struct btrfs_free_space *info; struct rb_node *n; u64 count; @@ -266,24 +253,30 @@ void btrfs_unpin_free_ino(struct btrfs_root *root) return; while (1) { + bool add_to_ctl = true; + + spin_lock(rbroot_lock); n = rb_first(rbroot); - if (!n) + if (!n) { + spin_unlock(rbroot_lock); break; + } info = rb_entry(n, struct btrfs_free_space, offset_index); BUG_ON(info->bitmap); if (info->offset > root->cache_progress) - goto free; + add_to_ctl = false; else if (info->offset + info->bytes > root->cache_progress) count = root->cache_progress - info->offset + 1; else count = info->bytes; - __btrfs_add_free_space(ctl, info->offset, count); -free: rb_erase(&info->offset_index, rbroot); - kfree(info); + spin_unlock(rbroot_lock); + if (add_to_ctl) + __btrfs_add_free_space(ctl, info->offset, count); + kmem_cache_free(btrfs_free_space_cachep, info); } } @@ -398,6 +391,8 @@ int btrfs_save_ino_cache(struct btrfs_root *root, struct btrfs_free_space_ctl *ctl = root->free_ino_ctl; struct btrfs_path *path; struct inode *inode; + struct btrfs_block_rsv *rsv; + u64 num_bytes; u64 alloc_hint = 0; int ret; int prealloc; @@ -421,11 +416,26 @@ int btrfs_save_ino_cache(struct btrfs_root *root, if (!path) return -ENOMEM; + rsv = trans->block_rsv; + trans->block_rsv = &root->fs_info->trans_block_rsv; + + num_bytes = trans->bytes_reserved; + /* + * 1 item for inode item insertion if need + * 3 items for inode item update (in the worst case) + * 1 item for free space object + * 3 items for pre-allocation + */ + trans->bytes_reserved = btrfs_calc_trans_metadata_size(root, 8); + ret = btrfs_block_rsv_add_noflush(root, trans->block_rsv, + trans->bytes_reserved); + if (ret) + goto out; again: inode = lookup_free_ino_inode(root, path); if (IS_ERR(inode) && PTR_ERR(inode) != -ENOENT) { ret = PTR_ERR(inode); - goto out; + goto out_release; } if (IS_ERR(inode)) { @@ -434,7 +444,7 @@ again: ret = create_free_ino_inode(root, trans, path); if (ret) - goto out; + goto out_release; goto again; } @@ -465,21 +475,26 @@ again: /* Just to make sure we have enough space */ prealloc += 8 * PAGE_CACHE_SIZE; - ret = btrfs_check_data_free_space(inode, prealloc); + ret = btrfs_delalloc_reserve_space(inode, prealloc); if (ret) goto out_put; ret = btrfs_prealloc_file_range_trans(inode, trans, 0, 0, prealloc, prealloc, prealloc, &alloc_hint); - if (ret) + if (ret) { + btrfs_delalloc_release_space(inode, prealloc); goto out_put; + } btrfs_free_reserved_data_space(inode, prealloc); + ret = btrfs_write_out_ino_cache(root, trans, path); out_put: iput(inode); +out_release: + btrfs_block_rsv_release(root, trans->block_rsv, trans->bytes_reserved); out: - if (ret == 0) - ret = btrfs_write_out_ino_cache(root, trans, path); + trans->block_rsv = rsv; + trans->bytes_reserved = num_bytes; btrfs_free_path(path); return ret; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index d42e6bf..cb10cb9 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -38,6 +38,7 @@ #include #include #include +#include #include "compat.h" #include "ctree.h" #include "disk-io.h" @@ -45,17 +46,17 @@ #include "btrfs_inode.h" #include "ioctl.h" #include "print-tree.h" -#include "volumes.h" #include "ordered-data.h" #include "xattr.h" #include "tree-log.h" +#include "volumes.h" #include "compression.h" #include "locking.h" #include "free-space-cache.h" #include "inode-map.h" struct btrfs_iget_args { - u64 ino; + struct btrfs_key *location; struct btrfs_root *root; }; @@ -93,6 +94,8 @@ static noinline int cow_file_range(struct inode *inode, struct page *locked_page, u64 start, u64 end, int *page_started, unsigned long *nr_written, int unlock); +static noinline int btrfs_update_inode_fallback(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct inode *inode); static int btrfs_init_inode_security(struct btrfs_trans_handle *trans, struct inode *inode, struct inode *dir, @@ -340,6 +343,7 @@ static noinline int compress_file_range(struct inode *inode, int i; int will_compress; int compress_type = root->fs_info->compress_type; + int redirty = 0; /* if this is a small write inside eof, kick off a defragbot */ if (end <= BTRFS_I(inode)->disk_i_size && (end - start + 1) < 16 * 1024) @@ -393,11 +397,25 @@ again: (BTRFS_I(inode)->flags & BTRFS_INODE_COMPRESS))) { WARN_ON(pages); pages = kzalloc(sizeof(struct page *) * nr_pages, GFP_NOFS); - BUG_ON(!pages); + if (!pages) { + /* just bail out to the uncompressed code */ + goto cont; + } if (BTRFS_I(inode)->force_compress) compress_type = BTRFS_I(inode)->force_compress; + /* + * we need to call clear_page_dirty_for_io on each + * page in the range. Otherwise applications with the file + * mmap'd can wander in and change the page contents while + * we are compressing them. + * + * If the compression fails for any reason, we set the pages + * dirty again later on. + */ + extent_range_clear_dirty_for_io(inode, start, end); + redirty = 1; ret = btrfs_compress_pages(compress_type, inode->i_mapping, start, total_compressed, pages, @@ -424,6 +442,7 @@ again: will_compress = 1; } } +cont: if (start == 0) { trans = btrfs_join_transaction(root); BUG_ON(IS_ERR(trans)); @@ -534,6 +553,8 @@ cleanup_and_bail_uncompressed: __set_page_dirty_nobuffers(locked_page); /* unlocked later on in the async handlers */ } + if (redirty) + extent_range_redirty_for_io(inode, start, end); add_async_extent(async_cow, start, end - start + 1, 0, NULL, 0, BTRFS_COMPRESS_NONE); *num_added += 1; @@ -750,15 +771,6 @@ static u64 get_extent_allocation_hint(struct inode *inode, u64 start, return alloc_hint; } -static inline bool is_free_space_inode(struct btrfs_root *root, - struct inode *inode) -{ - if (root == root->fs_info->tree_root || - BTRFS_I(inode)->location.objectid == BTRFS_FREE_INO_OBJECTID) - return true; - return false; -} - /* * when extent_io.c finds a delayed allocation range in the file, * the call backs end up in this code. The basic idea is to @@ -791,7 +803,7 @@ static noinline int cow_file_range(struct inode *inode, struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; int ret = 0; - BUG_ON(is_free_space_inode(root, inode)); + BUG_ON(btrfs_is_free_space_inode(root, inode)); trans = btrfs_join_transaction(root); BUG_ON(IS_ERR(trans)); trans->block_rsv = &root->fs_info->delalloc_block_rsv; @@ -829,7 +841,7 @@ static noinline int cow_file_range(struct inode *inode, } BUG_ON(disk_num_bytes > - btrfs_super_total_bytes(&root->fs_info->super_copy)); + btrfs_super_total_bytes(root->fs_info->super_copy)); alloc_hint = get_extent_allocation_hint(inode, start, num_bytes); btrfs_drop_extent_cache(inode, start, start + num_bytes - 1, 0); @@ -1070,9 +1082,10 @@ static noinline int run_delalloc_nocow(struct inode *inode, u64 ino = btrfs_ino(inode); path = btrfs_alloc_path(); - BUG_ON(!path); + if (!path) + return -ENOMEM; - nolock = is_free_space_inode(root, inode); + nolock = btrfs_is_free_space_inode(root, inode); if (nolock) trans = btrfs_join_transaction_nolock(root); @@ -1291,15 +1304,16 @@ static int run_delalloc_range(struct inode *inode, struct page *locked_page, return ret; } -static int btrfs_split_extent_hook(struct inode *inode, - struct extent_state *orig, u64 split) +static void btrfs_split_extent_hook(struct inode *inode, + struct extent_state *orig, u64 split) { /* not delalloc, ignore it */ if (!(orig->state & EXTENT_DELALLOC)) - return 0; + return; - atomic_inc(&BTRFS_I(inode)->outstanding_extents); - return 0; + spin_lock(&BTRFS_I(inode)->lock); + BTRFS_I(inode)->outstanding_extents++; + spin_unlock(&BTRFS_I(inode)->lock); } /* @@ -1308,16 +1322,17 @@ static int btrfs_split_extent_hook(struct inode *inode, * extents, such as when we are doing sequential writes, so we can properly * account for the metadata space we'll need. */ -static int btrfs_merge_extent_hook(struct inode *inode, - struct extent_state *new, - struct extent_state *other) +static void btrfs_merge_extent_hook(struct inode *inode, + struct extent_state *new, + struct extent_state *other) { /* not delalloc, ignore it */ if (!(other->state & EXTENT_DELALLOC)) - return 0; + return; - atomic_dec(&BTRFS_I(inode)->outstanding_extents); - return 0; + spin_lock(&BTRFS_I(inode)->lock); + BTRFS_I(inode)->outstanding_extents--; + spin_unlock(&BTRFS_I(inode)->lock); } /* @@ -1325,8 +1340,8 @@ static int btrfs_merge_extent_hook(struct inode *inode, * bytes in this file, and to maintain the list of inodes that * have pending delalloc work to be done. */ -static int btrfs_set_bit_hook(struct inode *inode, - struct extent_state *state, int *bits) +static void btrfs_set_bit_hook(struct inode *inode, + struct extent_state *state, int *bits) { /* @@ -1337,12 +1352,15 @@ static int btrfs_set_bit_hook(struct inode *inode, if (!(state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) { struct btrfs_root *root = BTRFS_I(inode)->root; u64 len = state->end + 1 - state->start; - bool do_list = !is_free_space_inode(root, inode); + bool do_list = !btrfs_is_free_space_inode(root, inode); - if (*bits & EXTENT_FIRST_DELALLOC) + if (*bits & EXTENT_FIRST_DELALLOC) { *bits &= ~EXTENT_FIRST_DELALLOC; - else - atomic_inc(&BTRFS_I(inode)->outstanding_extents); + } else { + spin_lock(&BTRFS_I(inode)->lock); + BTRFS_I(inode)->outstanding_extents++; + spin_unlock(&BTRFS_I(inode)->lock); + } spin_lock(&root->fs_info->delalloc_lock); BTRFS_I(inode)->delalloc_bytes += len; @@ -1353,14 +1371,13 @@ static int btrfs_set_bit_hook(struct inode *inode, } spin_unlock(&root->fs_info->delalloc_lock); } - return 0; } /* * extent_io.c clear_bit_hook, see set_bit_hook for why */ -static int btrfs_clear_bit_hook(struct inode *inode, - struct extent_state *state, int *bits) +static void btrfs_clear_bit_hook(struct inode *inode, + struct extent_state *state, int *bits) { /* * set_bit and clear bit hooks normally require _irqsave/restore @@ -1370,12 +1387,15 @@ static int btrfs_clear_bit_hook(struct inode *inode, if ((state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) { struct btrfs_root *root = BTRFS_I(inode)->root; u64 len = state->end + 1 - state->start; - bool do_list = !is_free_space_inode(root, inode); + bool do_list = !btrfs_is_free_space_inode(root, inode); - if (*bits & EXTENT_FIRST_DELALLOC) + if (*bits & EXTENT_FIRST_DELALLOC) { *bits &= ~EXTENT_FIRST_DELALLOC; - else if (!(*bits & EXTENT_DO_ACCOUNTING)) - atomic_dec(&BTRFS_I(inode)->outstanding_extents); + } else if (!(*bits & EXTENT_DO_ACCOUNTING)) { + spin_lock(&BTRFS_I(inode)->lock); + BTRFS_I(inode)->outstanding_extents--; + spin_unlock(&BTRFS_I(inode)->lock); + } if (*bits & EXTENT_DO_ACCOUNTING) btrfs_delalloc_release_metadata(inode, len); @@ -1394,7 +1414,6 @@ static int btrfs_clear_bit_hook(struct inode *inode, } spin_unlock(&root->fs_info->delalloc_lock); } - return 0; } /* @@ -1477,7 +1496,7 @@ static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; - if (is_free_space_inode(root, inode)) + if (btrfs_is_free_space_inode(root, inode)) ret = btrfs_bio_wq_end_io(root->fs_info, bio, 2); else ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0); @@ -1644,7 +1663,8 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans, int ret; path = btrfs_alloc_path(); - BUG_ON(!path); + if (!path) + return -ENOMEM; path->leave_spinning = 1; @@ -1726,7 +1746,7 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) return 0; BUG_ON(!ordered_extent); - nolock = is_free_space_inode(root, inode); + nolock = btrfs_is_free_space_inode(root, inode); if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) { BUG_ON(!list_empty(&ordered_extent->list)); @@ -1738,7 +1758,7 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) trans = btrfs_join_transaction(root); BUG_ON(IS_ERR(trans)); trans->block_rsv = &root->fs_info->delalloc_block_rsv; - ret = btrfs_update_inode(trans, root, inode); + ret = btrfs_update_inode_fallback(trans, root, inode); BUG_ON(ret); } goto out; @@ -1787,18 +1807,18 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) &ordered_extent->list); ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent); - if (!ret) { - ret = btrfs_update_inode(trans, root, inode); + if (!ret || !test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags)) { + ret = btrfs_update_inode_fallback(trans, root, inode); BUG_ON(ret); } ret = 0; out: - if (nolock) { - if (trans) - btrfs_end_transaction_nolock(trans, root); - } else { + if (root != root->fs_info->tree_root) btrfs_delalloc_release_metadata(inode, ordered_extent->len); - if (trans) + if (trans) { + if (nolock) + btrfs_end_transaction_nolock(trans, root); + else btrfs_end_transaction(trans, root); } @@ -1820,153 +1840,9 @@ static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end, } /* - * When IO fails, either with EIO or csum verification fails, we - * try other mirrors that might have a good copy of the data. This - * io_failure_record is used to record state as we go through all the - * mirrors. If another mirror has good data, the page is set up to date - * and things continue. If a good mirror can't be found, the original - * bio end_io callback is called to indicate things have failed. - */ -struct io_failure_record { - struct page *page; - u64 start; - u64 len; - u64 logical; - unsigned long bio_flags; - int last_mirror; -}; - -static int btrfs_io_failed_hook(struct bio *failed_bio, - struct page *page, u64 start, u64 end, - struct extent_state *state) -{ - struct io_failure_record *failrec = NULL; - u64 private; - struct extent_map *em; - struct inode *inode = page->mapping->host; - struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree; - struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; - struct bio *bio; - int num_copies; - int ret; - int rw; - u64 logical; - - ret = get_state_private(failure_tree, start, &private); - if (ret) { - failrec = kmalloc(sizeof(*failrec), GFP_NOFS); - if (!failrec) - return -ENOMEM; - failrec->start = start; - failrec->len = end - start + 1; - failrec->last_mirror = 0; - failrec->bio_flags = 0; - - read_lock(&em_tree->lock); - em = lookup_extent_mapping(em_tree, start, failrec->len); - if (em->start > start || em->start + em->len < start) { - free_extent_map(em); - em = NULL; - } - read_unlock(&em_tree->lock); - - if (IS_ERR_OR_NULL(em)) { - kfree(failrec); - return -EIO; - } - logical = start - em->start; - logical = em->block_start + logical; - if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) { - logical = em->block_start; - failrec->bio_flags = EXTENT_BIO_COMPRESSED; - extent_set_compress_type(&failrec->bio_flags, - em->compress_type); - } - failrec->logical = logical; - free_extent_map(em); - set_extent_bits(failure_tree, start, end, EXTENT_LOCKED | - EXTENT_DIRTY, GFP_NOFS); - set_state_private(failure_tree, start, - (u64)(unsigned long)failrec); - } else { - failrec = (struct io_failure_record *)(unsigned long)private; - } - num_copies = btrfs_num_copies( - &BTRFS_I(inode)->root->fs_info->mapping_tree, - failrec->logical, failrec->len); - failrec->last_mirror++; - if (!state) { - spin_lock(&BTRFS_I(inode)->io_tree.lock); - state = find_first_extent_bit_state(&BTRFS_I(inode)->io_tree, - failrec->start, - EXTENT_LOCKED); - if (state && state->start != failrec->start) - state = NULL; - spin_unlock(&BTRFS_I(inode)->io_tree.lock); - } - if (!state || failrec->last_mirror > num_copies) { - set_state_private(failure_tree, failrec->start, 0); - clear_extent_bits(failure_tree, failrec->start, - failrec->start + failrec->len - 1, - EXTENT_LOCKED | EXTENT_DIRTY, GFP_NOFS); - kfree(failrec); - return -EIO; - } - bio = bio_alloc(GFP_NOFS, 1); - bio->bi_private = state; - bio->bi_end_io = failed_bio->bi_end_io; - bio->bi_sector = failrec->logical >> 9; - bio->bi_bdev = failed_bio->bi_bdev; - bio->bi_size = 0; - - bio_add_page(bio, page, failrec->len, start - page_offset(page)); - if (failed_bio->bi_rw & REQ_WRITE) - rw = WRITE; - else - rw = READ; - - ret = BTRFS_I(inode)->io_tree.ops->submit_bio_hook(inode, rw, bio, - failrec->last_mirror, - failrec->bio_flags, 0); - return ret; -} - -/* - * each time an IO finishes, we do a fast check in the IO failure tree - * to see if we need to process or clean up an io_failure_record - */ -static int btrfs_clean_io_failures(struct inode *inode, u64 start) -{ - u64 private; - u64 private_failure; - struct io_failure_record *failure; - int ret; - - private = 0; - if (count_range_bits(&BTRFS_I(inode)->io_failure_tree, &private, - (u64)-1, 1, EXTENT_DIRTY, 0)) { - ret = get_state_private(&BTRFS_I(inode)->io_failure_tree, - start, &private_failure); - if (ret == 0) { - failure = (struct io_failure_record *)(unsigned long) - private_failure; - set_state_private(&BTRFS_I(inode)->io_failure_tree, - failure->start, 0); - clear_extent_bits(&BTRFS_I(inode)->io_failure_tree, - failure->start, - failure->start + failure->len - 1, - EXTENT_DIRTY | EXTENT_LOCKED, - GFP_NOFS); - kfree(failure); - } - } - return 0; -} - -/* * when reads are done, we need to check csums to verify the data is correct - * if there's a match, we allow the bio to finish. If not, we go through - * the io_failure_record routines to find good copies + * if there's a match, we allow the bio to finish. If not, the code in + * extent_io.c will try to find good copies for us. */ static int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end, struct extent_state *state) @@ -2012,10 +1888,6 @@ static int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end, kunmap_atomic(kaddr, KM_USER0); good: - /* if the io failure tree for this inode is non-empty, - * check to see if we've recovered from a failed IO - */ - btrfs_clean_io_failures(inode, start); return 0; zeroit: @@ -2080,89 +1952,6 @@ void btrfs_run_delayed_iputs(struct btrfs_root *root) up_read(&root->fs_info->cleanup_work_sem); } -/* - * calculate extra metadata reservation when snapshotting a subvolume - * contains orphan files. - */ -void btrfs_orphan_pre_snapshot(struct btrfs_trans_handle *trans, - struct btrfs_pending_snapshot *pending, - u64 *bytes_to_reserve) -{ - struct btrfs_root *root; - struct btrfs_block_rsv *block_rsv; - u64 num_bytes; - int index; - - root = pending->root; - if (!root->orphan_block_rsv || list_empty(&root->orphan_list)) - return; - - block_rsv = root->orphan_block_rsv; - - /* orphan block reservation for the snapshot */ - num_bytes = block_rsv->size; - - /* - * after the snapshot is created, COWing tree blocks may use more - * space than it frees. So we should make sure there is enough - * reserved space. - */ - index = trans->transid & 0x1; - if (block_rsv->reserved + block_rsv->freed[index] < block_rsv->size) { - num_bytes += block_rsv->size - - (block_rsv->reserved + block_rsv->freed[index]); - } - - *bytes_to_reserve += num_bytes; -} - -void btrfs_orphan_post_snapshot(struct btrfs_trans_handle *trans, - struct btrfs_pending_snapshot *pending) -{ - struct btrfs_root *root = pending->root; - struct btrfs_root *snap = pending->snap; - struct btrfs_block_rsv *block_rsv; - u64 num_bytes; - int index; - int ret; - - if (!root->orphan_block_rsv || list_empty(&root->orphan_list)) - return; - - /* refill source subvolume's orphan block reservation */ - block_rsv = root->orphan_block_rsv; - index = trans->transid & 0x1; - if (block_rsv->reserved + block_rsv->freed[index] < block_rsv->size) { - num_bytes = block_rsv->size - - (block_rsv->reserved + block_rsv->freed[index]); - ret = btrfs_block_rsv_migrate(&pending->block_rsv, - root->orphan_block_rsv, - num_bytes); - BUG_ON(ret); - } - - /* setup orphan block reservation for the snapshot */ - block_rsv = btrfs_alloc_block_rsv(snap); - BUG_ON(!block_rsv); - - btrfs_add_durable_block_rsv(root->fs_info, block_rsv); - snap->orphan_block_rsv = block_rsv; - - num_bytes = root->orphan_block_rsv->size; - ret = btrfs_block_rsv_migrate(&pending->block_rsv, - block_rsv, num_bytes); - BUG_ON(ret); - -#if 0 - /* insert orphan item for the snapshot */ - WARN_ON(!root->orphan_item_inserted); - ret = btrfs_insert_orphan_item(trans, root->fs_info->tree_root, - snap->root_key.objectid); - BUG_ON(ret); - snap->orphan_item_inserted = 1; -#endif -} - enum btrfs_orphan_cleanup_state { ORPHAN_CLEANUP_STARTED = 1, ORPHAN_CLEANUP_DONE = 2, @@ -2214,7 +2003,8 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode) if (!root->orphan_block_rsv) { block_rsv = btrfs_alloc_block_rsv(root); - BUG_ON(!block_rsv); + if (!block_rsv) + return -ENOMEM; } spin_lock(&root->orphan_lock); @@ -2247,9 +2037,6 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode) } spin_unlock(&root->orphan_lock); - if (block_rsv) - btrfs_add_durable_block_rsv(root->fs_info, block_rsv); - /* grab metadata reservation from transaction handle */ if (reserve) { ret = btrfs_orphan_reserve_metadata(trans, inode); @@ -2259,7 +2046,7 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode) /* insert an orphan item to track this unlinked/truncated file */ if (insert >= 1) { ret = btrfs_insert_orphan_item(trans, root, btrfs_ino(inode)); - BUG_ON(ret); + BUG_ON(ret && ret != -EEXIST); } /* insert an orphan item to track subvolume contains orphan files */ @@ -2316,6 +2103,7 @@ int btrfs_orphan_cleanup(struct btrfs_root *root) struct btrfs_key key, found_key; struct btrfs_trans_handle *trans; struct inode *inode; + u64 last_objectid = 0; int ret = 0, nr_unlink = 0, nr_truncate = 0; if (cmpxchg(&root->orphan_cleanup_state, 0, ORPHAN_CLEANUP_STARTED)) @@ -2367,41 +2155,81 @@ int btrfs_orphan_cleanup(struct btrfs_root *root) * crossing root thing. we store the inode number in the * offset of the orphan item. */ + + if (found_key.offset == last_objectid) { + printk(KERN_ERR "btrfs: Error removing orphan entry, " + "stopping orphan cleanup\n"); + ret = -EINVAL; + goto out; + } + + last_objectid = found_key.offset; + found_key.objectid = found_key.offset; found_key.type = BTRFS_INODE_ITEM_KEY; found_key.offset = 0; inode = btrfs_iget(root->fs_info->sb, &found_key, root, NULL); - if (IS_ERR(inode)) { - ret = PTR_ERR(inode); + ret = PTR_RET(inode); + if (ret && ret != -ESTALE) goto out; - } - /* - * add this inode to the orphan list so btrfs_orphan_del does - * the proper thing when we hit it - */ - spin_lock(&root->orphan_lock); - list_add(&BTRFS_I(inode)->i_orphan, &root->orphan_list); - spin_unlock(&root->orphan_lock); + if (ret == -ESTALE && root == root->fs_info->tree_root) { + struct btrfs_root *dead_root; + struct btrfs_fs_info *fs_info = root->fs_info; + int is_dead_root = 0; + /* + * this is an orphan in the tree root. Currently these + * could come from 2 sources: + * a) a snapshot deletion in progress + * b) a free space cache inode + * We need to distinguish those two, as the snapshot + * orphan must not get deleted. + * find_dead_roots already ran before us, so if this + * is a snapshot deletion, we should find the root + * in the dead_roots list + */ + spin_lock(&fs_info->trans_lock); + list_for_each_entry(dead_root, &fs_info->dead_roots, + root_list) { + if (dead_root->root_key.objectid == + found_key.objectid) { + is_dead_root = 1; + break; + } + } + spin_unlock(&fs_info->trans_lock); + if (is_dead_root) { + /* prevent this orphan from being found again */ + key.offset = found_key.objectid - 1; + continue; + } + } /* - * if this is a bad inode, means we actually succeeded in - * removing the inode, but not the orphan record, which means - * we need to manually delete the orphan since iput will just - * do a destroy_inode + * Inode is already gone but the orphan item is still there, + * kill the orphan item. */ - if (is_bad_inode(inode)) { - trans = btrfs_start_transaction(root, 0); + if (ret == -ESTALE) { + trans = btrfs_start_transaction(root, 1); if (IS_ERR(trans)) { ret = PTR_ERR(trans); goto out; } - btrfs_orphan_del(trans, inode); + ret = btrfs_del_orphan_item(trans, root, + found_key.objectid); + BUG_ON(ret); btrfs_end_transaction(trans, root); - iput(inode); continue; } + /* + * add this inode to the orphan list so btrfs_orphan_del does + * the proper thing when we hit it + */ + spin_lock(&root->orphan_lock); + list_add(&BTRFS_I(inode)->i_orphan, &root->orphan_list); + spin_unlock(&root->orphan_lock); + /* if we have links, this was a truncate, lets do that */ if (inode->i_nlink) { if (!S_ISREG(inode->i_mode)) { @@ -2410,7 +2238,14 @@ int btrfs_orphan_cleanup(struct btrfs_root *root) continue; } nr_truncate++; + /* + * Need to hold the imutex for reservation purposes, not + * a huge deal here but I have a WARN_ON in + * btrfs_delalloc_reserve_space to catch offenders. + */ + mutex_lock(&inode->i_mutex); ret = btrfs_truncate(inode); + mutex_unlock(&inode->i_mutex); } else { nr_unlink++; } @@ -2420,6 +2255,9 @@ int btrfs_orphan_cleanup(struct btrfs_root *root) if (ret) goto out; } + /* release the path since we're done with it */ + btrfs_release_path(path); + root->orphan_cleanup_state = ORPHAN_CLEANUP_DONE; if (root->orphan_block_rsv) @@ -2516,7 +2354,9 @@ static void btrfs_read_locked_inode(struct inode *inode) filled = true; path = btrfs_alloc_path(); - BUG_ON(!path); + if (!path) + goto make_bad; + path->leave_spinning = 1; memcpy(&location, &BTRFS_I(inode)->location, sizeof(location)); @@ -2531,15 +2371,8 @@ static void btrfs_read_locked_inode(struct inode *inode) inode_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_inode_item); - if (!leaf->map_token) - map_private_extent_buffer(leaf, (unsigned long)inode_item, - sizeof(struct btrfs_inode_item), - &leaf->map_token, &leaf->kaddr, - &leaf->map_start, &leaf->map_len, - KM_USER1); - inode->i_mode = btrfs_inode_mode(leaf, inode_item); - inode->i_nlink = btrfs_inode_nlink(leaf, inode_item); + set_nlink(inode, btrfs_inode_nlink(leaf, inode_item)); inode->i_uid = btrfs_inode_uid(leaf, inode_item); inode->i_gid = btrfs_inode_gid(leaf, inode_item); btrfs_i_size_write(inode, btrfs_inode_size(leaf, inode_item)); @@ -2575,11 +2408,6 @@ cache_acl: if (!maybe_acls) cache_no_acl(inode); - if (leaf->map_token) { - unmap_extent_buffer(leaf, leaf->map_token, KM_USER1); - leaf->map_token = NULL; - } - btrfs_free_path(path); switch (inode->i_mode & S_IFMT) { @@ -2624,13 +2452,6 @@ static void fill_inode_item(struct btrfs_trans_handle *trans, struct btrfs_inode_item *item, struct inode *inode) { - if (!leaf->map_token) - map_private_extent_buffer(leaf, (unsigned long)item, - sizeof(struct btrfs_inode_item), - &leaf->map_token, &leaf->kaddr, - &leaf->map_start, &leaf->map_len, - KM_USER1); - btrfs_set_inode_uid(leaf, item, inode->i_uid); btrfs_set_inode_gid(leaf, item, inode->i_gid); btrfs_set_inode_size(leaf, item, BTRFS_I(inode)->disk_i_size); @@ -2659,17 +2480,12 @@ static void fill_inode_item(struct btrfs_trans_handle *trans, btrfs_set_inode_rdev(leaf, item, inode->i_rdev); btrfs_set_inode_flags(leaf, item, BTRFS_I(inode)->flags); btrfs_set_inode_block_group(leaf, item, 0); - - if (leaf->map_token) { - unmap_extent_buffer(leaf, leaf->map_token, KM_USER1); - leaf->map_token = NULL; - } } /* * copy everything in the in-memory inode into the btree. */ -noinline int btrfs_update_inode(struct btrfs_trans_handle *trans, +static noinline int btrfs_update_inode_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *inode) { struct btrfs_inode_item *inode_item; @@ -2677,21 +2493,6 @@ noinline int btrfs_update_inode(struct btrfs_trans_handle *trans, struct extent_buffer *leaf; int ret; - /* - * If the inode is a free space inode, we can deadlock during commit - * if we put it into the delayed code. - * - * The data relocation inode should also be directly updated - * without delay - */ - if (!is_free_space_inode(root, inode) - && root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID) { - ret = btrfs_delayed_update_inode(trans, root, inode); - if (!ret) - btrfs_set_inode_last_trans(trans, inode); - return ret; - } - path = btrfs_alloc_path(); if (!path) return -ENOMEM; @@ -2720,6 +2521,43 @@ failed: } /* + * copy everything in the in-memory inode into the btree. + */ +noinline int btrfs_update_inode(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct inode *inode) +{ + int ret; + + /* + * If the inode is a free space inode, we can deadlock during commit + * if we put it into the delayed code. + * + * The data relocation inode should also be directly updated + * without delay + */ + if (!btrfs_is_free_space_inode(root, inode) + && root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID) { + ret = btrfs_delayed_update_inode(trans, root, inode); + if (!ret) + btrfs_set_inode_last_trans(trans, inode); + return ret; + } + + return btrfs_update_inode_item(trans, root, inode); +} + +static noinline int btrfs_update_inode_fallback(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct inode *inode) +{ + int ret; + + ret = btrfs_update_inode(trans, root, inode); + if (ret == -ENOSPC) + return btrfs_update_inode_item(trans, root, inode); + return ret; +} + +/* * unlink helper that gets used here in inode.c and in the tree logging * recovery code. It remove a link in a directory with a given name, and * also drops the back refs in the inode to the directory @@ -2857,7 +2695,16 @@ static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir, u64 ino = btrfs_ino(inode); u64 dir_ino = btrfs_ino(dir); - trans = btrfs_start_transaction(root, 10); + /* + * 1 for the possible orphan item + * 1 for the dir item + * 1 for the dir index + * 1 for the inode ref + * 1 for the inode ref in the tree log + * 2 for the dir entries in the log + * 1 for the inode + */ + trans = btrfs_start_transaction(root, 8); if (!IS_ERR(trans) || PTR_ERR(trans) != -ENOSPC) return trans; @@ -2880,7 +2727,8 @@ static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir, return ERR_PTR(-ENOMEM); } - trans = btrfs_start_transaction(root, 0); + /* 1 for the orphan item */ + trans = btrfs_start_transaction(root, 1); if (IS_ERR(trans)) { btrfs_free_path(path); root->fs_info->enospc_unlink = 0; @@ -2985,6 +2833,12 @@ static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir, err = 0; out: btrfs_free_path(path); + /* Migrate the orphan reservation over */ + if (!err) + err = btrfs_block_rsv_migrate(trans->block_rsv, + &root->fs_info->global_block_rsv, + trans->bytes_reserved); + if (err) { btrfs_end_transaction(trans, root); root->fs_info->enospc_unlink = 0; @@ -2999,6 +2853,9 @@ static void __unlink_end_trans(struct btrfs_trans_handle *trans, struct btrfs_root *root) { if (trans->block_rsv == &root->fs_info->global_block_rsv) { + btrfs_block_rsv_release(root, trans->block_rsv, + trans->bytes_reserved); + trans->block_rsv = &root->fs_info->trans_block_rsv; BUG_ON(!root->fs_info->enospc_unlink); root->fs_info->enospc_unlink = 0; } @@ -3021,13 +2878,16 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry) ret = btrfs_unlink_inode(trans, root, dir, dentry->d_inode, dentry->d_name.name, dentry->d_name.len); - BUG_ON(ret); + if (ret) + goto out; if (inode->i_nlink == 0) { ret = btrfs_orphan_add(trans, inode); - BUG_ON(ret); + if (ret) + goto out; } +out: nr = trans->blocks_used; __unlink_end_trans(trans, root); btrfs_btree_balance_dirty(root, nr); @@ -3170,6 +3030,11 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, BUG_ON(new_size > 0 && min_type != BTRFS_EXTENT_DATA_KEY); + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + path->reada = -1; + if (root->ref_cows || root == root->fs_info->tree_root) btrfs_drop_extent_cache(inode, new_size & (~mask), (u64)-1, 0); @@ -3182,10 +3047,6 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, if (min_type == 0 && root == BTRFS_I(inode)->root) btrfs_kill_delayed_inode_items(inode); - path = btrfs_alloc_path(); - BUG_ON(!path); - path->reada = -1; - key.objectid = ino; key.offset = (u64)-1; key.type = (u8)-1; @@ -3386,6 +3247,7 @@ static int btrfs_truncate_page(struct address_space *mapping, loff_t from) pgoff_t index = from >> PAGE_CACHE_SHIFT; unsigned offset = from & (PAGE_CACHE_SIZE-1); struct page *page; + gfp_t mask = btrfs_alloc_write_mask(mapping); int ret = 0; u64 page_start; u64 page_end; @@ -3398,7 +3260,7 @@ static int btrfs_truncate_page(struct address_space *mapping, loff_t from) ret = -ENOMEM; again: - page = grab_cache_page(mapping, index); + page = find_or_create_page(mapping, index, mask); if (!page) { btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE); goto out; @@ -3519,7 +3381,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size) u64 hint_byte = 0; hole_size = last_byte - cur_offset; - trans = btrfs_start_transaction(root, 2); + trans = btrfs_start_transaction(root, 3); if (IS_ERR(trans)) { err = PTR_ERR(trans); break; @@ -3528,19 +3390,26 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size) err = btrfs_drop_extents(trans, inode, cur_offset, cur_offset + hole_size, &hint_byte, 1); - if (err) + if (err) { + btrfs_update_inode(trans, root, inode); + btrfs_end_transaction(trans, root); break; + } err = btrfs_insert_file_extent(trans, root, btrfs_ino(inode), cur_offset, 0, 0, hole_size, 0, hole_size, 0, 0, 0); - if (err) + if (err) { + btrfs_update_inode(trans, root, inode); + btrfs_end_transaction(trans, root); break; + } btrfs_drop_extent_cache(inode, hole_start, last_byte - 1, 0); + btrfs_update_inode(trans, root, inode); btrfs_end_transaction(trans, root); } free_extent_map(em); @@ -3558,6 +3427,8 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size) static int btrfs_setsize(struct inode *inode, loff_t newsize) { + struct btrfs_root *root = BTRFS_I(inode)->root; + struct btrfs_trans_handle *trans; loff_t oldsize = i_size_read(inode); int ret; @@ -3565,16 +3436,19 @@ static int btrfs_setsize(struct inode *inode, loff_t newsize) return 0; if (newsize > oldsize) { - i_size_write(inode, newsize); - btrfs_ordered_update_i_size(inode, i_size_read(inode), NULL); truncate_pagecache(inode, oldsize, newsize); ret = btrfs_cont_expand(inode, oldsize, newsize); - if (ret) { - btrfs_setsize(inode, oldsize); + if (ret) return ret; - } - mark_inode_dirty(inode); + trans = btrfs_start_transaction(root, 1); + if (IS_ERR(trans)) + return PTR_ERR(trans); + + i_size_write(inode, newsize); + btrfs_ordered_update_i_size(inode, i_size_read(inode), NULL); + ret = btrfs_update_inode(trans, root, inode); + btrfs_end_transaction_throttle(trans, root); } else { /* @@ -3614,9 +3488,9 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) if (attr->ia_valid) { setattr_copy(inode, attr); - mark_inode_dirty(inode); + err = btrfs_dirty_inode(inode); - if (attr->ia_valid & ATTR_MODE) + if (!err && attr->ia_valid & ATTR_MODE) err = btrfs_acl_chmod(inode); } @@ -3627,6 +3501,8 @@ void btrfs_evict_inode(struct inode *inode) { struct btrfs_trans_handle *trans; struct btrfs_root *root = BTRFS_I(inode)->root; + struct btrfs_block_rsv *rsv, *global_rsv; + u64 min_size = btrfs_calc_trunc_metadata_size(root, 1); unsigned long nr; int ret; @@ -3634,7 +3510,7 @@ void btrfs_evict_inode(struct inode *inode) truncate_inode_pages(&inode->i_data, 0); if (inode->i_nlink && (btrfs_root_refs(&root->root_item) != 0 || - is_free_space_inode(root, inode))) + btrfs_is_free_space_inode(root, inode))) goto no_delete; if (is_bad_inode(inode)) { @@ -3642,7 +3518,8 @@ void btrfs_evict_inode(struct inode *inode) goto no_delete; } /* do we really want it for ->i_nlink > 0 and zero btrfs_root_refs? */ - btrfs_wait_ordered_range(inode, 0, (u64)-1); + if (!special_file(inode->i_mode)) + btrfs_wait_ordered_range(inode, 0, (u64)-1); if (root->fs_info->log_root_recovering) { BUG_ON(!list_empty(&BTRFS_I(inode)->i_orphan)); @@ -3654,22 +3531,55 @@ void btrfs_evict_inode(struct inode *inode) goto no_delete; } + rsv = btrfs_alloc_block_rsv(root); + if (!rsv) { + btrfs_orphan_del(NULL, inode); + goto no_delete; + } + rsv->size = min_size; + global_rsv = &root->fs_info->global_block_rsv; + btrfs_i_size_write(inode, 0); + /* + * This is a bit simpler than btrfs_truncate since + * + * 1) We've already reserved our space for our orphan item in the + * unlink. + * 2) We're going to delete the inode item, so we don't need to update + * it at all. + * + * So we just need to reserve some slack space in case we add bytes when + * doing the truncate. + */ while (1) { - trans = btrfs_join_transaction(root); - BUG_ON(IS_ERR(trans)); - trans->block_rsv = root->orphan_block_rsv; + ret = btrfs_block_rsv_refill_noflush(root, rsv, min_size); + + /* + * Try and steal from the global reserve since we will + * likely not use this space anyway, we want to try as + * hard as possible to get this to work. + */ + if (ret) + ret = btrfs_block_rsv_migrate(global_rsv, rsv, min_size); - ret = btrfs_block_rsv_check(trans, root, - root->orphan_block_rsv, 0, 5); if (ret) { - BUG_ON(ret != -EAGAIN); - ret = btrfs_commit_transaction(trans, root); - BUG_ON(ret); - continue; + printk(KERN_WARNING "Could not get space for a " + "delete, will truncate on mount %d\n", ret); + btrfs_orphan_del(NULL, inode); + btrfs_free_block_rsv(root, rsv); + goto no_delete; + } + + trans = btrfs_start_transaction(root, 0); + if (IS_ERR(trans)) { + btrfs_orphan_del(NULL, inode); + btrfs_free_block_rsv(root, rsv); + goto no_delete; } + trans->block_rsv = rsv; + ret = btrfs_truncate_inode_items(trans, root, inode, 0, 0); if (ret != -EAGAIN) break; @@ -3678,14 +3588,17 @@ void btrfs_evict_inode(struct inode *inode) btrfs_end_transaction(trans, root); trans = NULL; btrfs_btree_balance_dirty(root, nr); - } + btrfs_free_block_rsv(root, rsv); + if (ret == 0) { + trans->block_rsv = root->orphan_block_rsv; ret = btrfs_orphan_del(trans, inode); BUG_ON(ret); } + trans->block_rsv = &root->fs_info->trans_block_rsv; if (!(root == root->fs_info->tree_root || root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID)) btrfs_return_ino(root, btrfs_ino(inode)); @@ -3713,7 +3626,8 @@ static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry, int ret = 0; path = btrfs_alloc_path(); - BUG_ON(!path); + if (!path) + return -ENOMEM; di = btrfs_lookup_dir_item(NULL, root, path, btrfs_ino(dir), name, namelen, 0); @@ -3934,7 +3848,9 @@ again: static int btrfs_init_locked_inode(struct inode *inode, void *p) { struct btrfs_iget_args *args = p; - inode->i_ino = args->ino; + inode->i_ino = args->location->objectid; + memcpy(&BTRFS_I(inode)->location, args->location, + sizeof(*args->location)); BTRFS_I(inode)->root = args->root; btrfs_set_inode_space_info(args->root, inode); return 0; @@ -3943,20 +3859,20 @@ static int btrfs_init_locked_inode(struct inode *inode, void *p) static int btrfs_find_actor(struct inode *inode, void *opaque) { struct btrfs_iget_args *args = opaque; - return args->ino == btrfs_ino(inode) && + return args->location->objectid == BTRFS_I(inode)->location.objectid && args->root == BTRFS_I(inode)->root; } static struct inode *btrfs_iget_locked(struct super_block *s, - u64 objectid, + struct btrfs_key *location, struct btrfs_root *root) { struct inode *inode; struct btrfs_iget_args args; - args.ino = objectid; + args.location = location; args.root = root; - inode = iget5_locked(s, objectid, btrfs_find_actor, + inode = iget5_locked(s, location->objectid, btrfs_find_actor, btrfs_init_locked_inode, (void *)&args); return inode; @@ -3970,18 +3886,22 @@ struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location, { struct inode *inode; - inode = btrfs_iget_locked(s, location->objectid, root); + inode = btrfs_iget_locked(s, location, root); if (!inode) return ERR_PTR(-ENOMEM); if (inode->i_state & I_NEW) { - BTRFS_I(inode)->root = root; - memcpy(&BTRFS_I(inode)->location, location, sizeof(*location)); btrfs_read_locked_inode(inode); - inode_tree_add(inode); - unlock_new_inode(inode); - if (new) - *new = 1; + if (!is_bad_inode(inode)) { + inode_tree_add(inode); + unlock_new_inode(inode); + if (new) + *new = 1; + } else { + unlock_new_inode(inode); + iput(inode); + inode = ERR_PTR(-ESTALE); + } } return inode; @@ -4016,12 +3936,20 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry) struct btrfs_root *sub_root = root; struct btrfs_key location; int index; - int ret; + int ret = 0; if (dentry->d_name.len > BTRFS_NAME_LEN) return ERR_PTR(-ENAMETOOLONG); - ret = btrfs_inode_by_name(dir, dentry, &location); + if (unlikely(d_need_lookup(dentry))) { + memcpy(&location, dentry->d_fsdata, sizeof(struct btrfs_key)); + kfree(dentry->d_fsdata); + dentry->d_fsdata = NULL; + /* This thing is hashed, drop it for now */ + d_drop(dentry); + } else { + ret = btrfs_inode_by_name(dir, dentry, &location); + } if (ret < 0) return ERR_PTR(ret); @@ -4076,16 +4004,24 @@ static int btrfs_dentry_delete(const struct dentry *dentry) return 0; } +static void btrfs_dentry_release(struct dentry *dentry) +{ + if (dentry->d_fsdata) + kfree(dentry->d_fsdata); +} + static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) { - struct inode *inode; - - inode = btrfs_lookup_dentry(dir, dentry); - if (IS_ERR(inode)) - return ERR_CAST(inode); + struct dentry *ret; - return d_splice_alias(inode, dentry); + ret = d_splice_alias(btrfs_lookup_dentry(dir, dentry), dentry); + if (unlikely(d_need_lookup(dentry))) { + spin_lock(&dentry->d_lock); + dentry->d_flags &= ~DCACHE_NEED_LOOKUP; + spin_unlock(&dentry->d_lock); + } + return ret; } unsigned char btrfs_filetype_table[] = { @@ -4104,6 +4040,7 @@ static int btrfs_real_readdir(struct file *filp, void *dirent, struct btrfs_path *path; struct list_head ins_list; struct list_head del_list; + struct qstr q; int ret; struct extent_buffer *leaf; int slot; @@ -4194,6 +4131,7 @@ static int btrfs_real_readdir(struct file *filp, void *dirent, while (di_cur < di_total) { struct btrfs_key location; + struct dentry *tmp; if (verify_dir_item(root, leaf, di)) break; @@ -4214,6 +4152,33 @@ static int btrfs_real_readdir(struct file *filp, void *dirent, d_type = btrfs_filetype_table[btrfs_dir_type(leaf, di)]; btrfs_dir_item_key_to_cpu(leaf, di, &location); + q.name = name_ptr; + q.len = name_len; + q.hash = full_name_hash(q.name, q.len); + tmp = d_lookup(filp->f_dentry, &q); + if (!tmp) { + struct btrfs_key *newkey; + + newkey = kzalloc(sizeof(struct btrfs_key), + GFP_NOFS); + if (!newkey) + goto no_dentry; + tmp = d_alloc(filp->f_dentry, &q); + if (!tmp) { + kfree(newkey); + dput(tmp); + goto no_dentry; + } + memcpy(newkey, &location, + sizeof(struct btrfs_key)); + tmp->d_fsdata = newkey; + tmp->d_flags |= DCACHE_NEED_LOOKUP; + d_rehash(tmp); + dput(tmp); + } else { + dput(tmp); + } +no_dentry: /* is this a reference to our own snapshot? If so * skip it */ @@ -4278,7 +4243,7 @@ int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc) if (BTRFS_I(inode)->dummy_inode) return 0; - if (btrfs_fs_closing(root->fs_info) && is_free_space_inode(root, inode)) + if (btrfs_fs_closing(root->fs_info) && btrfs_is_free_space_inode(root, inode)) nolock = true; if (wbc->sync_mode == WB_SYNC_ALL) { @@ -4302,42 +4267,80 @@ int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc) * FIXME, needs more benchmarking...there are no reasons other than performance * to keep or drop this code. */ -void btrfs_dirty_inode(struct inode *inode, int flags) +int btrfs_dirty_inode(struct inode *inode) { struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_trans_handle *trans; int ret; if (BTRFS_I(inode)->dummy_inode) - return; + return 0; trans = btrfs_join_transaction(root); - BUG_ON(IS_ERR(trans)); + if (IS_ERR(trans)) + return PTR_ERR(trans); ret = btrfs_update_inode(trans, root, inode); if (ret && ret == -ENOSPC) { /* whoops, lets try again with the full transaction */ btrfs_end_transaction(trans, root); trans = btrfs_start_transaction(root, 1); - if (IS_ERR(trans)) { - printk_ratelimited(KERN_ERR "btrfs: fail to " - "dirty inode %llu error %ld\n", - (unsigned long long)btrfs_ino(inode), - PTR_ERR(trans)); - return; - } + if (IS_ERR(trans)) + return PTR_ERR(trans); ret = btrfs_update_inode(trans, root, inode); - if (ret) { - printk_ratelimited(KERN_ERR "btrfs: fail to " - "dirty inode %llu error %d\n", - (unsigned long long)btrfs_ino(inode), - ret); - } } btrfs_end_transaction(trans, root); if (BTRFS_I(inode)->delayed_node) btrfs_balance_delayed_items(root); + + return ret; +} + +/* + * This is a copy of file_update_time. We need this so we can return error on + * ENOSPC for updating the inode in the case of file write and mmap writes. + */ +int btrfs_update_time(struct file *file) +{ + struct inode *inode = file->f_path.dentry->d_inode; + struct timespec now; + int ret; + enum { S_MTIME = 1, S_CTIME = 2, S_VERSION = 4 } sync_it = 0; + + /* First try to exhaust all avenues to not sync */ + if (IS_NOCMTIME(inode)) + return 0; + + now = current_fs_time(inode->i_sb); + if (!timespec_equal(&inode->i_mtime, &now)) + sync_it = S_MTIME; + + if (!timespec_equal(&inode->i_ctime, &now)) + sync_it |= S_CTIME; + + if (IS_I_VERSION(inode)) + sync_it |= S_VERSION; + + if (!sync_it) + return 0; + + /* Finally allowed to write? Takes lock. */ + if (mnt_want_write_file(file)) + return 0; + + /* Only change inode inside the lock region */ + if (sync_it & S_VERSION) + inode_inc_iversion(inode); + if (sync_it & S_CTIME) + inode->i_ctime = now; + if (sync_it & S_MTIME) + inode->i_mtime = now; + ret = btrfs_dirty_inode(inode); + if (!ret) + mark_inode_dirty_sync(inode); + mnt_drop_write(file->f_path.mnt); + return ret; } /* @@ -4439,7 +4442,8 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, int owner; path = btrfs_alloc_path(); - BUG_ON(!path); + if (!path) + return ERR_PTR(-ENOMEM); inode = new_inode(root->fs_info->sb); if (!inode) { @@ -4474,7 +4478,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, inode->i_generation = BTRFS_I(inode)->generation; btrfs_set_inode_space_info(root, inode); - if (mode & S_IFDIR) + if (S_ISDIR(mode)) owner = 0; else owner = 1; @@ -4519,7 +4523,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, btrfs_inherit_iflags(inode, dir); - if ((mode & S_IFREG)) { + if (S_ISREG(mode)) { if (btrfs_test_opt(root, NODATASUM)) BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM; if (btrfs_test_opt(root, NODATACOW) || @@ -4601,10 +4605,6 @@ static int btrfs_add_nondir(struct btrfs_trans_handle *trans, int err = btrfs_add_link(trans, dir, inode, dentry->d_name.name, dentry->d_name.len, backref, index); - if (!err) { - d_instantiate(dentry, inode); - return 0; - } if (err > 0) err = -EEXIST; return err; @@ -4652,13 +4652,21 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry, goto out_unlock; } + /* + * If the active LSM wants to access the inode during + * d_instantiate it needs these. Smack checks to see + * if the filesystem supports xattrs by looking at the + * ops vector. + */ + + inode->i_op = &btrfs_special_inode_operations; err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index); if (err) drop_inode = 1; else { - inode->i_op = &btrfs_special_inode_operations; init_special_inode(inode, inode->i_mode, rdev); btrfs_update_inode(trans, root, inode); + d_instantiate(dentry, inode); } out_unlock: nr = trans->blocks_used; @@ -4710,15 +4718,23 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, goto out_unlock; } + /* + * If the active LSM wants to access the inode during + * d_instantiate it needs these. Smack checks to see + * if the filesystem supports xattrs by looking at the + * ops vector. + */ + inode->i_fop = &btrfs_file_operations; + inode->i_op = &btrfs_file_inode_operations; + err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index); if (err) drop_inode = 1; else { inode->i_mapping->a_ops = &btrfs_aops; inode->i_mapping->backing_dev_info = &root->fs_info->bdi; - inode->i_fop = &btrfs_file_operations; - inode->i_op = &btrfs_file_inode_operations; BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; + d_instantiate(dentry, inode); } out_unlock: nr = trans->blocks_used; @@ -4773,11 +4789,11 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, if (err) { drop_inode = 1; } else { - struct dentry *parent = dget_parent(dentry); + struct dentry *parent = dentry->d_parent; err = btrfs_update_inode(trans, root, inode); BUG_ON(err); + d_instantiate(dentry, inode); btrfs_log_new_name(trans, inode, NULL, parent); - dput(parent); } nr = trans->blocks_used; @@ -5757,8 +5773,7 @@ again: if (test_bit(BTRFS_ORDERED_NOCOW, &ordered->flags)) { ret = btrfs_ordered_update_i_size(inode, 0, ordered); if (!ret) - ret = btrfs_update_inode(trans, root, inode); - err = ret; + err = btrfs_update_inode_fallback(trans, root, inode); goto out; } @@ -5795,8 +5810,8 @@ again: add_pending_csums(trans, inode, ordered->file_offset, &ordered->list); ret = btrfs_ordered_update_i_size(inode, 0, ordered); - if (!ret) - btrfs_update_inode(trans, root, inode); + if (!ret || !test_bit(BTRFS_ORDERED_PREALLOC, &ordered->flags)) + btrfs_update_inode_fallback(trans, root, inode); ret = 0; out_unlock: unlock_extent_cached(&BTRFS_I(inode)->io_tree, ordered->file_offset, @@ -6251,7 +6266,7 @@ int btrfs_readpage(struct file *file, struct page *page) { struct extent_io_tree *tree; tree = &BTRFS_I(page->mapping->host)->io_tree; - return extent_read_full_page(tree, page, btrfs_get_extent); + return extent_read_full_page(tree, page, btrfs_get_extent, 0); } static int btrfs_writepage(struct page *page, struct writeback_control *wbc) @@ -6402,7 +6417,12 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) u64 page_start; u64 page_end; + /* Need this to keep space reservations serialized */ + mutex_lock(&inode->i_mutex); ret = btrfs_delalloc_reserve_space(inode, PAGE_CACHE_SIZE); + mutex_unlock(&inode->i_mutex); + if (!ret) + ret = btrfs_update_time(vma->vm_file); if (ret) { if (ret == -ENOMEM) ret = VM_FAULT_OOM; @@ -6503,6 +6523,7 @@ static int btrfs_truncate(struct inode *inode) struct btrfs_trans_handle *trans; unsigned long nr; u64 mask = root->sectorsize - 1; + u64 min_size = btrfs_calc_trunc_metadata_size(root, 1); ret = btrfs_truncate_page(inode->i_mapping, inode->i_size); if (ret) @@ -6550,19 +6571,23 @@ static int btrfs_truncate(struct inode *inode) rsv = btrfs_alloc_block_rsv(root); if (!rsv) return -ENOMEM; - btrfs_add_durable_block_rsv(root->fs_info, rsv); + rsv->size = min_size; + /* + * 1 for the truncate slack space + * 1 for the orphan item we're going to add + * 1 for the orphan item deletion + * 1 for updating the inode. + */ trans = btrfs_start_transaction(root, 4); if (IS_ERR(trans)) { err = PTR_ERR(trans); goto out; } - /* - * Reserve space for the truncate process. Truncate should be adding - * space, but if there are snapshots it may end up using space. - */ - ret = btrfs_truncate_reserve_metadata(trans, root, rsv); + /* Migrate the slack space for the truncate to our reserve */ + ret = btrfs_block_rsv_migrate(&root->fs_info->trans_block_rsv, rsv, + min_size); BUG_ON(ret); ret = btrfs_orphan_add(trans, inode); @@ -6571,21 +6596,6 @@ static int btrfs_truncate(struct inode *inode) goto out; } - nr = trans->blocks_used; - btrfs_end_transaction(trans, root); - btrfs_btree_balance_dirty(root, nr); - - /* - * Ok so we've already migrated our bytes over for the truncate, so here - * just reserve the one slot we need for updating the inode. - */ - trans = btrfs_start_transaction(root, 1); - if (IS_ERR(trans)) { - err = PTR_ERR(trans); - goto out; - } - trans->block_rsv = rsv; - /* * setattr is responsible for setting the ordered_data_close flag, * but that is only tested during the last file release. That @@ -6607,20 +6617,31 @@ static int btrfs_truncate(struct inode *inode) btrfs_add_ordered_operation(trans, root, inode); while (1) { + ret = btrfs_block_rsv_refill(root, rsv, min_size); + if (ret) { + /* + * This can only happen with the original transaction we + * started above, every other time we shouldn't have a + * transaction started yet. + */ + if (ret == -EAGAIN) + goto end_trans; + err = ret; + break; + } + if (!trans) { - trans = btrfs_start_transaction(root, 3); + /* Just need the 1 for updating the inode */ + trans = btrfs_start_transaction(root, 1); if (IS_ERR(trans)) { - err = PTR_ERR(trans); - goto out; + ret = err = PTR_ERR(trans); + trans = NULL; + break; } - - ret = btrfs_truncate_reserve_metadata(trans, root, - rsv); - BUG_ON(ret); - - trans->block_rsv = rsv; } + trans->block_rsv = rsv; + ret = btrfs_truncate_inode_items(trans, root, inode, inode->i_size, BTRFS_EXTENT_DATA_KEY); @@ -6635,7 +6656,7 @@ static int btrfs_truncate(struct inode *inode) err = ret; break; } - +end_trans: nr = trans->blocks_used; btrfs_end_transaction(trans, root); trans = NULL; @@ -6655,14 +6676,16 @@ static int btrfs_truncate(struct inode *inode) ret = btrfs_orphan_del(NULL, inode); } - trans->block_rsv = &root->fs_info->trans_block_rsv; - ret = btrfs_update_inode(trans, root, inode); - if (ret && !err) - err = ret; + if (trans) { + trans->block_rsv = &root->fs_info->trans_block_rsv; + ret = btrfs_update_inode(trans, root, inode); + if (ret && !err) + err = ret; - nr = trans->blocks_used; - ret = btrfs_end_transaction_throttle(trans, root); - btrfs_btree_balance_dirty(root, nr); + nr = trans->blocks_used; + ret = btrfs_end_transaction_throttle(trans, root); + btrfs_btree_balance_dirty(root, nr); + } out: btrfs_free_block_rsv(root, rsv); @@ -6690,7 +6713,7 @@ int btrfs_create_subvol_root(struct btrfs_trans_handle *trans, inode->i_op = &btrfs_dir_inode_operations; inode->i_fop = &btrfs_dir_file_operations; - inode->i_nlink = 1; + set_nlink(inode, 1); btrfs_i_size_write(inode, 0); err = btrfs_update_inode(trans, new_root, inode); @@ -6700,19 +6723,6 @@ int btrfs_create_subvol_root(struct btrfs_trans_handle *trans, return 0; } -/* helper function for file defrag and space balancing. This - * forces readahead on a given range of bytes in an inode - */ -unsigned long btrfs_force_ra(struct address_space *mapping, - struct file_ra_state *ra, struct file *file, - pgoff_t offset, pgoff_t last_index) -{ - pgoff_t req_size = last_index - offset + 1; - - page_cache_sync_readahead(mapping, ra, file, offset, req_size); - return offset + req_size; -} - struct inode *btrfs_alloc_inode(struct super_block *sb) { struct btrfs_inode *ei; @@ -6730,19 +6740,21 @@ struct inode *btrfs_alloc_inode(struct super_block *sb) ei->last_sub_trans = 0; ei->logged_trans = 0; ei->delalloc_bytes = 0; - ei->reserved_bytes = 0; ei->disk_i_size = 0; ei->flags = 0; + ei->csum_bytes = 0; ei->index_cnt = (u64)-1; ei->last_unlink_trans = 0; - atomic_set(&ei->outstanding_extents, 0); - atomic_set(&ei->reserved_extents, 0); + spin_lock_init(&ei->lock); + ei->outstanding_extents = 0; + ei->reserved_extents = 0; ei->ordered_data_close = 0; ei->orphan_meta_reserved = 0; ei->dummy_inode = 0; ei->in_defrag = 0; + ei->delalloc_meta_reserved = 0; ei->force_compress = BTRFS_COMPRESS_NONE; ei->delayed_node = NULL; @@ -6775,8 +6787,10 @@ void btrfs_destroy_inode(struct inode *inode) WARN_ON(!list_empty(&inode->i_dentry)); WARN_ON(inode->i_data.nrpages); - WARN_ON(atomic_read(&BTRFS_I(inode)->outstanding_extents)); - WARN_ON(atomic_read(&BTRFS_I(inode)->reserved_extents)); + WARN_ON(BTRFS_I(inode)->outstanding_extents); + WARN_ON(BTRFS_I(inode)->reserved_extents); + WARN_ON(BTRFS_I(inode)->delalloc_bytes); + WARN_ON(BTRFS_I(inode)->csum_bytes); /* * This can happen where we create an inode, but somebody else also @@ -6831,7 +6845,7 @@ int btrfs_drop_inode(struct inode *inode) struct btrfs_root *root = BTRFS_I(inode)->root; if (btrfs_root_refs(&root->root_item) == 0 && - !is_free_space_inode(root, inode)) + !btrfs_is_free_space_inode(root, inode)) return 1; else return generic_drop_inode(inode); @@ -6900,11 +6914,13 @@ static int btrfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) { struct inode *inode = dentry->d_inode; + u32 blocksize = inode->i_sb->s_blocksize; + generic_fillattr(inode, stat); - stat->dev = BTRFS_I(inode)->root->anon_super.s_dev; + stat->dev = BTRFS_I(inode)->root->anon_dev; stat->blksize = PAGE_CACHE_SIZE; - stat->blocks = (inode_get_bytes(inode) + - BTRFS_I(inode)->delalloc_bytes) >> 9; + stat->blocks = (ALIGN(inode_get_bytes(inode), blocksize) + + ALIGN(BTRFS_I(inode)->delalloc_bytes, blocksize)) >> 9; return 0; } @@ -7069,9 +7085,8 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, BUG_ON(ret); if (old_ino != BTRFS_FIRST_FREE_OBJECTID) { - struct dentry *parent = dget_parent(new_dentry); + struct dentry *parent = new_dentry->d_parent; btrfs_log_new_name(trans, old_inode, old_dir, parent); - dput(parent); btrfs_end_log_trans(root); } out_fail: @@ -7181,21 +7196,32 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, goto out_unlock; } + /* + * If the active LSM wants to access the inode during + * d_instantiate it needs these. Smack checks to see + * if the filesystem supports xattrs by looking at the + * ops vector. + */ + inode->i_fop = &btrfs_file_operations; + inode->i_op = &btrfs_file_inode_operations; + err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index); if (err) drop_inode = 1; else { inode->i_mapping->a_ops = &btrfs_aops; inode->i_mapping->backing_dev_info = &root->fs_info->bdi; - inode->i_fop = &btrfs_file_operations; - inode->i_op = &btrfs_file_inode_operations; BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; } if (drop_inode) goto out_unlock; path = btrfs_alloc_path(); - BUG_ON(!path); + if (!path) { + err = -ENOMEM; + drop_inode = 1; + goto out_unlock; + } key.objectid = btrfs_ino(inode); key.offset = 0; btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY); @@ -7233,6 +7259,8 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, drop_inode = 1; out_unlock: + if (!err) + d_instantiate(dentry, inode); nr = trans->blocks_used; btrfs_end_transaction_throttle(trans, root); if (drop_inode) { @@ -7332,15 +7360,19 @@ static int btrfs_set_page_dirty(struct page *page) return __set_page_dirty_nobuffers(page); } -static int btrfs_permission(struct inode *inode, int mask, unsigned int flags) +static int btrfs_permission(struct inode *inode, int mask) { struct btrfs_root *root = BTRFS_I(inode)->root; + umode_t mode = inode->i_mode; - if (btrfs_root_readonly(root) && (mask & MAY_WRITE)) - return -EROFS; - if ((BTRFS_I(inode)->flags & BTRFS_INODE_READONLY) && (mask & MAY_WRITE)) - return -EACCES; - return generic_permission(inode, mask, flags, btrfs_check_acl); + if (mask & MAY_WRITE && + (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode))) { + if (btrfs_root_readonly(root)) + return -EROFS; + if (BTRFS_I(inode)->flags & BTRFS_INODE_READONLY) + return -EACCES; + } + return generic_permission(inode, mask); } static const struct inode_operations btrfs_dir_inode_operations = { @@ -7360,10 +7392,12 @@ static const struct inode_operations btrfs_dir_inode_operations = { .listxattr = btrfs_listxattr, .removexattr = btrfs_removexattr, .permission = btrfs_permission, + .get_acl = btrfs_get_acl, }; static const struct inode_operations btrfs_dir_ro_inode_operations = { .lookup = btrfs_lookup, .permission = btrfs_permission, + .get_acl = btrfs_get_acl, }; static const struct file_operations btrfs_dir_file_operations = { @@ -7385,7 +7419,6 @@ static struct extent_io_ops btrfs_extent_io_ops = { .readpage_end_io_hook = btrfs_readpage_end_io_hook, .writepage_end_io_hook = btrfs_writepage_end_io_hook, .writepage_start_hook = btrfs_writepage_start_hook, - .readpage_io_failed_hook = btrfs_io_failed_hook, .set_bit_hook = btrfs_set_bit_hook, .clear_bit_hook = btrfs_clear_bit_hook, .merge_extent_hook = btrfs_merge_extent_hook, @@ -7432,6 +7465,7 @@ static const struct inode_operations btrfs_file_inode_operations = { .removexattr = btrfs_removexattr, .permission = btrfs_permission, .fiemap = btrfs_fiemap, + .get_acl = btrfs_get_acl, }; static const struct inode_operations btrfs_special_inode_operations = { .getattr = btrfs_getattr, @@ -7441,19 +7475,23 @@ static const struct inode_operations btrfs_special_inode_operations = { .getxattr = btrfs_getxattr, .listxattr = btrfs_listxattr, .removexattr = btrfs_removexattr, + .get_acl = btrfs_get_acl, }; static const struct inode_operations btrfs_symlink_inode_operations = { .readlink = generic_readlink, .follow_link = page_follow_link_light, .put_link = page_put_link, .getattr = btrfs_getattr, + .setattr = btrfs_setattr, .permission = btrfs_permission, .setxattr = btrfs_setxattr, .getxattr = btrfs_getxattr, .listxattr = btrfs_listxattr, .removexattr = btrfs_removexattr, + .get_acl = btrfs_get_acl, }; const struct dentry_operations btrfs_dentry_operations = { .d_delete = btrfs_dentry_delete, + .d_release = btrfs_dentry_release, }; diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index a205027..ba26540 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -51,6 +51,7 @@ #include "volumes.h" #include "locking.h" #include "inode-map.h" +#include "backref.h" /* Mask out flags that are inappropriate for the given type of inode. */ static inline __u32 btrfs_mask_flags(umode_t mode, __u32 flags) @@ -117,7 +118,7 @@ void btrfs_update_iflags(struct inode *inode) /* * Inherit flags from the parent inode. * - * Unlike extN we don't have any flags we don't want to inherit currently. + * Currently only the compression flags and the cow flags are inherited. */ void btrfs_inherit_iflags(struct inode *inode, struct inode *dir) { @@ -128,12 +129,17 @@ void btrfs_inherit_iflags(struct inode *inode, struct inode *dir) flags = BTRFS_I(dir)->flags; - if (S_ISREG(inode->i_mode)) - flags &= ~BTRFS_INODE_DIRSYNC; - else if (!S_ISDIR(inode->i_mode)) - flags &= (BTRFS_INODE_NODUMP | BTRFS_INODE_NOATIME); + if (flags & BTRFS_INODE_NOCOMPRESS) { + BTRFS_I(inode)->flags &= ~BTRFS_INODE_COMPRESS; + BTRFS_I(inode)->flags |= BTRFS_INODE_NOCOMPRESS; + } else if (flags & BTRFS_INODE_COMPRESS) { + BTRFS_I(inode)->flags &= ~BTRFS_INODE_NOCOMPRESS; + BTRFS_I(inode)->flags |= BTRFS_INODE_COMPRESS; + } + + if (flags & BTRFS_INODE_NODATACOW) + BTRFS_I(inode)->flags |= BTRFS_INODE_NODATACOW; - BTRFS_I(inode)->flags = flags; btrfs_update_iflags(inode); } @@ -246,11 +252,11 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg) trans = btrfs_join_transaction(root); BUG_ON(IS_ERR(trans)); + btrfs_update_iflags(inode); + inode->i_ctime = CURRENT_TIME; ret = btrfs_update_inode(trans, root, inode); BUG_ON(ret); - btrfs_update_iflags(inode); - inode->i_ctime = CURRENT_TIME; btrfs_end_transaction(trans, root); mnt_drop_write(file->f_path.mnt); @@ -277,6 +283,7 @@ static noinline int btrfs_ioctl_fitrim(struct file *file, void __user *arg) struct fstrim_range range; u64 minlen = ULLONG_MAX; u64 num_devices = 0; + u64 total_bytes = btrfs_super_total_bytes(root->fs_info->super_copy); int ret; if (!capable(CAP_SYS_ADMIN)) @@ -295,12 +302,15 @@ static noinline int btrfs_ioctl_fitrim(struct file *file, void __user *arg) } } rcu_read_unlock(); + if (!num_devices) return -EOPNOTSUPP; - if (copy_from_user(&range, arg, sizeof(range))) return -EFAULT; + if (range.start > total_bytes) + return -EINVAL; + range.len = min(range.len, total_bytes - range.start); range.minlen = max(range.minlen, minlen); ret = btrfs_trim_fs(root, &range); if (ret < 0) @@ -323,7 +333,7 @@ static noinline int create_subvol(struct btrfs_root *root, struct btrfs_inode_item *inode_item; struct extent_buffer *leaf; struct btrfs_root *new_root; - struct dentry *parent = dget_parent(dentry); + struct dentry *parent = dentry->d_parent; struct inode *dir; int ret; int err; @@ -332,10 +342,8 @@ static noinline int create_subvol(struct btrfs_root *root, u64 index = 0; ret = btrfs_find_free_objectid(root->fs_info->tree_root, &objectid); - if (ret) { - dput(parent); + if (ret) return ret; - } dir = parent->d_inode; @@ -346,10 +354,8 @@ static noinline int create_subvol(struct btrfs_root *root, * 2 - dir items */ trans = btrfs_start_transaction(root, 6); - if (IS_ERR(trans)) { - dput(parent); + if (IS_ERR(trans)) return PTR_ERR(trans); - } leaf = btrfs_alloc_free_block(trans, root, root->leafsize, 0, objectid, NULL, 0, 0, 0); @@ -439,7 +445,6 @@ static noinline int create_subvol(struct btrfs_root *root, d_instantiate(dentry, btrfs_lookup_dentry(dir, dentry)); fail: - dput(parent); if (async_transid) { *async_transid = trans->transid; err = btrfs_commit_transaction_async(trans, root, 1); @@ -456,7 +461,6 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry, bool readonly) { struct inode *inode; - struct dentry *parent; struct btrfs_pending_snapshot *pending_snapshot; struct btrfs_trans_handle *trans; int ret; @@ -504,9 +508,7 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry, if (ret) goto fail; - parent = dget_parent(dentry); - inode = btrfs_lookup_dentry(parent->d_inode, dentry); - dput(parent); + inode = btrfs_lookup_dentry(dentry->d_parent->d_inode, dentry); if (IS_ERR(inode)) { ret = PTR_ERR(inode); goto fail; @@ -768,7 +770,7 @@ static int should_defrag_range(struct inode *inode, u64 start, u64 len, int ret = 1; /* - * make sure that once we start defragging and extent, we keep on + * make sure that once we start defragging an extent, we keep on * defragging it */ if (start < *defrag_end) @@ -813,7 +815,6 @@ static int should_defrag_range(struct inode *inode, u64 start, u64 len, * extent will force at least part of that big extent to be defragged. */ if (ret) { - *last_len += len; *defrag_end = extent_map_end(em); } else { *last_len = 0; @@ -851,13 +852,16 @@ static int cluster_pages_for_defrag(struct inode *inode, int i_done; struct btrfs_ordered_extent *ordered; struct extent_state *cached_state = NULL; + gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping); if (isize == 0) return 0; file_end = (isize - 1) >> PAGE_CACHE_SHIFT; + mutex_lock(&inode->i_mutex); ret = btrfs_delalloc_reserve_space(inode, num_pages << PAGE_CACHE_SHIFT); + mutex_unlock(&inode->i_mutex); if (ret) return ret; again: @@ -867,8 +871,8 @@ again: /* step one, lock all the pages */ for (i = 0; i < num_pages; i++) { struct page *page; - page = grab_cache_page(inode->i_mapping, - start_index + i); + page = find_or_create_page(inode->i_mapping, + start_index + i, mask); if (!page) break; @@ -938,7 +942,9 @@ again: GFP_NOFS); if (i_done != num_pages) { - atomic_inc(&BTRFS_I(inode)->outstanding_extents); + spin_lock(&BTRFS_I(inode)->lock); + BTRFS_I(inode)->outstanding_extents++; + spin_unlock(&BTRFS_I(inode)->lock); btrfs_delalloc_release_space(inode, (num_pages - i_done) << PAGE_CACHE_SHIFT); } @@ -978,18 +984,20 @@ int btrfs_defrag_file(struct inode *inode, struct file *file, struct btrfs_super_block *disk_super; struct file_ra_state *ra = NULL; unsigned long last_index; + u64 isize = i_size_read(inode); u64 features; u64 last_len = 0; u64 skip = 0; u64 defrag_end = 0; u64 newer_off = range->start; - int newer_left = 0; unsigned long i; + unsigned long ra_index = 0; int ret; int defrag_count = 0; int compress_type = BTRFS_COMPRESS_ZLIB; int extent_thresh = range->extent_thresh; - int newer_cluster = (256 * 1024) >> PAGE_CACHE_SHIFT; + int max_cluster = (256 * 1024) >> PAGE_CACHE_SHIFT; + int cluster = max_cluster; u64 new_align = ~((u64)128 * 1024 - 1); struct page **pages = NULL; @@ -1003,7 +1011,7 @@ int btrfs_defrag_file(struct inode *inode, struct file *file, compress_type = range->compress_type; } - if (inode->i_size == 0) + if (isize == 0) return 0; /* @@ -1019,7 +1027,7 @@ int btrfs_defrag_file(struct inode *inode, struct file *file, ra = &file->f_ra; } - pages = kmalloc(sizeof(struct page *) * newer_cluster, + pages = kmalloc(sizeof(struct page *) * max_cluster, GFP_NOFS); if (!pages) { ret = -ENOMEM; @@ -1028,10 +1036,10 @@ int btrfs_defrag_file(struct inode *inode, struct file *file, /* find the last page to defrag */ if (range->start + range->len > range->start) { - last_index = min_t(u64, inode->i_size - 1, + last_index = min_t(u64, isize - 1, range->start + range->len - 1) >> PAGE_CACHE_SHIFT; } else { - last_index = (inode->i_size - 1) >> PAGE_CACHE_SHIFT; + last_index = (isize - 1) >> PAGE_CACHE_SHIFT; } if (newer_than) { @@ -1044,16 +1052,24 @@ int btrfs_defrag_file(struct inode *inode, struct file *file, * the extents in the file evenly spaced */ i = (newer_off & new_align) >> PAGE_CACHE_SHIFT; - newer_left = newer_cluster; } else goto out_ra; } else { i = range->start >> PAGE_CACHE_SHIFT; } if (!max_to_defrag) - max_to_defrag = last_index - 1; + max_to_defrag = last_index; - while (i <= last_index && defrag_count < max_to_defrag) { + /* + * make writeback starts from i, so the defrag range can be + * written sequentially. + */ + if (i < inode->i_mapping->writeback_index) + inode->i_mapping->writeback_index = i; + + while (i <= last_index && defrag_count < max_to_defrag && + (i < (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> + PAGE_CACHE_SHIFT)) { /* * make sure we stop running if someone unmounts * the FS @@ -1076,18 +1092,31 @@ int btrfs_defrag_file(struct inode *inode, struct file *file, i = max(i + 1, next); continue; } + + if (!newer_than) { + cluster = (PAGE_CACHE_ALIGN(defrag_end) >> + PAGE_CACHE_SHIFT) - i; + cluster = min(cluster, max_cluster); + } else { + cluster = max_cluster; + } + if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS) BTRFS_I(inode)->force_compress = compress_type; - btrfs_force_ra(inode->i_mapping, ra, file, i, newer_cluster); + if (i + cluster > ra_index) { + ra_index = max(i, ra_index); + btrfs_force_ra(inode->i_mapping, ra, file, ra_index, + cluster); + ra_index += max_cluster; + } - ret = cluster_pages_for_defrag(inode, pages, i, newer_cluster); + ret = cluster_pages_for_defrag(inode, pages, i, cluster); if (ret < 0) goto out_ra; defrag_count += ret; balance_dirty_pages_ratelimited_nr(inode->i_mapping, ret); - i += ret; if (newer_than) { if (newer_off == (u64)-1) @@ -1102,12 +1131,17 @@ int btrfs_defrag_file(struct inode *inode, struct file *file, if (!ret) { range->start = newer_off; i = (newer_off & new_align) >> PAGE_CACHE_SHIFT; - newer_left = newer_cluster; } else { break; } } else { - i++; + if (ret > 0) { + i += ret; + last_len += ret << PAGE_CACHE_SHIFT; + } else { + i++; + last_len = 0; + } } } @@ -1133,16 +1167,14 @@ int btrfs_defrag_file(struct inode *inode, struct file *file, mutex_unlock(&inode->i_mutex); } - disk_super = &root->fs_info->super_copy; + disk_super = root->fs_info->super_copy; features = btrfs_super_incompat_flags(disk_super); if (range->compress_type == BTRFS_COMPRESS_LZO) { features |= BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO; btrfs_set_super_incompat_flags(disk_super, features); } - if (!file) - kfree(ra); - return defrag_count; + ret = defrag_count; out_ra: if (!file) @@ -1186,12 +1218,12 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root, *devstr = '\0'; devstr = vol_args->name; devid = simple_strtoull(devstr, &end, 10); - printk(KERN_INFO "resizing devid %llu\n", + printk(KERN_INFO "btrfs: resizing devid %llu\n", (unsigned long long)devid); } device = btrfs_find_device(root, devid, NULL, NULL); if (!device) { - printk(KERN_INFO "resizer unable to find device %llu\n", + printk(KERN_INFO "btrfs: resizer unable to find device %llu\n", (unsigned long long)devid); ret = -EINVAL; goto out_unlock; @@ -1237,7 +1269,7 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root, do_div(new_size, root->sectorsize); new_size *= root->sectorsize; - printk(KERN_INFO "new size for %s is %llu\n", + printk(KERN_INFO "btrfs: new size for %s is %llu\n", device->name, (unsigned long long)new_size); if (new_size > old_size) { @@ -1248,7 +1280,7 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root, } ret = btrfs_grow_device(trans, device, new_size); btrfs_commit_transaction(trans, root); - } else { + } else if (new_size < old_size) { ret = btrfs_shrink_device(device, new_size); } @@ -1295,12 +1327,17 @@ static noinline int btrfs_ioctl_snap_create_transid(struct file *file, printk(KERN_INFO "btrfs: Snapshot src from " "another FS\n"); ret = -EINVAL; - fput(src_file); - goto out; + } else if (!inode_owner_or_capable(src_inode)) { + /* + * Subvolume creation is not restricted, but snapshots + * are limited to own subvolumes only + */ + ret = -EPERM; + } else { + ret = btrfs_mksubvol(&file->f_path, name, namelen, + BTRFS_I(src_inode)->root, + transid, readonly); } - ret = btrfs_mksubvol(&file->f_path, name, namelen, - BTRFS_I(src_inode)->root, - transid, readonly); fput(src_file); } out: @@ -1755,11 +1792,10 @@ static noinline int btrfs_search_path_in_tree(struct btrfs_fs_info *info, key.objectid = key.offset; key.offset = (u64)-1; dirid = key.objectid; - } if (ptr < name) goto out; - memcpy(name, ptr, total_len); + memmove(name, ptr, total_len); name[total_len]='\0'; ret = 0; out: @@ -2184,6 +2220,11 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, if (!(src_file->f_mode & FMODE_READ)) goto out_fput; + /* don't make the dst file partly checksummed */ + if ((BTRFS_I(src)->flags & BTRFS_INODE_NODATASUM) != + (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) + goto out_fput; + ret = -EISDIR; if (S_ISDIR(src->i_mode) || S_ISDIR(inode->i_mode)) goto out_fput; @@ -2222,11 +2263,26 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, if (off + len == src->i_size) len = ALIGN(src->i_size, bs) - off; + if (len == 0) { + ret = 0; + goto out_unlock; + } + /* verify the end result is block aligned */ if (!IS_ALIGNED(off, bs) || !IS_ALIGNED(off + len, bs) || !IS_ALIGNED(destoff, bs)) goto out_unlock; + if (destoff > inode->i_size) { + ret = btrfs_cont_expand(inode, inode->i_size, destoff); + if (ret) + goto out_unlock; + } + + /* truncate page cache pages from target inode range */ + truncate_inode_pages_range(&inode->i_data, destoff, + PAGE_CACHE_ALIGN(destoff + len) - 1); + /* do any pending delalloc/csum calc on src, one way or another, and lock file content */ while (1) { @@ -2320,7 +2376,12 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, else new_key.offset = destoff; - trans = btrfs_start_transaction(root, 1); + /* + * 1 - adjusting old extent (we may have to split it) + * 1 - add new extent + * 1 - inode update + */ + trans = btrfs_start_transaction(root, 3); if (IS_ERR(trans)) { ret = PTR_ERR(trans); goto out; @@ -2328,14 +2389,21 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, if (type == BTRFS_FILE_EXTENT_REG || type == BTRFS_FILE_EXTENT_PREALLOC) { + /* + * a | --- range to clone ---| b + * | ------------- extent ------------- | + */ + + /* substract range b */ + if (key.offset + datal > off + len) + datal = off + len - key.offset; + + /* substract range a */ if (off > key.offset) { datao += off - key.offset; datal -= off - key.offset; } - if (key.offset + datal > off + len) - datal = off + len - key.offset; - ret = btrfs_drop_extents(trans, inode, new_key.offset, new_key.offset + datal, @@ -2380,6 +2448,20 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, new_key.offset += skip; } + /* + * Don't copy an inline extent into an offset + * greater than zero. Having an inline extent + * at such an offset results in chaos as btrfs + * isn't prepared for such cases. Just skip + * this case for the same reasons as commented + * at btrfs_ioctl_clone(). + */ + if (new_key.offset > 0) { + ret = -EOPNOTSUPP; + btrfs_end_transaction(trans, root); + goto out; + } + if (key.offset + datal > off+len) trim = key.offset + datal - (off+len); @@ -2432,7 +2514,6 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, if (endoff > inode->i_size) btrfs_i_size_write(inode, endoff); - BTRFS_I(inode)->flags = BTRFS_I(src)->flags; ret = btrfs_update_inode(trans, root, inode); BUG_ON(ret); btrfs_end_transaction(trans, root); @@ -2559,7 +2640,7 @@ static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp) return PTR_ERR(trans); } - dir_id = btrfs_super_root_dir(&root->fs_info->super_copy); + dir_id = btrfs_super_root_dir(root->fs_info->super_copy); di = btrfs_lookup_dir_item(trans, root->fs_info->tree_root, path, dir_id, "default", 7, 1); if (IS_ERR_OR_NULL(di)) { @@ -2575,7 +2656,7 @@ static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp) btrfs_mark_buffer_dirty(path->nodes[0]); btrfs_free_path(path); - disk_super = &root->fs_info->super_copy; + disk_super = root->fs_info->super_copy; features = btrfs_super_incompat_flags(disk_super); if (!(features & BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL)) { features |= BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL; @@ -2836,6 +2917,147 @@ static long btrfs_ioctl_scrub_progress(struct btrfs_root *root, return ret; } +static long btrfs_ioctl_ino_to_path(struct btrfs_root *root, void __user *arg) +{ + int ret = 0; + int i; + u64 rel_ptr; + int size; + struct btrfs_ioctl_ino_path_args *ipa = NULL; + struct inode_fs_paths *ipath = NULL; + struct btrfs_path *path; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + path = btrfs_alloc_path(); + if (!path) { + ret = -ENOMEM; + goto out; + } + + ipa = memdup_user(arg, sizeof(*ipa)); + if (IS_ERR(ipa)) { + ret = PTR_ERR(ipa); + ipa = NULL; + goto out; + } + + size = min_t(u32, ipa->size, 4096); + ipath = init_ipath(size, root, path); + if (IS_ERR(ipath)) { + ret = PTR_ERR(ipath); + ipath = NULL; + goto out; + } + + ret = paths_from_inode(ipa->inum, ipath); + if (ret < 0) + goto out; + + for (i = 0; i < ipath->fspath->elem_cnt; ++i) { + rel_ptr = ipath->fspath->val[i] - + (u64)(unsigned long)ipath->fspath->val; + ipath->fspath->val[i] = rel_ptr; + } + + ret = copy_to_user((void *)(unsigned long)ipa->fspath, + (void *)(unsigned long)ipath->fspath, size); + if (ret) { + ret = -EFAULT; + goto out; + } + +out: + btrfs_free_path(path); + free_ipath(ipath); + kfree(ipa); + + return ret; +} + +static int build_ino_list(u64 inum, u64 offset, u64 root, void *ctx) +{ + struct btrfs_data_container *inodes = ctx; + const size_t c = 3 * sizeof(u64); + + if (inodes->bytes_left >= c) { + inodes->bytes_left -= c; + inodes->val[inodes->elem_cnt] = inum; + inodes->val[inodes->elem_cnt + 1] = offset; + inodes->val[inodes->elem_cnt + 2] = root; + inodes->elem_cnt += 3; + } else { + inodes->bytes_missing += c - inodes->bytes_left; + inodes->bytes_left = 0; + inodes->elem_missed += 3; + } + + return 0; +} + +static long btrfs_ioctl_logical_to_ino(struct btrfs_root *root, + void __user *arg) +{ + int ret = 0; + int size; + u64 extent_offset; + struct btrfs_ioctl_logical_ino_args *loi; + struct btrfs_data_container *inodes = NULL; + struct btrfs_path *path = NULL; + struct btrfs_key key; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + loi = memdup_user(arg, sizeof(*loi)); + if (IS_ERR(loi)) { + ret = PTR_ERR(loi); + loi = NULL; + goto out; + } + + path = btrfs_alloc_path(); + if (!path) { + ret = -ENOMEM; + goto out; + } + + size = min_t(u32, loi->size, 4096); + inodes = init_data_container(size); + if (IS_ERR(inodes)) { + ret = PTR_ERR(inodes); + inodes = NULL; + goto out; + } + + ret = extent_from_logical(root->fs_info, loi->logical, path, &key); + + if (ret & BTRFS_EXTENT_FLAG_TREE_BLOCK) + ret = -ENOENT; + if (ret < 0) + goto out; + + extent_offset = loi->logical - key.objectid; + ret = iterate_extent_inodes(root->fs_info, path, key.objectid, + extent_offset, build_ino_list, inodes); + + if (ret < 0) + goto out; + + ret = copy_to_user((void *)(unsigned long)loi->inodes, + (void *)(unsigned long)inodes, size); + if (ret) + ret = -EFAULT; + +out: + btrfs_free_path(path); + kfree(inodes); + kfree(loi); + + return ret; +} + long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { @@ -2893,6 +3115,10 @@ long btrfs_ioctl(struct file *file, unsigned int return btrfs_ioctl_tree_search(file, argp); case BTRFS_IOC_INO_LOOKUP: return btrfs_ioctl_ino_lookup(file, argp); + case BTRFS_IOC_INO_PATHS: + return btrfs_ioctl_ino_to_path(root, argp); + case BTRFS_IOC_LOGICAL_INO: + return btrfs_ioctl_logical_to_ino(root, argp); case BTRFS_IOC_SPACE_INFO: return btrfs_ioctl_space_info(root, argp); case BTRFS_IOC_SYNC: diff --git a/fs/btrfs/ioctl.h b/fs/btrfs/ioctl.h index ad1ea78..252ae99 100644 --- a/fs/btrfs/ioctl.h +++ b/fs/btrfs/ioctl.h @@ -193,6 +193,30 @@ struct btrfs_ioctl_space_args { struct btrfs_ioctl_space_info spaces[0]; }; +struct btrfs_data_container { + __u32 bytes_left; /* out -- bytes not needed to deliver output */ + __u32 bytes_missing; /* out -- additional bytes needed for result */ + __u32 elem_cnt; /* out */ + __u32 elem_missed; /* out */ + __u64 val[0]; /* out */ +}; + +struct btrfs_ioctl_ino_path_args { + __u64 inum; /* in */ + __u32 size; /* in */ + __u64 reserved[4]; + /* struct btrfs_data_container *fspath; out */ + __u64 fspath; /* out */ +}; + +struct btrfs_ioctl_logical_ino_args { + __u64 logical; /* in */ + __u32 size; /* in */ + __u64 reserved[4]; + /* struct btrfs_data_container *inodes; out */ + __u64 inodes; +}; + #define BTRFS_IOC_SNAP_CREATE _IOW(BTRFS_IOCTL_MAGIC, 1, \ struct btrfs_ioctl_vol_args) #define BTRFS_IOC_DEFRAG _IOW(BTRFS_IOCTL_MAGIC, 2, \ @@ -248,4 +272,9 @@ struct btrfs_ioctl_space_args { struct btrfs_ioctl_dev_info_args) #define BTRFS_IOC_FS_INFO _IOR(BTRFS_IOCTL_MAGIC, 31, \ struct btrfs_ioctl_fs_info_args) +#define BTRFS_IOC_INO_PATHS _IOWR(BTRFS_IOCTL_MAGIC, 35, \ + struct btrfs_ioctl_ino_path_args) +#define BTRFS_IOC_LOGICAL_INO _IOWR(BTRFS_IOCTL_MAGIC, 36, \ + struct btrfs_ioctl_ino_path_args) + #endif diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c index 66fa43d..d77b67c 100644 --- a/fs/btrfs/locking.c +++ b/fs/btrfs/locking.c @@ -24,185 +24,197 @@ #include "extent_io.h" #include "locking.h" -static inline void spin_nested(struct extent_buffer *eb) -{ - spin_lock(&eb->lock); -} +void btrfs_assert_tree_read_locked(struct extent_buffer *eb); /* - * Setting a lock to blocking will drop the spinlock and set the - * flag that forces other procs who want the lock to wait. After - * this you can safely schedule with the lock held. + * if we currently have a spinning reader or writer lock + * (indicated by the rw flag) this will bump the count + * of blocking holders and drop the spinlock. */ -void btrfs_set_lock_blocking(struct extent_buffer *eb) +void btrfs_set_lock_blocking_rw(struct extent_buffer *eb, int rw) { - if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) { - set_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags); - spin_unlock(&eb->lock); + if (rw == BTRFS_WRITE_LOCK) { + if (atomic_read(&eb->blocking_writers) == 0) { + WARN_ON(atomic_read(&eb->spinning_writers) != 1); + atomic_dec(&eb->spinning_writers); + btrfs_assert_tree_locked(eb); + atomic_inc(&eb->blocking_writers); + write_unlock(&eb->lock); + } + } else if (rw == BTRFS_READ_LOCK) { + btrfs_assert_tree_read_locked(eb); + atomic_inc(&eb->blocking_readers); + WARN_ON(atomic_read(&eb->spinning_readers) == 0); + atomic_dec(&eb->spinning_readers); + read_unlock(&eb->lock); } - /* exit with the spin lock released and the bit set */ + return; } /* - * clearing the blocking flag will take the spinlock again. - * After this you can't safely schedule + * if we currently have a blocking lock, take the spinlock + * and drop our blocking count */ -void btrfs_clear_lock_blocking(struct extent_buffer *eb) +void btrfs_clear_lock_blocking_rw(struct extent_buffer *eb, int rw) { - if (test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) { - spin_nested(eb); - clear_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags); - smp_mb__after_clear_bit(); + if (rw == BTRFS_WRITE_LOCK_BLOCKING) { + BUG_ON(atomic_read(&eb->blocking_writers) != 1); + write_lock(&eb->lock); + WARN_ON(atomic_read(&eb->spinning_writers)); + atomic_inc(&eb->spinning_writers); + if (atomic_dec_and_test(&eb->blocking_writers)) + wake_up(&eb->write_lock_wq); + } else if (rw == BTRFS_READ_LOCK_BLOCKING) { + BUG_ON(atomic_read(&eb->blocking_readers) == 0); + read_lock(&eb->lock); + atomic_inc(&eb->spinning_readers); + if (atomic_dec_and_test(&eb->blocking_readers)) + wake_up(&eb->read_lock_wq); } - /* exit with the spin lock held */ + return; } /* - * unfortunately, many of the places that currently set a lock to blocking - * don't end up blocking for very long, and often they don't block - * at all. For a dbench 50 run, if we don't spin on the blocking bit - * at all, the context switch rate can jump up to 400,000/sec or more. - * - * So, we're still stuck with this crummy spin on the blocking bit, - * at least until the most common causes of the short blocks - * can be dealt with. + * take a spinning read lock. This will wait for any blocking + * writers */ -static int btrfs_spin_on_block(struct extent_buffer *eb) +void btrfs_tree_read_lock(struct extent_buffer *eb) { - int i; - - for (i = 0; i < 512; i++) { - if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) - return 1; - if (need_resched()) - break; - cpu_relax(); +again: + wait_event(eb->write_lock_wq, atomic_read(&eb->blocking_writers) == 0); + read_lock(&eb->lock); + if (atomic_read(&eb->blocking_writers)) { + read_unlock(&eb->lock); + wait_event(eb->write_lock_wq, + atomic_read(&eb->blocking_writers) == 0); + goto again; } - return 0; + atomic_inc(&eb->read_locks); + atomic_inc(&eb->spinning_readers); } /* - * This is somewhat different from trylock. It will take the - * spinlock but if it finds the lock is set to blocking, it will - * return without the lock held. - * - * returns 1 if it was able to take the lock and zero otherwise - * - * After this call, scheduling is not safe without first calling - * btrfs_set_lock_blocking() + * returns 1 if we get the read lock and 0 if we don't + * this won't wait for blocking writers */ -int btrfs_try_spin_lock(struct extent_buffer *eb) +int btrfs_try_tree_read_lock(struct extent_buffer *eb) { - int i; + if (atomic_read(&eb->blocking_writers)) + return 0; - if (btrfs_spin_on_block(eb)) { - spin_nested(eb); - if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) - return 1; - spin_unlock(&eb->lock); + read_lock(&eb->lock); + if (atomic_read(&eb->blocking_writers)) { + read_unlock(&eb->lock); + return 0; } - /* spin for a bit on the BLOCKING flag */ - for (i = 0; i < 2; i++) { - cpu_relax(); - if (!btrfs_spin_on_block(eb)) - break; - - spin_nested(eb); - if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) - return 1; - spin_unlock(&eb->lock); - } - return 0; + atomic_inc(&eb->read_locks); + atomic_inc(&eb->spinning_readers); + return 1; } /* - * the autoremove wake function will return 0 if it tried to wake up - * a process that was already awake, which means that process won't - * count as an exclusive wakeup. The waitq code will continue waking - * procs until it finds one that was actually sleeping. - * - * For btrfs, this isn't quite what we want. We want a single proc - * to be notified that the lock is ready for taking. If that proc - * already happen to be awake, great, it will loop around and try for - * the lock. - * - * So, btrfs_wake_function always returns 1, even when the proc that we - * tried to wake up was already awake. + * returns 1 if we get the read lock and 0 if we don't + * this won't wait for blocking writers or readers */ -static int btrfs_wake_function(wait_queue_t *wait, unsigned mode, - int sync, void *key) +int btrfs_try_tree_write_lock(struct extent_buffer *eb) { - autoremove_wake_function(wait, mode, sync, key); + if (atomic_read(&eb->blocking_writers) || + atomic_read(&eb->blocking_readers)) + return 0; + write_lock(&eb->lock); + if (atomic_read(&eb->blocking_writers) || + atomic_read(&eb->blocking_readers)) { + write_unlock(&eb->lock); + return 0; + } + atomic_inc(&eb->write_locks); + atomic_inc(&eb->spinning_writers); return 1; } /* - * returns with the extent buffer spinlocked. - * - * This will spin and/or wait as required to take the lock, and then - * return with the spinlock held. - * - * After this call, scheduling is not safe without first calling - * btrfs_set_lock_blocking() + * drop a spinning read lock + */ +void btrfs_tree_read_unlock(struct extent_buffer *eb) +{ + btrfs_assert_tree_read_locked(eb); + WARN_ON(atomic_read(&eb->spinning_readers) == 0); + atomic_dec(&eb->spinning_readers); + atomic_dec(&eb->read_locks); + read_unlock(&eb->lock); +} + +/* + * drop a blocking read lock + */ +void btrfs_tree_read_unlock_blocking(struct extent_buffer *eb) +{ + btrfs_assert_tree_read_locked(eb); + WARN_ON(atomic_read(&eb->blocking_readers) == 0); + if (atomic_dec_and_test(&eb->blocking_readers)) + wake_up(&eb->read_lock_wq); + atomic_dec(&eb->read_locks); +} + +/* + * take a spinning write lock. This will wait for both + * blocking readers or writers */ int btrfs_tree_lock(struct extent_buffer *eb) { - DEFINE_WAIT(wait); - wait.func = btrfs_wake_function; - - if (!btrfs_spin_on_block(eb)) - goto sleep; - - while(1) { - spin_nested(eb); - - /* nobody is blocking, exit with the spinlock held */ - if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) - return 0; - - /* - * we have the spinlock, but the real owner is blocking. - * wait for them - */ - spin_unlock(&eb->lock); - - /* - * spin for a bit, and if the blocking flag goes away, - * loop around - */ - cpu_relax(); - if (btrfs_spin_on_block(eb)) - continue; -sleep: - prepare_to_wait_exclusive(&eb->lock_wq, &wait, - TASK_UNINTERRUPTIBLE); - - if (test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) - schedule(); - - finish_wait(&eb->lock_wq, &wait); +again: + wait_event(eb->read_lock_wq, atomic_read(&eb->blocking_readers) == 0); + wait_event(eb->write_lock_wq, atomic_read(&eb->blocking_writers) == 0); + write_lock(&eb->lock); + if (atomic_read(&eb->blocking_readers)) { + write_unlock(&eb->lock); + wait_event(eb->read_lock_wq, + atomic_read(&eb->blocking_readers) == 0); + goto again; } + if (atomic_read(&eb->blocking_writers)) { + write_unlock(&eb->lock); + wait_event(eb->write_lock_wq, + atomic_read(&eb->blocking_writers) == 0); + goto again; + } + WARN_ON(atomic_read(&eb->spinning_writers)); + atomic_inc(&eb->spinning_writers); + atomic_inc(&eb->write_locks); return 0; } +/* + * drop a spinning or a blocking write lock. + */ int btrfs_tree_unlock(struct extent_buffer *eb) { - /* - * if we were a blocking owner, we don't have the spinlock held - * just clear the bit and look for waiters - */ - if (test_and_clear_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) - smp_mb__after_clear_bit(); - else - spin_unlock(&eb->lock); - - if (waitqueue_active(&eb->lock_wq)) - wake_up(&eb->lock_wq); + int blockers = atomic_read(&eb->blocking_writers); + + BUG_ON(blockers > 1); + + btrfs_assert_tree_locked(eb); + atomic_dec(&eb->write_locks); + + if (blockers) { + WARN_ON(atomic_read(&eb->spinning_writers)); + atomic_dec(&eb->blocking_writers); + smp_wmb(); + wake_up(&eb->write_lock_wq); + } else { + WARN_ON(atomic_read(&eb->spinning_writers) != 1); + atomic_dec(&eb->spinning_writers); + write_unlock(&eb->lock); + } return 0; } void btrfs_assert_tree_locked(struct extent_buffer *eb) { - if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) - assert_spin_locked(&eb->lock); + BUG_ON(!atomic_read(&eb->write_locks)); +} + +void btrfs_assert_tree_read_locked(struct extent_buffer *eb) +{ + BUG_ON(!atomic_read(&eb->read_locks)); } diff --git a/fs/btrfs/locking.h b/fs/btrfs/locking.h index 5c33a56..17247dd 100644 --- a/fs/btrfs/locking.h +++ b/fs/btrfs/locking.h @@ -19,11 +19,43 @@ #ifndef __BTRFS_LOCKING_ #define __BTRFS_LOCKING_ +#define BTRFS_WRITE_LOCK 1 +#define BTRFS_READ_LOCK 2 +#define BTRFS_WRITE_LOCK_BLOCKING 3 +#define BTRFS_READ_LOCK_BLOCKING 4 + int btrfs_tree_lock(struct extent_buffer *eb); int btrfs_tree_unlock(struct extent_buffer *eb); int btrfs_try_spin_lock(struct extent_buffer *eb); -void btrfs_set_lock_blocking(struct extent_buffer *eb); -void btrfs_clear_lock_blocking(struct extent_buffer *eb); +void btrfs_tree_read_lock(struct extent_buffer *eb); +void btrfs_tree_read_unlock(struct extent_buffer *eb); +void btrfs_tree_read_unlock_blocking(struct extent_buffer *eb); +void btrfs_set_lock_blocking_rw(struct extent_buffer *eb, int rw); +void btrfs_clear_lock_blocking_rw(struct extent_buffer *eb, int rw); void btrfs_assert_tree_locked(struct extent_buffer *eb); +int btrfs_try_tree_read_lock(struct extent_buffer *eb); +int btrfs_try_tree_write_lock(struct extent_buffer *eb); + +static inline void btrfs_tree_unlock_rw(struct extent_buffer *eb, int rw) +{ + if (rw == BTRFS_WRITE_LOCK || rw == BTRFS_WRITE_LOCK_BLOCKING) + btrfs_tree_unlock(eb); + else if (rw == BTRFS_READ_LOCK_BLOCKING) + btrfs_tree_read_unlock_blocking(eb); + else if (rw == BTRFS_READ_LOCK) + btrfs_tree_read_unlock(eb); + else + BUG(); +} + +static inline void btrfs_set_lock_blocking(struct extent_buffer *eb) +{ + btrfs_set_lock_blocking_rw(eb, BTRFS_WRITE_LOCK); +} + +static inline void btrfs_clear_lock_blocking(struct extent_buffer *eb) +{ + btrfs_clear_lock_blocking_rw(eb, BTRFS_WRITE_LOCK_BLOCKING); +} #endif diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index fb2605d..f38e452 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c @@ -158,8 +158,7 @@ static void print_extent_ref_v0(struct extent_buffer *eb, int slot) void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l) { int i; - u32 type; - u32 nr = btrfs_header_nritems(l); + u32 type, nr; struct btrfs_item *item; struct btrfs_root_item *ri; struct btrfs_dir_item *di; @@ -172,6 +171,11 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l) struct btrfs_key key; struct btrfs_key found_key; + if (!l) + return; + + nr = btrfs_header_nritems(l); + printk(KERN_INFO "leaf %llu total ptrs %d free space %d\n", (unsigned long long)btrfs_header_bytenr(l), nr, btrfs_leaf_free_space(root, l)); diff --git a/fs/btrfs/ref-cache.c b/fs/btrfs/ref-cache.c deleted file mode 100644 index 82d569c..0000000 --- a/fs/btrfs/ref-cache.c +++ /dev/null @@ -1,68 +0,0 @@ -/* - * Copyright (C) 2008 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License v2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. - */ - -#include -#include -#include -#include "ctree.h" -#include "ref-cache.h" -#include "transaction.h" - -static struct rb_node *tree_insert(struct rb_root *root, u64 bytenr, - struct rb_node *node) -{ - struct rb_node **p = &root->rb_node; - struct rb_node *parent = NULL; - struct btrfs_leaf_ref *entry; - - while (*p) { - parent = *p; - entry = rb_entry(parent, struct btrfs_leaf_ref, rb_node); - - if (bytenr < entry->bytenr) - p = &(*p)->rb_left; - else if (bytenr > entry->bytenr) - p = &(*p)->rb_right; - else - return parent; - } - - entry = rb_entry(node, struct btrfs_leaf_ref, rb_node); - rb_link_node(node, parent, p); - rb_insert_color(node, root); - return NULL; -} - -static struct rb_node *tree_search(struct rb_root *root, u64 bytenr) -{ - struct rb_node *n = root->rb_node; - struct btrfs_leaf_ref *entry; - - while (n) { - entry = rb_entry(n, struct btrfs_leaf_ref, rb_node); - WARN_ON(!entry->in_tree); - - if (bytenr < entry->bytenr) - n = n->rb_left; - else if (bytenr > entry->bytenr) - n = n->rb_right; - else - return n; - } - return NULL; -} diff --git a/fs/btrfs/ref-cache.h b/fs/btrfs/ref-cache.h deleted file mode 100644 index 24f7001..0000000 --- a/fs/btrfs/ref-cache.h +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Copyright (C) 2008 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License v2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. - */ -#ifndef __REFCACHE__ -#define __REFCACHE__ - -struct btrfs_extent_info { - /* bytenr and num_bytes find the extent in the extent allocation tree */ - u64 bytenr; - u64 num_bytes; - - /* objectid and offset find the back reference for the file */ - u64 objectid; - u64 offset; -}; - -struct btrfs_leaf_ref { - struct rb_node rb_node; - struct btrfs_leaf_ref_tree *tree; - int in_tree; - atomic_t usage; - - u64 root_gen; - u64 bytenr; - u64 owner; - u64 generation; - int nritems; - - struct list_head list; - struct btrfs_extent_info extents[]; -}; - -static inline size_t btrfs_leaf_ref_size(int nr_extents) -{ - return sizeof(struct btrfs_leaf_ref) + - sizeof(struct btrfs_extent_info) * nr_extents; -} -#endif diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 2ab5837..cfb5543 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -670,7 +670,6 @@ struct backref_node *build_backref_tree(struct reloc_control *rc, int cowonly; int ret; int err = 0; - bool need_check = true; path1 = btrfs_alloc_path(); path2 = btrfs_alloc_path(); @@ -893,7 +892,6 @@ again: cur->bytenr); lower = cur; - need_check = true; for (; level < BTRFS_MAX_LEVEL; level++) { if (!path2->nodes[level]) { BUG_ON(btrfs_root_bytenr(&root->root_item) != @@ -937,12 +935,14 @@ again: /* * add the block to pending list if we - * need check its backrefs, we only do this once - * while walking up a tree as we will catch - * anything else later on. + * need check its backrefs. only block + * at 'cur->level + 1' is added to the + * tail of pending list. this guarantees + * we check backrefs from lower level + * blocks to upper level blocks. */ - if (!upper->checked && need_check) { - need_check = false; + if (!upper->checked && + level == cur->level + 1) { list_add_tail(&edge->list[UPPER], &list); } else @@ -1174,6 +1174,8 @@ static int clone_backref_node(struct btrfs_trans_handle *trans, list_add_tail(&new_edge->list[UPPER], &new_node->lower); } + } else { + list_add_tail(&new_node->lower, &cache->leaves); } rb_node = tree_insert(&cache->rb_root, new_node->bytenr, @@ -2041,8 +2043,7 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc, BUG_ON(IS_ERR(trans)); trans->block_rsv = rc->block_rsv; - ret = btrfs_block_rsv_check(trans, root, rc->block_rsv, - min_reserved, 0); + ret = btrfs_block_rsv_refill(root, rc->block_rsv, min_reserved); if (ret) { BUG_ON(ret != -EAGAIN); ret = btrfs_commit_transaction(trans, root); @@ -2152,8 +2153,7 @@ int prepare_to_merge(struct reloc_control *rc, int err) again: if (!err) { num_bytes = rc->merging_rsv_size; - ret = btrfs_block_rsv_add(NULL, root, rc->block_rsv, - num_bytes); + ret = btrfs_block_rsv_add(root, rc->block_rsv, num_bytes); if (ret) err = ret; } @@ -2427,7 +2427,7 @@ static int reserve_metadata_space(struct btrfs_trans_handle *trans, num_bytes = calcu_metadata_size(rc, node, 1) * 2; trans->block_rsv = rc->block_rsv; - ret = btrfs_block_rsv_add(trans, root, rc->block_rsv, num_bytes); + ret = btrfs_block_rsv_add(root, rc->block_rsv, num_bytes); if (ret) { if (ret == -EAGAIN) rc->commit_transaction = 1; @@ -2922,6 +2922,7 @@ static int relocate_file_extent_cluster(struct inode *inode, unsigned long last_index; struct page *page; struct file_ra_state *ra; + gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping); int nr = 0; int ret = 0; @@ -2946,7 +2947,9 @@ static int relocate_file_extent_cluster(struct inode *inode, index = (cluster->start - offset) >> PAGE_CACHE_SHIFT; last_index = (cluster->end - offset) >> PAGE_CACHE_SHIFT; while (index <= last_index) { + mutex_lock(&inode->i_mutex); ret = btrfs_delalloc_reserve_metadata(inode, PAGE_CACHE_SIZE); + mutex_unlock(&inode->i_mutex); if (ret) goto out; @@ -2955,7 +2958,8 @@ static int relocate_file_extent_cluster(struct inode *inode, page_cache_sync_readahead(inode->i_mapping, ra, NULL, index, last_index + 1 - index); - page = grab_cache_page(inode->i_mapping, index); + page = find_or_create_page(inode->i_mapping, index, + mask); if (!page) { btrfs_delalloc_release_metadata(inode, PAGE_CACHE_SIZE); @@ -3322,8 +3326,11 @@ static int find_data_references(struct reloc_control *rc, } key.objectid = ref_objectid; - key.offset = ref_offset; key.type = BTRFS_EXTENT_DATA_KEY; + if (ref_offset > ((u64)-1 << 32)) + key.offset = 0; + else + key.offset = ref_offset; path->search_commit_root = 1; path->skip_locking = 1; @@ -3644,14 +3651,11 @@ int prepare_to_relocate(struct reloc_control *rc) * btrfs_init_reloc_root will use them when there * is no reservation in transaction handle. */ - ret = btrfs_block_rsv_add(NULL, rc->extent_root, rc->block_rsv, + ret = btrfs_block_rsv_add(rc->extent_root, rc->block_rsv, rc->extent_root->nodesize * 256); if (ret) return ret; - rc->block_rsv->refill_used = 1; - btrfs_add_durable_block_rsv(rc->extent_root->fs_info, rc->block_rsv); - memset(&rc->cluster, 0, sizeof(rc->cluster)); rc->search_start = rc->block_group->key.objectid; rc->extents_found = 0; @@ -3776,8 +3780,7 @@ restart: } } - ret = btrfs_block_rsv_check(trans, rc->extent_root, - rc->block_rsv, 0, 5); + ret = btrfs_block_rsv_check(rc->extent_root, rc->block_rsv, 5); if (ret < 0) { if (ret != -EAGAIN) { err = ret; diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index ebe4544..f409990 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c @@ -71,13 +71,12 @@ out: return ret; } -int btrfs_set_root_node(struct btrfs_root_item *item, - struct extent_buffer *node) +void btrfs_set_root_node(struct btrfs_root_item *item, + struct extent_buffer *node) { btrfs_set_root_bytenr(item, node->start); btrfs_set_root_level(item, btrfs_header_level(node)); btrfs_set_root_generation(item, btrfs_header_generation(node)); - return 0; } /* diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index a8d03d5..ddf2c90 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -17,10 +17,14 @@ */ #include +#include #include "ctree.h" #include "volumes.h" #include "disk-io.h" #include "ordered-data.h" +#include "transaction.h" +#include "backref.h" +#include "extent_io.h" /* * This is only the first step towards a full-features scrub. It reads all @@ -29,15 +33,12 @@ * any can be found. * * Future enhancements: - * - To enhance the performance, better read-ahead strategies for the - * extent-tree can be employed. * - In case an unrepairable extent is encountered, track which files are * affected and report them * - In case of a read error on files with nodatasum, map the file and read * the extent to trigger a writeback of the good copy * - track and record media errors, throw out bad devices * - add a mode to also read unallocated space - * - make the prefetch cancellable */ struct scrub_bio; @@ -63,7 +64,7 @@ static void scrub_fixup(struct scrub_bio *sbio, int ix); struct scrub_page { u64 flags; /* extent flags */ u64 generation; - u64 mirror_num; + int mirror_num; int have_csum; u8 csum[BTRFS_CSUM_SIZE]; }; @@ -87,6 +88,7 @@ struct scrub_dev { int first_free; int curr; atomic_t in_flight; + atomic_t fixup_cnt; spinlock_t list_lock; wait_queue_head_t list_wait; u16 csum_size; @@ -100,6 +102,27 @@ struct scrub_dev { spinlock_t stat_lock; }; +struct scrub_fixup_nodatasum { + struct scrub_dev *sdev; + u64 logical; + struct btrfs_root *root; + struct btrfs_work work; + int mirror_num; +}; + +struct scrub_warning { + struct btrfs_path *path; + u64 extent_item_size; + char *scratch_buf; + char *msg_buf; + const char *errstr; + sector_t sector; + u64 logical; + struct btrfs_device *dev; + int msg_bufsize; + int scratch_bufsize; +}; + static void scrub_free_csums(struct scrub_dev *sdev) { while (!list_empty(&sdev->csum_list)) { @@ -175,14 +198,15 @@ struct scrub_dev *scrub_setup_dev(struct btrfs_device *dev) if (i != SCRUB_BIOS_PER_DEV-1) sdev->bios[i]->next_free = i + 1; - else + else sdev->bios[i]->next_free = -1; } sdev->first_free = 0; sdev->curr = -1; atomic_set(&sdev->in_flight, 0); + atomic_set(&sdev->fixup_cnt, 0); atomic_set(&sdev->cancel_req, 0); - sdev->csum_size = btrfs_super_csum_size(&fs_info->super_copy); + sdev->csum_size = btrfs_super_csum_size(fs_info->super_copy); INIT_LIST_HEAD(&sdev->csum_list); spin_lock_init(&sdev->list_lock); @@ -195,24 +219,366 @@ nomem: return ERR_PTR(-ENOMEM); } +static int scrub_print_warning_inode(u64 inum, u64 offset, u64 root, void *ctx) +{ + u64 isize; + u32 nlink; + int ret; + int i; + struct extent_buffer *eb; + struct btrfs_inode_item *inode_item; + struct scrub_warning *swarn = ctx; + struct btrfs_fs_info *fs_info = swarn->dev->dev_root->fs_info; + struct inode_fs_paths *ipath = NULL; + struct btrfs_root *local_root; + struct btrfs_key root_key; + + root_key.objectid = root; + root_key.type = BTRFS_ROOT_ITEM_KEY; + root_key.offset = (u64)-1; + local_root = btrfs_read_fs_root_no_name(fs_info, &root_key); + if (IS_ERR(local_root)) { + ret = PTR_ERR(local_root); + goto err; + } + + ret = inode_item_info(inum, 0, local_root, swarn->path); + if (ret) { + btrfs_release_path(swarn->path); + goto err; + } + + eb = swarn->path->nodes[0]; + inode_item = btrfs_item_ptr(eb, swarn->path->slots[0], + struct btrfs_inode_item); + isize = btrfs_inode_size(eb, inode_item); + nlink = btrfs_inode_nlink(eb, inode_item); + btrfs_release_path(swarn->path); + + ipath = init_ipath(4096, local_root, swarn->path); + if (IS_ERR(ipath)) { + ret = PTR_ERR(ipath); + ipath = NULL; + goto err; + } + ret = paths_from_inode(inum, ipath); + + if (ret < 0) + goto err; + + /* + * we deliberately ignore the bit ipath might have been too small to + * hold all of the paths here + */ + for (i = 0; i < ipath->fspath->elem_cnt; ++i) + printk(KERN_WARNING "btrfs: %s at logical %llu on dev " + "%s, sector %llu, root %llu, inode %llu, offset %llu, " + "length %llu, links %u (path: %s)\n", swarn->errstr, + swarn->logical, swarn->dev->name, + (unsigned long long)swarn->sector, root, inum, offset, + min(isize - offset, (u64)PAGE_SIZE), nlink, + (char *)(unsigned long)ipath->fspath->val[i]); + + free_ipath(ipath); + return 0; + +err: + printk(KERN_WARNING "btrfs: %s at logical %llu on dev " + "%s, sector %llu, root %llu, inode %llu, offset %llu: path " + "resolving failed with ret=%d\n", swarn->errstr, + swarn->logical, swarn->dev->name, + (unsigned long long)swarn->sector, root, inum, offset, ret); + + free_ipath(ipath); + return 0; +} + +static void scrub_print_warning(const char *errstr, struct scrub_bio *sbio, + int ix) +{ + struct btrfs_device *dev = sbio->sdev->dev; + struct btrfs_fs_info *fs_info = dev->dev_root->fs_info; + struct btrfs_path *path; + struct btrfs_key found_key; + struct extent_buffer *eb; + struct btrfs_extent_item *ei; + struct scrub_warning swarn; + u32 item_size; + int ret; + u64 ref_root; + u8 ref_level; + unsigned long ptr = 0; + const int bufsize = 4096; + u64 extent_offset; + + path = btrfs_alloc_path(); + + swarn.scratch_buf = kmalloc(bufsize, GFP_NOFS); + swarn.msg_buf = kmalloc(bufsize, GFP_NOFS); + swarn.sector = (sbio->physical + ix * PAGE_SIZE) >> 9; + swarn.logical = sbio->logical + ix * PAGE_SIZE; + swarn.errstr = errstr; + swarn.dev = dev; + swarn.msg_bufsize = bufsize; + swarn.scratch_bufsize = bufsize; + + if (!path || !swarn.scratch_buf || !swarn.msg_buf) + goto out; + + ret = extent_from_logical(fs_info, swarn.logical, path, &found_key); + if (ret < 0) + goto out; + + extent_offset = swarn.logical - found_key.objectid; + swarn.extent_item_size = found_key.offset; + + eb = path->nodes[0]; + ei = btrfs_item_ptr(eb, path->slots[0], struct btrfs_extent_item); + item_size = btrfs_item_size_nr(eb, path->slots[0]); + + if (ret & BTRFS_EXTENT_FLAG_TREE_BLOCK) { + do { + ret = tree_backref_for_extent(&ptr, eb, ei, item_size, + &ref_root, &ref_level); + printk(KERN_WARNING "%s at logical %llu on dev %s, " + "sector %llu: metadata %s (level %d) in tree " + "%llu\n", errstr, swarn.logical, dev->name, + (unsigned long long)swarn.sector, + ref_level ? "node" : "leaf", + ret < 0 ? -1 : ref_level, + ret < 0 ? -1 : ref_root); + } while (ret != 1); + } else { + swarn.path = path; + iterate_extent_inodes(fs_info, path, found_key.objectid, + extent_offset, + scrub_print_warning_inode, &swarn); + } + +out: + btrfs_free_path(path); + kfree(swarn.scratch_buf); + kfree(swarn.msg_buf); +} + +static int scrub_fixup_readpage(u64 inum, u64 offset, u64 root, void *ctx) +{ + struct page *page = NULL; + unsigned long index; + struct scrub_fixup_nodatasum *fixup = ctx; + int ret; + int corrected = 0; + struct btrfs_key key; + struct inode *inode = NULL; + u64 end = offset + PAGE_SIZE - 1; + struct btrfs_root *local_root; + + key.objectid = root; + key.type = BTRFS_ROOT_ITEM_KEY; + key.offset = (u64)-1; + local_root = btrfs_read_fs_root_no_name(fixup->root->fs_info, &key); + if (IS_ERR(local_root)) + return PTR_ERR(local_root); + + key.type = BTRFS_INODE_ITEM_KEY; + key.objectid = inum; + key.offset = 0; + inode = btrfs_iget(fixup->root->fs_info->sb, &key, local_root, NULL); + if (IS_ERR(inode)) + return PTR_ERR(inode); + + index = offset >> PAGE_CACHE_SHIFT; + + page = find_or_create_page(inode->i_mapping, index, GFP_NOFS); + if (!page) { + ret = -ENOMEM; + goto out; + } + + if (PageUptodate(page)) { + struct btrfs_mapping_tree *map_tree; + if (PageDirty(page)) { + /* + * we need to write the data to the defect sector. the + * data that was in that sector is not in memory, + * because the page was modified. we must not write the + * modified page to that sector. + * + * TODO: what could be done here: wait for the delalloc + * runner to write out that page (might involve + * COW) and see whether the sector is still + * referenced afterwards. + * + * For the meantime, we'll treat this error + * incorrectable, although there is a chance that a + * later scrub will find the bad sector again and that + * there's no dirty page in memory, then. + */ + ret = -EIO; + goto out; + } + map_tree = &BTRFS_I(inode)->root->fs_info->mapping_tree; + ret = repair_io_failure(map_tree, offset, PAGE_SIZE, + fixup->logical, page, + fixup->mirror_num); + unlock_page(page); + corrected = !ret; + } else { + /* + * we need to get good data first. the general readpage path + * will call repair_io_failure for us, we just have to make + * sure we read the bad mirror. + */ + ret = set_extent_bits(&BTRFS_I(inode)->io_tree, offset, end, + EXTENT_DAMAGED, GFP_NOFS); + if (ret) { + /* set_extent_bits should give proper error */ + WARN_ON(ret > 0); + if (ret > 0) + ret = -EFAULT; + goto out; + } + + ret = extent_read_full_page(&BTRFS_I(inode)->io_tree, page, + btrfs_get_extent, + fixup->mirror_num); + wait_on_page_locked(page); + + corrected = !test_range_bit(&BTRFS_I(inode)->io_tree, offset, + end, EXTENT_DAMAGED, 0, NULL); + if (!corrected) + clear_extent_bits(&BTRFS_I(inode)->io_tree, offset, end, + EXTENT_DAMAGED, GFP_NOFS); + } + +out: + if (page) + put_page(page); + if (inode) + iput(inode); + + if (ret < 0) + return ret; + + if (ret == 0 && corrected) { + /* + * we only need to call readpage for one of the inodes belonging + * to this extent. so make iterate_extent_inodes stop + */ + return 1; + } + + return -EIO; +} + +static void scrub_fixup_nodatasum(struct btrfs_work *work) +{ + int ret; + struct scrub_fixup_nodatasum *fixup; + struct scrub_dev *sdev; + struct btrfs_trans_handle *trans = NULL; + struct btrfs_fs_info *fs_info; + struct btrfs_path *path; + int uncorrectable = 0; + + fixup = container_of(work, struct scrub_fixup_nodatasum, work); + sdev = fixup->sdev; + fs_info = fixup->root->fs_info; + + path = btrfs_alloc_path(); + if (!path) { + spin_lock(&sdev->stat_lock); + ++sdev->stat.malloc_errors; + spin_unlock(&sdev->stat_lock); + uncorrectable = 1; + goto out; + } + + trans = btrfs_join_transaction(fixup->root); + if (IS_ERR(trans)) { + uncorrectable = 1; + goto out; + } + + /* + * the idea is to trigger a regular read through the standard path. we + * read a page from the (failed) logical address by specifying the + * corresponding copynum of the failed sector. thus, that readpage is + * expected to fail. + * that is the point where on-the-fly error correction will kick in + * (once it's finished) and rewrite the failed sector if a good copy + * can be found. + */ + ret = iterate_inodes_from_logical(fixup->logical, fixup->root->fs_info, + path, scrub_fixup_readpage, + fixup); + if (ret < 0) { + uncorrectable = 1; + goto out; + } + WARN_ON(ret != 1); + + spin_lock(&sdev->stat_lock); + ++sdev->stat.corrected_errors; + spin_unlock(&sdev->stat_lock); + +out: + if (trans && !IS_ERR(trans)) + btrfs_end_transaction(trans, fixup->root); + if (uncorrectable) { + spin_lock(&sdev->stat_lock); + ++sdev->stat.uncorrectable_errors; + spin_unlock(&sdev->stat_lock); + printk_ratelimited(KERN_ERR "btrfs: unable to fixup " + "(nodatasum) error at logical %llu\n", + fixup->logical); + } + + btrfs_free_path(path); + kfree(fixup); + + /* see caller why we're pretending to be paused in the scrub counters */ + mutex_lock(&fs_info->scrub_lock); + atomic_dec(&fs_info->scrubs_running); + atomic_dec(&fs_info->scrubs_paused); + mutex_unlock(&fs_info->scrub_lock); + atomic_dec(&sdev->fixup_cnt); + wake_up(&fs_info->scrub_pause_wait); + wake_up(&sdev->list_wait); +} + /* * scrub_recheck_error gets called when either verification of the page * failed or the bio failed to read, e.g. with EIO. In the latter case, * recheck_error gets called for every page in the bio, even though only * one may be bad */ -static void scrub_recheck_error(struct scrub_bio *sbio, int ix) +static int scrub_recheck_error(struct scrub_bio *sbio, int ix) { + struct scrub_dev *sdev = sbio->sdev; + u64 sector = (sbio->physical + ix * PAGE_SIZE) >> 9; + static DEFINE_RATELIMIT_STATE(_rs, DEFAULT_RATELIMIT_INTERVAL, + DEFAULT_RATELIMIT_BURST); + if (sbio->err) { - if (scrub_fixup_io(READ, sbio->sdev->dev->bdev, - (sbio->physical + ix * PAGE_SIZE) >> 9, + if (scrub_fixup_io(READ, sbio->sdev->dev->bdev, sector, sbio->bio->bi_io_vec[ix].bv_page) == 0) { if (scrub_fixup_check(sbio, ix) == 0) - return; + return 0; } + if (__ratelimit(&_rs)) + scrub_print_warning("i/o error", sbio, ix); + } else { + if (__ratelimit(&_rs)) + scrub_print_warning("checksum error", sbio, ix); } + spin_lock(&sdev->stat_lock); + ++sdev->stat.read_errors; + spin_unlock(&sdev->stat_lock); + scrub_fixup(sbio, ix); + return 1; } static int scrub_fixup_check(struct scrub_bio *sbio, int ix) @@ -250,7 +616,8 @@ static void scrub_fixup(struct scrub_bio *sbio, int ix) struct scrub_dev *sdev = sbio->sdev; struct btrfs_fs_info *fs_info = sdev->dev->dev_root->fs_info; struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree; - struct btrfs_multi_bio *multi = NULL; + struct btrfs_bio *bbio = NULL; + struct scrub_fixup_nodatasum *fixup; u64 logical = sbio->logical + ix * PAGE_SIZE; u64 length; int i; @@ -259,38 +626,57 @@ static void scrub_fixup(struct scrub_bio *sbio, int ix) if ((sbio->spag[ix].flags & BTRFS_EXTENT_FLAG_DATA) && (sbio->spag[ix].have_csum == 0)) { + fixup = kzalloc(sizeof(*fixup), GFP_NOFS); + if (!fixup) + goto uncorrectable; + fixup->sdev = sdev; + fixup->logical = logical; + fixup->root = fs_info->extent_root; + fixup->mirror_num = sbio->spag[ix].mirror_num; /* - * nodatasum, don't try to fix anything - * FIXME: we can do better, open the inode and trigger a - * writeback + * increment scrubs_running to prevent cancel requests from + * completing as long as a fixup worker is running. we must also + * increment scrubs_paused to prevent deadlocking on pause + * requests used for transactions commits (as the worker uses a + * transaction context). it is safe to regard the fixup worker + * as paused for all matters practical. effectively, we only + * avoid cancellation requests from completing. */ - goto uncorrectable; + mutex_lock(&fs_info->scrub_lock); + atomic_inc(&fs_info->scrubs_running); + atomic_inc(&fs_info->scrubs_paused); + mutex_unlock(&fs_info->scrub_lock); + atomic_inc(&sdev->fixup_cnt); + fixup->work.func = scrub_fixup_nodatasum; + btrfs_queue_worker(&fs_info->scrub_workers, &fixup->work); + return; } length = PAGE_SIZE; ret = btrfs_map_block(map_tree, REQ_WRITE, logical, &length, - &multi, 0); - if (ret || !multi || length < PAGE_SIZE) { + &bbio, 0); + if (ret || !bbio || length < PAGE_SIZE) { printk(KERN_ERR "scrub_fixup: btrfs_map_block failed us for %llu\n", (unsigned long long)logical); WARN_ON(1); + kfree(bbio); return; } - if (multi->num_stripes == 1) + if (bbio->num_stripes == 1) /* there aren't any replicas */ goto uncorrectable; /* * first find a good copy */ - for (i = 0; i < multi->num_stripes; ++i) { - if (i == sbio->spag[ix].mirror_num) + for (i = 0; i < bbio->num_stripes; ++i) { + if (i + 1 == sbio->spag[ix].mirror_num) continue; - if (scrub_fixup_io(READ, multi->stripes[i].dev->bdev, - multi->stripes[i].physical >> 9, + if (scrub_fixup_io(READ, bbio->stripes[i].dev->bdev, + bbio->stripes[i].physical >> 9, sbio->bio->bi_io_vec[ix].bv_page)) { /* I/O-error, this is not a good copy */ continue; @@ -299,7 +685,7 @@ static void scrub_fixup(struct scrub_bio *sbio, int ix) if (scrub_fixup_check(sbio, ix) == 0) break; } - if (i == multi->num_stripes) + if (i == bbio->num_stripes) goto uncorrectable; if (!sdev->readonly) { @@ -314,25 +700,23 @@ static void scrub_fixup(struct scrub_bio *sbio, int ix) } } - kfree(multi); + kfree(bbio); spin_lock(&sdev->stat_lock); ++sdev->stat.corrected_errors; spin_unlock(&sdev->stat_lock); - if (printk_ratelimit()) - printk(KERN_ERR "btrfs: fixed up at %llu\n", - (unsigned long long)logical); + printk_ratelimited(KERN_ERR "btrfs: fixed up error at logical %llu\n", + (unsigned long long)logical); return; uncorrectable: - kfree(multi); + kfree(bbio); spin_lock(&sdev->stat_lock); ++sdev->stat.uncorrectable_errors; spin_unlock(&sdev->stat_lock); - if (printk_ratelimit()) - printk(KERN_ERR "btrfs: unable to fixup at %llu\n", - (unsigned long long)logical); + printk_ratelimited(KERN_ERR "btrfs: unable to fixup (regular) error at " + "logical %llu\n", (unsigned long long)logical); } static int scrub_fixup_io(int rw, struct block_device *bdev, sector_t sector, @@ -382,8 +766,14 @@ static void scrub_checksum(struct btrfs_work *work) int ret; if (sbio->err) { + ret = 0; for (i = 0; i < sbio->count; ++i) - scrub_recheck_error(sbio, i); + ret |= scrub_recheck_error(sbio, i); + if (!ret) { + spin_lock(&sdev->stat_lock); + ++sdev->stat.unverified_errors; + spin_unlock(&sdev->stat_lock); + } sbio->bio->bi_flags &= ~(BIO_POOL_MASK - 1); sbio->bio->bi_flags |= 1 << BIO_UPTODATE; @@ -396,10 +786,6 @@ static void scrub_checksum(struct btrfs_work *work) bi->bv_offset = 0; bi->bv_len = PAGE_SIZE; } - - spin_lock(&sdev->stat_lock); - ++sdev->stat.read_errors; - spin_unlock(&sdev->stat_lock); goto out; } for (i = 0; i < sbio->count; ++i) { @@ -420,8 +806,14 @@ static void scrub_checksum(struct btrfs_work *work) WARN_ON(1); } kunmap_atomic(buffer, KM_USER0); - if (ret) - scrub_recheck_error(sbio, i); + if (ret) { + ret = scrub_recheck_error(sbio, i); + if (!ret) { + spin_lock(&sdev->stat_lock); + ++sdev->stat.unverified_errors; + spin_unlock(&sdev->stat_lock); + } + } } out: @@ -557,57 +949,27 @@ static int scrub_checksum_super(struct scrub_bio *sbio, void *buffer) static int scrub_submit(struct scrub_dev *sdev) { struct scrub_bio *sbio; - struct bio *bio; - int i; if (sdev->curr == -1) return 0; sbio = sdev->bios[sdev->curr]; - - bio = bio_alloc(GFP_NOFS, sbio->count); - if (!bio) - goto nomem; - - bio->bi_private = sbio; - bio->bi_end_io = scrub_bio_end_io; - bio->bi_bdev = sdev->dev->bdev; - bio->bi_sector = sbio->physical >> 9; - - for (i = 0; i < sbio->count; ++i) { - struct page *page; - int ret; - - page = alloc_page(GFP_NOFS); - if (!page) - goto nomem; - - ret = bio_add_page(bio, page, PAGE_SIZE, 0); - if (!ret) { - __free_page(page); - goto nomem; - } - } - sbio->err = 0; sdev->curr = -1; atomic_inc(&sdev->in_flight); - submit_bio(READ, bio); + submit_bio(READ, sbio->bio); return 0; - -nomem: - scrub_free_bio(bio); - - return -ENOMEM; } static int scrub_page(struct scrub_dev *sdev, u64 logical, u64 len, - u64 physical, u64 flags, u64 gen, u64 mirror_num, + u64 physical, u64 flags, u64 gen, int mirror_num, u8 *csum, int force) { struct scrub_bio *sbio; + struct page *page; + int ret; again: /* @@ -628,12 +990,22 @@ again: } sbio = sdev->bios[sdev->curr]; if (sbio->count == 0) { + struct bio *bio; + sbio->physical = physical; sbio->logical = logical; + bio = bio_alloc(GFP_NOFS, SCRUB_PAGES_PER_BIO); + if (!bio) + return -ENOMEM; + + bio->bi_private = sbio; + bio->bi_end_io = scrub_bio_end_io; + bio->bi_bdev = sdev->dev->bdev; + bio->bi_sector = sbio->physical >> 9; + sbio->err = 0; + sbio->bio = bio; } else if (sbio->physical + sbio->count * PAGE_SIZE != physical || sbio->logical + sbio->count * PAGE_SIZE != logical) { - int ret; - ret = scrub_submit(sdev); if (ret) return ret; @@ -643,6 +1015,20 @@ again: sbio->spag[sbio->count].generation = gen; sbio->spag[sbio->count].have_csum = 0; sbio->spag[sbio->count].mirror_num = mirror_num; + + page = alloc_page(GFP_NOFS); + if (!page) + return -ENOMEM; + + ret = bio_add_page(sbio->bio, page, PAGE_SIZE, 0); + if (!ret) { + __free_page(page); + ret = scrub_submit(sdev); + if (ret) + return ret; + goto again; + } + if (csum) { sbio->spag[sbio->count].have_csum = 1; memcpy(sbio->spag[sbio->count].csum, csum, sdev->csum_size); @@ -701,7 +1087,7 @@ static int scrub_find_csum(struct scrub_dev *sdev, u64 logical, u64 len, /* scrub extent tries to collect up to 64 kB for each bio */ static int scrub_extent(struct scrub_dev *sdev, u64 logical, u64 len, - u64 physical, u64 flags, u64 gen, u64 mirror_num) + u64 physical, u64 flags, u64 gen, int mirror_num) { int ret; u8 csum[BTRFS_CSUM_SIZE]; @@ -741,13 +1127,16 @@ static noinline_for_stack int scrub_stripe(struct scrub_dev *sdev, int slot; int i; u64 nstripes; - int start_stripe; struct extent_buffer *l; struct btrfs_key key; u64 physical; u64 logical; u64 generation; - u64 mirror_num; + int mirror_num; + struct reada_control *reada1; + struct reada_control *reada2; + struct btrfs_key key_start; + struct btrfs_key key_end; u64 increment = map->stripe_len; u64 offset; @@ -758,102 +1147,88 @@ static noinline_for_stack int scrub_stripe(struct scrub_dev *sdev, if (map->type & BTRFS_BLOCK_GROUP_RAID0) { offset = map->stripe_len * num; increment = map->stripe_len * map->num_stripes; - mirror_num = 0; + mirror_num = 1; } else if (map->type & BTRFS_BLOCK_GROUP_RAID10) { int factor = map->num_stripes / map->sub_stripes; offset = map->stripe_len * (num / map->sub_stripes); increment = map->stripe_len * factor; - mirror_num = num % map->sub_stripes; + mirror_num = num % map->sub_stripes + 1; } else if (map->type & BTRFS_BLOCK_GROUP_RAID1) { increment = map->stripe_len; - mirror_num = num % map->num_stripes; + mirror_num = num % map->num_stripes + 1; } else if (map->type & BTRFS_BLOCK_GROUP_DUP) { increment = map->stripe_len; - mirror_num = num % map->num_stripes; + mirror_num = num % map->num_stripes + 1; } else { increment = map->stripe_len; - mirror_num = 0; + mirror_num = 1; } path = btrfs_alloc_path(); if (!path) return -ENOMEM; - path->reada = 2; path->search_commit_root = 1; path->skip_locking = 1; /* - * find all extents for each stripe and just read them to get - * them into the page cache - * FIXME: we can do better. build a more intelligent prefetching + * trigger the readahead for extent tree csum tree and wait for + * completion. During readahead, the scrub is officially paused + * to not hold off transaction commits */ logical = base + offset; - physical = map->stripes[num].physical; - ret = 0; - for (i = 0; i < nstripes; ++i) { - key.objectid = logical; - key.type = BTRFS_EXTENT_ITEM_KEY; - key.offset = (u64)0; - - ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); - if (ret < 0) - goto out_noplug; - /* - * we might miss half an extent here, but that doesn't matter, - * as it's only the prefetch - */ - while (1) { - l = path->nodes[0]; - slot = path->slots[0]; - if (slot >= btrfs_header_nritems(l)) { - ret = btrfs_next_leaf(root, path); - if (ret == 0) - continue; - if (ret < 0) - goto out_noplug; - - break; - } - btrfs_item_key_to_cpu(l, &key, slot); + wait_event(sdev->list_wait, + atomic_read(&sdev->in_flight) == 0); + atomic_inc(&fs_info->scrubs_paused); + wake_up(&fs_info->scrub_pause_wait); - if (key.objectid >= logical + map->stripe_len) - break; + /* FIXME it might be better to start readahead at commit root */ + key_start.objectid = logical; + key_start.type = BTRFS_EXTENT_ITEM_KEY; + key_start.offset = (u64)0; + key_end.objectid = base + offset + nstripes * increment; + key_end.type = BTRFS_EXTENT_ITEM_KEY; + key_end.offset = (u64)0; + reada1 = btrfs_reada_add(root, &key_start, &key_end); + + key_start.objectid = BTRFS_EXTENT_CSUM_OBJECTID; + key_start.type = BTRFS_EXTENT_CSUM_KEY; + key_start.offset = logical; + key_end.objectid = BTRFS_EXTENT_CSUM_OBJECTID; + key_end.type = BTRFS_EXTENT_CSUM_KEY; + key_end.offset = base + offset + nstripes * increment; + reada2 = btrfs_reada_add(csum_root, &key_start, &key_end); + + if (!IS_ERR(reada1)) + btrfs_reada_wait(reada1); + if (!IS_ERR(reada2)) + btrfs_reada_wait(reada2); - path->slots[0]++; - } - btrfs_release_path(path); - logical += increment; - physical += map->stripe_len; - cond_resched(); + mutex_lock(&fs_info->scrub_lock); + while (atomic_read(&fs_info->scrub_pause_req)) { + mutex_unlock(&fs_info->scrub_lock); + wait_event(fs_info->scrub_pause_wait, + atomic_read(&fs_info->scrub_pause_req) == 0); + mutex_lock(&fs_info->scrub_lock); } + atomic_dec(&fs_info->scrubs_paused); + mutex_unlock(&fs_info->scrub_lock); + wake_up(&fs_info->scrub_pause_wait); /* * collect all data csums for the stripe to avoid seeking during * the scrub. This might currently (crc32) end up to be about 1MB */ - start_stripe = 0; blk_start_plug(&plug); -again: - logical = base + offset + start_stripe * increment; - for (i = start_stripe; i < nstripes; ++i) { - ret = btrfs_lookup_csums_range(csum_root, logical, - logical + map->stripe_len - 1, - &sdev->csum_list, 1); - if (ret) - goto out; - logical += increment; - cond_resched(); - } /* * now find all extents for each stripe and scrub them */ - logical = base + offset + start_stripe * increment; - physical = map->stripes[num].physical + start_stripe * map->stripe_len; + logical = base + offset; + physical = map->stripes[num].physical; ret = 0; - for (i = start_stripe; i < nstripes; ++i) { + for (i = 0; i < nstripes; ++i) { /* * canceled? */ @@ -882,11 +1257,14 @@ again: atomic_dec(&fs_info->scrubs_paused); mutex_unlock(&fs_info->scrub_lock); wake_up(&fs_info->scrub_pause_wait); - scrub_free_csums(sdev); - start_stripe = i; - goto again; } + ret = btrfs_lookup_csums_range(csum_root, logical, + logical + map->stripe_len - 1, + &sdev->csum_list, 1); + if (ret) + goto out; + key.objectid = logical; key.type = BTRFS_EXTENT_ITEM_KEY; key.offset = (u64)0; @@ -982,7 +1360,6 @@ next: out: blk_finish_plug(&plug); -out_noplug: btrfs_free_path(path); return ret < 0 ? ret : 0; } @@ -1158,18 +1535,22 @@ static noinline_for_stack int scrub_supers(struct scrub_dev *sdev) static noinline_for_stack int scrub_workers_get(struct btrfs_root *root) { struct btrfs_fs_info *fs_info = root->fs_info; + int ret = 0; mutex_lock(&fs_info->scrub_lock); if (fs_info->scrub_workers_refcnt == 0) { btrfs_init_workers(&fs_info->scrub_workers, "scrub", fs_info->thread_pool_size, &fs_info->generic_worker); fs_info->scrub_workers.idle_thresh = 4; - btrfs_start_workers(&fs_info->scrub_workers, 1); + ret = btrfs_start_workers(&fs_info->scrub_workers); + if (ret) + goto out; } ++fs_info->scrub_workers_refcnt; +out: mutex_unlock(&fs_info->scrub_lock); - return 0; + return ret; } static noinline_for_stack void scrub_workers_put(struct btrfs_root *root) @@ -1253,10 +1634,11 @@ int btrfs_scrub_dev(struct btrfs_root *root, u64 devid, u64 start, u64 end, ret = scrub_enumerate_chunks(sdev, start, end); wait_event(sdev->list_wait, atomic_read(&sdev->in_flight) == 0); - atomic_dec(&fs_info->scrubs_running); wake_up(&fs_info->scrub_pause_wait); + wait_event(sdev->list_wait, atomic_read(&sdev->fixup_cnt) == 0); + if (progress) memcpy(progress, &sdev->stat, sizeof(*progress)); diff --git a/fs/btrfs/struct-funcs.c b/fs/btrfs/struct-funcs.c index c0f7eca..bc1f6ad 100644 --- a/fs/btrfs/struct-funcs.c +++ b/fs/btrfs/struct-funcs.c @@ -50,36 +50,22 @@ u##bits btrfs_##name(struct extent_buffer *eb, \ unsigned long part_offset = (unsigned long)s; \ unsigned long offset = part_offset + offsetof(type, member); \ type *p; \ - /* ugly, but we want the fast path here */ \ - if (eb->map_token && offset >= eb->map_start && \ - offset + sizeof(((type *)0)->member) <= eb->map_start + \ - eb->map_len) { \ - p = (type *)(eb->kaddr + part_offset - eb->map_start); \ - return le##bits##_to_cpu(p->member); \ - } \ - { \ - int err; \ - char *map_token; \ - char *kaddr; \ - int unmap_on_exit = (eb->map_token == NULL); \ - unsigned long map_start; \ - unsigned long map_len; \ - u##bits res; \ - err = map_extent_buffer(eb, offset, \ - sizeof(((type *)0)->member), \ - &map_token, &kaddr, \ - &map_start, &map_len, KM_USER1); \ - if (err) { \ - __le##bits leres; \ - read_eb_member(eb, s, type, member, &leres); \ - return le##bits##_to_cpu(leres); \ - } \ - p = (type *)(kaddr + part_offset - map_start); \ - res = le##bits##_to_cpu(p->member); \ - if (unmap_on_exit) \ - unmap_extent_buffer(eb, map_token, KM_USER1); \ - return res; \ - } \ + int err; \ + char *kaddr; \ + unsigned long map_start; \ + unsigned long map_len; \ + u##bits res; \ + err = map_private_extent_buffer(eb, offset, \ + sizeof(((type *)0)->member), \ + &kaddr, &map_start, &map_len); \ + if (err) { \ + __le##bits leres; \ + read_eb_member(eb, s, type, member, &leres); \ + return le##bits##_to_cpu(leres); \ + } \ + p = (type *)(kaddr + part_offset - map_start); \ + res = le##bits##_to_cpu(p->member); \ + return res; \ } \ void btrfs_set_##name(struct extent_buffer *eb, \ type *s, u##bits val) \ @@ -87,36 +73,21 @@ void btrfs_set_##name(struct extent_buffer *eb, \ unsigned long part_offset = (unsigned long)s; \ unsigned long offset = part_offset + offsetof(type, member); \ type *p; \ - /* ugly, but we want the fast path here */ \ - if (eb->map_token && offset >= eb->map_start && \ - offset + sizeof(((type *)0)->member) <= eb->map_start + \ - eb->map_len) { \ - p = (type *)(eb->kaddr + part_offset - eb->map_start); \ - p->member = cpu_to_le##bits(val); \ - return; \ - } \ - { \ - int err; \ - char *map_token; \ - char *kaddr; \ - int unmap_on_exit = (eb->map_token == NULL); \ - unsigned long map_start; \ - unsigned long map_len; \ - err = map_extent_buffer(eb, offset, \ - sizeof(((type *)0)->member), \ - &map_token, &kaddr, \ - &map_start, &map_len, KM_USER1); \ - if (err) { \ - __le##bits val2; \ - val2 = cpu_to_le##bits(val); \ - write_eb_member(eb, s, type, member, &val2); \ - return; \ - } \ - p = (type *)(kaddr + part_offset - map_start); \ - p->member = cpu_to_le##bits(val); \ - if (unmap_on_exit) \ - unmap_extent_buffer(eb, map_token, KM_USER1); \ - } \ + int err; \ + char *kaddr; \ + unsigned long map_start; \ + unsigned long map_len; \ + err = map_private_extent_buffer(eb, offset, \ + sizeof(((type *)0)->member), \ + &kaddr, &map_start, &map_len); \ + if (err) { \ + __le##bits val2; \ + val2 = cpu_to_le##bits(val); \ + write_eb_member(eb, s, type, member, &val2); \ + return; \ + } \ + p = (type *)(kaddr + part_offset - map_start); \ + p->member = cpu_to_le##bits(val); \ } #include "ctree.h" @@ -125,15 +96,6 @@ void btrfs_node_key(struct extent_buffer *eb, struct btrfs_disk_key *disk_key, int nr) { unsigned long ptr = btrfs_node_key_ptr_offset(nr); - if (eb->map_token && ptr >= eb->map_start && - ptr + sizeof(*disk_key) <= eb->map_start + eb->map_len) { - memcpy(disk_key, eb->kaddr + ptr - eb->map_start, - sizeof(*disk_key)); - return; - } else if (eb->map_token) { - unmap_extent_buffer(eb, eb->map_token, KM_USER1); - eb->map_token = NULL; - } read_eb_member(eb, (struct btrfs_key_ptr *)ptr, struct btrfs_key_ptr, key, disk_key); } diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 15634d4..200f63b 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -40,6 +40,8 @@ #include #include #include +#include +#include #include "compat.h" #include "delayed-inode.h" #include "ctree.h" @@ -58,6 +60,7 @@ #include static const struct super_operations btrfs_super_ops; +static struct file_system_type btrfs_fs_type; static const char *btrfs_decode_error(struct btrfs_fs_info *fs_info, int errno, char nbuf[16]) @@ -162,7 +165,7 @@ enum { Opt_notreelog, Opt_ratio, Opt_flushoncommit, Opt_discard, Opt_space_cache, Opt_clear_cache, Opt_user_subvol_rm_allowed, Opt_enospc_debug, Opt_subvolrootid, Opt_defrag, - Opt_inode_cache, Opt_err, + Opt_inode_cache, Opt_no_space_cache, Opt_recovery, Opt_err, }; static match_table_t tokens = { @@ -195,6 +198,8 @@ static match_table_t tokens = { {Opt_subvolrootid, "subvolrootid=%d"}, {Opt_defrag, "autodefrag"}, {Opt_inode_cache, "inode_cache"}, + {Opt_no_space_cache, "nospace_cache"}, + {Opt_recovery, "recovery"}, {Opt_err, NULL}, }; @@ -206,14 +211,19 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) { struct btrfs_fs_info *info = root->fs_info; substring_t args[MAX_OPT_ARGS]; - char *p, *num, *orig; + char *p, *num, *orig = NULL; + u64 cache_gen; int intarg; int ret = 0; char *compress_type; bool compress_force = false; + cache_gen = btrfs_super_cache_generation(root->fs_info->super_copy); + if (cache_gen) + btrfs_set_opt(info->mount_opt, SPACE_CACHE); + if (!options) - return 0; + goto out; /* * strsep changes the string, duplicate it because parse_options @@ -360,9 +370,12 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) btrfs_set_opt(info->mount_opt, DISCARD); break; case Opt_space_cache: - printk(KERN_INFO "btrfs: enabling disk space caching\n"); btrfs_set_opt(info->mount_opt, SPACE_CACHE); break; + case Opt_no_space_cache: + printk(KERN_INFO "btrfs: disabling disk space caching\n"); + btrfs_clear_opt(info->mount_opt, SPACE_CACHE); + break; case Opt_inode_cache: printk(KERN_INFO "btrfs: enabling inode map caching\n"); btrfs_set_opt(info->mount_opt, INODE_MAP_CACHE); @@ -381,6 +394,10 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) printk(KERN_INFO "btrfs: enabling auto defrag"); btrfs_set_opt(info->mount_opt, AUTO_DEFRAG); break; + case Opt_recovery: + printk(KERN_INFO "btrfs: enabling auto recovery"); + btrfs_set_opt(info->mount_opt, RECOVERY); + break; case Opt_err: printk(KERN_INFO "btrfs: unrecognized mount option " "'%s'\n", p); @@ -391,6 +408,8 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) } } out: + if (!ret && btrfs_test_opt(root, SPACE_CACHE)) + printk(KERN_INFO "btrfs: disk space caching is enabled\n"); kfree(orig); return ret; } @@ -406,12 +425,12 @@ static int btrfs_parse_early_options(const char *options, fmode_t flags, u64 *subvol_rootid, struct btrfs_fs_devices **fs_devices) { substring_t args[MAX_OPT_ARGS]; - char *opts, *orig, *p; + char *device_name, *opts, *orig, *p; int error = 0; int intarg; if (!options) - goto out; + return 0; /* * strsep changes the string, duplicate it because parse_options @@ -430,6 +449,7 @@ static int btrfs_parse_early_options(const char *options, fmode_t flags, token = match_token(p, tokens, args); switch (token) { case Opt_subvol: + kfree(*subvol_name); *subvol_name = match_strdup(&args[0]); break; case Opt_subvolid: @@ -457,29 +477,24 @@ static int btrfs_parse_early_options(const char *options, fmode_t flags, } break; case Opt_device: - error = btrfs_scan_one_device(match_strdup(&args[0]), + device_name = match_strdup(&args[0]); + if (!device_name) { + error = -ENOMEM; + goto out; + } + error = btrfs_scan_one_device(device_name, flags, holder, fs_devices); + kfree(device_name); if (error) - goto out_free_opts; + goto out; break; default: break; } } - out_free_opts: +out: kfree(orig); - out: - /* - * If no subvolume name is specified we use the default one. Allocate - * a copy of the string "." here so that code later in the - * mount path doesn't care if it's the default volume or another one. - */ - if (!*subvol_name) { - *subvol_name = kstrdup(".", GFP_KERNEL); - if (!*subvol_name) - return -ENOMEM; - } return error; } @@ -492,7 +507,6 @@ static struct dentry *get_default_root(struct super_block *sb, struct btrfs_path *path; struct btrfs_key location; struct inode *inode; - struct dentry *dentry; u64 dir_id; int new = 0; @@ -517,7 +531,7 @@ static struct dentry *get_default_root(struct super_block *sb, * will mount by default if we haven't been given a specific subvolume * to mount. */ - dir_id = btrfs_super_root_dir(&root->fs_info->super_copy); + dir_id = btrfs_super_root_dir(root->fs_info->super_copy); di = btrfs_lookup_dir_item(NULL, root, path, dir_id, "default", 7, 0); if (IS_ERR(di)) { btrfs_free_path(path); @@ -566,29 +580,7 @@ setup_root: return dget(sb->s_root); } - if (new) { - const struct qstr name = { .name = "/", .len = 1 }; - - /* - * New inode, we need to make the dentry a sibling of s_root so - * everything gets cleaned up properly on unmount. - */ - dentry = d_alloc(sb->s_root, &name); - if (!dentry) { - iput(inode); - return ERR_PTR(-ENOMEM); - } - d_splice_alias(inode, dentry); - } else { - /* - * We found the inode in cache, just find a dentry for it and - * put the reference to the inode we just got. - */ - dentry = d_find_alias(inode); - iput(inode); - } - - return dentry; + return d_obtain_alias(inode); } static int btrfs_fill_super(struct super_block *sb, @@ -719,6 +711,8 @@ static int btrfs_show_options(struct seq_file *seq, struct vfsmount *vfs) seq_puts(seq, ",noacl"); if (btrfs_test_opt(root, SPACE_CACHE)) seq_puts(seq, ",space_cache"); + else + seq_puts(seq, ",nospace_cache"); if (btrfs_test_opt(root, CLEAR_CACHE)) seq_puts(seq, ",clear_cache"); if (btrfs_test_opt(root, USER_SUBVOL_RM_ALLOWED)) @@ -753,6 +747,111 @@ static int btrfs_set_super(struct super_block *s, void *data) return set_anon_super(s, data); } +/* + * subvolumes are identified by ino 256 + */ +static inline int is_subvolume_inode(struct inode *inode) +{ + if (inode && inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) + return 1; + return 0; +} + +/* + * This will strip out the subvol=%s argument for an argument string and add + * subvolid=0 to make sure we get the actual tree root for path walking to the + * subvol we want. + */ +static char *setup_root_args(char *args) +{ + unsigned copied = 0; + unsigned len = strlen(args) + 2; + char *pos; + char *ret; + + /* + * We need the same args as before, but minus + * + * subvol=a + * + * and add + * + * subvolid=0 + * + * which is a difference of 2 characters, so we allocate strlen(args) + + * 2 characters. + */ + ret = kzalloc(len * sizeof(char), GFP_NOFS); + if (!ret) + return NULL; + pos = strstr(args, "subvol="); + + /* This shouldn't happen, but just in case.. */ + if (!pos) { + kfree(ret); + return NULL; + } + + /* + * The subvol=<> arg is not at the front of the string, copy everybody + * up to that into ret. + */ + if (pos != args) { + *pos = '\0'; + strcpy(ret, args); + copied += strlen(args); + pos++; + } + + strncpy(ret + copied, "subvolid=0", len - copied); + + /* Length of subvolid=0 */ + copied += 10; + + /* + * If there is no , after the subvol= option then we know there's no + * other options and we can just return. + */ + pos = strchr(pos, ','); + if (!pos) + return ret; + + /* Copy the rest of the arguments into our buffer */ + strncpy(ret + copied, pos, len - copied); + copied += strlen(pos); + + return ret; +} + +static struct dentry *mount_subvol(const char *subvol_name, int flags, + const char *device_name, char *data) +{ + struct dentry *root; + struct vfsmount *mnt; + char *newargs; + + newargs = setup_root_args(data); + if (!newargs) + return ERR_PTR(-ENOMEM); + mnt = vfs_kern_mount(&btrfs_fs_type, flags, device_name, + newargs); + kfree(newargs); + if (IS_ERR(mnt)) + return ERR_CAST(mnt); + + root = mount_subtree(mnt, subvol_name); + + if (!IS_ERR(root) && !is_subvolume_inode(root->d_inode)) { + struct super_block *s = root->d_sb; + dput(root); + root = ERR_PTR(-EINVAL); + deactivate_locked_super(s); + printk(KERN_ERR "btrfs: '%s' is not a valid subvolume\n", + subvol_name); + } + + return root; +} /* * Find a superblock for the given device / mount point. @@ -767,7 +866,6 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags, struct super_block *s; struct dentry *root; struct btrfs_fs_devices *fs_devices = NULL; - struct btrfs_root *tree_root = NULL; struct btrfs_fs_info *fs_info = NULL; fmode_t mode = FMODE_READ; char *subvol_name = NULL; @@ -781,21 +879,20 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags, error = btrfs_parse_early_options(data, mode, fs_type, &subvol_name, &subvol_objectid, &subvol_rootid, &fs_devices); - if (error) + if (error) { + kfree(subvol_name); return ERR_PTR(error); + } - error = btrfs_scan_one_device(device_name, mode, fs_type, &fs_devices); - if (error) - goto error_free_subvol_name; + if (subvol_name) { + root = mount_subvol(subvol_name, flags, device_name, data); + kfree(subvol_name); + return root; + } - error = btrfs_open_devices(fs_devices, mode, fs_type); + error = btrfs_scan_one_device(device_name, mode, fs_type, &fs_devices); if (error) - goto error_free_subvol_name; - - if (!(flags & MS_RDONLY) && fs_devices->rw_devices == 0) { - error = -EACCES; - goto error_close_devices; - } + return ERR_PTR(error); /* * Setup a dummy root and fs_info for test/set super. This is because @@ -804,19 +901,40 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags, * then open_ctree will properly initialize everything later. */ fs_info = kzalloc(sizeof(struct btrfs_fs_info), GFP_NOFS); - tree_root = kzalloc(sizeof(struct btrfs_root), GFP_NOFS); - if (!fs_info || !tree_root) { + if (!fs_info) + return ERR_PTR(-ENOMEM); + + fs_info->tree_root = kzalloc(sizeof(struct btrfs_root), GFP_NOFS); + if (!fs_info->tree_root) { error = -ENOMEM; - goto error_close_devices; + goto error_fs_info; } - fs_info->tree_root = tree_root; + fs_info->tree_root->fs_info = fs_info; fs_info->fs_devices = fs_devices; - tree_root->fs_info = fs_info; + + fs_info->super_copy = kzalloc(BTRFS_SUPER_INFO_SIZE, GFP_NOFS); + fs_info->super_for_commit = kzalloc(BTRFS_SUPER_INFO_SIZE, GFP_NOFS); + if (!fs_info->super_copy || !fs_info->super_for_commit) { + error = -ENOMEM; + goto error_fs_info; + } + + error = btrfs_open_devices(fs_devices, mode, fs_type); + if (error) + goto error_fs_info; + + if (!(flags & MS_RDONLY) && fs_devices->rw_devices == 0) { + error = -EACCES; + goto error_close_devices; + } bdev = fs_devices->latest_bdev; - s = sget(fs_type, btrfs_test_super, btrfs_set_super, tree_root); - if (IS_ERR(s)) - goto error_s; + s = sget(fs_type, btrfs_test_super, btrfs_set_super, + fs_info->tree_root); + if (IS_ERR(s)) { + error = PTR_ERR(s); + goto error_close_devices; + } if (s->s_root) { if ((flags ^ s->s_flags) & MS_RDONLY) { @@ -826,75 +944,35 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags, } btrfs_close_devices(fs_devices); - kfree(fs_info); - kfree(tree_root); + free_fs_info(fs_info); } else { char b[BDEVNAME_SIZE]; s->s_flags = flags | MS_NOSEC; strlcpy(s->s_id, bdevname(bdev, b), sizeof(s->s_id)); + btrfs_sb(s)->fs_info->bdev_holder = fs_type; error = btrfs_fill_super(s, fs_devices, data, flags & MS_SILENT ? 1 : 0); if (error) { deactivate_locked_super(s); - goto error_free_subvol_name; + return ERR_PTR(error); } - btrfs_sb(s)->fs_info->bdev_holder = fs_type; s->s_flags |= MS_ACTIVE; } - /* if they gave us a subvolume name bind mount into that */ - if (strcmp(subvol_name, ".")) { - struct dentry *new_root; - - root = get_default_root(s, subvol_rootid); - if (IS_ERR(root)) { - error = PTR_ERR(root); - deactivate_locked_super(s); - goto error_free_subvol_name; - } - - mutex_lock(&root->d_inode->i_mutex); - new_root = lookup_one_len(subvol_name, root, - strlen(subvol_name)); - mutex_unlock(&root->d_inode->i_mutex); - - if (IS_ERR(new_root)) { - dput(root); - deactivate_locked_super(s); - error = PTR_ERR(new_root); - goto error_free_subvol_name; - } - if (!new_root->d_inode) { - dput(root); - dput(new_root); - deactivate_locked_super(s); - error = -ENXIO; - goto error_free_subvol_name; - } - dput(root); - root = new_root; - } else { - root = get_default_root(s, subvol_objectid); - if (IS_ERR(root)) { - error = PTR_ERR(root); - deactivate_locked_super(s); - goto error_free_subvol_name; - } + root = get_default_root(s, subvol_objectid); + if (IS_ERR(root)) { + deactivate_locked_super(s); + return root; } - kfree(subvol_name); return root; -error_s: - error = PTR_ERR(s); error_close_devices: btrfs_close_devices(fs_devices); - kfree(fs_info); - kfree(tree_root); -error_free_subvol_name: - kfree(subvol_name); +error_fs_info: + free_fs_info(fs_info); return ERR_PTR(error); } @@ -919,7 +997,7 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data) if (root->fs_info->fs_devices->rw_devices == 0) return -EACCES; - if (btrfs_super_log_root(&root->fs_info->super_copy) != 0) + if (btrfs_super_log_root(root->fs_info->super_copy) != 0) return -EINVAL; ret = btrfs_cleanup_fs_roots(root->fs_info); @@ -976,11 +1054,11 @@ static int btrfs_calc_avail_data_space(struct btrfs_root *root, u64 *free_bytes) u64 avail_space; u64 used_space; u64 min_stripe_size; - int min_stripes = 1; + int min_stripes = 1, num_stripes = 1; int i = 0, nr_devices; int ret; - nr_devices = fs_info->fs_devices->rw_devices; + nr_devices = fs_info->fs_devices->open_devices; BUG_ON(!nr_devices); devices_info = kmalloc(sizeof(*devices_info) * nr_devices, @@ -990,20 +1068,24 @@ static int btrfs_calc_avail_data_space(struct btrfs_root *root, u64 *free_bytes) /* calc min stripe number for data space alloction */ type = btrfs_get_alloc_profile(root, 1); - if (type & BTRFS_BLOCK_GROUP_RAID0) + if (type & BTRFS_BLOCK_GROUP_RAID0) { min_stripes = 2; - else if (type & BTRFS_BLOCK_GROUP_RAID1) + num_stripes = nr_devices; + } else if (type & BTRFS_BLOCK_GROUP_RAID1) { min_stripes = 2; - else if (type & BTRFS_BLOCK_GROUP_RAID10) + num_stripes = 2; + } else if (type & BTRFS_BLOCK_GROUP_RAID10) { min_stripes = 4; + num_stripes = 4; + } if (type & BTRFS_BLOCK_GROUP_DUP) min_stripe_size = 2 * BTRFS_STRIPE_LEN; else min_stripe_size = BTRFS_STRIPE_LEN; - list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) { - if (!device->in_fs_metadata) + list_for_each_entry(device, &fs_devices->devices, dev_list) { + if (!device->in_fs_metadata || !device->bdev) continue; avail_space = device->total_bytes - device->bytes_used; @@ -1064,13 +1146,16 @@ static int btrfs_calc_avail_data_space(struct btrfs_root *root, u64 *free_bytes) i = nr_devices - 1; avail_space = 0; while (nr_devices >= min_stripes) { + if (num_stripes > nr_devices) + num_stripes = nr_devices; + if (devices_info[i].max_avail >= min_stripe_size) { int j; u64 alloc_size; - avail_space += devices_info[i].max_avail * min_stripes; + avail_space += devices_info[i].max_avail * num_stripes; alloc_size = devices_info[i].max_avail; - for (j = i + 1 - min_stripes; j <= i; j++) + for (j = i + 1 - num_stripes; j <= i; j++) devices_info[j].max_avail -= alloc_size; } i--; @@ -1085,7 +1170,7 @@ static int btrfs_calc_avail_data_space(struct btrfs_root *root, u64 *free_bytes) static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf) { struct btrfs_root *root = btrfs_sb(dentry->d_sb); - struct btrfs_super_block *disk_super = &root->fs_info->super_copy; + struct btrfs_super_block *disk_super = root->fs_info->super_copy; struct list_head *head = &root->fs_info->space_info; struct btrfs_space_info *found; u64 total_used = 0; @@ -1187,6 +1272,16 @@ static int btrfs_unfreeze(struct super_block *sb) return 0; } +static void btrfs_fs_dirty_inode(struct inode *inode, int flags) +{ + int ret; + + ret = btrfs_dirty_inode(inode); + if (ret) + printk_ratelimited(KERN_ERR "btrfs: fail to dirty inode %Lu " + "error %d\n", btrfs_ino(inode), ret); +} + static const struct super_operations btrfs_super_ops = { .drop_inode = btrfs_drop_inode, .evict_inode = btrfs_evict_inode, @@ -1194,7 +1289,7 @@ static const struct super_operations btrfs_super_ops = { .sync_fs = btrfs_sync_fs, .show_options = btrfs_show_options, .write_inode = btrfs_write_inode, - .dirty_inode = btrfs_dirty_inode, + .dirty_inode = btrfs_fs_dirty_inode, .alloc_inode = btrfs_alloc_inode, .destroy_inode = btrfs_destroy_inode, .statfs = btrfs_statfs, diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 51dcec8..292e847 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -55,6 +55,7 @@ static noinline int join_transaction(struct btrfs_root *root, int nofail) struct btrfs_transaction *cur_trans; spin_lock(&root->fs_info->trans_lock); +loop: if (root->fs_info->trans_no_join) { if (!nofail) { spin_unlock(&root->fs_info->trans_lock); @@ -75,16 +76,18 @@ static noinline int join_transaction(struct btrfs_root *root, int nofail) cur_trans = kmem_cache_alloc(btrfs_transaction_cachep, GFP_NOFS); if (!cur_trans) return -ENOMEM; + spin_lock(&root->fs_info->trans_lock); if (root->fs_info->running_transaction) { + /* + * someone started a transaction after we unlocked. Make sure + * to redo the trans_no_join checks above + */ kmem_cache_free(btrfs_transaction_cachep, cur_trans); cur_trans = root->fs_info->running_transaction; - atomic_inc(&cur_trans->use_count); - atomic_inc(&cur_trans->num_writers); - cur_trans->num_joined++; - spin_unlock(&root->fs_info->trans_lock); - return 0; + goto loop; } + atomic_set(&cur_trans->num_writers, 1); cur_trans->num_joined = 0; init_waitqueue_head(&cur_trans->writer_wait); @@ -216,17 +219,11 @@ static void wait_current_trans(struct btrfs_root *root) spin_lock(&root->fs_info->trans_lock); cur_trans = root->fs_info->running_transaction; if (cur_trans && cur_trans->blocked) { - DEFINE_WAIT(wait); atomic_inc(&cur_trans->use_count); spin_unlock(&root->fs_info->trans_lock); - while (1) { - prepare_to_wait(&root->fs_info->transaction_wait, &wait, - TASK_UNINTERRUPTIBLE); - if (!cur_trans->blocked) - break; - schedule(); - } - finish_wait(&root->fs_info->transaction_wait, &wait); + + wait_event(root->fs_info->transaction_wait, + !cur_trans->blocked); put_transaction(cur_trans); } else { spin_unlock(&root->fs_info->trans_lock); @@ -260,7 +257,7 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root, { struct btrfs_trans_handle *h; struct btrfs_transaction *cur_trans; - int retries = 0; + u64 num_bytes = 0; int ret; if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) @@ -274,6 +271,19 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root, h->block_rsv = NULL; goto got_it; } + + /* + * Do the reservation before we join the transaction so we can do all + * the appropriate flushing if need be. + */ + if (num_items > 0 && root != root->fs_info->chunk_root) { + num_bytes = btrfs_calc_trans_metadata_size(root, num_items); + ret = btrfs_block_rsv_add(root, + &root->fs_info->trans_block_rsv, + num_bytes); + if (ret) + return ERR_PTR(ret); + } again: h = kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS); if (!h) @@ -310,24 +320,9 @@ again: goto again; } - if (num_items > 0) { - ret = btrfs_trans_reserve_metadata(h, root, num_items); - if (ret == -EAGAIN && !retries) { - retries++; - btrfs_commit_transaction(h, root); - goto again; - } else if (ret == -EAGAIN) { - /* - * We have already retried and got EAGAIN, so really we - * don't have space, so set ret to -ENOSPC. - */ - ret = -ENOSPC; - } - - if (ret < 0) { - btrfs_end_transaction(h, root); - return ERR_PTR(ret); - } + if (num_bytes) { + h->block_rsv = &root->fs_info->trans_block_rsv; + h->bytes_reserved = num_bytes; } got_it: @@ -359,19 +354,10 @@ struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *root } /* wait for a transaction commit to be fully complete */ -static noinline int wait_for_commit(struct btrfs_root *root, +static noinline void wait_for_commit(struct btrfs_root *root, struct btrfs_transaction *commit) { - DEFINE_WAIT(wait); - while (!commit->commit_done) { - prepare_to_wait(&commit->commit_wait, &wait, - TASK_UNINTERRUPTIBLE); - if (commit->commit_done) - break; - schedule(); - } - finish_wait(&commit->commit_wait, &wait); - return 0; + wait_event(commit->commit_wait, commit->commit_done); } int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid) @@ -435,8 +421,8 @@ static int should_end_transaction(struct btrfs_trans_handle *trans, struct btrfs_root *root) { int ret; - ret = btrfs_block_rsv_check(trans, root, - &root->fs_info->global_block_rsv, 0, 5); + + ret = btrfs_block_rsv_check(root, &root->fs_info->global_block_rsv, 5); return ret ? 1 : 0; } @@ -444,17 +430,26 @@ int btrfs_should_end_transaction(struct btrfs_trans_handle *trans, struct btrfs_root *root) { struct btrfs_transaction *cur_trans = trans->transaction; + struct btrfs_block_rsv *rsv = trans->block_rsv; int updates; smp_mb(); if (cur_trans->blocked || cur_trans->delayed_refs.flushing) return 1; + /* + * We need to do this in case we're deleting csums so the global block + * rsv get's used instead of the csum block rsv. + */ + trans->block_rsv = NULL; + updates = trans->delayed_ref_updates; trans->delayed_ref_updates = 0; if (updates) btrfs_run_delayed_refs(trans, root, updates); + trans->block_rsv = rsv; + return should_end_transaction(trans, root); } @@ -465,11 +460,14 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, struct btrfs_fs_info *info = root->fs_info; int count = 0; - if (--trans->use_count) { + if (trans->use_count > 1) { + trans->use_count--; trans->block_rsv = trans->orig_rsv; return 0; } + btrfs_trans_release_metadata(trans, root); + trans->block_rsv = NULL; while (count < 4) { unsigned long cur = trans->delayed_ref_updates; trans->delayed_ref_updates = 0; @@ -490,8 +488,6 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, count++; } - btrfs_trans_release_metadata(trans, root); - if (lock && !atomic_read(&root->fs_info->open_ioctl_trans) && should_end_transaction(trans, root)) { trans->transaction->blocked = 1; @@ -572,50 +568,21 @@ int btrfs_end_transaction_dmeta(struct btrfs_trans_handle *trans, int btrfs_write_marked_extents(struct btrfs_root *root, struct extent_io_tree *dirty_pages, int mark) { - int ret; int err = 0; int werr = 0; - struct page *page; - struct inode *btree_inode = root->fs_info->btree_inode; + struct address_space *mapping = root->fs_info->btree_inode->i_mapping; u64 start = 0; u64 end; - unsigned long index; - while (1) { - ret = find_first_extent_bit(dirty_pages, start, &start, &end, - mark); - if (ret) - break; - while (start <= end) { - cond_resched(); - - index = start >> PAGE_CACHE_SHIFT; - start = (u64)(index + 1) << PAGE_CACHE_SHIFT; - page = find_get_page(btree_inode->i_mapping, index); - if (!page) - continue; - - btree_lock_page_hook(page); - if (!page->mapping) { - unlock_page(page); - page_cache_release(page); - continue; - } - - if (PageWriteback(page)) { - if (PageDirty(page)) - wait_on_page_writeback(page); - else { - unlock_page(page); - page_cache_release(page); - continue; - } - } - err = write_one_page(page, 0); - if (err) - werr = err; - page_cache_release(page); - } + while (!find_first_extent_bit(dirty_pages, start, &start, &end, + mark)) { + convert_extent_bit(dirty_pages, start, end, EXTENT_NEED_WAIT, mark, + GFP_NOFS); + err = filemap_fdatawrite_range(mapping, start, end); + if (err) + werr = err; + cond_resched(); + start = end + 1; } if (err) werr = err; @@ -631,39 +598,20 @@ int btrfs_write_marked_extents(struct btrfs_root *root, int btrfs_wait_marked_extents(struct btrfs_root *root, struct extent_io_tree *dirty_pages, int mark) { - int ret; int err = 0; int werr = 0; - struct page *page; - struct inode *btree_inode = root->fs_info->btree_inode; + struct address_space *mapping = root->fs_info->btree_inode->i_mapping; u64 start = 0; u64 end; - unsigned long index; - while (1) { - ret = find_first_extent_bit(dirty_pages, start, &start, &end, - mark); - if (ret) - break; - - clear_extent_bits(dirty_pages, start, end, mark, GFP_NOFS); - while (start <= end) { - index = start >> PAGE_CACHE_SHIFT; - start = (u64)(index + 1) << PAGE_CACHE_SHIFT; - page = find_get_page(btree_inode->i_mapping, index); - if (!page) - continue; - if (PageDirty(page)) { - btree_lock_page_hook(page); - wait_on_page_writeback(page); - err = write_one_page(page, 0); - if (err) - werr = err; - } - wait_on_page_writeback(page); - page_cache_release(page); - cond_resched(); - } + while (!find_first_extent_bit(dirty_pages, start, &start, &end, + EXTENT_NEED_WAIT)) { + clear_extent_bits(dirty_pages, start, end, EXTENT_NEED_WAIT, GFP_NOFS); + err = filemap_fdatawait_range(mapping, start, end); + if (err) + werr = err; + cond_resched(); + start = end + 1; } if (err) werr = err; @@ -683,7 +631,12 @@ int btrfs_write_and_wait_marked_extents(struct btrfs_root *root, ret = btrfs_write_marked_extents(root, dirty_pages, mark); ret2 = btrfs_wait_marked_extents(root, dirty_pages, mark); - return ret || ret2; + + if (ret) + return ret; + if (ret2) + return ret2; + return 0; } int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, @@ -826,6 +779,10 @@ static noinline int commit_fs_roots(struct btrfs_trans_handle *trans, btrfs_save_ino_cache(root, trans); + /* see comments in should_cow_block() */ + root->force_cow = 0; + smp_wmb(); + if (root->commit_root != root->node) { mutex_lock(&root->fs_commit_mutex); switch_commit_root(root); @@ -894,6 +851,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root *tree_root = fs_info->tree_root; struct btrfs_root *root = pending->root; struct btrfs_root *parent_root; + struct btrfs_block_rsv *rsv; struct inode *parent_inode; struct dentry *parent; struct dentry *dentry; @@ -905,6 +863,8 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, u64 objectid; u64 root_flags; + rsv = trans->block_rsv; + new_root_item = kmalloc(sizeof(*new_root_item), GFP_NOFS); if (!new_root_item) { pending->error = -ENOMEM; @@ -918,11 +878,10 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, } btrfs_reloc_pre_snapshot(trans, pending, &to_reserve); - btrfs_orphan_pre_snapshot(trans, pending, &to_reserve); if (to_reserve > 0) { - ret = btrfs_block_rsv_add(trans, root, &pending->block_rsv, - to_reserve); + ret = btrfs_block_rsv_add_noflush(root, &pending->block_rsv, + to_reserve); if (ret) { pending->error = ret; goto fail; @@ -986,6 +945,10 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, btrfs_tree_unlock(old); free_extent_buffer(old); + /* see comments in should_cow_block() */ + root->force_cow = 1; + smp_wmb(); + btrfs_set_root_node(new_root_item, tmp); /* record when the snapshot was created in key.offset */ key.offset = trans->transid; @@ -1009,9 +972,9 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, BUG_ON(IS_ERR(pending->snap)); btrfs_reloc_post_snapshot(trans, pending); - btrfs_orphan_post_snapshot(trans, pending); fail: kfree(new_root_item); + trans->block_rsv = rsv; btrfs_block_rsv_release(root, &pending->block_rsv, (u64)-1); return 0; } @@ -1038,7 +1001,7 @@ static void update_super_roots(struct btrfs_root *root) struct btrfs_root_item *root_item; struct btrfs_super_block *super; - super = &root->fs_info->super_copy; + super = root->fs_info->super_copy; root_item = &root->fs_info->chunk_root->root_item; super->chunk_root = root_item->bytenr; @@ -1049,7 +1012,7 @@ static void update_super_roots(struct btrfs_root *root) super->root = root_item->bytenr; super->generation = root_item->generation; super->root_level = root_item->level; - if (super->cache_generation != 0 || btrfs_test_opt(root, SPACE_CACHE)) + if (btrfs_test_opt(root, SPACE_CACHE)) super->cache_generation = root_item->generation; } @@ -1080,22 +1043,7 @@ int btrfs_transaction_blocked(struct btrfs_fs_info *info) static void wait_current_trans_commit_start(struct btrfs_root *root, struct btrfs_transaction *trans) { - DEFINE_WAIT(wait); - - if (trans->in_commit) - return; - - while (1) { - prepare_to_wait(&root->fs_info->transaction_blocked_wait, &wait, - TASK_UNINTERRUPTIBLE); - if (trans->in_commit) { - finish_wait(&root->fs_info->transaction_blocked_wait, - &wait); - break; - } - schedule(); - finish_wait(&root->fs_info->transaction_blocked_wait, &wait); - } + wait_event(root->fs_info->transaction_blocked_wait, trans->in_commit); } /* @@ -1105,24 +1053,8 @@ static void wait_current_trans_commit_start(struct btrfs_root *root, static void wait_current_trans_commit_start_and_unblock(struct btrfs_root *root, struct btrfs_transaction *trans) { - DEFINE_WAIT(wait); - - if (trans->commit_done || (trans->in_commit && !trans->blocked)) - return; - - while (1) { - prepare_to_wait(&root->fs_info->transaction_wait, &wait, - TASK_UNINTERRUPTIBLE); - if (trans->commit_done || - (trans->in_commit && !trans->blocked)) { - finish_wait(&root->fs_info->transaction_wait, - &wait); - break; - } - schedule(); - finish_wait(&root->fs_info->transaction_wait, - &wait); - } + wait_event(root->fs_info->transaction_wait, + trans->commit_done || (trans->in_commit && !trans->blocked)); } /* @@ -1205,14 +1137,15 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, btrfs_run_ordered_operations(root, 0); + btrfs_trans_release_metadata(trans, root); + trans->block_rsv = NULL; + /* make a pass through all the delayed refs we have so far * any runnings procs may add more while we are here */ ret = btrfs_run_delayed_refs(trans, root, 0); BUG_ON(ret); - btrfs_trans_release_metadata(trans, root); - cur_trans = trans->transaction; /* * set the flushing flag so procs in this transaction have to @@ -1229,8 +1162,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, atomic_inc(&cur_trans->use_count); btrfs_end_transaction(trans, root); - ret = wait_for_commit(root, cur_trans); - BUG_ON(ret); + wait_for_commit(root, cur_trans); put_transaction(cur_trans); @@ -1379,12 +1311,12 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, update_super_roots(root); if (!root->fs_info->log_root_recovering) { - btrfs_set_super_log_root(&root->fs_info->super_copy, 0); - btrfs_set_super_log_root_level(&root->fs_info->super_copy, 0); + btrfs_set_super_log_root(root->fs_info->super_copy, 0); + btrfs_set_super_log_root_level(root->fs_info->super_copy, 0); } - memcpy(&root->fs_info->super_for_commit, &root->fs_info->super_copy, - sizeof(root->fs_info->super_copy)); + memcpy(root->fs_info->super_for_commit, root->fs_info->super_copy, + sizeof(*root->fs_info->super_copy)); trans->transaction->blocked = 0; spin_lock(&root->fs_info->trans_lock); diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 88dec16..21faa12 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -276,8 +276,9 @@ static int process_one_buffer(struct btrfs_root *log, struct walk_control *wc, u64 gen) { if (wc->pin) - btrfs_pin_extent(log->fs_info->extent_root, - eb->start, eb->len, 0); + btrfs_pin_extent_for_log_replay(wc->trans, + log->fs_info->extent_root, + eb->start, eb->len); if (btrfs_buffer_uptodate(eb, gen)) { if (wc->write) @@ -1069,7 +1070,7 @@ static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans, } btrfs_release_path(path); if (nlink != inode->i_nlink) { - inode->i_nlink = nlink; + set_nlink(inode, nlink); btrfs_update_inode(trans, root, inode); } BTRFS_I(inode)->index_cnt = (u64)-1; @@ -1679,7 +1680,8 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb, return 0; path = btrfs_alloc_path(); - BUG_ON(!path); + if (!path) + return -ENOMEM; nritems = btrfs_header_nritems(eb); for (i = 0; i < nritems; i++) { @@ -1785,21 +1787,23 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans, return -ENOMEM; if (*level == 1) { - wc->process_func(root, next, wc, ptr_gen); + ret = wc->process_func(root, next, wc, ptr_gen); + if (ret) + return ret; path->slots[*level]++; if (wc->free) { btrfs_read_buffer(next, ptr_gen); btrfs_tree_lock(next); - clean_tree_block(trans, root, next); btrfs_set_lock_blocking(next); + clean_tree_block(trans, root, next); btrfs_wait_tree_block_writeback(next); btrfs_tree_unlock(next); WARN_ON(root_owner != BTRFS_TREE_LOG_OBJECTID); - ret = btrfs_free_reserved_extent(root, + ret = btrfs_free_and_pin_reserved_extent(root, bytenr, blocksize); BUG_ON(ret); } @@ -1850,21 +1854,24 @@ static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans, parent = path->nodes[*level + 1]; root_owner = btrfs_header_owner(parent); - wc->process_func(root, path->nodes[*level], wc, + ret = wc->process_func(root, path->nodes[*level], wc, btrfs_header_generation(path->nodes[*level])); + if (ret) + return ret; + if (wc->free) { struct extent_buffer *next; next = path->nodes[*level]; btrfs_tree_lock(next); - clean_tree_block(trans, root, next); btrfs_set_lock_blocking(next); + clean_tree_block(trans, root, next); btrfs_wait_tree_block_writeback(next); btrfs_tree_unlock(next); WARN_ON(root_owner != BTRFS_TREE_LOG_OBJECTID); - ret = btrfs_free_reserved_extent(root, + ret = btrfs_free_and_pin_reserved_extent(root, path->nodes[*level]->start, path->nodes[*level]->len); BUG_ON(ret); @@ -1926,14 +1933,14 @@ static int walk_log_tree(struct btrfs_trans_handle *trans, next = path->nodes[orig_level]; btrfs_tree_lock(next); - clean_tree_block(trans, log, next); btrfs_set_lock_blocking(next); + clean_tree_block(trans, log, next); btrfs_wait_tree_block_writeback(next); btrfs_tree_unlock(next); WARN_ON(log->root_key.objectid != BTRFS_TREE_LOG_OBJECTID); - ret = btrfs_free_reserved_extent(log, next->start, + ret = btrfs_free_and_pin_reserved_extent(log, next->start, next->len); BUG_ON(ret); } @@ -2049,10 +2056,10 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, /* wait for previous tree log sync to complete */ if (atomic_read(&root->log_commit[(index1 + 1) % 2])) wait_log_commit(trans, root, root->log_transid - 1); - while (1) { unsigned long batch = root->log_batch; - if (root->log_multiple_pids) { + /* when we're on an ssd, just kick the log commit out */ + if (!btrfs_test_opt(root, SSD) && root->log_multiple_pids) { mutex_unlock(&root->log_mutex); schedule_timeout_uninterruptible(1); mutex_lock(&root->log_mutex); @@ -2153,9 +2160,9 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, BUG_ON(ret); btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark); - btrfs_set_super_log_root(&root->fs_info->super_for_commit, + btrfs_set_super_log_root(root->fs_info->super_for_commit, log_root_tree->node->start); - btrfs_set_super_log_root_level(&root->fs_info->super_for_commit, + btrfs_set_super_log_root_level(root->fs_info->super_for_commit, btrfs_header_level(log_root_tree->node)); log_root_tree->log_batch = 0; diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 7745ad5..9899205 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -142,6 +142,7 @@ static noinline int run_scheduled_bios(struct btrfs_device *device) unsigned long limit; unsigned long last_waited = 0; int force_reg = 0; + int sync_pending = 0; struct blk_plug plug; /* @@ -229,6 +230,22 @@ loop_lock: BUG_ON(atomic_read(&cur->bi_cnt) == 0); + /* + * if we're doing the sync list, record that our + * plug has some sync requests on it + * + * If we're doing the regular list and there are + * sync requests sitting around, unplug before + * we add more + */ + if (pending_bios == &device->pending_sync_bios) { + sync_pending = 1; + } else if (sync_pending) { + blk_finish_plug(&plug); + blk_start_plug(&plug); + sync_pending = 0; + } + submit_bio(cur->bi_rw, cur); num_run++; batch_run++; @@ -278,6 +295,12 @@ loop_lock: btrfs_requeue_work(&device->work); goto done; } + /* unplug every 64 requests just for good measure */ + if (batch_run % 64 == 0) { + blk_finish_plug(&plug); + blk_start_plug(&plug); + sync_pending = 0; + } } cond_resched(); @@ -349,6 +372,14 @@ static noinline int device_list_add(const char *path, } INIT_LIST_HEAD(&device->dev_alloc_list); + /* init readahead state */ + spin_lock_init(&device->reada_lock); + device->reada_curr_zone = NULL; + atomic_set(&device->reada_in_flight, 0); + device->reada_next = 0; + INIT_RADIX_TREE(&device->reada_zones, GFP_NOFS & ~__GFP_WAIT); + INIT_RADIX_TREE(&device->reada_extents, GFP_NOFS & ~__GFP_WAIT); + mutex_lock(&fs_devices->device_list_mutex); list_add_rcu(&device->dev_list, &fs_devices->devices); mutex_unlock(&fs_devices->device_list_mutex); @@ -587,10 +618,8 @@ static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices, set_blocksize(bdev, 4096); bh = btrfs_read_dev_super(bdev); - if (!bh) { - ret = -EINVAL; + if (!bh) goto error_close; - } disk_super = (struct btrfs_super_block *)bh->b_data; devid = btrfs_stack_device_id(&disk_super->dev_item); @@ -645,7 +674,7 @@ error: continue; } if (fs_devices->open_devices == 0) { - ret = -EIO; + ret = -EINVAL; goto out; } fs_devices->seeding = seeding; @@ -853,6 +882,7 @@ int find_free_dev_extent(struct btrfs_trans_handle *trans, max_hole_start = search_start; max_hole_size = 0; + hole_size = 0; if (search_start >= search_end) { ret = -ENOSPC; @@ -935,7 +965,14 @@ next: cond_resched(); } - hole_size = search_end- search_start; + /* + * At this point, search_start should be the end of + * allocated dev extents, and when shrinking the device, + * search_end may be smaller than search_start. + */ + if (search_end > search_start) + hole_size = search_end - search_start; + if (hole_size > max_hole_size) { max_hole_start = search_start; max_hole_size = hole_size; @@ -975,7 +1012,7 @@ static int btrfs_free_dev_extent(struct btrfs_trans_handle *trans, key.objectid = device->devid; key.offset = start; key.type = BTRFS_DEV_EXTENT_KEY; - +again: ret = btrfs_search_slot(trans, root, &key, path, -1, 1); if (ret > 0) { ret = btrfs_previous_item(root, path, key.objectid, @@ -988,6 +1025,9 @@ static int btrfs_free_dev_extent(struct btrfs_trans_handle *trans, struct btrfs_dev_extent); BUG_ON(found_key.offset > start || found_key.offset + btrfs_dev_extent_length(leaf, extent) < start); + key = found_key; + btrfs_release_path(path); + goto again; } else if (ret == 0) { leaf = path->nodes[0]; extent = btrfs_item_ptr(leaf, path->slots[0], @@ -995,8 +1035,13 @@ static int btrfs_free_dev_extent(struct btrfs_trans_handle *trans, } BUG_ON(ret); - if (device->bytes_used > 0) - device->bytes_used -= btrfs_dev_extent_length(leaf, extent); + if (device->bytes_used > 0) { + u64 len = btrfs_dev_extent_length(leaf, extent); + device->bytes_used -= len; + spin_lock(&root->fs_info->free_chunk_lock); + root->fs_info->free_chunk_space += len; + spin_unlock(&root->fs_info->free_chunk_lock); + } ret = btrfs_del_item(trans, root, path); out: @@ -1055,7 +1100,8 @@ static noinline int find_next_chunk(struct btrfs_root *root, struct btrfs_key found_key; path = btrfs_alloc_path(); - BUG_ON(!path); + if (!path) + return -ENOMEM; key.objectid = objectid; key.offset = (u64)-1; @@ -1337,6 +1383,11 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) if (ret) goto error_undo; + spin_lock(&root->fs_info->free_chunk_lock); + root->fs_info->free_chunk_space = device->total_bytes - + device->bytes_used; + spin_unlock(&root->fs_info->free_chunk_lock); + device->in_fs_metadata = 0; btrfs_scrub_cancel_dev(root, device); @@ -1368,8 +1419,8 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) call_rcu(&device->rcu, free_device); mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); - num_devices = btrfs_super_num_devices(&root->fs_info->super_copy) - 1; - btrfs_set_super_num_devices(&root->fs_info->super_copy, num_devices); + num_devices = btrfs_super_num_devices(root->fs_info->super_copy) - 1; + btrfs_set_super_num_devices(root->fs_info->super_copy, num_devices); if (cur_devices->open_devices == 0) { struct btrfs_fs_devices *fs_devices; @@ -1431,7 +1482,7 @@ static int btrfs_prepare_sprout(struct btrfs_trans_handle *trans, struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices; struct btrfs_fs_devices *old_devices; struct btrfs_fs_devices *seed_devices; - struct btrfs_super_block *disk_super = &root->fs_info->super_copy; + struct btrfs_super_block *disk_super = root->fs_info->super_copy; struct btrfs_device *device; u64 super_flags; @@ -1573,7 +1624,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) if ((sb->s_flags & MS_RDONLY) && !root->fs_info->fs_devices->seeding) return -EINVAL; - bdev = blkdev_get_by_path(device_path, FMODE_EXCL, + bdev = blkdev_get_by_path(device_path, FMODE_WRITE | FMODE_EXCL, root->fs_info->bdev_holder); if (IS_ERR(bdev)) return PTR_ERR(bdev); @@ -1672,15 +1723,19 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) root->fs_info->fs_devices->num_can_discard++; root->fs_info->fs_devices->total_rw_bytes += device->total_bytes; + spin_lock(&root->fs_info->free_chunk_lock); + root->fs_info->free_chunk_space += device->total_bytes; + spin_unlock(&root->fs_info->free_chunk_lock); + if (!blk_queue_nonrot(bdev_get_queue(bdev))) root->fs_info->fs_devices->rotating = 1; - total_bytes = btrfs_super_total_bytes(&root->fs_info->super_copy); - btrfs_set_super_total_bytes(&root->fs_info->super_copy, + total_bytes = btrfs_super_total_bytes(root->fs_info->super_copy); + btrfs_set_super_total_bytes(root->fs_info->super_copy, total_bytes + device->total_bytes); - total_bytes = btrfs_super_num_devices(&root->fs_info->super_copy); - btrfs_set_super_num_devices(&root->fs_info->super_copy, + total_bytes = btrfs_super_num_devices(root->fs_info->super_copy); + btrfs_set_super_num_devices(root->fs_info->super_copy, total_bytes + 1); mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); @@ -1771,7 +1826,7 @@ static int __btrfs_grow_device(struct btrfs_trans_handle *trans, struct btrfs_device *device, u64 new_size) { struct btrfs_super_block *super_copy = - &device->dev_root->fs_info->super_copy; + device->dev_root->fs_info->super_copy; u64 old_total = btrfs_super_total_bytes(super_copy); u64 diff = new_size - device->total_bytes; @@ -1830,7 +1885,7 @@ static int btrfs_free_chunk(struct btrfs_trans_handle *trans, static int btrfs_del_sys_chunk(struct btrfs_root *root, u64 chunk_objectid, u64 chunk_offset) { - struct btrfs_super_block *super_copy = &root->fs_info->super_copy; + struct btrfs_super_block *super_copy = root->fs_info->super_copy; struct btrfs_disk_key *disk_key; struct btrfs_chunk *chunk; u8 *ptr; @@ -2085,8 +2140,10 @@ int btrfs_balance(struct btrfs_root *dev_root) /* step two, relocate all the chunks */ path = btrfs_alloc_path(); - BUG_ON(!path); - + if (!path) { + ret = -ENOMEM; + goto error; + } key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID; key.offset = (u64)-1; key.type = BTRFS_CHUNK_ITEM_KEY; @@ -2154,7 +2211,7 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size) bool retried = false; struct extent_buffer *l; struct btrfs_key key; - struct btrfs_super_block *super_copy = &root->fs_info->super_copy; + struct btrfs_super_block *super_copy = root->fs_info->super_copy; u64 old_total = btrfs_super_total_bytes(super_copy); u64 old_size = device->total_bytes; u64 diff = device->total_bytes - new_size; @@ -2171,8 +2228,12 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size) lock_chunks(root); device->total_bytes = new_size; - if (device->writeable) + if (device->writeable) { device->fs_devices->total_rw_bytes -= diff; + spin_lock(&root->fs_info->free_chunk_lock); + root->fs_info->free_chunk_space -= diff; + spin_unlock(&root->fs_info->free_chunk_lock); + } unlock_chunks(root); again: @@ -2236,6 +2297,9 @@ again: device->total_bytes = old_size; if (device->writeable) device->fs_devices->total_rw_bytes += diff; + spin_lock(&root->fs_info->free_chunk_lock); + root->fs_info->free_chunk_space += diff; + spin_unlock(&root->fs_info->free_chunk_lock); unlock_chunks(root); goto done; } @@ -2271,7 +2335,7 @@ static int btrfs_add_system_chunk(struct btrfs_trans_handle *trans, struct btrfs_key *key, struct btrfs_chunk *chunk, int item_size) { - struct btrfs_super_block *super_copy = &root->fs_info->super_copy; + struct btrfs_super_block *super_copy = root->fs_info->super_copy; struct btrfs_disk_key disk_key; u32 array_size; u8 *ptr; @@ -2434,9 +2498,10 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, total_avail = device->total_bytes - device->bytes_used; else total_avail = 0; - /* avail is off by max(alloc_start, 1MB), but that is the same - * for all devices, so it doesn't hurt the sorting later on - */ + + /* If there is no space on this device, skip it. */ + if (total_avail == 0) + continue; ret = find_free_dev_extent(trans, device, max_stripe_size * dev_stripes, @@ -2593,6 +2658,11 @@ static int __finish_chunk_alloc(struct btrfs_trans_handle *trans, index++; } + spin_lock(&extent_root->fs_info->free_chunk_lock); + extent_root->fs_info->free_chunk_space -= (stripe_size * + map->num_stripes); + spin_unlock(&extent_root->fs_info->free_chunk_lock); + index = 0; stripe = &chunk->stripe; while (index < map->num_stripes) { @@ -2685,7 +2755,8 @@ static noinline int init_first_rw_device(struct btrfs_trans_handle *trans, ret = find_next_chunk(fs_info->chunk_root, BTRFS_FIRST_CHUNK_TREE_OBJECTID, &chunk_offset); - BUG_ON(ret); + if (ret) + return ret; alloc_profile = BTRFS_BLOCK_GROUP_METADATA | (fs_info->metadata_alloc_profile & @@ -2825,7 +2896,7 @@ static int find_live_mirror(struct map_lookup *map, int first, int num, static int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw, u64 logical, u64 *length, - struct btrfs_multi_bio **multi_ret, + struct btrfs_bio **bbio_ret, int mirror_num) { struct extent_map *em; @@ -2843,18 +2914,18 @@ static int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw, int i; int num_stripes; int max_errors = 0; - struct btrfs_multi_bio *multi = NULL; + struct btrfs_bio *bbio = NULL; - if (multi_ret && !(rw & (REQ_WRITE | REQ_DISCARD))) + if (bbio_ret && !(rw & (REQ_WRITE | REQ_DISCARD))) stripes_allocated = 1; again: - if (multi_ret) { - multi = kzalloc(btrfs_multi_bio_size(stripes_allocated), + if (bbio_ret) { + bbio = kzalloc(btrfs_bio_size(stripes_allocated), GFP_NOFS); - if (!multi) + if (!bbio) return -ENOMEM; - atomic_set(&multi->error, 0); + atomic_set(&bbio->error, 0); } read_lock(&em_tree->lock); @@ -2875,7 +2946,7 @@ again: if (mirror_num > map->num_stripes) mirror_num = 0; - /* if our multi bio struct is too small, back off and try again */ + /* if our btrfs_bio struct is too small, back off and try again */ if (rw & REQ_WRITE) { if (map->type & (BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_DUP)) { @@ -2894,11 +2965,11 @@ again: stripes_required = map->num_stripes; } } - if (multi_ret && (rw & (REQ_WRITE | REQ_DISCARD)) && + if (bbio_ret && (rw & (REQ_WRITE | REQ_DISCARD)) && stripes_allocated < stripes_required) { stripes_allocated = map->num_stripes; free_extent_map(em); - kfree(multi); + kfree(bbio); goto again; } stripe_nr = offset; @@ -2927,7 +2998,7 @@ again: *length = em->len - offset; } - if (!multi_ret) + if (!bbio_ret) goto out; num_stripes = 1; @@ -2952,13 +3023,17 @@ again: stripe_index = find_live_mirror(map, 0, map->num_stripes, current->pid % map->num_stripes); + mirror_num = stripe_index + 1; } } else if (map->type & BTRFS_BLOCK_GROUP_DUP) { - if (rw & (REQ_WRITE | REQ_DISCARD)) + if (rw & (REQ_WRITE | REQ_DISCARD)) { num_stripes = map->num_stripes; - else if (mirror_num) + } else if (mirror_num) { stripe_index = mirror_num - 1; + } else { + mirror_num = 1; + } } else if (map->type & BTRFS_BLOCK_GROUP_RAID10) { int factor = map->num_stripes / map->sub_stripes; @@ -2978,6 +3053,7 @@ again: stripe_index = find_live_mirror(map, stripe_index, map->sub_stripes, stripe_index + current->pid % map->sub_stripes); + mirror_num = stripe_index + 1; } } else { /* @@ -2986,15 +3062,16 @@ again: * stripe_index is the number of our device in the stripe array */ stripe_index = do_div(stripe_nr, map->num_stripes); + mirror_num = stripe_index + 1; } BUG_ON(stripe_index >= map->num_stripes); if (rw & REQ_DISCARD) { for (i = 0; i < num_stripes; i++) { - multi->stripes[i].physical = + bbio->stripes[i].physical = map->stripes[stripe_index].physical + stripe_offset + stripe_nr * map->stripe_len; - multi->stripes[i].dev = map->stripes[stripe_index].dev; + bbio->stripes[i].dev = map->stripes[stripe_index].dev; if (map->type & BTRFS_BLOCK_GROUP_RAID0) { u64 stripes; @@ -3015,16 +3092,16 @@ again: } stripes = stripe_nr_end - 1 - j; do_div(stripes, map->num_stripes); - multi->stripes[i].length = map->stripe_len * + bbio->stripes[i].length = map->stripe_len * (stripes - stripe_nr + 1); if (i == 0) { - multi->stripes[i].length -= + bbio->stripes[i].length -= stripe_offset; stripe_offset = 0; } if (stripe_index == last_stripe) - multi->stripes[i].length -= + bbio->stripes[i].length -= stripe_end_offset; } else if (map->type & BTRFS_BLOCK_GROUP_RAID10) { u64 stripes; @@ -3049,11 +3126,11 @@ again: } stripes = stripe_nr_end - 1 - j; do_div(stripes, factor); - multi->stripes[i].length = map->stripe_len * + bbio->stripes[i].length = map->stripe_len * (stripes - stripe_nr + 1); if (i < map->sub_stripes) { - multi->stripes[i].length -= + bbio->stripes[i].length -= stripe_offset; if (i == map->sub_stripes - 1) stripe_offset = 0; @@ -3061,11 +3138,11 @@ again: if (stripe_index >= last_stripe && stripe_index <= (last_stripe + map->sub_stripes - 1)) { - multi->stripes[i].length -= + bbio->stripes[i].length -= stripe_end_offset; } } else - multi->stripes[i].length = *length; + bbio->stripes[i].length = *length; stripe_index++; if (stripe_index == map->num_stripes) { @@ -3076,19 +3153,20 @@ again: } } else { for (i = 0; i < num_stripes; i++) { - multi->stripes[i].physical = + bbio->stripes[i].physical = map->stripes[stripe_index].physical + stripe_offset + stripe_nr * map->stripe_len; - multi->stripes[i].dev = + bbio->stripes[i].dev = map->stripes[stripe_index].dev; stripe_index++; } } - if (multi_ret) { - *multi_ret = multi; - multi->num_stripes = num_stripes; - multi->max_errors = max_errors; + if (bbio_ret) { + *bbio_ret = bbio; + bbio->num_stripes = num_stripes; + bbio->max_errors = max_errors; + bbio->mirror_num = mirror_num; } out: free_extent_map(em); @@ -3097,9 +3175,9 @@ out: int btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw, u64 logical, u64 *length, - struct btrfs_multi_bio **multi_ret, int mirror_num) + struct btrfs_bio **bbio_ret, int mirror_num) { - return __btrfs_map_block(map_tree, rw, logical, length, multi_ret, + return __btrfs_map_block(map_tree, rw, logical, length, bbio_ret, mirror_num); } @@ -3168,30 +3246,32 @@ int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree, return 0; } -static void end_bio_multi_stripe(struct bio *bio, int err) +static void btrfs_end_bio(struct bio *bio, int err) { - struct btrfs_multi_bio *multi = bio->bi_private; + struct btrfs_bio *bbio = bio->bi_private; int is_orig_bio = 0; if (err) - atomic_inc(&multi->error); + atomic_inc(&bbio->error); - if (bio == multi->orig_bio) + if (bio == bbio->orig_bio) is_orig_bio = 1; - if (atomic_dec_and_test(&multi->stripes_pending)) { + if (atomic_dec_and_test(&bbio->stripes_pending)) { if (!is_orig_bio) { bio_put(bio); - bio = multi->orig_bio; + bio = bbio->orig_bio; } - bio->bi_private = multi->private; - bio->bi_end_io = multi->end_io; + bio->bi_private = bbio->private; + bio->bi_end_io = bbio->end_io; + bio->bi_bdev = (struct block_device *) + (unsigned long)bbio->mirror_num; /* only send an error to the higher layers if it is * beyond the tolerance of the multi-bio */ - if (atomic_read(&multi->error) > multi->max_errors) { + if (atomic_read(&bbio->error) > bbio->max_errors) { err = -EIO; - } else if (err) { + } else { /* * this bio is actually up to date, we didn't * go over the max number of errors @@ -3199,7 +3279,7 @@ static void end_bio_multi_stripe(struct bio *bio, int err) set_bit(BIO_UPTODATE, &bio->bi_flags); err = 0; } - kfree(multi); + kfree(bbio); bio_endio(bio, err); } else if (!is_orig_bio) { @@ -3279,20 +3359,20 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio, u64 logical = (u64)bio->bi_sector << 9; u64 length = 0; u64 map_length; - struct btrfs_multi_bio *multi = NULL; int ret; int dev_nr = 0; int total_devs = 1; + struct btrfs_bio *bbio = NULL; length = bio->bi_size; map_tree = &root->fs_info->mapping_tree; map_length = length; - ret = btrfs_map_block(map_tree, rw, logical, &map_length, &multi, + ret = btrfs_map_block(map_tree, rw, logical, &map_length, &bbio, mirror_num); BUG_ON(ret); - total_devs = multi->num_stripes; + total_devs = bbio->num_stripes; if (map_length < length) { printk(KERN_CRIT "mapping failed logical %llu bio len %llu " "len %llu\n", (unsigned long long)logical, @@ -3300,25 +3380,28 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio, (unsigned long long)map_length); BUG(); } - multi->end_io = first_bio->bi_end_io; - multi->private = first_bio->bi_private; - multi->orig_bio = first_bio; - atomic_set(&multi->stripes_pending, multi->num_stripes); + + bbio->orig_bio = first_bio; + bbio->private = first_bio->bi_private; + bbio->end_io = first_bio->bi_end_io; + atomic_set(&bbio->stripes_pending, bbio->num_stripes); while (dev_nr < total_devs) { - if (total_devs > 1) { - if (dev_nr < total_devs - 1) { - bio = bio_clone(first_bio, GFP_NOFS); - BUG_ON(!bio); - } else { - bio = first_bio; - } - bio->bi_private = multi; - bio->bi_end_io = end_bio_multi_stripe; + if (dev_nr < total_devs - 1) { + bio = bio_clone(first_bio, GFP_NOFS); + BUG_ON(!bio); + } else { + bio = first_bio; } - bio->bi_sector = multi->stripes[dev_nr].physical >> 9; - dev = multi->stripes[dev_nr].dev; + bio->bi_private = bbio; + bio->bi_end_io = btrfs_end_bio; + bio->bi_sector = bbio->stripes[dev_nr].physical >> 9; + dev = bbio->stripes[dev_nr].dev; if (dev && dev->bdev && (rw != WRITE || dev->writeable)) { + pr_debug("btrfs_map_bio: rw %d, secor=%llu, dev=%lu " + "(%s id %llu), size=%u\n", rw, + (u64)bio->bi_sector, (u_long)dev->bdev->bd_dev, + dev->name, dev->devid, bio->bi_size); bio->bi_bdev = dev->bdev; if (async_submit) schedule_bio(root, dev, rw, bio); @@ -3331,8 +3414,6 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio, } dev_nr++; } - if (total_devs == 1) - kfree(multi); return 0; } @@ -3593,15 +3674,20 @@ static int read_one_dev(struct btrfs_root *root, fill_device_from_item(leaf, dev_item, device); device->dev_root = root->fs_info->dev_root; device->in_fs_metadata = 1; - if (device->writeable) + if (device->writeable) { device->fs_devices->total_rw_bytes += device->total_bytes; + spin_lock(&root->fs_info->free_chunk_lock); + root->fs_info->free_chunk_space += device->total_bytes - + device->bytes_used; + spin_unlock(&root->fs_info->free_chunk_lock); + } ret = 0; return ret; } int btrfs_read_sys_array(struct btrfs_root *root) { - struct btrfs_super_block *super_copy = &root->fs_info->super_copy; + struct btrfs_super_block *super_copy = root->fs_info->super_copy; struct extent_buffer *sb; struct btrfs_disk_key *disk_key; struct btrfs_chunk *chunk; @@ -3619,7 +3705,7 @@ int btrfs_read_sys_array(struct btrfs_root *root) if (!sb) return -ENOMEM; btrfs_set_buffer_uptodate(sb); - btrfs_set_buffer_lockdep_class(sb, 0); + btrfs_set_buffer_lockdep_class(root->root_key.objectid, sb, 0); write_extent_buffer(sb, super_copy, 0, BTRFS_SUPER_INFO_SIZE); array_size = btrfs_super_sys_array_size(super_copy); diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 6d866db..78f2d4d 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -92,6 +92,20 @@ struct btrfs_device { struct btrfs_work work; struct rcu_head rcu; struct work_struct rcu_work; + + /* readahead state */ + spinlock_t reada_lock; + atomic_t reada_in_flight; + u64 reada_next; + struct reada_zone *reada_curr_zone; + struct radix_tree_root reada_zones; + struct radix_tree_root reada_extents; + + /* for sending down flush barriers */ + struct bio *flush_bio; + struct completion flush_wait; + int nobarriers; + }; struct btrfs_fs_devices { @@ -136,7 +150,10 @@ struct btrfs_bio_stripe { u64 length; /* only used for discard mappings */ }; -struct btrfs_multi_bio { +struct btrfs_bio; +typedef void (btrfs_bio_end_io_t) (struct btrfs_bio *bio, int err); + +struct btrfs_bio { atomic_t stripes_pending; bio_end_io_t *end_io; struct bio *orig_bio; @@ -144,6 +161,7 @@ struct btrfs_multi_bio { atomic_t error; int max_errors; int num_stripes; + int mirror_num; struct btrfs_bio_stripe stripes[]; }; @@ -171,7 +189,7 @@ struct map_lookup { int btrfs_account_dev_extents_size(struct btrfs_device *device, u64 start, u64 end, u64 *length); -#define btrfs_multi_bio_size(n) (sizeof(struct btrfs_multi_bio) + \ +#define btrfs_bio_size(n) (sizeof(struct btrfs_bio) + \ (sizeof(struct btrfs_bio_stripe) * (n))) int btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans, @@ -180,7 +198,7 @@ int btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans, u64 chunk_offset, u64 start, u64 num_bytes); int btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw, u64 logical, u64 *length, - struct btrfs_multi_bio **multi_ret, int mirror_num); + struct btrfs_bio **bbio_ret, int mirror_num); int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree, u64 chunk_start, u64 physical, u64 devid, u64 **logical, int *naddrs, int *stripe_len); diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c index 5366fe4..60ff45e 100644 --- a/fs/btrfs/xattr.c +++ b/fs/btrfs/xattr.c @@ -102,48 +102,82 @@ static int do_setxattr(struct btrfs_trans_handle *trans, if (!path) return -ENOMEM; - /* first lets see if we already have this xattr */ - di = btrfs_lookup_xattr(trans, root, path, btrfs_ino(inode), name, - strlen(name), -1); - if (IS_ERR(di)) { - ret = PTR_ERR(di); - goto out; - } - - /* ok we already have this xattr, lets remove it */ - if (di) { - /* if we want create only exit */ - if (flags & XATTR_CREATE) { - ret = -EEXIST; + if (flags & XATTR_REPLACE) { + di = btrfs_lookup_xattr(trans, root, path, btrfs_ino(inode), name, + name_len, -1); + if (IS_ERR(di)) { + ret = PTR_ERR(di); + goto out; + } else if (!di) { + ret = -ENODATA; goto out; } - ret = btrfs_delete_one_dir_name(trans, root, path, di); - BUG_ON(ret); + if (ret) + goto out; btrfs_release_path(path); - /* if we don't have a value then we are removing the xattr */ + /* + * remove the attribute + */ if (!value) goto out; - } else { + } + +again: + ret = btrfs_insert_xattr_item(trans, root, path, btrfs_ino(inode), + name, name_len, value, size); + /* + * If we're setting an xattr to a new value but the new value is say + * exactly BTRFS_MAX_XATTR_SIZE, we could end up with EOVERFLOW getting + * back from split_leaf. This is because it thinks we'll be extending + * the existing item size, but we're asking for enough space to add the + * item itself. So if we get EOVERFLOW just set ret to EEXIST and let + * the rest of the function figure it out. + */ + if (ret == -EOVERFLOW) + ret = -EEXIST; + + if (ret == -EEXIST) { + if (flags & XATTR_CREATE) + goto out; + /* + * We can't use the path we already have since we won't have the + * proper locking for a delete, so release the path and + * re-lookup to delete the thing. + */ btrfs_release_path(path); + di = btrfs_lookup_xattr(trans, root, path, btrfs_ino(inode), + name, name_len, -1); + if (IS_ERR(di)) { + ret = PTR_ERR(di); + goto out; + } else if (!di) { + /* Shouldn't happen but just in case... */ + btrfs_release_path(path); + goto again; + } - if (flags & XATTR_REPLACE) { - /* we couldn't find the attr to replace */ - ret = -ENODATA; + ret = btrfs_delete_one_dir_name(trans, root, path, di); + if (ret) goto out; + + /* + * We have a value to set, so go back and try to insert it now. + */ + if (value) { + btrfs_release_path(path); + goto again; } } - - /* ok we have to create a completely new xattr */ - ret = btrfs_insert_xattr_item(trans, root, path, btrfs_ino(inode), - name, name_len, value, size); - BUG_ON(ret); out: btrfs_free_path(path); return ret; } +/* + * @value: "" makes the attribute to empty, NULL removes it + */ int __btrfs_setxattr(struct btrfs_trans_handle *trans, struct inode *inode, const char *name, const void *value, size_t size, int flags) @@ -276,21 +310,40 @@ const struct xattr_handler *btrfs_xattr_handlers[] = { /* * Check if the attribute is in a supported namespace. * - * This applied after the check for the synthetic attributes in the system + * This is applied after the check for the synthetic attributes in the system * namespace. */ -static bool btrfs_is_valid_xattr(const char *name) +static int btrfs_is_valid_xattr(const char *name) { - return !strncmp(name, XATTR_SECURITY_PREFIX, - XATTR_SECURITY_PREFIX_LEN) || - !strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN) || - !strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) || - !strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN); + int len = strlen(name); + int prefixlen = 0; + + if (!strncmp(name, XATTR_SECURITY_PREFIX, + XATTR_SECURITY_PREFIX_LEN)) + prefixlen = XATTR_SECURITY_PREFIX_LEN; + else if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN)) + prefixlen = XATTR_SYSTEM_PREFIX_LEN; + else if (!strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN)) + prefixlen = XATTR_TRUSTED_PREFIX_LEN; + else if (!strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN)) + prefixlen = XATTR_USER_PREFIX_LEN; + else + return -EOPNOTSUPP; + + /* + * The name cannot consist of just prefix + */ + if (len <= prefixlen) + return -EINVAL; + + return 0; } ssize_t btrfs_getxattr(struct dentry *dentry, const char *name, void *buffer, size_t size) { + int ret; + /* * If this is a request for a synthetic attribute in the system.* * namespace use the generic infrastructure to resolve a handler @@ -299,8 +352,9 @@ ssize_t btrfs_getxattr(struct dentry *dentry, const char *name, if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN)) return generic_getxattr(dentry, name, buffer, size); - if (!btrfs_is_valid_xattr(name)) - return -EOPNOTSUPP; + ret = btrfs_is_valid_xattr(name); + if (ret) + return ret; return __btrfs_getxattr(dentry->d_inode, name, buffer, size); } @@ -308,6 +362,7 @@ int btrfs_setxattr(struct dentry *dentry, const char *name, const void *value, size_t size, int flags) { struct btrfs_root *root = BTRFS_I(dentry->d_inode)->root; + int ret; /* * The permission on security.* and system.* is not checked @@ -324,8 +379,9 @@ int btrfs_setxattr(struct dentry *dentry, const char *name, const void *value, if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN)) return generic_setxattr(dentry, name, value, size, flags); - if (!btrfs_is_valid_xattr(name)) - return -EOPNOTSUPP; + ret = btrfs_is_valid_xattr(name); + if (ret) + return ret; if (size == 0) value = ""; /* empty EA, do not remove */ @@ -337,6 +393,7 @@ int btrfs_setxattr(struct dentry *dentry, const char *name, const void *value, int btrfs_removexattr(struct dentry *dentry, const char *name) { struct btrfs_root *root = BTRFS_I(dentry->d_inode)->root; + int ret; /* * The permission on security.* and system.* is not checked @@ -353,43 +410,44 @@ int btrfs_removexattr(struct dentry *dentry, const char *name) if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN)) return generic_removexattr(dentry, name); - if (!btrfs_is_valid_xattr(name)) - return -EOPNOTSUPP; + ret = btrfs_is_valid_xattr(name); + if (ret) + return ret; return __btrfs_setxattr(NULL, dentry->d_inode, name, NULL, 0, XATTR_REPLACE); } -int btrfs_xattr_security_init(struct btrfs_trans_handle *trans, - struct inode *inode, struct inode *dir, - const struct qstr *qstr) +int btrfs_initxattrs(struct inode *inode, const struct xattr *xattr_array, + void *fs_info) { - int err; - size_t len; - void *value; - char *suffix; + const struct xattr *xattr; + struct btrfs_trans_handle *trans = fs_info; char *name; + int err = 0; - err = security_inode_init_security(inode, dir, qstr, &suffix, &value, - &len); - if (err) { - if (err == -EOPNOTSUPP) - return 0; - return err; - } - - name = kmalloc(XATTR_SECURITY_PREFIX_LEN + strlen(suffix) + 1, - GFP_NOFS); - if (!name) { - err = -ENOMEM; - } else { + for (xattr = xattr_array; xattr->name != NULL; xattr++) { + name = kmalloc(XATTR_SECURITY_PREFIX_LEN + + strlen(xattr->name) + 1, GFP_NOFS); + if (!name) { + err = -ENOMEM; + break; + } strcpy(name, XATTR_SECURITY_PREFIX); - strcpy(name + XATTR_SECURITY_PREFIX_LEN, suffix); - err = __btrfs_setxattr(trans, inode, name, value, len, 0); + strcpy(name + XATTR_SECURITY_PREFIX_LEN, xattr->name); + err = __btrfs_setxattr(trans, inode, name, + xattr->value, xattr->value_len, 0); kfree(name); + if (err < 0) + break; } - - kfree(suffix); - kfree(value); return err; } + +int btrfs_xattr_security_init(struct btrfs_trans_handle *trans, + struct inode *inode, struct inode *dir, + const struct qstr *qstr) +{ + return security_inode_init_security(inode, dir, qstr, + &btrfs_initxattrs, trans); +} diff --git a/fs/hfs/bnode.c b/fs/hfs/bnode.c index cdb41a1..8daea16 100644 --- a/fs/hfs/bnode.c +++ b/fs/hfs/bnode.c @@ -287,7 +287,6 @@ static struct hfs_bnode *__hfs_bnode_create(struct hfs_btree *tree, u32 cnid) page_cache_release(page); goto fail; } - page_cache_release(page); node->page[i] = page; } @@ -397,11 +396,11 @@ node_error: void hfs_bnode_free(struct hfs_bnode *node) { - //int i; + int i; - //for (i = 0; i < node->tree->pages_per_bnode; i++) - // if (node->page[i]) - // page_cache_release(node->page[i]); + for (i = 0; i < node->tree->pages_per_bnode; i++) + if (node->page[i]) + page_cache_release(node->page[i]); kfree(node); } diff --git a/fs/hfs/brec.c b/fs/hfs/brec.c index 92fb358..db240c5 100644 --- a/fs/hfs/brec.c +++ b/fs/hfs/brec.c @@ -132,13 +132,16 @@ skip: hfs_bnode_write(node, entry, data_off + key_len, entry_len); hfs_bnode_dump(node); - if (new_node) { - /* update parent key if we inserted a key - * at the start of the first node - */ - if (!rec && new_node != node) - hfs_brec_update_parent(fd); + /* + * update parent key if we inserted a key + * at the start of the node and it is not the new node + */ + if (!rec && new_node != node) { + hfs_bnode_read_key(node, fd->search_key, data_off + size); + hfs_brec_update_parent(fd); + } + if (new_node) { hfs_bnode_put(fd->bnode); if (!new_node->parent) { hfs_btree_inc_height(tree); @@ -167,9 +170,6 @@ skip: goto again; } - if (!rec) - hfs_brec_update_parent(fd); - return 0; } @@ -366,6 +366,8 @@ again: if (IS_ERR(parent)) return PTR_ERR(parent); __hfs_brec_find(parent, fd); + if (fd->record < 0) + return -ENOENT; hfs_bnode_dump(parent); rec = fd->record; diff --git a/fs/hfs/dir.c b/fs/hfs/dir.c index b4d70b1..bce4eef 100644 --- a/fs/hfs/dir.c +++ b/fs/hfs/dir.c @@ -198,7 +198,7 @@ static int hfs_create(struct inode *dir, struct dentry *dentry, int mode, res = hfs_cat_create(inode->i_ino, dir, &dentry->d_name, inode); if (res) { - inode->i_nlink = 0; + clear_nlink(inode); hfs_delete_inode(inode); iput(inode); return res; @@ -227,7 +227,7 @@ static int hfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) res = hfs_cat_create(inode->i_ino, dir, &dentry->d_name, inode); if (res) { - inode->i_nlink = 0; + clear_nlink(inode); hfs_delete_inode(inode); iput(inode); return res; diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c index fff16c9..a1a9fdc 100644 --- a/fs/hfs/inode.c +++ b/fs/hfs/inode.c @@ -123,8 +123,8 @@ static ssize_t hfs_direct_IO(int rw, struct kiocb *iocb, struct inode *inode = file->f_path.dentry->d_inode->i_mapping->host; ssize_t ret; - ret = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, - offset, nr_segs, hfs_get_block, NULL); + ret = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs, + hfs_get_block); /* * In case of error extending write may have instantiated a few @@ -183,7 +183,7 @@ struct inode *hfs_new_inode(struct inode *dir, struct qstr *name, int mode) inode->i_mode = mode; inode->i_uid = current_fsuid(); inode->i_gid = current_fsgid(); - inode->i_nlink = 1; + set_nlink(inode, 1); inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC; HFS_I(inode)->flags = 0; HFS_I(inode)->rsrc_inode = NULL; @@ -313,7 +313,7 @@ static int hfs_read_inode(struct inode *inode, void *data) /* Initialize the inode */ inode->i_uid = hsb->s_uid; inode->i_gid = hsb->s_gid; - inode->i_nlink = 1; + set_nlink(inode, 1); if (idata->key) HFS_I(inode)->cat_key = *idata->key; @@ -615,6 +615,8 @@ int hfs_inode_setattr(struct dentry *dentry, struct iattr * attr) if ((attr->ia_valid & ATTR_SIZE) && attr->ia_size != i_size_read(inode)) { + inode_dio_wait(inode); + error = vmtruncate(inode, attr->ia_size); if (error) return error; @@ -625,12 +627,18 @@ int hfs_inode_setattr(struct dentry *dentry, struct iattr * attr) return 0; } -static int hfs_file_fsync(struct file *filp, int datasync) +static int hfs_file_fsync(struct file *filp, loff_t start, loff_t end, + int datasync) { struct inode *inode = filp->f_mapping->host; struct super_block * sb; int ret, err; + ret = filemap_write_and_wait_range(inode->i_mapping, start, end); + if (ret) + return ret; + mutex_lock(&inode->i_mutex); + /* sync the inode to buffers */ ret = write_inode_now(inode, 0); @@ -647,6 +655,7 @@ static int hfs_file_fsync(struct file *filp, int datasync) err = sync_blockdev(sb->s_bdev); if (!ret) ret = err; + mutex_unlock(&inode->i_mutex); return ret; } diff --git a/fs/hfs/super.c b/fs/hfs/super.c index 1b55f70..cac813d 100644 --- a/fs/hfs/super.c +++ b/fs/hfs/super.c @@ -138,9 +138,9 @@ static int hfs_show_options(struct seq_file *seq, struct vfsmount *mnt) struct hfs_sb_info *sbi = HFS_SB(mnt->mnt_sb); if (sbi->s_creator != cpu_to_be32(0x3f3f3f3f)) - seq_printf(seq, ",creator=%.4s", (char *)&sbi->s_creator); + seq_show_option_n(seq, "creator", (char *)&sbi->s_creator, 4); if (sbi->s_type != cpu_to_be32(0x3f3f3f3f)) - seq_printf(seq, ",type=%.4s", (char *)&sbi->s_type); + seq_show_option_n(seq, "type", (char *)&sbi->s_type, 4); seq_printf(seq, ",uid=%u,gid=%u", sbi->s_uid, sbi->s_gid); if (sbi->s_file_umask != 0133) seq_printf(seq, ",file_umask=%o", sbi->s_file_umask); diff --git a/fs/hfsplus/bnode.c b/fs/hfsplus/bnode.c index 1c42cc5..a1e9109 100644 --- a/fs/hfsplus/bnode.c +++ b/fs/hfsplus/bnode.c @@ -454,7 +454,6 @@ static struct hfs_bnode *__hfs_bnode_create(struct hfs_btree *tree, u32 cnid) page_cache_release(page); goto fail; } - page_cache_release(page); node->page[i] = page; } @@ -566,13 +565,11 @@ node_error: void hfs_bnode_free(struct hfs_bnode *node) { -#if 0 int i; for (i = 0; i < node->tree->pages_per_bnode; i++) if (node->page[i]) page_cache_release(node->page[i]); -#endif kfree(node); } diff --git a/fs/hfsplus/brec.c b/fs/hfsplus/brec.c index 2312de3..7429c40 100644 --- a/fs/hfsplus/brec.c +++ b/fs/hfsplus/brec.c @@ -43,6 +43,10 @@ u16 hfs_brec_keylen(struct hfs_bnode *node, u16 rec) node->tree->node_size - (rec + 1) * 2); if (!recoff) return 0; + if (recoff > node->tree->node_size - 2) { + printk(KERN_ERR "hfs: recoff %d too large\n", recoff); + return 0; + } retval = hfs_bnode_read_u16(node, recoff) + 2; if (retval > node->tree->max_key_len + 2) { @@ -126,13 +130,16 @@ skip: hfs_bnode_write(node, entry, data_off + key_len, entry_len); hfs_bnode_dump(node); - if (new_node) { - /* update parent key if we inserted a key - * at the start of the first node - */ - if (!rec && new_node != node) - hfs_brec_update_parent(fd); + /* + * update parent key if we inserted a key + * at the start of the node and it is not the new node + */ + if (!rec && new_node != node) { + hfs_bnode_read_key(node, fd->search_key, data_off + size); + hfs_brec_update_parent(fd); + } + if (new_node) { hfs_bnode_put(fd->bnode); if (!new_node->parent) { hfs_btree_inc_height(tree); @@ -162,9 +169,6 @@ skip: goto again; } - if (!rec) - hfs_brec_update_parent(fd); - return 0; } @@ -364,6 +368,8 @@ again: if (IS_ERR(parent)) return PTR_ERR(parent); __hfs_brec_find(parent, fd); + if (fd->record < 0) + return -ENOENT; hfs_bnode_dump(parent); rec = fd->record; diff --git a/fs/hfsplus/catalog.c b/fs/hfsplus/catalog.c index 408073a..ec2a9c2 100644 --- a/fs/hfsplus/catalog.c +++ b/fs/hfsplus/catalog.c @@ -212,7 +212,9 @@ int hfsplus_create_cat(u32 cnid, struct inode *dir, dprint(DBG_CAT_MOD, "create_cat: %s,%u(%d)\n", str->name, cnid, inode->i_nlink); - hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd); + err = hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd); + if (err) + return err; hfsplus_cat_build_key(sb, fd.search_key, cnid, NULL); entry_size = hfsplus_fill_cat_thread(sb, &entry, @@ -269,7 +271,9 @@ int hfsplus_delete_cat(u32 cnid, struct inode *dir, struct qstr *str) dprint(DBG_CAT_MOD, "delete_cat: %s,%u\n", str ? str->name : NULL, cnid); - hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd); + err = hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd); + if (err) + return err; if (!str) { int len; @@ -347,12 +351,14 @@ int hfsplus_rename_cat(u32 cnid, struct hfs_find_data src_fd, dst_fd; hfsplus_cat_entry entry; int entry_size, type; - int err = 0; + int err; dprint(DBG_CAT_MOD, "rename_cat: %u - %lu,%s - %lu,%s\n", cnid, src_dir->i_ino, src_name->name, dst_dir->i_ino, dst_name->name); - hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &src_fd); + err = hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &src_fd); + if (err) + return err; dst_fd = src_fd; /* find the old dir entry and read the data */ diff --git a/fs/hfsplus/dir.c b/fs/hfsplus/dir.c index 159f5eb..5adb740 100644 --- a/fs/hfsplus/dir.c +++ b/fs/hfsplus/dir.c @@ -38,7 +38,9 @@ static struct dentry *hfsplus_lookup(struct inode *dir, struct dentry *dentry, sb = dir->i_sb; dentry->d_fsdata = NULL; - hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd); + err = hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd); + if (err) + return ERR_PTR(err); hfsplus_cat_build_key(sb, fd.search_key, dir->i_ino, &dentry->d_name); again: err = hfs_brec_read(&fd, &entry, sizeof(entry)); @@ -132,7 +134,9 @@ static int hfsplus_readdir(struct file *filp, void *dirent, filldir_t filldir) if (filp->f_pos >= inode->i_size) return 0; - hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd); + err = hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd); + if (err) + return err; hfsplus_cat_build_key(sb, fd.search_key, inode->i_ino, NULL); err = hfs_brec_find(&fd); if (err) @@ -422,7 +426,7 @@ static int hfsplus_symlink(struct inode *dir, struct dentry *dentry, goto out; out_err: - inode->i_nlink = 0; + clear_nlink(inode); hfsplus_delete_inode(inode); iput(inode); out: @@ -447,7 +451,7 @@ static int hfsplus_mknod(struct inode *dir, struct dentry *dentry, res = hfsplus_create_cat(inode->i_ino, dir, &dentry->d_name, inode); if (res) { - inode->i_nlink = 0; + clear_nlink(inode); hfsplus_delete_inode(inode); iput(inode); goto out; diff --git a/fs/hfsplus/extents.c b/fs/hfsplus/extents.c index 9d8c087..32b12e5 100644 --- a/fs/hfsplus/extents.c +++ b/fs/hfsplus/extents.c @@ -119,22 +119,31 @@ static void __hfsplus_ext_write_extent(struct inode *inode, set_bit(HFSPLUS_I_EXT_DIRTY, &hip->flags); } -static void hfsplus_ext_write_extent_locked(struct inode *inode) +static int hfsplus_ext_write_extent_locked(struct inode *inode) { + int res; + if (HFSPLUS_I(inode)->extent_state & HFSPLUS_EXT_DIRTY) { struct hfs_find_data fd; - hfs_find_init(HFSPLUS_SB(inode->i_sb)->ext_tree, &fd); + res = hfs_find_init(HFSPLUS_SB(inode->i_sb)->ext_tree, &fd); + if (res) + return res; __hfsplus_ext_write_extent(inode, &fd); hfs_find_exit(&fd); } + return 0; } -void hfsplus_ext_write_extent(struct inode *inode) +int hfsplus_ext_write_extent(struct inode *inode) { + int res; + mutex_lock(&HFSPLUS_I(inode)->extents_lock); - hfsplus_ext_write_extent_locked(inode); + res = hfsplus_ext_write_extent_locked(inode); mutex_unlock(&HFSPLUS_I(inode)->extents_lock); + + return res; } static inline int __hfsplus_ext_read_extent(struct hfs_find_data *fd, @@ -194,9 +203,11 @@ static int hfsplus_ext_read_extent(struct inode *inode, u32 block) block < hip->cached_start + hip->cached_blocks) return 0; - hfs_find_init(HFSPLUS_SB(inode->i_sb)->ext_tree, &fd); - res = __hfsplus_ext_cache_extent(&fd, inode, block); - hfs_find_exit(&fd); + res = hfs_find_init(HFSPLUS_SB(inode->i_sb)->ext_tree, &fd); + if (!res) { + res = __hfsplus_ext_cache_extent(&fd, inode, block); + hfs_find_exit(&fd); + } return res; } @@ -209,6 +220,7 @@ int hfsplus_get_block(struct inode *inode, sector_t iblock, struct hfsplus_inode_info *hip = HFSPLUS_I(inode); int res = -EIO; u32 ablock, dblock, mask; + sector_t sector; int was_dirty = 0; int shift; @@ -255,10 +267,12 @@ int hfsplus_get_block(struct inode *inode, sector_t iblock, done: dprint(DBG_EXTENT, "get_block(%lu): %llu - %u\n", inode->i_ino, (long long)iblock, dblock); + mask = (1 << sbi->fs_shift) - 1; - map_bh(bh_result, sb, - (dblock << sbi->fs_shift) + sbi->blockoffset + - (iblock & mask)); + sector = ((sector_t)dblock << sbi->fs_shift) + + sbi->blockoffset + (iblock & mask); + map_bh(bh_result, sb, sector); + if (create) { set_buffer_new(bh_result); hip->phys_size += sb->s_blocksize; @@ -371,7 +385,9 @@ int hfsplus_free_fork(struct super_block *sb, u32 cnid, if (total_blocks == blocks) return 0; - hfs_find_init(HFSPLUS_SB(sb)->ext_tree, &fd); + res = hfs_find_init(HFSPLUS_SB(sb)->ext_tree, &fd); + if (res) + return res; do { res = __hfsplus_ext_read_extent(&fd, ext_entry, cnid, total_blocks, type); @@ -469,7 +485,9 @@ out: insert_extent: dprint(DBG_EXTENT, "insert new extent\n"); - hfsplus_ext_write_extent_locked(inode); + res = hfsplus_ext_write_extent_locked(inode); + if (res) + goto out; memset(hip->cached_extents, 0, sizeof(hfsplus_extent_rec)); hip->cached_extents[0].start_block = cpu_to_be32(start); @@ -500,7 +518,6 @@ void hfsplus_file_truncate(struct inode *inode) struct page *page; void *fsdata; loff_t size = inode->i_size; - int res; res = pagecache_write_begin(NULL, mapping, size, 0, AOP_FLAG_UNINTERRUPTIBLE, @@ -523,7 +540,12 @@ void hfsplus_file_truncate(struct inode *inode) goto out; mutex_lock(&hip->extents_lock); - hfs_find_init(HFSPLUS_SB(sb)->ext_tree, &fd); + res = hfs_find_init(HFSPLUS_SB(sb)->ext_tree, &fd); + if (res) { + mutex_unlock(&hip->extents_lock); + /* XXX: We lack error handling of hfsplus_file_truncate() */ + return; + } while (1) { if (alloc_cnt == hip->first_blocks) { hfsplus_free_extents(sb, hip->first_extents, diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h index 4e7f64b..d7674d0 100644 --- a/fs/hfsplus/hfsplus_fs.h +++ b/fs/hfsplus/hfsplus_fs.h @@ -386,7 +386,7 @@ extern const struct file_operations hfsplus_dir_operations; /* extents.c */ int hfsplus_ext_cmp_key(const hfsplus_btree_key *, const hfsplus_btree_key *); -void hfsplus_ext_write_extent(struct inode *); +int hfsplus_ext_write_extent(struct inode *); int hfsplus_get_block(struct inode *, sector_t, struct buffer_head *, int); int hfsplus_free_fork(struct super_block *, u32, struct hfsplus_fork_raw *, int); @@ -404,7 +404,8 @@ int hfsplus_cat_read_inode(struct inode *, struct hfs_find_data *); int hfsplus_cat_write_inode(struct inode *); struct inode *hfsplus_new_inode(struct super_block *, int); void hfsplus_delete_inode(struct inode *); -int hfsplus_file_fsync(struct file *file, int datasync); +int hfsplus_file_fsync(struct file *file, loff_t start, loff_t end, + int datasync); /* ioctl.c */ long hfsplus_ioctl(struct file *filp, unsigned int cmd, unsigned long arg); diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c index b248a6cf..40e1413 100644 --- a/fs/hfsplus/inode.c +++ b/fs/hfsplus/inode.c @@ -119,8 +119,8 @@ static ssize_t hfsplus_direct_IO(int rw, struct kiocb *iocb, struct inode *inode = file->f_path.dentry->d_inode->i_mapping->host; ssize_t ret; - ret = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, - offset, nr_segs, hfsplus_get_block, NULL); + ret = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs, + hfsplus_get_block); /* * In case of error extending write may have instantiated a few @@ -195,11 +195,13 @@ static struct dentry *hfsplus_file_lookup(struct inode *dir, hip->flags = 0; set_bit(HFSPLUS_I_RSRC, &hip->flags); - hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd); - err = hfsplus_find_cat(sb, dir->i_ino, &fd); - if (!err) - err = hfsplus_cat_read_inode(inode, &fd); - hfs_find_exit(&fd); + err = hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd); + if (!err) { + err = hfsplus_find_cat(sb, dir->i_ino, &fd); + if (!err) + err = hfsplus_cat_read_inode(inode, &fd); + hfs_find_exit(&fd); + } if (err) { iput(inode); return ERR_PTR(err); @@ -296,6 +298,8 @@ static int hfsplus_setattr(struct dentry *dentry, struct iattr *attr) if ((attr->ia_valid & ATTR_SIZE) && attr->ia_size != i_size_read(inode)) { + inode_dio_wait(inode); + error = vmtruncate(inode, attr->ia_size); if (error) return error; @@ -306,13 +310,19 @@ static int hfsplus_setattr(struct dentry *dentry, struct iattr *attr) return 0; } -int hfsplus_file_fsync(struct file *file, int datasync) +int hfsplus_file_fsync(struct file *file, loff_t start, loff_t end, + int datasync) { struct inode *inode = file->f_mapping->host; struct hfsplus_inode_info *hip = HFSPLUS_I(inode); struct hfsplus_sb_info *sbi = HFSPLUS_SB(inode->i_sb); int error = 0, error2; + error = filemap_write_and_wait_range(inode->i_mapping, start, end); + if (error) + return error; + mutex_lock(&inode->i_mutex); + /* * Sync inode metadata into the catalog and extent trees. */ @@ -340,6 +350,8 @@ int hfsplus_file_fsync(struct file *file, int datasync) if (!test_bit(HFSPLUS_SB_NOBARRIER, &sbi->flags)) blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL); + mutex_unlock(&inode->i_mutex); + return error; } @@ -379,7 +391,7 @@ struct inode *hfsplus_new_inode(struct super_block *sb, int mode) inode->i_mode = mode; inode->i_uid = current_fsuid(); inode->i_gid = current_fsgid(); - inode->i_nlink = 1; + set_nlink(inode, 1); inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC; hip = HFSPLUS_I(inode); @@ -500,7 +512,7 @@ int hfsplus_cat_read_inode(struct inode *inode, struct hfs_find_data *fd) hfs_bnode_read(fd->bnode, &entry, fd->entryoffset, sizeof(struct hfsplus_cat_folder)); hfsplus_get_perms(inode, &folder->permissions, 1); - inode->i_nlink = 1; + set_nlink(inode, 1); inode->i_size = 2 + be32_to_cpu(folder->valence); inode->i_atime = hfsp_mt2ut(folder->access_date); inode->i_mtime = hfsp_mt2ut(folder->content_mod_date); @@ -520,11 +532,11 @@ int hfsplus_cat_read_inode(struct inode *inode, struct hfs_find_data *fd) hfsplus_inode_read_fork(inode, HFSPLUS_IS_RSRC(inode) ? &file->rsrc_fork : &file->data_fork); hfsplus_get_perms(inode, &file->permissions, 0); - inode->i_nlink = 1; + set_nlink(inode, 1); if (S_ISREG(inode->i_mode)) { if (file->permissions.dev) - inode->i_nlink = - be32_to_cpu(file->permissions.dev); + set_nlink(inode, + be32_to_cpu(file->permissions.dev)); inode->i_op = &hfsplus_file_inode_operations; inode->i_fop = &hfsplus_file_operations; inode->i_mapping->a_ops = &hfsplus_aops; diff --git a/fs/hfsplus/options.c b/fs/hfsplus/options.c index bb62a5882..c8d6b4f 100644 --- a/fs/hfsplus/options.c +++ b/fs/hfsplus/options.c @@ -211,9 +211,9 @@ int hfsplus_show_options(struct seq_file *seq, struct vfsmount *mnt) struct hfsplus_sb_info *sbi = HFSPLUS_SB(mnt->mnt_sb); if (sbi->creator != HFSPLUS_DEF_CR_TYPE) - seq_printf(seq, ",creator=%.4s", (char *)&sbi->creator); + seq_show_option_n(seq, "creator", (char *)&sbi->creator, 4); if (sbi->type != HFSPLUS_DEF_CR_TYPE) - seq_printf(seq, ",type=%.4s", (char *)&sbi->type); + seq_show_option_n(seq, "type", (char *)&sbi->type, 4); seq_printf(seq, ",umask=%o,uid=%u,gid=%u", sbi->umask, sbi->uid, sbi->gid); if (sbi->part >= 0) diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c index c3a76fd..d24a9b6 100644 --- a/fs/hfsplus/super.c +++ b/fs/hfsplus/super.c @@ -73,11 +73,13 @@ struct inode *hfsplus_iget(struct super_block *sb, unsigned long ino) if (inode->i_ino >= HFSPLUS_FIRSTUSER_CNID || inode->i_ino == HFSPLUS_ROOT_CNID) { - hfs_find_init(HFSPLUS_SB(inode->i_sb)->cat_tree, &fd); - err = hfsplus_find_cat(inode->i_sb, inode->i_ino, &fd); - if (!err) - err = hfsplus_cat_read_inode(inode, &fd); - hfs_find_exit(&fd); + err = hfs_find_init(HFSPLUS_SB(inode->i_sb)->cat_tree, &fd); + if (!err) { + err = hfsplus_find_cat(inode->i_sb, inode->i_ino, &fd); + if (!err) + err = hfsplus_cat_read_inode(inode, &fd); + hfs_find_exit(&fd); + } } else { err = hfsplus_system_read_inode(inode); } @@ -133,9 +135,13 @@ static int hfsplus_system_write_inode(struct inode *inode) static int hfsplus_write_inode(struct inode *inode, struct writeback_control *wbc) { + int err; + dprint(DBG_INODE, "hfsplus_write_inode: %lu\n", inode->i_ino); - hfsplus_ext_write_extent(inode); + err = hfsplus_ext_write_extent(inode); + if (err) + return err; if (inode->i_ino >= HFSPLUS_FIRSTUSER_CNID || inode->i_ino == HFSPLUS_ROOT_CNID) @@ -338,6 +344,7 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) struct inode *root, *inode; struct qstr str; struct nls_table *nls = NULL; + u64 last_fs_block, last_fs_page; int err; err = -EINVAL; @@ -393,6 +400,17 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) if (!sbi->rsrc_clump_blocks) sbi->rsrc_clump_blocks = 1; + err = -EFBIG; + last_fs_block = sbi->total_blocks - 1; + last_fs_page = (last_fs_block << sbi->alloc_blksz_shift) >> + PAGE_CACHE_SHIFT; + + if ((last_fs_block > (sector_t)(~0ULL) >> (sbi->alloc_blksz_shift - 9)) || + (last_fs_page > (pgoff_t)(~0ULL))) { + printk(KERN_ERR "hfs: filesystem size too large.\n"); + goto out_free_vhdr; + } + /* Set up operations so we can load metadata */ sb->s_op = &hfsplus_sops; sb->s_maxbytes = MAX_LFS_FILESIZE; @@ -417,6 +435,8 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) sb->s_flags |= MS_RDONLY; } + err = -EINVAL; + /* Load metadata objects (B*Trees) */ sbi->ext_tree = hfs_btree_open(sb, HFSPLUS_EXT_CNID); if (!sbi->ext_tree) { @@ -447,7 +467,9 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) str.len = sizeof(HFSP_HIDDENDIR_NAME) - 1; str.name = HFSP_HIDDENDIR_NAME; - hfs_find_init(sbi->cat_tree, &fd); + err = hfs_find_init(sbi->cat_tree, &fd); + if (err) + goto out_put_root; hfsplus_cat_build_key(sb, fd.search_key, HFSPLUS_ROOT_CNID, &str); if (!hfs_brec_read(&fd, &entry, sizeof(entry))) { hfs_find_exit(&fd); diff --git a/fs/hfsplus/unicode.c b/fs/hfsplus/unicode.c index a3f0bfc..a32998f 100644 --- a/fs/hfsplus/unicode.c +++ b/fs/hfsplus/unicode.c @@ -142,7 +142,11 @@ int hfsplus_uni2asc(struct super_block *sb, /* search for single decomposed char */ if (likely(compose)) ce1 = hfsplus_compose_lookup(hfsplus_compose_table, c0); - if (ce1 && (cc = ce1[0])) { + if (ce1) + cc = ce1[0]; + else + cc = 0; + if (cc) { /* start of a possibly decomposed Hangul char */ if (cc != 0xffff) goto done; @@ -209,7 +213,8 @@ int hfsplus_uni2asc(struct super_block *sb, i++; ce2 = ce1; } - if ((cc = ce2[0])) { + cc = ce2[0]; + if (cc) { ip += i; ustrlen -= i; goto done; @@ -301,7 +306,11 @@ int hfsplus_asc2uni(struct super_block *sb, struct hfsplus_unistr *ustr, while (outlen < HFSPLUS_MAX_STRLEN && len > 0) { size = asc2unichar(sb, astr, len, &c); - if (decompose && (dstr = decompose_unichar(c, &dsize))) { + if (decompose) + dstr = decompose_unichar(c, &dsize); + else + dstr = NULL; + if (dstr) { if (outlen + dsize > HFSPLUS_MAX_STRLEN) break; do { @@ -346,15 +355,23 @@ int hfsplus_hash_dentry(const struct dentry *dentry, const struct inode *inode, astr += size; len -= size; - if (decompose && (dstr = decompose_unichar(c, &dsize))) { + if (decompose) + dstr = decompose_unichar(c, &dsize); + else + dstr = NULL; + if (dstr) { do { c2 = *dstr++; - if (!casefold || (c2 = case_fold(c2))) + if (casefold) + c2 = case_fold(c2); + if (!casefold || c2) hash = partial_name_hash(c2, hash); } while (--dsize > 0); } else { c2 = c; - if (!casefold || (c2 = case_fold(c2))) + if (casefold) + c2 = case_fold(c2); + if (!casefold || c2) hash = partial_name_hash(c2, hash); } } @@ -422,12 +439,14 @@ int hfsplus_compare_dentry(const struct dentry *parent, c1 = *dstr1; c2 = *dstr2; if (casefold) { - if (!(c1 = case_fold(c1))) { + c1 = case_fold(c1); + if (!c1) { dstr1++; dsize1--; continue; } - if (!(c2 = case_fold(c2))) { + c2 = case_fold(c2); + if (!c2) { dstr2++; dsize2--; continue; diff --git a/fs/hfsplus/wrapper.c b/fs/hfsplus/wrapper.c index aac1563..90effcc 100644 --- a/fs/hfsplus/wrapper.c +++ b/fs/hfsplus/wrapper.c @@ -184,10 +184,6 @@ int hfsplus_read_wrapper(struct super_block *sb) if (hfsplus_get_last_session(sb, &part_start, &part_size)) goto out; - if ((u64)part_start + part_size > 0x100000000ULL) { - pr_err("hfs: volumes larger than 2TB are not supported yet\n"); - goto out; - } error = -ENOMEM; sbi->s_vhdr_buf = kmalloc(hfsplus_min_io_size(sb), GFP_KERNEL); @@ -215,8 +211,9 @@ reread: if (!hfsplus_read_mdb(sbi->s_vhdr, &wd)) goto out_free_backup_vhdr; wd.ablk_size >>= HFSPLUS_SECTOR_SHIFT; - part_start += wd.ablk_start + wd.embed_start * wd.ablk_size; - part_size = wd.embed_count * wd.ablk_size; + part_start += (sector_t)wd.ablk_start + + (sector_t)wd.embed_start * wd.ablk_size; + part_size = (sector_t)wd.embed_count * wd.ablk_size; goto reread; default: /* diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c index 2638c834e..104e4d9 100644 --- a/fs/hostfs/hostfs_kern.c +++ b/fs/hostfs/hostfs_kern.c @@ -265,7 +265,7 @@ static int hostfs_show_options(struct seq_file *seq, struct vfsmount *vfs) size_t offset = strlen(root_ino) + 1; if (strlen(root_path) > offset) - seq_printf(seq, ",%s", root_path + offset); + seq_show_option(seq, root_path + offset, NULL); return 0; } @@ -362,9 +362,20 @@ retry: return 0; } -int hostfs_fsync(struct file *file, int datasync) +int hostfs_fsync(struct file *file, loff_t start, loff_t end, int datasync) { - return fsync_file(HOSTFS_I(file->f_mapping->host)->fd, datasync); + struct inode *inode = file->f_mapping->host; + int ret; + + ret = filemap_write_and_wait_range(inode->i_mapping, start, end); + if (ret) + return ret; + + mutex_lock(&inode->i_mutex); + ret = fsync_file(HOSTFS_I(inode)->fd, datasync); + mutex_unlock(&inode->i_mutex); + + return ret; } static const struct file_operations hostfs_file_fops = { @@ -530,7 +541,7 @@ static int read_name(struct inode *ino, char *name) ino->i_ino = st.ino; ino->i_mode = st.mode; - ino->i_nlink = st.nlink; + set_nlink(ino, st.nlink); ino->i_uid = st.uid; ino->i_gid = st.gid; ino->i_atime = st.atime; @@ -748,12 +759,12 @@ int hostfs_rename(struct inode *from_ino, struct dentry *from, return err; } -int hostfs_permission(struct inode *ino, int desired, unsigned int flags) +int hostfs_permission(struct inode *ino, int desired) { char *name; int r = 0, w = 0, x = 0, err; - if (flags & IPERM_FLAG_RCU) + if (desired & MAY_NOT_BLOCK) return -ECHILD; if (desired & MAY_READ) r = 1; @@ -770,7 +781,7 @@ int hostfs_permission(struct inode *ino, int desired, unsigned int flags) err = access_file(name, r, w, x); __putname(name); if (!err) - err = generic_permission(ino, desired, flags, NULL); + err = generic_permission(ino, desired); return err; } diff --git a/fs/hostfs/hostfs_user.c b/fs/hostfs/hostfs_user.c index d51a983..dd7bc38 100644 --- a/fs/hostfs/hostfs_user.c +++ b/fs/hostfs/hostfs_user.c @@ -16,7 +16,6 @@ #include #include "hostfs.h" #include "os.h" -#include "user.h" #include static void stat64_to_hostfs(const struct stat64 *buf, struct hostfs_stat *p) diff --git a/fs/hpfs/alloc.c b/fs/hpfs/alloc.c index 7a5eb2c..e0d57c0 100644 --- a/fs/hpfs/alloc.c +++ b/fs/hpfs/alloc.c @@ -8,6 +8,58 @@ #include "hpfs_fn.h" +static void hpfs_claim_alloc(struct super_block *s, secno sec) +{ + struct hpfs_sb_info *sbi = hpfs_sb(s); + if (sbi->sb_n_free != (unsigned)-1) { + if (unlikely(!sbi->sb_n_free)) { + hpfs_error(s, "free count underflow, allocating sector %08x", sec); + sbi->sb_n_free = -1; + return; + } + sbi->sb_n_free--; + } +} + +static void hpfs_claim_free(struct super_block *s, secno sec) +{ + struct hpfs_sb_info *sbi = hpfs_sb(s); + if (sbi->sb_n_free != (unsigned)-1) { + if (unlikely(sbi->sb_n_free >= sbi->sb_fs_size)) { + hpfs_error(s, "free count overflow, freeing sector %08x", sec); + sbi->sb_n_free = -1; + return; + } + sbi->sb_n_free++; + } +} + +static void hpfs_claim_dirband_alloc(struct super_block *s, secno sec) +{ + struct hpfs_sb_info *sbi = hpfs_sb(s); + if (sbi->sb_n_free_dnodes != (unsigned)-1) { + if (unlikely(!sbi->sb_n_free_dnodes)) { + hpfs_error(s, "dirband free count underflow, allocating sector %08x", sec); + sbi->sb_n_free_dnodes = -1; + return; + } + sbi->sb_n_free_dnodes--; + } +} + +static void hpfs_claim_dirband_free(struct super_block *s, secno sec) +{ + struct hpfs_sb_info *sbi = hpfs_sb(s); + if (sbi->sb_n_free_dnodes != (unsigned)-1) { + if (unlikely(sbi->sb_n_free_dnodes >= sbi->sb_dirband_size / 4)) { + hpfs_error(s, "dirband free count overflow, freeing sector %08x", sec); + sbi->sb_n_free_dnodes = -1; + return; + } + sbi->sb_n_free_dnodes++; + } +} + /* * Check if a sector is allocated in bitmap * This is really slow. Turned on only if chk==2 @@ -203,9 +255,15 @@ secno hpfs_alloc_sector(struct super_block *s, secno near, unsigned n, int forwa } sec = 0; ret: + if (sec) { + i = 0; + do + hpfs_claim_alloc(s, sec + i); + while (unlikely(++i < n)); + } if (sec && f_p) { for (i = 0; i < forward; i++) { - if (!hpfs_alloc_if_possible(s, sec + i + 1)) { + if (!hpfs_alloc_if_possible(s, sec + n + i)) { hpfs_error(s, "Prealloc doesn't work! Wanted %d, allocated at %08x, can't allocate %d", forward, sec, i); sec = 0; break; @@ -228,6 +286,7 @@ static secno alloc_in_dirband(struct super_block *s, secno near) nr >>= 2; sec = alloc_in_bmp(s, (~0x3fff) | nr, 1, 0); if (!sec) return 0; + hpfs_claim_dirband_alloc(s, sec); return ((sec & 0x3fff) << 2) + sbi->sb_dirband_start; } @@ -242,6 +301,7 @@ int hpfs_alloc_if_possible(struct super_block *s, secno sec) bmp[(sec & 0x3fff) >> 5] &= cpu_to_le32(~(1 << (sec & 0x1f))); hpfs_mark_4buffers_dirty(&qbh); hpfs_brelse4(&qbh); + hpfs_claim_alloc(s, sec); return 1; } hpfs_brelse4(&qbh); @@ -275,6 +335,7 @@ void hpfs_free_sectors(struct super_block *s, secno sec, unsigned n) return; } bmp[(sec & 0x3fff) >> 5] |= cpu_to_le32(1 << (sec & 0x1f)); + hpfs_claim_free(s, sec); if (!--n) { hpfs_mark_4buffers_dirty(&qbh); hpfs_brelse4(&qbh); @@ -359,6 +420,7 @@ void hpfs_free_dnode(struct super_block *s, dnode_secno dno) bmp[ssec >> 5] |= cpu_to_le32(1 << (ssec & 0x1f)); hpfs_mark_4buffers_dirty(&qbh); hpfs_brelse4(&qbh); + hpfs_claim_dirband_free(s, dno); } } @@ -366,7 +428,7 @@ struct dnode *hpfs_alloc_dnode(struct super_block *s, secno near, dnode_secno *dno, struct quad_buffer_head *qbh) { struct dnode *d; - if (hpfs_count_one_bitmap(s, hpfs_sb(s)->sb_dmap) > FREE_DNODES_ADD) { + if (hpfs_get_free_dnodes(s) > FREE_DNODES_ADD) { if (!(*dno = alloc_in_dirband(s, near))) if (!(*dno = hpfs_alloc_sector(s, near, 4, 0))) return NULL; } else { diff --git a/fs/hpfs/dir.c b/fs/hpfs/dir.c index f46ae02..46549c7 100644 --- a/fs/hpfs/dir.c +++ b/fs/hpfs/dir.c @@ -29,25 +29,31 @@ static loff_t hpfs_dir_lseek(struct file *filp, loff_t off, int whence) struct hpfs_inode_info *hpfs_inode = hpfs_i(i); struct super_block *s = i->i_sb; + /* Somebody else will have to figure out what to do here */ + if (whence == SEEK_DATA || whence == SEEK_HOLE) + return -EINVAL; + + mutex_lock(&i->i_mutex); hpfs_lock(s); /*printk("dir lseek\n");*/ if (new_off == 0 || new_off == 1 || new_off == 11 || new_off == 12 || new_off == 13) goto ok; - mutex_lock(&i->i_mutex); pos = ((loff_t) hpfs_de_as_down_as_possible(s, hpfs_inode->i_dno) << 4) + 1; while (pos != new_off) { if (map_pos_dirent(i, &pos, &qbh)) hpfs_brelse4(&qbh); else goto fail; if (pos == 12) goto fail; } - mutex_unlock(&i->i_mutex); + hpfs_add_pos(i, &filp->f_pos); ok: + filp->f_pos = new_off; hpfs_unlock(s); - return filp->f_pos = new_off; -fail: mutex_unlock(&i->i_mutex); + return new_off; +fail: /*printk("illegal lseek: %016llx\n", new_off);*/ hpfs_unlock(s); + mutex_unlock(&i->i_mutex); return -ESPIPE; } @@ -243,7 +249,7 @@ struct dentry *hpfs_lookup(struct inode *dir, struct dentry *dentry, struct name result->i_mode &= ~0111; result->i_op = &hpfs_file_iops; result->i_fop = &hpfs_file_ops; - result->i_nlink = 1; + set_nlink(result, 1); } unlock_new_inode(result); } diff --git a/fs/hpfs/file.c b/fs/hpfs/file.c index 89c500e..5ecfffe 100644 --- a/fs/hpfs/file.c +++ b/fs/hpfs/file.c @@ -18,9 +18,14 @@ static int hpfs_file_release(struct inode *inode, struct file *file) return 0; } -int hpfs_file_fsync(struct file *file, int datasync) +int hpfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync) { struct inode *inode = file->f_mapping->host; + int ret; + + ret = filemap_write_and_wait_range(file->f_mapping, start, end); + if (ret) + return ret; return sync_blockdev(inode->i_sb->s_bdev); } @@ -111,9 +116,12 @@ static int hpfs_write_begin(struct file *file, struct address_space *mapping, hpfs_get_block, &hpfs_i(mapping->host)->mmu_private); if (unlikely(ret)) { - loff_t isize = mapping->host->i_size; + loff_t isize; + hpfs_lock(mapping->host->i_sb); + isize = mapping->host->i_size; if (pos + len > isize) vmtruncate(mapping->host, isize); + hpfs_unlock(mapping->host->i_sb); } return ret; diff --git a/fs/hpfs/hpfs_fn.h b/fs/hpfs/hpfs_fn.h index dd552f8..f1f2ca7 100644 --- a/fs/hpfs/hpfs_fn.h +++ b/fs/hpfs/hpfs_fn.h @@ -258,7 +258,7 @@ void hpfs_set_ea(struct inode *, struct fnode *, const char *, /* file.c */ -int hpfs_file_fsync(struct file *, int); +int hpfs_file_fsync(struct file *, loff_t, loff_t, int); extern const struct file_operations hpfs_file_ops; extern const struct inode_operations hpfs_file_iops; extern const struct address_space_operations hpfs_aops; @@ -311,10 +311,10 @@ static inline struct hpfs_sb_info *hpfs_sb(struct super_block *sb) /* super.c */ -void hpfs_error(struct super_block *, const char *, ...) - __attribute__((format (printf, 2, 3))); +__printf(2, 3) +void hpfs_error(struct super_block *, const char *, ...); int hpfs_stop_cycles(struct super_block *, int, int *, int *, char *); -unsigned hpfs_count_one_bitmap(struct super_block *, secno); +unsigned hpfs_get_free_dnodes(struct super_block *); /* * local time (HPFS) to GMT (Unix) diff --git a/fs/hpfs/inode.c b/fs/hpfs/inode.c index 338cd83..3b2cec2 100644 --- a/fs/hpfs/inode.c +++ b/fs/hpfs/inode.c @@ -53,7 +53,7 @@ void hpfs_read_inode(struct inode *i) i->i_mode &= ~0111; i->i_op = &hpfs_file_iops; i->i_fop = &hpfs_file_ops; - i->i_nlink = 0;*/ + clear_nlink(i);*/ make_bad_inode(i); return; } @@ -77,7 +77,7 @@ void hpfs_read_inode(struct inode *i) i->i_mode = S_IFLNK | 0777; i->i_op = &page_symlink_inode_operations; i->i_data.a_ops = &hpfs_symlink_aops; - i->i_nlink = 1; + set_nlink(i, 1); i->i_size = ea_size; i->i_blocks = 1; brelse(bh); @@ -101,7 +101,7 @@ void hpfs_read_inode(struct inode *i) } if (S_ISBLK(mode) || S_ISCHR(mode) || S_ISFIFO(mode) || S_ISSOCK(mode)) { brelse(bh); - i->i_nlink = 1; + set_nlink(i, 1); i->i_size = 0; i->i_blocks = 1; init_special_inode(i, mode, @@ -125,13 +125,13 @@ void hpfs_read_inode(struct inode *i) hpfs_count_dnodes(i->i_sb, hpfs_inode->i_dno, &n_dnodes, &n_subdirs, NULL); i->i_blocks = 4 * n_dnodes; i->i_size = 2048 * n_dnodes; - i->i_nlink = 2 + n_subdirs; + set_nlink(i, 2 + n_subdirs); } else { i->i_mode |= S_IFREG; if (!hpfs_inode->i_ea_mode) i->i_mode &= ~0111; i->i_op = &hpfs_file_iops; i->i_fop = &hpfs_file_ops; - i->i_nlink = 1; + set_nlink(i, 1); i->i_size = le32_to_cpu(fnode->file_size); i->i_blocks = ((i->i_size + 511) >> 9) + 1; i->i_data.a_ops = &hpfs_aops; diff --git a/fs/hpfs/namei.c b/fs/hpfs/namei.c index acf95da..ea91fcb 100644 --- a/fs/hpfs/namei.c +++ b/fs/hpfs/namei.c @@ -56,7 +56,7 @@ static int hpfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) result->i_fop = &hpfs_dir_ops; result->i_blocks = 4; result->i_size = 2048; - result->i_nlink = 2; + set_nlink(result, 2); if (dee.read_only) result->i_mode &= ~0222; @@ -150,7 +150,7 @@ static int hpfs_create(struct inode *dir, struct dentry *dentry, int mode, struc result->i_mode &= ~0111; result->i_op = &hpfs_file_iops; result->i_fop = &hpfs_file_ops; - result->i_nlink = 1; + set_nlink(result, 1); hpfs_i(result)->i_parent_dir = dir->i_ino; result->i_ctime.tv_sec = result->i_mtime.tv_sec = result->i_atime.tv_sec = local_to_gmt(dir->i_sb, le32_to_cpu(dee.creation_date)); result->i_ctime.tv_nsec = 0; @@ -242,7 +242,7 @@ static int hpfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t hpfs_i(result)->i_ea_size = 0; result->i_uid = current_fsuid(); result->i_gid = current_fsgid(); - result->i_nlink = 1; + set_nlink(result, 1); result->i_size = 0; result->i_blocks = 1; init_special_inode(result, mode, rdev); @@ -318,7 +318,7 @@ static int hpfs_symlink(struct inode *dir, struct dentry *dentry, const char *sy result->i_uid = current_fsuid(); result->i_gid = current_fsgid(); result->i_blocks = 1; - result->i_nlink = 1; + set_nlink(result, 1); result->i_size = strlen(symlink); result->i_op = &page_symlink_inode_operations; result->i_data.a_ops = &hpfs_symlink_aops; @@ -398,7 +398,7 @@ again: hpfs_unlock(dir->i_sb); return -ENOSPC; } - if (generic_permission(inode, MAY_WRITE, 0, NULL) || + if (generic_permission(inode, MAY_WRITE) || !S_ISREG(inode->i_mode) || get_write_access(inode)) { d_rehash(dentry); diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c index f760c15..b03e766 100644 --- a/fs/hpfs/super.c +++ b/fs/hpfs/super.c @@ -115,7 +115,7 @@ static void hpfs_put_super(struct super_block *s) kfree(sbi); } -unsigned hpfs_count_one_bitmap(struct super_block *s, secno secno) +static unsigned hpfs_count_one_bitmap(struct super_block *s, secno secno) { struct quad_buffer_head qbh; unsigned long *bits; @@ -123,7 +123,7 @@ unsigned hpfs_count_one_bitmap(struct super_block *s, secno secno) bits = hpfs_map_4sectors(s, secno, &qbh, 4); if (!bits) - return 0; + return (unsigned)-1; count = bitmap_weight(bits, 2048 * BITS_PER_BYTE); hpfs_brelse4(&qbh); return count; @@ -134,29 +134,45 @@ static unsigned count_bitmaps(struct super_block *s) unsigned n, count, n_bands; n_bands = (hpfs_sb(s)->sb_fs_size + 0x3fff) >> 14; count = 0; - for (n = 0; n < n_bands; n++) - count += hpfs_count_one_bitmap(s, le32_to_cpu(hpfs_sb(s)->sb_bmp_dir[n])); + for (n = 0; n < n_bands; n++) { + unsigned c; + c = hpfs_count_one_bitmap(s, le32_to_cpu(hpfs_sb(s)->sb_bmp_dir[n])); + if (c != (unsigned)-1) + count += c; + } return count; } +unsigned hpfs_get_free_dnodes(struct super_block *s) +{ + struct hpfs_sb_info *sbi = hpfs_sb(s); + if (sbi->sb_n_free_dnodes == (unsigned)-1) { + unsigned c = hpfs_count_one_bitmap(s, sbi->sb_dmap); + if (c == (unsigned)-1) + return 0; + sbi->sb_n_free_dnodes = c; + } + return sbi->sb_n_free_dnodes; +} + static int hpfs_statfs(struct dentry *dentry, struct kstatfs *buf) { struct super_block *s = dentry->d_sb; struct hpfs_sb_info *sbi = hpfs_sb(s); u64 id = huge_encode_dev(s->s_bdev->bd_dev); + hpfs_lock(s); - /*if (sbi->sb_n_free == -1) {*/ + if (sbi->sb_n_free == (unsigned)-1) sbi->sb_n_free = count_bitmaps(s); - sbi->sb_n_free_dnodes = hpfs_count_one_bitmap(s, sbi->sb_dmap); - /*}*/ + buf->f_type = s->s_magic; buf->f_bsize = 512; buf->f_blocks = sbi->sb_fs_size; buf->f_bfree = sbi->sb_n_free; buf->f_bavail = sbi->sb_n_free; buf->f_files = sbi->sb_dirband_size / 4; - buf->f_ffree = sbi->sb_n_free_dnodes; + buf->f_ffree = hpfs_get_free_dnodes(s); buf->f_fsid.val[0] = (u32)id; buf->f_fsid.val[1] = (u32)(id >> 32); buf->f_namelen = 254; diff --git a/fs/hppfs/hppfs.c b/fs/hppfs/hppfs.c index 9d71c95..f590b11 100644 --- a/fs/hppfs/hppfs.c +++ b/fs/hppfs/hppfs.c @@ -574,9 +574,10 @@ static int hppfs_readdir(struct file *file, void *ent, filldir_t filldir) return err; } -static int hppfs_fsync(struct file *file, int datasync) +static int hppfs_fsync(struct file *file, loff_t start, loff_t end, + int datasync) { - return 0; + return filemap_write_and_wait_range(file->f_mapping, start, end); } static const struct file_operations hppfs_dir_fops = { @@ -701,7 +702,7 @@ static struct inode *get_inode(struct super_block *sb, struct dentry *dentry) inode->i_ctime = proc_ino->i_ctime; inode->i_ino = proc_ino->i_ino; inode->i_mode = proc_ino->i_mode; - inode->i_nlink = proc_ino->i_nlink; + set_nlink(inode, proc_ino->i_nlink); inode->i_size = proc_ino->i_size; inode->i_blocks = proc_ino->i_blocks; diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 6327a06..0aa424a 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -94,7 +94,7 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma) vma->vm_flags |= VM_HUGETLB | VM_RESERVED; vma->vm_ops = &hugetlb_vm_ops; - if (vma->vm_pgoff & ~(huge_page_mask(h) >> PAGE_SHIFT)) + if (vma->vm_pgoff & (~huge_page_mask(h) >> PAGE_SHIFT)) return -EINVAL; vma_len = (loff_t)(vma->vm_end - vma->vm_start); @@ -484,6 +484,7 @@ static struct inode *hugetlbfs_get_inode(struct super_block *sb, uid_t uid, inode->i_op = &page_symlink_inode_operations; break; } + lockdep_annotate_inode_mutex_key(inode); } return inode; } @@ -592,9 +593,15 @@ static int hugetlbfs_statfs(struct dentry *dentry, struct kstatfs *buf) spin_lock(&sbinfo->stat_lock); /* If no limits set, just report 0 for max/free/used * blocks, like simple_statfs() */ - if (sbinfo->max_blocks >= 0) { - buf->f_blocks = sbinfo->max_blocks; - buf->f_bavail = buf->f_bfree = sbinfo->free_blocks; + if (sbinfo->spool) { + long free_pages; + + spin_lock(&sbinfo->spool->lock); + buf->f_blocks = sbinfo->spool->max_hpages; + free_pages = sbinfo->spool->max_hpages + - sbinfo->spool->used_hpages; + buf->f_bavail = buf->f_bfree = free_pages; + spin_unlock(&sbinfo->spool->lock); buf->f_files = sbinfo->max_inodes; buf->f_ffree = sbinfo->free_inodes; } @@ -610,6 +617,10 @@ static void hugetlbfs_put_super(struct super_block *sb) if (sbi) { sb->s_fs_info = NULL; + + if (sbi->spool) + hugepage_put_subpool(sbi->spool); + kfree(sbi); } } @@ -841,10 +852,14 @@ hugetlbfs_fill_super(struct super_block *sb, void *data, int silent) sb->s_fs_info = sbinfo; sbinfo->hstate = config.hstate; spin_lock_init(&sbinfo->stat_lock); - sbinfo->max_blocks = config.nr_blocks; - sbinfo->free_blocks = config.nr_blocks; sbinfo->max_inodes = config.nr_inodes; sbinfo->free_inodes = config.nr_inodes; + sbinfo->spool = NULL; + if (config.nr_blocks != -1) { + sbinfo->spool = hugepage_new_subpool(config.nr_blocks); + if (!sbinfo->spool) + goto out_free; + } sb->s_maxbytes = MAX_LFS_FILESIZE; sb->s_blocksize = huge_page_size(config.hstate); sb->s_blocksize_bits = huge_page_shift(config.hstate); @@ -864,38 +879,12 @@ hugetlbfs_fill_super(struct super_block *sb, void *data, int silent) sb->s_root = root; return 0; out_free: + if (sbinfo->spool) + kfree(sbinfo->spool); kfree(sbinfo); return -ENOMEM; } -int hugetlb_get_quota(struct address_space *mapping, long delta) -{ - int ret = 0; - struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(mapping->host->i_sb); - - if (sbinfo->free_blocks > -1) { - spin_lock(&sbinfo->stat_lock); - if (sbinfo->free_blocks - delta >= 0) - sbinfo->free_blocks -= delta; - else - ret = -ENOMEM; - spin_unlock(&sbinfo->stat_lock); - } - - return ret; -} - -void hugetlb_put_quota(struct address_space *mapping, long delta) -{ - struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(mapping->host->i_sb); - - if (sbinfo->free_blocks > -1) { - spin_lock(&sbinfo->stat_lock); - sbinfo->free_blocks += delta; - spin_unlock(&sbinfo->stat_lock); - } -} - static struct dentry *hugetlbfs_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *data) { @@ -963,7 +952,7 @@ struct file *hugetlb_file_setup(const char *name, size_t size, d_instantiate(path.dentry, inode); inode->i_size = size; - inode->i_nlink = 0; + clear_nlink(inode); error = -ENFILE; file = alloc_file(&path, FMODE_WRITE | FMODE_READ, @@ -1024,6 +1013,7 @@ static int __init init_hugetlbfs_fs(void) static void __exit exit_hugetlbfs_fs(void) { kmem_cache_destroy(hugetlbfs_inode_cachep); + kern_unmount(hugetlbfs_vfsmount); unregister_filesystem(&hugetlbfs_fs_type); bdi_destroy(&hugetlbfs_backing_dev_info); } diff --git a/fs/isofs/dir.c b/fs/isofs/dir.c index 0542b6e..f20437c 100644 --- a/fs/isofs/dir.c +++ b/fs/isofs/dir.c @@ -254,19 +254,16 @@ static int isofs_readdir(struct file *filp, char *tmpname; struct iso_directory_record *tmpde; struct inode *inode = filp->f_path.dentry->d_inode; - struct isofs_sb_info *sbi = ISOFS_SB(inode->i_sb); tmpname = (char *)__get_free_page(GFP_KERNEL); if (tmpname == NULL) return -ENOMEM; - mutex_lock(&sbi->s_mutex); tmpde = (struct iso_directory_record *) (tmpname+1024); result = do_isofs_readdir(inode, filp, dirent, filldir, tmpname, tmpde); free_page((unsigned long) tmpname); - mutex_unlock(&sbi->s_mutex); return result; } diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c index 26f6364..2f9197f 100644 --- a/fs/isofs/inode.c +++ b/fs/isofs/inode.c @@ -20,6 +20,7 @@ #include #include #include +#include #include "isofs.h" #include "zisofs.h" @@ -67,7 +68,7 @@ static void isofs_put_super(struct super_block *sb) return; } -static int isofs_read_inode(struct inode *); +static int isofs_read_inode(struct inode *, int relocated); static int isofs_statfs (struct dentry *, struct kstatfs *); static struct kmem_cache *isofs_inode_cachep; @@ -854,7 +855,6 @@ root_found: sbi->s_utf8 = opt.utf8; sbi->s_nocompress = opt.nocompress; sbi->s_overriderockperm = opt.overriderockperm; - mutex_init(&sbi->s_mutex); /* * It would be incredibly stupid to allow people to mark every file * on the disk as suid, so we merely allow them to set the default @@ -1140,7 +1140,13 @@ struct buffer_head *isofs_bread(struct inode *inode, sector_t block) static int isofs_readpage(struct file *file, struct page *page) { - return block_read_full_page(page,isofs_get_block); + return mpage_readpage(page, isofs_get_block); +} + +static int isofs_readpages(struct file *file, struct address_space *mapping, + struct list_head *pages, unsigned nr_pages) +{ + return mpage_readpages(mapping, pages, nr_pages, isofs_get_block); } static sector_t _isofs_bmap(struct address_space *mapping, sector_t block) @@ -1150,6 +1156,7 @@ static sector_t _isofs_bmap(struct address_space *mapping, sector_t block) static const struct address_space_operations isofs_aops = { .readpage = isofs_readpage, + .readpages = isofs_readpages, .bmap = _isofs_bmap }; @@ -1256,7 +1263,7 @@ out_toomany: goto out; } -static int isofs_read_inode(struct inode *inode) +static int isofs_read_inode(struct inode *inode, int relocated) { struct super_block *sb = inode->i_sb; struct isofs_sb_info *sbi = ISOFS_SB(sb); @@ -1311,7 +1318,7 @@ static int isofs_read_inode(struct inode *inode) inode->i_mode = S_IFDIR | sbi->s_dmode; else inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO; - inode->i_nlink = 1; /* + set_nlink(inode, 1); /* * Set to 1. We know there are 2, but * the find utility tries to optimize * if it is 2, and it screws up. It is @@ -1329,7 +1336,7 @@ static int isofs_read_inode(struct inode *inode) */ inode->i_mode = S_IFREG | S_IRUGO | S_IXUGO; } - inode->i_nlink = 1; + set_nlink(inode, 1); } inode->i_uid = sbi->s_uid; inode->i_gid = sbi->s_gid; @@ -1401,7 +1408,7 @@ static int isofs_read_inode(struct inode *inode) */ if (!high_sierra) { - parse_rock_ridge_inode(de, inode); + parse_rock_ridge_inode(de, inode, relocated); /* if we want uid/gid set, override the rock ridge setting */ if (sbi->s_uid_set) inode->i_uid = sbi->s_uid; @@ -1480,9 +1487,10 @@ static int isofs_iget5_set(struct inode *ino, void *data) * offset that point to the underlying meta-data for the inode. The * code below is otherwise similar to the iget() code in * include/linux/fs.h */ -struct inode *isofs_iget(struct super_block *sb, - unsigned long block, - unsigned long offset) +struct inode *__isofs_iget(struct super_block *sb, + unsigned long block, + unsigned long offset, + int relocated) { unsigned long hashval; struct inode *inode; @@ -1504,7 +1512,7 @@ struct inode *isofs_iget(struct super_block *sb, return ERR_PTR(-ENOMEM); if (inode->i_state & I_NEW) { - ret = isofs_read_inode(inode); + ret = isofs_read_inode(inode, relocated); if (ret < 0) { iget_failed(inode); inode = ERR_PTR(ret); diff --git a/fs/isofs/isofs.h b/fs/isofs/isofs.h index 2882dc0..f9c9793 100644 --- a/fs/isofs/isofs.h +++ b/fs/isofs/isofs.h @@ -55,7 +55,6 @@ struct isofs_sb_info { gid_t s_gid; uid_t s_uid; struct nls_table *s_nls_iocharset; /* Native language support table */ - struct mutex s_mutex; /* replaces BKL, please remove if possible */ }; #define ISOFS_INVALID_MODE ((mode_t) -1) @@ -108,7 +107,7 @@ extern int iso_date(char *, int); struct inode; /* To make gcc happy */ -extern int parse_rock_ridge_inode(struct iso_directory_record *, struct inode *); +extern int parse_rock_ridge_inode(struct iso_directory_record *, struct inode *, int relocated); extern int get_rock_ridge_filename(struct iso_directory_record *, char *, struct inode *); extern int isofs_name_translate(struct iso_directory_record *, char *, struct inode *); @@ -119,9 +118,24 @@ extern struct dentry *isofs_lookup(struct inode *, struct dentry *, struct namei extern struct buffer_head *isofs_bread(struct inode *, sector_t); extern int isofs_get_blocks(struct inode *, sector_t, struct buffer_head **, unsigned long); -extern struct inode *isofs_iget(struct super_block *sb, - unsigned long block, - unsigned long offset); +struct inode *__isofs_iget(struct super_block *sb, + unsigned long block, + unsigned long offset, + int relocated); + +static inline struct inode *isofs_iget(struct super_block *sb, + unsigned long block, + unsigned long offset) +{ + return __isofs_iget(sb, block, offset, 0); +} + +static inline struct inode *isofs_iget_reloc(struct super_block *sb, + unsigned long block, + unsigned long offset) +{ + return __isofs_iget(sb, block, offset, 1); +} /* Because the inode number is no longer relevant to finding the * underlying meta-data for an inode, we are free to choose a more diff --git a/fs/isofs/namei.c b/fs/isofs/namei.c index 4fb3e80..1e2946f 100644 --- a/fs/isofs/namei.c +++ b/fs/isofs/namei.c @@ -168,7 +168,6 @@ struct dentry *isofs_lookup(struct inode *dir, struct dentry *dentry, struct nam int found; unsigned long uninitialized_var(block); unsigned long uninitialized_var(offset); - struct isofs_sb_info *sbi = ISOFS_SB(dir->i_sb); struct inode *inode; struct page *page; @@ -176,21 +175,13 @@ struct dentry *isofs_lookup(struct inode *dir, struct dentry *dentry, struct nam if (!page) return ERR_PTR(-ENOMEM); - mutex_lock(&sbi->s_mutex); found = isofs_find_entry(dir, dentry, &block, &offset, page_address(page), 1024 + page_address(page)); __free_page(page); - inode = NULL; - if (found) { - inode = isofs_iget(dir->i_sb, block, offset); - if (IS_ERR(inode)) { - mutex_unlock(&sbi->s_mutex); - return ERR_CAST(inode); - } - } - mutex_unlock(&sbi->s_mutex); + inode = found ? isofs_iget(dir->i_sb, block, offset) : NULL; + return d_splice_alias(inode, dentry); } diff --git a/fs/isofs/rock.c b/fs/isofs/rock.c index f9cd04d..1780949 100644 --- a/fs/isofs/rock.c +++ b/fs/isofs/rock.c @@ -30,6 +30,7 @@ struct rock_state { int cont_size; int cont_extent; int cont_offset; + int cont_loops; struct inode *inode; }; @@ -73,6 +74,9 @@ static void init_rock_state(struct rock_state *rs, struct inode *inode) rs->inode = inode; } +/* Maximum number of Rock Ridge continuation entries */ +#define RR_MAX_CE_ENTRIES 32 + /* * Returns 0 if the caller should continue scanning, 1 if the scan must end * and -ve on error. @@ -105,6 +109,8 @@ static int rock_continue(struct rock_state *rs) goto out; } ret = -EIO; + if (++rs->cont_loops >= RR_MAX_CE_ENTRIES) + goto out; bh = sb_bread(rs->inode->i_sb, rs->cont_extent); if (bh) { memcpy(rs->buffer, bh->b_data + rs->cont_offset, @@ -288,12 +294,16 @@ eio: goto out; } +#define RR_REGARD_XA 1 +#define RR_RELOC_DE 2 + static int parse_rock_ridge_inode_internal(struct iso_directory_record *de, - struct inode *inode, int regard_xa) + struct inode *inode, int flags) { int symlink_len = 0; int cnt, sig; + unsigned int reloc_block; struct inode *reloc; struct rock_ridge *rr; int rootflag; @@ -305,7 +315,7 @@ parse_rock_ridge_inode_internal(struct iso_directory_record *de, init_rock_state(&rs, inode); setup_rock_ridge(de, inode, &rs); - if (regard_xa) { + if (flags & RR_REGARD_XA) { rs.chr += 14; rs.len -= 14; if (rs.len < 0) @@ -352,6 +362,9 @@ repeat: rs.cont_size = isonum_733(rr->u.CE.size); break; case SIG('E', 'R'): + /* Invalid length of ER tag id? */ + if (rr->u.ER.len_id + offsetof(struct rock_ridge, u.ER.data) > rr->len) + goto out; ISOFS_SB(inode->i_sb)->s_rock = 1; printk(KERN_DEBUG "ISO 9660 Extensions: "); { @@ -363,7 +376,7 @@ repeat: break; case SIG('P', 'X'): inode->i_mode = isonum_733(rr->u.PX.mode); - inode->i_nlink = isonum_733(rr->u.PX.n_links); + set_nlink(inode, isonum_733(rr->u.PX.n_links)); inode->i_uid = isonum_733(rr->u.PX.uid); inode->i_gid = isonum_733(rr->u.PX.gid); break; @@ -485,18 +498,28 @@ repeat: "relocated directory\n"); goto out; case SIG('C', 'L'): - ISOFS_I(inode)->i_first_extent = - isonum_733(rr->u.CL.location); - reloc = - isofs_iget(inode->i_sb, - ISOFS_I(inode)->i_first_extent, - 0); + if (flags & RR_RELOC_DE) { + printk(KERN_ERR + "ISOFS: Recursive directory relocation " + "is not supported\n"); + goto eio; + } + reloc_block = isonum_733(rr->u.CL.location); + if (reloc_block == ISOFS_I(inode)->i_iget5_block && + ISOFS_I(inode)->i_iget5_offset == 0) { + printk(KERN_ERR + "ISOFS: Directory relocation points to " + "itself\n"); + goto eio; + } + ISOFS_I(inode)->i_first_extent = reloc_block; + reloc = isofs_iget_reloc(inode->i_sb, reloc_block, 0); if (IS_ERR(reloc)) { ret = PTR_ERR(reloc); goto out; } inode->i_mode = reloc->i_mode; - inode->i_nlink = reloc->i_nlink; + set_nlink(inode, reloc->i_nlink); inode->i_uid = reloc->i_uid; inode->i_gid = reloc->i_gid; inode->i_rdev = reloc->i_rdev; @@ -637,9 +660,11 @@ static char *get_symlink_chunk(char *rpnt, struct rock_ridge *rr, char *plimit) return rpnt; } -int parse_rock_ridge_inode(struct iso_directory_record *de, struct inode *inode) +int parse_rock_ridge_inode(struct iso_directory_record *de, struct inode *inode, + int relocated) { - int result = parse_rock_ridge_inode_internal(de, inode, 0); + int flags = relocated ? RR_RELOC_DE : 0; + int result = parse_rock_ridge_inode_internal(de, inode, flags); /* * if rockridge flag was reset and we didn't look for attributes @@ -647,7 +672,8 @@ int parse_rock_ridge_inode(struct iso_directory_record *de, struct inode *inode) */ if ((ISOFS_SB(inode->i_sb)->s_rock_offset == -1) && (ISOFS_SB(inode->i_sb)->s_rock == 2)) { - result = parse_rock_ridge_inode_internal(de, inode, 14); + result = parse_rock_ridge_inode_internal(de, inode, + flags | RR_REGARD_XA); } return result; } @@ -678,7 +704,6 @@ static int rock_ridge_symlink_readpage(struct file *file, struct page *page) init_rock_state(&rs, inode); block = ei->i_iget5_block; - mutex_lock(&sbi->s_mutex); bh = sb_bread(inode->i_sb, block); if (!bh) goto out_noread; @@ -748,7 +773,6 @@ repeat: goto fail; brelse(bh); *rpnt = '\0'; - mutex_unlock(&sbi->s_mutex); SetPageUptodate(page); kunmap(page); unlock_page(page); @@ -765,7 +789,6 @@ out_bad_span: printk("symlink spans iso9660 blocks\n"); fail: brelse(bh); - mutex_unlock(&sbi->s_mutex); error: SetPageError(page); kunmap(page); diff --git a/fs/jbd/checkpoint.c b/fs/jbd/checkpoint.c index e4b87bc..5c93ffc 100644 --- a/fs/jbd/checkpoint.c +++ b/fs/jbd/checkpoint.c @@ -22,6 +22,8 @@ #include #include #include +#include +#include /* * Unlink a buffer from a transaction checkpoint list. @@ -95,10 +97,14 @@ static int __try_to_free_cp_buf(struct journal_head *jh) if (jh->b_jlist == BJ_None && !buffer_locked(bh) && !buffer_dirty(bh) && !buffer_write_io_error(bh)) { + /* + * Get our reference so that bh cannot be freed before + * we unlock it + */ + get_bh(bh); JBUFFER_TRACE(jh, "remove from checkpoint list"); ret = __journal_remove_checkpoint(jh) + 1; jbd_unlock_bh_state(bh); - journal_remove_journal_head(bh); BUFFER_TRACE(bh, "release"); __brelse(bh); } else { @@ -220,8 +226,8 @@ restart: spin_lock(&journal->j_list_lock); goto restart; } + get_bh(bh); if (buffer_locked(bh)) { - get_bh(bh); spin_unlock(&journal->j_list_lock); jbd_unlock_bh_state(bh); wait_on_buffer(bh); @@ -240,7 +246,6 @@ restart: */ released = __journal_remove_checkpoint(jh); jbd_unlock_bh_state(bh); - journal_remove_journal_head(bh); __brelse(bh); } @@ -253,9 +258,12 @@ static void __flush_batch(journal_t *journal, struct buffer_head **bhs, int *batch_count) { int i; + struct blk_plug plug; + blk_start_plug(&plug); for (i = 0; i < *batch_count; i++) - write_dirty_buffer(bhs[i], WRITE); + write_dirty_buffer(bhs[i], WRITE_SYNC); + blk_finish_plug(&plug); for (i = 0; i < *batch_count; i++) { struct buffer_head *bh = bhs[i]; @@ -304,12 +312,12 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh, ret = 1; if (unlikely(buffer_write_io_error(bh))) ret = -EIO; + get_bh(bh); J_ASSERT_JH(jh, !buffer_jbddirty(bh)); BUFFER_TRACE(bh, "remove from checkpoint"); __journal_remove_checkpoint(jh); spin_unlock(&journal->j_list_lock); jbd_unlock_bh_state(bh); - journal_remove_journal_head(bh); __brelse(bh); } else { /* @@ -358,6 +366,7 @@ int log_do_checkpoint(journal_t *journal) * journal straight away. */ result = cleanup_journal_tail(journal); + trace_jbd_checkpoint(journal, result); jbd_debug(1, "cleanup_journal_tail returned %d\n", result); if (result <= 0) return result; @@ -444,8 +453,6 @@ out: * * Return <0 on error, 0 on success, 1 if there was nothing to clean up. * - * Called with the journal lock held. - * * This is the only part of the journaling code which really needs to be * aware of transaction aborts. Checkpointing involves writing to the * main filesystem area rather than to the journal, so it can proceed @@ -463,13 +470,14 @@ int cleanup_journal_tail(journal_t *journal) if (is_journal_aborted(journal)) return 1; - /* OK, work out the oldest transaction remaining in the log, and + /* + * OK, work out the oldest transaction remaining in the log, and * the log block it starts at. * * If the log is now empty, we need to work out which is the * next transaction ID we will write, and where it will - * start. */ - + * start. + */ spin_lock(&journal->j_state_lock); spin_lock(&journal->j_list_lock); transaction = journal->j_checkpoint_transactions; @@ -495,7 +503,25 @@ int cleanup_journal_tail(journal_t *journal) spin_unlock(&journal->j_state_lock); return 1; } + spin_unlock(&journal->j_state_lock); + /* + * We need to make sure that any blocks that were recently written out + * --- perhaps by log_do_checkpoint() --- are flushed out before we + * drop the transactions from the journal. It's unlikely this will be + * necessary, especially with an appropriately sized journal, but we + * need this to guarantee correctness. Fortunately + * cleanup_journal_tail() doesn't get called all that often. + */ + if (journal->j_flags & JFS_BARRIER) + blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL); + + spin_lock(&journal->j_state_lock); + if (!tid_gt(first_tid, journal->j_tail_sequence)) { + spin_unlock(&journal->j_state_lock); + /* Someone else cleaned up journal so return 0 */ + return 0; + } /* OK, update the superblock to recover the freed space. * Physical blocks come first: have we wrapped beyond the end of * the log? */ @@ -503,6 +529,7 @@ int cleanup_journal_tail(journal_t *journal) if (blocknr < journal->j_tail) freed = freed + journal->j_last - journal->j_first; + trace_jbd_cleanup_journal_tail(journal, first_tid, blocknr, freed); jbd_debug(1, "Cleaning journal tail from %d to %d (offset %u), " "freeing %u\n", @@ -523,9 +550,9 @@ int cleanup_journal_tail(journal_t *journal) /* * journal_clean_one_cp_list * - * Find all the written-back checkpoint buffers in the given list and release them. + * Find all the written-back checkpoint buffers in the given list and release + * them. * - * Called with the journal locked. * Called with j_list_lock held. * Returns number of bufers reaped (for debug) */ @@ -632,8 +659,8 @@ out: * checkpoint lists. * * The function returns 1 if it frees the transaction, 0 otherwise. + * The function can free jh and bh. * - * This function is called with the journal locked. * This function is called with j_list_lock held. * This function is called with jbd_lock_bh_state(jh2bh(jh)) */ @@ -652,13 +679,14 @@ int __journal_remove_checkpoint(struct journal_head *jh) } journal = transaction->t_journal; + JBUFFER_TRACE(jh, "removing from transaction"); __buffer_unlink(jh); jh->b_cp_transaction = NULL; + journal_put_journal_head(jh); if (transaction->t_checkpoint_list != NULL || transaction->t_checkpoint_io_list != NULL) goto out; - JBUFFER_TRACE(jh, "transaction has no more buffers"); /* * There is one special case to worry about: if we have just pulled the @@ -669,10 +697,8 @@ int __journal_remove_checkpoint(struct journal_head *jh) * The locking here around t_state is a bit sleazy. * See the comment at the end of journal_commit_transaction(). */ - if (transaction->t_state != T_FINISHED) { - JBUFFER_TRACE(jh, "belongs to running/committing transaction"); + if (transaction->t_state != T_FINISHED) goto out; - } /* OK, that was the last buffer for the transaction: we can now safely remove this transaction from the log */ @@ -684,7 +710,6 @@ int __journal_remove_checkpoint(struct journal_head *jh) wake_up(&journal->j_wait_logspace); ret = 1; out: - JBUFFER_TRACE(jh, "exit"); return ret; } @@ -703,6 +728,8 @@ void __journal_insert_checkpoint(struct journal_head *jh, J_ASSERT_JH(jh, buffer_dirty(jh2bh(jh)) || buffer_jbddirty(jh2bh(jh))); J_ASSERT_JH(jh, jh->b_cp_transaction == NULL); + /* Get reference for checkpointing transaction */ + journal_grab_journal_head(jh2bh(jh)); jh->b_cp_transaction = transaction; if (!transaction->t_checkpoint_list) { @@ -752,6 +779,7 @@ void __journal_drop_transaction(journal_t *journal, transaction_t *transaction) J_ASSERT(journal->j_committing_transaction != transaction); J_ASSERT(journal->j_running_transaction != transaction); + trace_jbd_drop_transaction(journal, transaction); jbd_debug(1, "Dropping transaction %d, all done\n", transaction->t_tid); kfree(transaction); } diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c index dcd23f8..931bf95 100644 --- a/fs/jbd/commit.c +++ b/fs/jbd/commit.c @@ -21,6 +21,7 @@ #include #include #include +#include /* * Default IO end handler for temporary BJ_IO buffer_heads. @@ -209,6 +210,8 @@ write_out_data: if (!trylock_buffer(bh)) { BUFFER_TRACE(bh, "needs blocking lock"); spin_unlock(&journal->j_list_lock); + trace_jbd_do_submit_data(journal, + commit_transaction); /* Write out all data to prevent deadlocks */ journal_do_submit_data(wbuf, bufs, write_op); bufs = 0; @@ -241,6 +244,8 @@ write_out_data: jbd_unlock_bh_state(bh); if (bufs == journal->j_wbufsize) { spin_unlock(&journal->j_list_lock); + trace_jbd_do_submit_data(journal, + commit_transaction); journal_do_submit_data(wbuf, bufs, write_op); bufs = 0; goto write_out_data; @@ -258,10 +263,6 @@ write_out_data: jbd_unlock_bh_state(bh); if (locked) unlock_buffer(bh); - journal_remove_journal_head(bh); - /* One for our safety reference, other for - * journal_remove_journal_head() */ - put_bh(bh); release_data_buffer(bh); } @@ -271,6 +272,7 @@ write_out_data: } } spin_unlock(&journal->j_list_lock); + trace_jbd_do_submit_data(journal, commit_transaction); journal_do_submit_data(wbuf, bufs, write_op); return err; @@ -321,12 +323,14 @@ void journal_commit_transaction(journal_t *journal) commit_transaction = journal->j_running_transaction; J_ASSERT(commit_transaction->t_state == T_RUNNING); + trace_jbd_start_commit(journal, commit_transaction); jbd_debug(1, "JBD: starting commit of transaction %d\n", commit_transaction->t_tid); spin_lock(&journal->j_state_lock); commit_transaction->t_state = T_LOCKED; + trace_jbd_commit_locking(journal, commit_transaction); spin_lock(&commit_transaction->t_handle_lock); while (commit_transaction->t_updates) { DEFINE_WAIT(wait); @@ -397,6 +401,7 @@ void journal_commit_transaction(journal_t *journal) */ journal_switch_revoke_table(journal); + trace_jbd_commit_flushing(journal, commit_transaction); commit_transaction->t_state = T_FLUSH; journal->j_committing_transaction = commit_transaction; journal->j_running_transaction = NULL; @@ -451,14 +456,9 @@ void journal_commit_transaction(journal_t *journal) } if (buffer_jbd(bh) && bh2jh(bh) == jh && jh->b_transaction == commit_transaction && - jh->b_jlist == BJ_Locked) { + jh->b_jlist == BJ_Locked) __journal_unfile_buffer(jh); - jbd_unlock_bh_state(bh); - journal_remove_journal_head(bh); - put_bh(bh); - } else { - jbd_unlock_bh_state(bh); - } + jbd_unlock_bh_state(bh); release_data_buffer(bh); cond_resched_lock(&journal->j_list_lock); } @@ -498,6 +498,7 @@ void journal_commit_transaction(journal_t *journal) commit_transaction->t_state = T_COMMIT; spin_unlock(&journal->j_state_lock); + trace_jbd_commit_logging(journal, commit_transaction); J_ASSERT(commit_transaction->t_nr_buffers <= commit_transaction->t_outstanding_credits); @@ -802,10 +803,16 @@ restart_loop: while (commit_transaction->t_forget) { transaction_t *cp_transaction; struct buffer_head *bh; + int try_to_free = 0; jh = commit_transaction->t_forget; spin_unlock(&journal->j_list_lock); bh = jh2bh(jh); + /* + * Get a reference so that bh cannot be freed before we are + * done with it. + */ + get_bh(bh); jbd_lock_bh_state(bh); J_ASSERT_JH(jh, jh->b_transaction == commit_transaction || jh->b_transaction == journal->j_running_transaction); @@ -881,28 +888,27 @@ restart_loop: __journal_insert_checkpoint(jh, commit_transaction); if (is_journal_aborted(journal)) clear_buffer_jbddirty(bh); - JBUFFER_TRACE(jh, "refile for checkpoint writeback"); - __journal_refile_buffer(jh); - jbd_unlock_bh_state(bh); } else { J_ASSERT_BH(bh, !buffer_dirty(bh)); - /* The buffer on BJ_Forget list and not jbddirty means + /* + * The buffer on BJ_Forget list and not jbddirty means * it has been freed by this transaction and hence it * could not have been reallocated until this * transaction has committed. *BUT* it could be * reallocated once we have written all the data to * disk and before we process the buffer on BJ_Forget - * list. */ - JBUFFER_TRACE(jh, "refile or unfile freed buffer"); - __journal_refile_buffer(jh); - if (!jh->b_transaction) { - jbd_unlock_bh_state(bh); - /* needs a brelse */ - journal_remove_journal_head(bh); - release_buffer_page(bh); - } else - jbd_unlock_bh_state(bh); + * list. + */ + if (!jh->b_next_transaction) + try_to_free = 1; } + JBUFFER_TRACE(jh, "refile or unfile freed buffer"); + __journal_refile_buffer(jh); + jbd_unlock_bh_state(bh); + if (try_to_free) + release_buffer_page(bh); + else + __brelse(bh); cond_resched_lock(&journal->j_list_lock); } spin_unlock(&journal->j_list_lock); @@ -969,6 +975,7 @@ restart_loop: } spin_unlock(&journal->j_list_lock); + trace_jbd_end_commit(journal, commit_transaction); jbd_debug(1, "JBD: commit %d complete, head %d\n", journal->j_commit_sequence, journal->j_tail_sequence); diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c index 9f36384..fea8dd6 100644 --- a/fs/jbd/journal.c +++ b/fs/jbd/journal.c @@ -38,6 +38,9 @@ #include #include +#define CREATE_TRACE_POINTS +#include + #include #include @@ -1065,6 +1068,7 @@ void journal_update_superblock(journal_t *journal, int wait) } else write_dirty_buffer(bh, WRITE); + trace_jbd_update_superblock_end(journal, wait); out: /* If we have just flushed the log (by marking s_start==0), then * any future commit will have to be careful to update the @@ -1807,10 +1811,9 @@ static void journal_free_journal_head(struct journal_head *jh) * When a buffer has its BH_JBD bit set it is immune from being released by * core kernel code, mainly via ->b_count. * - * A journal_head may be detached from its buffer_head when the journal_head's - * b_transaction, b_cp_transaction and b_next_transaction pointers are NULL. - * Various places in JBD call journal_remove_journal_head() to indicate that the - * journal_head can be dropped if needed. + * A journal_head is detached from its buffer_head when the journal_head's + * b_jcount reaches zero. Running transaction (b_transaction) and checkpoint + * transaction (b_cp_transaction) hold their references to b_jcount. * * Various places in the kernel want to attach a journal_head to a buffer_head * _before_ attaching the journal_head to a transaction. To protect the @@ -1823,17 +1826,16 @@ static void journal_free_journal_head(struct journal_head *jh) * (Attach a journal_head if needed. Increments b_jcount) * struct journal_head *jh = journal_add_journal_head(bh); * ... - * jh->b_transaction = xxx; - * journal_put_journal_head(jh); - * - * Now, the journal_head's b_jcount is zero, but it is safe from being released - * because it has a non-zero b_transaction. + * (Get another reference for transaction) + * journal_grab_journal_head(bh); + * jh->b_transaction = xxx; + * (Put original reference) + * journal_put_journal_head(jh); */ /* * Give a buffer_head a journal_head. * - * Doesn't need the journal lock. * May sleep. */ struct journal_head *journal_add_journal_head(struct buffer_head *bh) @@ -1897,61 +1899,29 @@ static void __journal_remove_journal_head(struct buffer_head *bh) struct journal_head *jh = bh2jh(bh); J_ASSERT_JH(jh, jh->b_jcount >= 0); - - get_bh(bh); - if (jh->b_jcount == 0) { - if (jh->b_transaction == NULL && - jh->b_next_transaction == NULL && - jh->b_cp_transaction == NULL) { - J_ASSERT_JH(jh, jh->b_jlist == BJ_None); - J_ASSERT_BH(bh, buffer_jbd(bh)); - J_ASSERT_BH(bh, jh2bh(jh) == bh); - BUFFER_TRACE(bh, "remove journal_head"); - if (jh->b_frozen_data) { - printk(KERN_WARNING "%s: freeing " - "b_frozen_data\n", - __func__); - jbd_free(jh->b_frozen_data, bh->b_size); - } - if (jh->b_committed_data) { - printk(KERN_WARNING "%s: freeing " - "b_committed_data\n", - __func__); - jbd_free(jh->b_committed_data, bh->b_size); - } - bh->b_private = NULL; - jh->b_bh = NULL; /* debug, really */ - clear_buffer_jbd(bh); - __brelse(bh); - journal_free_journal_head(jh); - } else { - BUFFER_TRACE(bh, "journal_head was locked"); - } + J_ASSERT_JH(jh, jh->b_transaction == NULL); + J_ASSERT_JH(jh, jh->b_next_transaction == NULL); + J_ASSERT_JH(jh, jh->b_cp_transaction == NULL); + J_ASSERT_JH(jh, jh->b_jlist == BJ_None); + J_ASSERT_BH(bh, buffer_jbd(bh)); + J_ASSERT_BH(bh, jh2bh(jh) == bh); + BUFFER_TRACE(bh, "remove journal_head"); + if (jh->b_frozen_data) { + printk(KERN_WARNING "%s: freeing b_frozen_data\n", __func__); + jbd_free(jh->b_frozen_data, bh->b_size); } + if (jh->b_committed_data) { + printk(KERN_WARNING "%s: freeing b_committed_data\n", __func__); + jbd_free(jh->b_committed_data, bh->b_size); + } + bh->b_private = NULL; + jh->b_bh = NULL; /* debug, really */ + clear_buffer_jbd(bh); + journal_free_journal_head(jh); } /* - * journal_remove_journal_head(): if the buffer isn't attached to a transaction - * and has a zero b_jcount then remove and release its journal_head. If we did - * see that the buffer is not used by any transaction we also "logically" - * decrement ->b_count. - * - * We in fact take an additional increment on ->b_count as a convenience, - * because the caller usually wants to do additional things with the bh - * after calling here. - * The caller of journal_remove_journal_head() *must* run __brelse(bh) at some - * time. Once the caller has run __brelse(), the buffer is eligible for - * reaping by try_to_free_buffers(). - */ -void journal_remove_journal_head(struct buffer_head *bh) -{ - jbd_lock_bh_journal_head(bh); - __journal_remove_journal_head(bh); - jbd_unlock_bh_journal_head(bh); -} - -/* - * Drop a reference on the passed journal_head. If it fell to zero then try to + * Drop a reference on the passed journal_head. If it fell to zero then * release the journal_head from the buffer_head. */ void journal_put_journal_head(struct journal_head *jh) @@ -1961,11 +1931,12 @@ void journal_put_journal_head(struct journal_head *jh) jbd_lock_bh_journal_head(bh); J_ASSERT_JH(jh, jh->b_jcount > 0); --jh->b_jcount; - if (!jh->b_jcount && !jh->b_transaction) { + if (!jh->b_jcount) { __journal_remove_journal_head(bh); + jbd_unlock_bh_journal_head(bh); __brelse(bh); - } - jbd_unlock_bh_journal_head(bh); + } else + jbd_unlock_bh_journal_head(bh); } /* diff --git a/fs/jbd/recovery.c b/fs/jbd/recovery.c index 5b43e96..008bf06 100644 --- a/fs/jbd/recovery.c +++ b/fs/jbd/recovery.c @@ -20,6 +20,7 @@ #include #include #include +#include #endif /* @@ -263,6 +264,9 @@ int journal_recover(journal_t *journal) err2 = sync_blockdev(journal->j_fs_dev); if (!err) err = err2; + /* Flush disk caches to get replayed data on the permanent storage */ + if (journal->j_flags & JFS_BARRIER) + blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL); return err; } diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c index d7ab092..7c86b37 100644 --- a/fs/jbd/transaction.c +++ b/fs/jbd/transaction.c @@ -26,6 +26,7 @@ #include #include #include +#include static void __journal_temp_unlink_buffer(struct journal_head *jh); @@ -99,11 +100,10 @@ static int start_this_handle(journal_t *journal, handle_t *handle) alloc_transaction: if (!journal->j_running_transaction) { - new_transaction = kzalloc(sizeof(*new_transaction), - GFP_NOFS|__GFP_NOFAIL); + new_transaction = kzalloc(sizeof(*new_transaction), GFP_NOFS); if (!new_transaction) { - ret = -ENOMEM; - goto out; + congestion_wait(BLK_RW_ASYNC, HZ/50); + goto alloc_transaction; } } @@ -696,7 +696,6 @@ repeat: if (!jh->b_transaction) { JBUFFER_TRACE(jh, "no transaction"); J_ASSERT_JH(jh, !jh->b_next_transaction); - jh->b_transaction = transaction; JBUFFER_TRACE(jh, "file as BJ_Reserved"); spin_lock(&journal->j_list_lock); __journal_file_buffer(jh, transaction, BJ_Reserved); @@ -818,7 +817,6 @@ int journal_get_create_access(handle_t *handle, struct buffer_head *bh) * committed and so it's safe to clear the dirty bit. */ clear_buffer_dirty(jh2bh(jh)); - jh->b_transaction = transaction; /* first access by this transaction */ jh->b_modified = 0; @@ -844,8 +842,8 @@ int journal_get_create_access(handle_t *handle, struct buffer_head *bh) */ JBUFFER_TRACE(jh, "cancelling revoke"); journal_cancel_revoke(handle, jh); - journal_put_journal_head(jh); out: + journal_put_journal_head(jh); return err; } @@ -1069,8 +1067,9 @@ int journal_dirty_data(handle_t *handle, struct buffer_head *bh) ret = -EIO; goto no_journal; } - - if (jh->b_transaction != NULL) { + /* We might have slept so buffer could be refiled now */ + if (jh->b_transaction != NULL && + jh->b_transaction != handle->h_transaction) { JBUFFER_TRACE(jh, "unfile from commit"); __journal_temp_unlink_buffer(jh); /* It still points to the committing @@ -1091,8 +1090,6 @@ int journal_dirty_data(handle_t *handle, struct buffer_head *bh) if (jh->b_jlist != BJ_SyncData && jh->b_jlist != BJ_Locked) { JBUFFER_TRACE(jh, "not on correct data list: unfile"); J_ASSERT_JH(jh, jh->b_jlist != BJ_Shadow); - __journal_temp_unlink_buffer(jh); - jh->b_transaction = handle->h_transaction; JBUFFER_TRACE(jh, "file as data"); __journal_file_buffer(jh, handle->h_transaction, BJ_SyncData); @@ -1300,8 +1297,6 @@ int journal_forget (handle_t *handle, struct buffer_head *bh) __journal_file_buffer(jh, transaction, BJ_Forget); } else { __journal_unfile_buffer(jh); - journal_remove_journal_head(bh); - __brelse(bh); if (!buffer_jbd(bh)) { spin_unlock(&journal->j_list_lock); jbd_unlock_bh_state(bh); @@ -1622,19 +1617,32 @@ static void __journal_temp_unlink_buffer(struct journal_head *jh) mark_buffer_dirty(bh); /* Expose it to the VM */ } +/* + * Remove buffer from all transactions. + * + * Called with bh_state lock and j_list_lock + * + * jh and bh may be already freed when this function returns. + */ void __journal_unfile_buffer(struct journal_head *jh) { __journal_temp_unlink_buffer(jh); jh->b_transaction = NULL; + journal_put_journal_head(jh); } void journal_unfile_buffer(journal_t *journal, struct journal_head *jh) { - jbd_lock_bh_state(jh2bh(jh)); + struct buffer_head *bh = jh2bh(jh); + + /* Get reference so that buffer cannot be freed before we unlock it */ + get_bh(bh); + jbd_lock_bh_state(bh); spin_lock(&journal->j_list_lock); __journal_unfile_buffer(jh); spin_unlock(&journal->j_list_lock); - jbd_unlock_bh_state(jh2bh(jh)); + jbd_unlock_bh_state(bh); + __brelse(bh); } /* @@ -1661,16 +1669,12 @@ __journal_try_to_free_buffer(journal_t *journal, struct buffer_head *bh) /* A written-back ordered data buffer */ JBUFFER_TRACE(jh, "release data"); __journal_unfile_buffer(jh); - journal_remove_journal_head(bh); - __brelse(bh); } } else if (jh->b_cp_transaction != NULL && jh->b_transaction == NULL) { /* written-back checkpointed metadata buffer */ if (jh->b_jlist == BJ_None) { JBUFFER_TRACE(jh, "remove from checkpoint list"); __journal_remove_checkpoint(jh); - journal_remove_journal_head(bh); - __brelse(bh); } } spin_unlock(&journal->j_list_lock); @@ -1733,7 +1737,7 @@ int journal_try_to_free_buffers(journal_t *journal, /* * We take our own ref against the journal_head here to avoid * having to add tons of locking around each instance of - * journal_remove_journal_head() and journal_put_journal_head(). + * journal_put_journal_head(). */ jh = journal_grab_journal_head(bh); if (!jh) @@ -1770,10 +1774,9 @@ static int __dispose_buffer(struct journal_head *jh, transaction_t *transaction) int may_free = 1; struct buffer_head *bh = jh2bh(jh); - __journal_unfile_buffer(jh); - if (jh->b_cp_transaction) { JBUFFER_TRACE(jh, "on running+cp transaction"); + __journal_temp_unlink_buffer(jh); /* * We don't want to write the buffer anymore, clear the * bit so that we don't confuse checks in @@ -1784,8 +1787,7 @@ static int __dispose_buffer(struct journal_head *jh, transaction_t *transaction) may_free = 0; } else { JBUFFER_TRACE(jh, "on running transaction"); - journal_remove_journal_head(bh); - __brelse(bh); + __journal_unfile_buffer(jh); } return may_free; } @@ -2096,6 +2098,8 @@ void __journal_file_buffer(struct journal_head *jh, if (jh->b_transaction) __journal_temp_unlink_buffer(jh); + else + journal_grab_journal_head(bh); jh->b_transaction = transaction; switch (jlist) { @@ -2153,9 +2157,10 @@ void journal_file_buffer(struct journal_head *jh, * already started to be used by a subsequent transaction, refile the * buffer on that transaction's metadata list. * - * Called under journal->j_list_lock - * + * Called under j_list_lock * Called under jbd_lock_bh_state(jh2bh(jh)) + * + * jh and bh may be already free when this function returns */ void __journal_refile_buffer(struct journal_head *jh) { @@ -2179,6 +2184,11 @@ void __journal_refile_buffer(struct journal_head *jh) was_dirty = test_clear_buffer_jbddirty(bh); __journal_temp_unlink_buffer(jh); + /* + * We set b_transaction here because b_next_transaction will inherit + * our jh reference and thus __journal_file_buffer() must not take a + * new one. + */ jh->b_transaction = jh->b_next_transaction; jh->b_next_transaction = NULL; if (buffer_freed(bh)) @@ -2195,30 +2205,21 @@ void __journal_refile_buffer(struct journal_head *jh) } /* - * For the unlocked version of this call, also make sure that any - * hanging journal_head is cleaned up if necessary. - * - * __journal_refile_buffer is usually called as part of a single locked - * operation on a buffer_head, in which the caller is probably going to - * be hooking the journal_head onto other lists. In that case it is up - * to the caller to remove the journal_head if necessary. For the - * unlocked journal_refile_buffer call, the caller isn't going to be - * doing anything else to the buffer so we need to do the cleanup - * ourselves to avoid a jh leak. - * - * *** The journal_head may be freed by this call! *** + * __journal_refile_buffer() with necessary locking added. We take our bh + * reference so that we can safely unlock bh. + * + * The jh and bh may be freed by this call. */ void journal_refile_buffer(journal_t *journal, struct journal_head *jh) { struct buffer_head *bh = jh2bh(jh); + /* Get reference so that buffer cannot be freed before we unlock it */ + get_bh(bh); jbd_lock_bh_state(bh); spin_lock(&journal->j_list_lock); - __journal_refile_buffer(jh); jbd_unlock_bh_state(bh); - journal_remove_journal_head(bh); - spin_unlock(&journal->j_list_lock); __brelse(bh); } diff --git a/fs/jfs/acl.c b/fs/jfs/acl.c index e5de942..45559dc 100644 --- a/fs/jfs/acl.c +++ b/fs/jfs/acl.c @@ -27,7 +27,7 @@ #include "jfs_xattr.h" #include "jfs_acl.h" -static struct posix_acl *jfs_get_acl(struct inode *inode, int type) +struct posix_acl *jfs_get_acl(struct inode *inode, int type) { struct posix_acl *acl; char *ea_name; @@ -114,30 +114,9 @@ out: return rc; } -int jfs_check_acl(struct inode *inode, int mask, unsigned int flags) -{ - struct posix_acl *acl; - - if (flags & IPERM_FLAG_RCU) - return -ECHILD; - - acl = jfs_get_acl(inode, ACL_TYPE_ACCESS); - if (IS_ERR(acl)) - return PTR_ERR(acl); - if (acl) { - int error = posix_acl_permission(inode, acl, mask); - posix_acl_release(acl); - return error; - } - - return -EAGAIN; -} - int jfs_init_acl(tid_t tid, struct inode *inode, struct inode *dir) { struct posix_acl *acl = NULL; - struct posix_acl *clone; - mode_t mode; int rc = 0; if (S_ISLNK(inode->i_mode)) @@ -153,20 +132,11 @@ int jfs_init_acl(tid_t tid, struct inode *inode, struct inode *dir) if (rc) goto cleanup; } - clone = posix_acl_clone(acl, GFP_KERNEL); - if (!clone) { - rc = -ENOMEM; - goto cleanup; - } - mode = inode->i_mode; - rc = posix_acl_create_masq(clone, &mode); - if (rc >= 0) { - inode->i_mode = mode; - if (rc > 0) - rc = jfs_set_acl(tid, inode, ACL_TYPE_ACCESS, - clone); - } - posix_acl_release(clone); + rc = posix_acl_create(&acl, GFP_KERNEL, &inode->i_mode); + if (rc < 0) + goto cleanup; /* posix_acl_release(NULL) is no-op */ + if (rc > 0) + rc = jfs_set_acl(tid, inode, ACL_TYPE_ACCESS, acl); cleanup: posix_acl_release(acl); } else @@ -180,8 +150,9 @@ cleanup: int jfs_acl_chmod(struct inode *inode) { - struct posix_acl *acl, *clone; + struct posix_acl *acl; int rc; + tid_t tid; if (S_ISLNK(inode->i_mode)) return -EOPNOTSUPP; @@ -190,22 +161,18 @@ int jfs_acl_chmod(struct inode *inode) if (IS_ERR(acl) || !acl) return PTR_ERR(acl); - clone = posix_acl_clone(acl, GFP_KERNEL); - posix_acl_release(acl); - if (!clone) - return -ENOMEM; - - rc = posix_acl_chmod_masq(clone, inode->i_mode); - if (!rc) { - tid_t tid = txBegin(inode->i_sb, 0); - mutex_lock(&JFS_IP(inode)->commit_mutex); - rc = jfs_set_acl(tid, inode, ACL_TYPE_ACCESS, clone); - if (!rc) - rc = txCommit(tid, 1, &inode, 0); - txEnd(tid); - mutex_unlock(&JFS_IP(inode)->commit_mutex); - } + rc = posix_acl_chmod(&acl, GFP_KERNEL, inode->i_mode); + if (rc) + return rc; - posix_acl_release(clone); + tid = txBegin(inode->i_sb, 0); + mutex_lock(&JFS_IP(inode)->commit_mutex); + rc = jfs_set_acl(tid, inode, ACL_TYPE_ACCESS, acl); + if (!rc) + rc = txCommit(tid, 1, &inode, 0); + txEnd(tid); + mutex_unlock(&JFS_IP(inode)->commit_mutex); + + posix_acl_release(acl); return rc; } diff --git a/fs/jfs/file.c b/fs/jfs/file.c index 2f3f531..844f946 100644 --- a/fs/jfs/file.c +++ b/fs/jfs/file.c @@ -28,19 +28,26 @@ #include "jfs_acl.h" #include "jfs_debug.h" -int jfs_fsync(struct file *file, int datasync) +int jfs_fsync(struct file *file, loff_t start, loff_t end, int datasync) { struct inode *inode = file->f_mapping->host; int rc = 0; + rc = filemap_write_and_wait_range(inode->i_mapping, start, end); + if (rc) + return rc; + + mutex_lock(&inode->i_mutex); if (!(inode->i_state & I_DIRTY) || (datasync && !(inode->i_state & I_DIRTY_DATASYNC))) { /* Make sure committed changes hit the disk */ jfs_flush_journal(JFS_SBI(inode->i_sb)->log, 1); + mutex_unlock(&inode->i_mutex); return rc; } rc |= jfs_commit_inode(inode, 1); + mutex_unlock(&inode->i_mutex); return rc ? -EIO : 0; } @@ -110,6 +117,8 @@ int jfs_setattr(struct dentry *dentry, struct iattr *iattr) if ((iattr->ia_valid & ATTR_SIZE) && iattr->ia_size != i_size_read(inode)) { + inode_dio_wait(inode); + rc = vmtruncate(inode, iattr->ia_size); if (rc) return rc; @@ -131,7 +140,7 @@ const struct inode_operations jfs_file_inode_operations = { .removexattr = jfs_removexattr, .setattr = jfs_setattr, #ifdef CONFIG_JFS_POSIX_ACL - .check_acl = jfs_check_acl, + .get_acl = jfs_get_acl, #endif }; diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c index 09100b4..13fc885 100644 --- a/fs/jfs/inode.c +++ b/fs/jfs/inode.c @@ -329,8 +329,8 @@ static ssize_t jfs_direct_IO(int rw, struct kiocb *iocb, struct inode *inode = file->f_mapping->host; ssize_t ret; - ret = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, - offset, nr_segs, jfs_get_block, NULL); + ret = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs, + jfs_get_block); /* * In case of error extending write may have instantiated a few diff --git a/fs/jfs/jfs_acl.h b/fs/jfs/jfs_acl.h index f9285c4..ad84fe5 100644 --- a/fs/jfs/jfs_acl.h +++ b/fs/jfs/jfs_acl.h @@ -20,7 +20,7 @@ #ifdef CONFIG_JFS_POSIX_ACL -int jfs_check_acl(struct inode *, int, unsigned int flags); +struct posix_acl *jfs_get_acl(struct inode *inode, int type); int jfs_init_acl(tid_t, struct inode *, struct inode *); int jfs_acl_chmod(struct inode *inode); diff --git a/fs/jfs/jfs_dmap.c b/fs/jfs/jfs_dmap.c index 4496872..9cbd11a 100644 --- a/fs/jfs/jfs_dmap.c +++ b/fs/jfs/jfs_dmap.c @@ -3161,7 +3161,7 @@ static int dbAllocDmapBU(struct bmap * bmp, struct dmap * dp, s64 blkno, { int rc; int dbitno, word, rembits, nb, nwords, wbitno, agno; - s8 oldroot, *leaf; + s8 oldroot; struct dmaptree *tp = (struct dmaptree *) & dp->tree; /* save the current value of the root (i.e. maximum free string) @@ -3169,9 +3169,6 @@ static int dbAllocDmapBU(struct bmap * bmp, struct dmap * dp, s64 blkno, */ oldroot = tp->stree[ROOT]; - /* pick up a pointer to the leaves of the dmap tree */ - leaf = tp->stree + LEAFIND; - /* determine the bit number and word within the dmap of the * starting block. */ diff --git a/fs/jfs/jfs_dtree.c b/fs/jfs/jfs_dtree.c index b6f17c0..f6f32fa 100644 --- a/fs/jfs/jfs_dtree.c +++ b/fs/jfs/jfs_dtree.c @@ -3048,9 +3048,9 @@ int jfs_readdir(struct file *filp, void *dirent, filldir_t filldir) dir_index = (u32) filp->f_pos; /* - * NFSv4 reserves cookies 1 and 2 for . and .. so we add - * the value we return to the vfs is one greater than the - * one we use internally. + * NFSv4 reserves cookies 1 and 2 for . and .. so the value + * we return to the vfs is one greater than the one we use + * internally. */ if (dir_index) dir_index--; @@ -3103,7 +3103,7 @@ int jfs_readdir(struct file *filp, void *dirent, filldir_t filldir) * self "." */ filp->f_pos = 1; - if (filldir(dirent, ".", 1, 0, ip->i_ino, + if (filldir(dirent, ".", 1, 1, ip->i_ino, DT_DIR)) return 0; } @@ -3111,7 +3111,7 @@ int jfs_readdir(struct file *filp, void *dirent, filldir_t filldir) * parent ".." */ filp->f_pos = 2; - if (filldir(dirent, "..", 2, 1, PARENT(ip), DT_DIR)) + if (filldir(dirent, "..", 2, 2, PARENT(ip), DT_DIR)) return 0; /* diff --git a/fs/jfs/jfs_imap.c b/fs/jfs/jfs_imap.c index b78b2f9..1b6f15f 100644 --- a/fs/jfs/jfs_imap.c +++ b/fs/jfs/jfs_imap.c @@ -457,7 +457,7 @@ struct inode *diReadSpecial(struct super_block *sb, ino_t inum, int secondary) /* read the page of fixed disk inode (AIT) in raw mode */ mp = read_metapage(ip, address << sbi->l2nbperpage, PSIZE, 1); if (mp == NULL) { - ip->i_nlink = 1; /* Don't want iput() deleting it */ + set_nlink(ip, 1); /* Don't want iput() deleting it */ iput(ip); return (NULL); } @@ -469,7 +469,7 @@ struct inode *diReadSpecial(struct super_block *sb, ino_t inum, int secondary) /* copy on-disk inode to in-memory inode */ if ((copy_from_dinode(dp, ip)) != 0) { /* handle bad return by returning NULL for ip */ - ip->i_nlink = 1; /* Don't want iput() deleting it */ + set_nlink(ip, 1); /* Don't want iput() deleting it */ iput(ip); /* release the page */ release_metapage(mp); @@ -3076,7 +3076,7 @@ static int copy_from_dinode(struct dinode * dip, struct inode *ip) ip->i_mode |= 0001; } } - ip->i_nlink = le32_to_cpu(dip->di_nlink); + set_nlink(ip, le32_to_cpu(dip->di_nlink)); jfs_ip->saved_uid = le32_to_cpu(dip->di_uid); if (sbi->uid == -1) diff --git a/fs/jfs/jfs_inode.c b/fs/jfs/jfs_inode.c index 2686531..7f464c5 100644 --- a/fs/jfs/jfs_inode.c +++ b/fs/jfs/jfs_inode.c @@ -95,7 +95,7 @@ struct inode *ialloc(struct inode *parent, umode_t mode) if (insert_inode_locked(inode) < 0) { rc = -EINVAL; - goto fail_unlock; + goto fail_put; } inode_init_owner(inode, parent, mode); @@ -156,8 +156,7 @@ struct inode *ialloc(struct inode *parent, umode_t mode) fail_drop: dquot_drop(inode); inode->i_flags |= S_NOQUOTA; -fail_unlock: - inode->i_nlink = 0; + clear_nlink(inode); unlock_new_inode(inode); fail_put: iput(inode); diff --git a/fs/jfs/jfs_inode.h b/fs/jfs/jfs_inode.h index ec2fb8b..9271cfe 100644 --- a/fs/jfs/jfs_inode.h +++ b/fs/jfs/jfs_inode.h @@ -21,7 +21,7 @@ struct fid; extern struct inode *ialloc(struct inode *, umode_t); -extern int jfs_fsync(struct file *, int); +extern int jfs_fsync(struct file *, loff_t, loff_t, int); extern long jfs_ioctl(struct file *, unsigned int, unsigned long); extern long jfs_compat_ioctl(struct file *, unsigned int, unsigned long); extern struct inode *jfs_iget(struct super_block *, unsigned long); diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c index ee55e45..bfb2a91 100644 --- a/fs/jfs/jfs_logmgr.c +++ b/fs/jfs/jfs_logmgr.c @@ -67,6 +67,7 @@ #include /* for sync_blockdev() */ #include #include +#include #include #include #include diff --git a/fs/jfs/jfs_txnmgr.c b/fs/jfs/jfs_txnmgr.c index f6cc0c0..af96060 100644 --- a/fs/jfs/jfs_txnmgr.c +++ b/fs/jfs/jfs_txnmgr.c @@ -1143,7 +1143,6 @@ int txCommit(tid_t tid, /* transaction identifier */ struct jfs_log *log; struct tblock *tblk; struct lrd *lrd; - int lsn; struct inode *ip; struct jfs_inode_info *jfs_ip; int k, n; @@ -1310,7 +1309,7 @@ int txCommit(tid_t tid, /* transaction identifier */ */ lrd->type = cpu_to_le16(LOG_COMMIT); lrd->length = 0; - lsn = lmLog(log, tblk, lrd, NULL); + lmLog(log, tblk, lrd, NULL); lmGroupCommit(log, tblk); @@ -2935,7 +2934,6 @@ int jfs_sync(void *arg) { struct inode *ip; struct jfs_inode_info *jfs_ip; - int rc; tid_t tid; do { @@ -2961,7 +2959,7 @@ int jfs_sync(void *arg) */ TXN_UNLOCK(); tid = txBegin(ip->i_sb, COMMIT_INODE); - rc = txCommit(tid, 1, &ip, 0); + txCommit(tid, 1, &ip, 0); txEnd(tid); mutex_unlock(&jfs_ip->commit_mutex); diff --git a/fs/jfs/jfs_umount.c b/fs/jfs/jfs_umount.c index adcf92d..7971f37 100644 --- a/fs/jfs/jfs_umount.c +++ b/fs/jfs/jfs_umount.c @@ -68,7 +68,7 @@ int jfs_umount(struct super_block *sb) /* * Wait for outstanding transactions to be written to log: */ - jfs_flush_journal(log, 1); + jfs_flush_journal(log, 2); /* * close fileset inode allocation map (aka fileset inode) @@ -146,7 +146,7 @@ int jfs_umount_rw(struct super_block *sb) * * remove file system from log active file system list. */ - jfs_flush_journal(log, 1); + jfs_flush_journal(log, 2); /* * Make sure all metadata makes it to disk diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c index eaaf2b5..a112ad9 100644 --- a/fs/jfs/namei.c +++ b/fs/jfs/namei.c @@ -172,7 +172,7 @@ static int jfs_create(struct inode *dip, struct dentry *dentry, int mode, mutex_unlock(&JFS_IP(dip)->commit_mutex); if (rc) { free_ea_wmap(ip); - ip->i_nlink = 0; + clear_nlink(ip); unlock_new_inode(ip); iput(ip); } else { @@ -292,7 +292,7 @@ static int jfs_mkdir(struct inode *dip, struct dentry *dentry, int mode) goto out3; } - ip->i_nlink = 2; /* for '.' */ + set_nlink(ip, 2); /* for '.' */ ip->i_op = &jfs_dir_inode_operations; ip->i_fop = &jfs_dir_operations; @@ -311,7 +311,7 @@ static int jfs_mkdir(struct inode *dip, struct dentry *dentry, int mode) mutex_unlock(&JFS_IP(dip)->commit_mutex); if (rc) { free_ea_wmap(ip); - ip->i_nlink = 0; + clear_nlink(ip); unlock_new_inode(ip); iput(ip); } else { @@ -844,7 +844,7 @@ static int jfs_link(struct dentry *old_dentry, rc = txCommit(tid, 2, &iplist[0], 0); if (rc) { - ip->i_nlink--; /* never instantiated */ + drop_nlink(ip); /* never instantiated */ iput(ip); } else d_instantiate(dentry, ip); @@ -893,7 +893,7 @@ static int jfs_symlink(struct inode *dip, struct dentry *dentry, unchar *i_fastsymlink; s64 xlen = 0; int bmask = 0, xsize; - s64 extent = 0, xaddr; + s64 xaddr; struct metapage *mp; struct super_block *sb; struct tblock *tblk; @@ -993,7 +993,6 @@ static int jfs_symlink(struct inode *dip, struct dentry *dentry, txAbort(tid, 0); goto out3; } - extent = xaddr; ip->i_size = ssize - 1; while (ssize) { /* This is kind of silly since PATH_MAX == 4K */ @@ -1049,7 +1048,7 @@ static int jfs_symlink(struct inode *dip, struct dentry *dentry, mutex_unlock(&JFS_IP(dip)->commit_mutex); if (rc) { free_ea_wmap(ip); - ip->i_nlink = 0; + clear_nlink(ip); unlock_new_inode(ip); iput(ip); } else { @@ -1434,7 +1433,7 @@ static int jfs_mknod(struct inode *dir, struct dentry *dentry, mutex_unlock(&JFS_IP(dir)->commit_mutex); if (rc) { free_ea_wmap(ip); - ip->i_nlink = 0; + clear_nlink(ip); unlock_new_inode(ip); iput(ip); } else { @@ -1456,34 +1455,23 @@ static struct dentry *jfs_lookup(struct inode *dip, struct dentry *dentry, struc ino_t inum; struct inode *ip; struct component_name key; - const char *name = dentry->d_name.name; - int len = dentry->d_name.len; int rc; - jfs_info("jfs_lookup: name = %s", name); - - if ((name[0] == '.') && (len == 1)) - inum = dip->i_ino; - else if (strcmp(name, "..") == 0) - inum = PARENT(dip); - else { - if ((rc = get_UCSname(&key, dentry))) - return ERR_PTR(rc); - rc = dtSearch(dip, &key, &inum, &btstack, JFS_LOOKUP); - free_UCSname(&key); - if (rc == -ENOENT) { - d_add(dentry, NULL); - return NULL; - } else if (rc) { - jfs_err("jfs_lookup: dtSearch returned %d", rc); - return ERR_PTR(rc); - } - } - - ip = jfs_iget(dip->i_sb, inum); - if (IS_ERR(ip)) { - jfs_err("jfs_lookup: iget failed on inum %d", (uint) inum); - return ERR_CAST(ip); + jfs_info("jfs_lookup: name = %s", dentry->d_name.name); + + if ((rc = get_UCSname(&key, dentry))) + return ERR_PTR(rc); + rc = dtSearch(dip, &key, &inum, &btstack, JFS_LOOKUP); + free_UCSname(&key); + if (rc == -ENOENT) { + ip = NULL; + } else if (rc) { + jfs_err("jfs_lookup: dtSearch returned %d", rc); + ip = ERR_PTR(rc); + } else { + ip = jfs_iget(dip->i_sb, inum); + if (IS_ERR(ip)) + jfs_err("jfs_lookup: iget failed on inum %d", (uint)inum); } return d_splice_alias(ip, dentry); @@ -1548,7 +1536,7 @@ const struct inode_operations jfs_dir_inode_operations = { .removexattr = jfs_removexattr, .setattr = jfs_setattr, #ifdef CONFIG_JFS_POSIX_ACL - .check_acl = jfs_check_acl, + .get_acl = jfs_get_acl, #endif }; @@ -1597,8 +1585,6 @@ out: static int jfs_ci_revalidate(struct dentry *dentry, struct nameidata *nd) { - if (nd && nd->flags & LOOKUP_RCU) - return -ECHILD; /* * This is not negative dentry. Always valid. * @@ -1624,10 +1610,8 @@ static int jfs_ci_revalidate(struct dentry *dentry, struct nameidata *nd) * case sensitive name which is specified by user if this is * for creation. */ - if (!(nd->flags & (LOOKUP_CONTINUE | LOOKUP_PARENT))) { - if (nd->flags & (LOOKUP_CREATE | LOOKUP_RENAME_TARGET)) - return 0; - } + if (nd->flags & (LOOKUP_CREATE | LOOKUP_RENAME_TARGET)) + return 0; return 1; } diff --git a/fs/jfs/super.c b/fs/jfs/super.c index 06c8a67..a44eff0 100644 --- a/fs/jfs/super.c +++ b/fs/jfs/super.c @@ -485,7 +485,6 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent) goto out_unload; } inode->i_ino = 0; - inode->i_nlink = 1; inode->i_size = sb->s_bdev->bd_inode->i_size; inode->i_mapping->a_ops = &jfs_metapage_aops; insert_inode_hash(inode); diff --git a/fs/jfs/xattr.c b/fs/jfs/xattr.c index 24838f1..26683e1 100644 --- a/fs/jfs/xattr.c +++ b/fs/jfs/xattr.c @@ -693,8 +693,7 @@ static int can_set_system_xattr(struct inode *inode, const char *name, return rc; } if (acl) { - mode_t mode = inode->i_mode; - rc = posix_acl_equiv_mode(acl, &mode); + rc = posix_acl_equiv_mode(acl, &inode->i_mode); posix_acl_release(acl); if (rc < 0) { printk(KERN_ERR @@ -702,7 +701,6 @@ static int can_set_system_xattr(struct inode *inode, const char *name, rc); return rc; } - inode->i_mode = mode; mark_inode_dirty(inode); } /* @@ -1091,38 +1089,37 @@ int jfs_removexattr(struct dentry *dentry, const char *name) } #ifdef CONFIG_JFS_SECURITY -int jfs_init_security(tid_t tid, struct inode *inode, struct inode *dir, - const struct qstr *qstr) +int jfs_initxattrs(struct inode *inode, const struct xattr *xattr_array, + void *fs_info) { - int rc; - size_t len; - void *value; - char *suffix; + const struct xattr *xattr; + tid_t *tid = fs_info; char *name; - - rc = security_inode_init_security(inode, dir, qstr, &suffix, &value, - &len); - if (rc) { - if (rc == -EOPNOTSUPP) - return 0; - return rc; - } - name = kmalloc(XATTR_SECURITY_PREFIX_LEN + 1 + strlen(suffix), - GFP_NOFS); - if (!name) { - rc = -ENOMEM; - goto kmalloc_failed; + int err = 0; + + for (xattr = xattr_array; xattr->name != NULL; xattr++) { + name = kmalloc(XATTR_SECURITY_PREFIX_LEN + + strlen(xattr->name) + 1, GFP_NOFS); + if (!name) { + err = -ENOMEM; + break; + } + strcpy(name, XATTR_SECURITY_PREFIX); + strcpy(name + XATTR_SECURITY_PREFIX_LEN, xattr->name); + + err = __jfs_setxattr(*tid, inode, name, + xattr->value, xattr->value_len, 0); + kfree(name); + if (err < 0) + break; } - strcpy(name, XATTR_SECURITY_PREFIX); - strcpy(name + XATTR_SECURITY_PREFIX_LEN, suffix); - - rc = __jfs_setxattr(tid, inode, name, value, len, 0); - - kfree(name); -kmalloc_failed: - kfree(suffix); - kfree(value); + return err; +} - return rc; +int jfs_init_security(tid_t tid, struct inode *inode, struct inode *dir, + const struct qstr *qstr) +{ + return security_inode_init_security(inode, dir, qstr, + &jfs_initxattrs, &tid); } #endif diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig index fbb2a5e..10e6366 100644 --- a/fs/nfsd/Kconfig +++ b/fs/nfsd/Kconfig @@ -28,18 +28,6 @@ config NFSD If unsure, say N. -config NFSD_DEPRECATED - bool "Include support for deprecated syscall interface to NFSD" - depends on NFSD - default y - help - The syscall interface to nfsd was obsoleted in 2.6.0 by a new - filesystem based interface. The old interface is due for removal - in 2.6.40. If you wish to remove the interface before then - say N. - - In unsure, say Y. - config NFSD_V2_ACL bool depends on NFSD diff --git a/fs/nfsd/cache.h b/fs/nfsd/cache.h index d892be6..93cc9d3 100644 --- a/fs/nfsd/cache.h +++ b/fs/nfsd/cache.h @@ -69,7 +69,7 @@ enum { int nfsd_reply_cache_init(void); void nfsd_reply_cache_shutdown(void); -int nfsd_cache_lookup(struct svc_rqst *, int); +int nfsd_cache_lookup(struct svc_rqst *); void nfsd_cache_update(struct svc_rqst *, int, __be32 *); #ifdef CONFIG_NFSD_V4 diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index d7c4f02..a0205fc 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c @@ -16,7 +16,6 @@ #include #include -#include #include #include "nfsd.h" @@ -318,7 +317,6 @@ static void svc_export_put(struct kref *ref) struct svc_export *exp = container_of(ref, struct svc_export, h.ref); path_put(&exp->ex_path); auth_domain_put(exp->ex_client); - kfree(exp->ex_pathname); nfsd4_fslocs_free(&exp->ex_fslocs); kfree(exp); } @@ -528,11 +526,6 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen) exp.ex_client = dom; - err = -ENOMEM; - exp.ex_pathname = kstrdup(buf, GFP_KERNEL); - if (!exp.ex_pathname) - goto out2; - /* expiry */ err = -EINVAL; exp.h.expiry_time = get_expiry(&mesg); @@ -613,8 +606,6 @@ out4: nfsd4_fslocs_free(&exp.ex_fslocs); kfree(exp.ex_uuid); out3: - kfree(exp.ex_pathname); -out2: path_put(&exp.ex_path); out1: auth_domain_put(dom); @@ -678,7 +669,6 @@ static void svc_export_init(struct cache_head *cnew, struct cache_head *citem) new->ex_client = item->ex_client; new->ex_path.dentry = dget(item->ex_path.dentry); new->ex_path.mnt = mntget(item->ex_path.mnt); - new->ex_pathname = NULL; new->ex_fslocs.locations = NULL; new->ex_fslocs.locations_count = 0; new->ex_fslocs.migrated = 0; @@ -696,8 +686,6 @@ static void export_update(struct cache_head *cnew, struct cache_head *citem) new->ex_fsid = item->ex_fsid; new->ex_uuid = item->ex_uuid; item->ex_uuid = NULL; - new->ex_pathname = item->ex_pathname; - item->ex_pathname = NULL; new->ex_fslocs.locations = item->ex_fslocs.locations; item->ex_fslocs.locations = NULL; new->ex_fslocs.locations_count = item->ex_fslocs.locations_count; @@ -797,58 +785,6 @@ exp_find_key(svc_client *clp, int fsid_type, u32 *fsidv, struct cache_req *reqp) return ek; } -#ifdef CONFIG_NFSD_DEPRECATED -static int exp_set_key(svc_client *clp, int fsid_type, u32 *fsidv, - struct svc_export *exp) -{ - struct svc_expkey key, *ek; - - key.ek_client = clp; - key.ek_fsidtype = fsid_type; - memcpy(key.ek_fsid, fsidv, key_len(fsid_type)); - key.ek_path = exp->ex_path; - key.h.expiry_time = NEVER; - key.h.flags = 0; - - ek = svc_expkey_lookup(&key); - if (ek) - ek = svc_expkey_update(&key,ek); - if (ek) { - cache_put(&ek->h, &svc_expkey_cache); - return 0; - } - return -ENOMEM; -} - -/* - * Find the client's export entry matching xdev/xino. - */ -static inline struct svc_expkey * -exp_get_key(svc_client *clp, dev_t dev, ino_t ino) -{ - u32 fsidv[3]; - - if (old_valid_dev(dev)) { - mk_fsid(FSID_DEV, fsidv, dev, ino, 0, NULL); - return exp_find_key(clp, FSID_DEV, fsidv, NULL); - } - mk_fsid(FSID_ENCODE_DEV, fsidv, dev, ino, 0, NULL); - return exp_find_key(clp, FSID_ENCODE_DEV, fsidv, NULL); -} - -/* - * Find the client's export entry matching fsid - */ -static inline struct svc_expkey * -exp_get_fsid_key(svc_client *clp, int fsid) -{ - u32 fsidv[2]; - - mk_fsid(FSID_NUM, fsidv, 0, 0, fsid, NULL); - - return exp_find_key(clp, FSID_NUM, fsidv, NULL); -} -#endif static svc_export *exp_get_by_name(svc_client *clp, const struct path *path, struct cache_req *reqp) @@ -890,275 +826,7 @@ static struct svc_export *exp_parent(svc_client *clp, struct path *path) return exp; } -#ifdef CONFIG_NFSD_DEPRECATED -/* - * Hashtable locking. Write locks are placed only by user processes - * wanting to modify export information. - * Write locking only done in this file. Read locking - * needed externally. - */ -static DECLARE_RWSEM(hash_sem); - -void -exp_readlock(void) -{ - down_read(&hash_sem); -} - -static inline void -exp_writelock(void) -{ - down_write(&hash_sem); -} - -void -exp_readunlock(void) -{ - up_read(&hash_sem); -} - -static inline void -exp_writeunlock(void) -{ - up_write(&hash_sem); -} -#else - -/* hash_sem not needed once deprecated interface is removed */ -void exp_readlock(void) {} -static inline void exp_writelock(void){} -void exp_readunlock(void) {} -static inline void exp_writeunlock(void){} - -#endif - -#ifdef CONFIG_NFSD_DEPRECATED -static void exp_do_unexport(svc_export *unexp); -static int exp_verify_string(char *cp, int max); - -static void exp_fsid_unhash(struct svc_export *exp) -{ - struct svc_expkey *ek; - - if ((exp->ex_flags & NFSEXP_FSID) == 0) - return; - - ek = exp_get_fsid_key(exp->ex_client, exp->ex_fsid); - if (!IS_ERR(ek)) { - sunrpc_invalidate(&ek->h, &svc_expkey_cache); - cache_put(&ek->h, &svc_expkey_cache); - } -} - -static int exp_fsid_hash(svc_client *clp, struct svc_export *exp) -{ - u32 fsid[2]; - - if ((exp->ex_flags & NFSEXP_FSID) == 0) - return 0; - - mk_fsid(FSID_NUM, fsid, 0, 0, exp->ex_fsid, NULL); - return exp_set_key(clp, FSID_NUM, fsid, exp); -} - -static int exp_hash(struct auth_domain *clp, struct svc_export *exp) -{ - u32 fsid[2]; - struct inode *inode = exp->ex_path.dentry->d_inode; - dev_t dev = inode->i_sb->s_dev; - - if (old_valid_dev(dev)) { - mk_fsid(FSID_DEV, fsid, dev, inode->i_ino, 0, NULL); - return exp_set_key(clp, FSID_DEV, fsid, exp); - } - mk_fsid(FSID_ENCODE_DEV, fsid, dev, inode->i_ino, 0, NULL); - return exp_set_key(clp, FSID_ENCODE_DEV, fsid, exp); -} - -static void exp_unhash(struct svc_export *exp) -{ - struct svc_expkey *ek; - struct inode *inode = exp->ex_path.dentry->d_inode; - - ek = exp_get_key(exp->ex_client, inode->i_sb->s_dev, inode->i_ino); - if (!IS_ERR(ek)) { - sunrpc_invalidate(&ek->h, &svc_expkey_cache); - cache_put(&ek->h, &svc_expkey_cache); - } -} - -/* - * Export a file system. - */ -int -exp_export(struct nfsctl_export *nxp) -{ - svc_client *clp; - struct svc_export *exp = NULL; - struct svc_export new; - struct svc_expkey *fsid_key = NULL; - struct path path; - int err; - - /* Consistency check */ - err = -EINVAL; - if (!exp_verify_string(nxp->ex_path, NFS_MAXPATHLEN) || - !exp_verify_string(nxp->ex_client, NFSCLNT_IDMAX)) - goto out; - - dprintk("exp_export called for %s:%s (%x/%ld fl %x).\n", - nxp->ex_client, nxp->ex_path, - (unsigned)nxp->ex_dev, (long)nxp->ex_ino, - nxp->ex_flags); - - /* Try to lock the export table for update */ - exp_writelock(); - - /* Look up client info */ - if (!(clp = auth_domain_find(nxp->ex_client))) - goto out_unlock; - - - /* Look up the dentry */ - err = kern_path(nxp->ex_path, 0, &path); - if (err) - goto out_put_clp; - err = -EINVAL; - - exp = exp_get_by_name(clp, &path, NULL); - - memset(&new, 0, sizeof(new)); - - /* must make sure there won't be an ex_fsid clash */ - if ((nxp->ex_flags & NFSEXP_FSID) && - (!IS_ERR(fsid_key = exp_get_fsid_key(clp, nxp->ex_dev))) && - fsid_key->ek_path.mnt && - (fsid_key->ek_path.mnt != path.mnt || - fsid_key->ek_path.dentry != path.dentry)) - goto finish; - - if (!IS_ERR(exp)) { - /* just a flags/id/fsid update */ - - exp_fsid_unhash(exp); - exp->ex_flags = nxp->ex_flags; - exp->ex_anon_uid = nxp->ex_anon_uid; - exp->ex_anon_gid = nxp->ex_anon_gid; - exp->ex_fsid = nxp->ex_dev; - - err = exp_fsid_hash(clp, exp); - goto finish; - } - - err = check_export(path.dentry->d_inode, &nxp->ex_flags, NULL); - if (err) goto finish; - - err = -ENOMEM; - - dprintk("nfsd: creating export entry %p for client %p\n", exp, clp); - - new.h.expiry_time = NEVER; - new.h.flags = 0; - new.ex_pathname = kstrdup(nxp->ex_path, GFP_KERNEL); - if (!new.ex_pathname) - goto finish; - new.ex_client = clp; - new.ex_path = path; - new.ex_flags = nxp->ex_flags; - new.ex_anon_uid = nxp->ex_anon_uid; - new.ex_anon_gid = nxp->ex_anon_gid; - new.ex_fsid = nxp->ex_dev; - - exp = svc_export_lookup(&new); - if (exp) - exp = svc_export_update(&new, exp); - - if (!exp) - goto finish; - - if (exp_hash(clp, exp) || - exp_fsid_hash(clp, exp)) { - /* failed to create at least one index */ - exp_do_unexport(exp); - cache_flush(); - } else - err = 0; -finish: - kfree(new.ex_pathname); - if (!IS_ERR_OR_NULL(exp)) - exp_put(exp); - if (!IS_ERR_OR_NULL(fsid_key)) - cache_put(&fsid_key->h, &svc_expkey_cache); - path_put(&path); -out_put_clp: - auth_domain_put(clp); -out_unlock: - exp_writeunlock(); -out: - return err; -} - -/* - * Unexport a file system. The export entry has already - * been removed from the client's list of exported fs's. - */ -static void -exp_do_unexport(svc_export *unexp) -{ - sunrpc_invalidate(&unexp->h, &svc_export_cache); - exp_unhash(unexp); - exp_fsid_unhash(unexp); -} - - -/* - * unexport syscall. - */ -int -exp_unexport(struct nfsctl_export *nxp) -{ - struct auth_domain *dom; - svc_export *exp; - struct path path; - int err; - - /* Consistency check */ - if (!exp_verify_string(nxp->ex_path, NFS_MAXPATHLEN) || - !exp_verify_string(nxp->ex_client, NFSCLNT_IDMAX)) - return -EINVAL; - - exp_writelock(); - - err = -EINVAL; - dom = auth_domain_find(nxp->ex_client); - if (!dom) { - dprintk("nfsd: unexport couldn't find %s\n", nxp->ex_client); - goto out_unlock; - } - - err = kern_path(nxp->ex_path, 0, &path); - if (err) - goto out_domain; - - err = -EINVAL; - exp = exp_get_by_name(dom, &path, NULL); - path_put(&path); - if (IS_ERR(exp)) - goto out_domain; - - exp_do_unexport(exp); - exp_put(exp); - err = 0; - -out_domain: - auth_domain_put(dom); - cache_flush(); -out_unlock: - exp_writeunlock(); - return err; -} -#endif /* CONFIG_NFSD_DEPRECATED */ /* * Obtain the root fh on behalf of a client. @@ -1330,7 +998,7 @@ rqst_exp_parent(struct svc_rqst *rqstp, struct path *path) return exp; } -static struct svc_export *find_fsidzero_export(struct svc_rqst *rqstp) +struct svc_export *rqst_find_fsidzero_export(struct svc_rqst *rqstp) { u32 fsidv[2]; @@ -1350,7 +1018,7 @@ exp_pseudoroot(struct svc_rqst *rqstp, struct svc_fh *fhp) struct svc_export *exp; __be32 rv; - exp = find_fsidzero_export(rqstp); + exp = rqst_find_fsidzero_export(rqstp); if (IS_ERR(exp)) return nfserrno(PTR_ERR(exp)); rv = fh_compose(fhp, exp, exp->ex_path.dentry, NULL); @@ -1367,7 +1035,6 @@ static void *e_start(struct seq_file *m, loff_t *pos) unsigned hash, export; struct cache_head *ch; - exp_readlock(); read_lock(&svc_export_cache.hash_lock); if (!n--) return SEQ_START_TOKEN; @@ -1418,7 +1085,6 @@ static void e_stop(struct seq_file *m, void *p) __releases(svc_export_cache.hash_lock) { read_unlock(&svc_export_cache.hash_lock); - exp_readunlock(); } static struct flags { @@ -1550,97 +1216,6 @@ const struct seq_operations nfs_exports_op = { .show = e_show, }; -#ifdef CONFIG_NFSD_DEPRECATED -/* - * Add or modify a client. - * Change requests may involve the list of host addresses. The list of - * exports and possibly existing uid maps are left untouched. - */ -int -exp_addclient(struct nfsctl_client *ncp) -{ - struct auth_domain *dom; - int i, err; - struct in6_addr addr6; - - /* First, consistency check. */ - err = -EINVAL; - if (! exp_verify_string(ncp->cl_ident, NFSCLNT_IDMAX)) - goto out; - if (ncp->cl_naddr > NFSCLNT_ADDRMAX) - goto out; - - /* Lock the hashtable */ - exp_writelock(); - - dom = unix_domain_find(ncp->cl_ident); - - err = -ENOMEM; - if (!dom) - goto out_unlock; - - /* Insert client into hashtable. */ - for (i = 0; i < ncp->cl_naddr; i++) { - ipv6_addr_set_v4mapped(ncp->cl_addrlist[i].s_addr, &addr6); - auth_unix_add_addr(&init_net, &addr6, dom); - } - auth_unix_forget_old(dom); - auth_domain_put(dom); - - err = 0; - -out_unlock: - exp_writeunlock(); -out: - return err; -} - -/* - * Delete a client given an identifier. - */ -int -exp_delclient(struct nfsctl_client *ncp) -{ - int err; - struct auth_domain *dom; - - err = -EINVAL; - if (!exp_verify_string(ncp->cl_ident, NFSCLNT_IDMAX)) - goto out; - - /* Lock the hashtable */ - exp_writelock(); - - dom = auth_domain_find(ncp->cl_ident); - /* just make sure that no addresses work - * and that it will expire soon - */ - if (dom) { - err = auth_unix_forget_old(dom); - auth_domain_put(dom); - } - - exp_writeunlock(); -out: - return err; -} - -/* - * Verify that string is non-empty and does not exceed max length. - */ -static int -exp_verify_string(char *cp, int max) -{ - int i; - - for (i = 0; i < max; i++) - if (!cp[i]) - return i; - cp[i] = 0; - printk(KERN_NOTICE "nfsd: couldn't validate string %s\n", cp); - return 0; -} -#endif /* CONFIG_NFSD_DEPRECATED */ /* * Initialize the exports module. @@ -1667,10 +1242,8 @@ nfsd_export_init(void) void nfsd_export_flush(void) { - exp_writelock(); cache_purge(&svc_expkey_cache); cache_purge(&svc_export_cache); - exp_writeunlock(); } /* @@ -1682,12 +1255,9 @@ nfsd_export_shutdown(void) dprintk("nfsd: shutting down export module.\n"); - exp_writelock(); - cache_unregister(&svc_expkey_cache); cache_unregister(&svc_export_cache); svcauth_unix_purge(); - exp_writeunlock(); dprintk("nfsd: export shutdown complete.\n"); } diff --git a/fs/nfsd/lockd.c b/fs/nfsd/lockd.c index 7c831a2..77e7a5c 100644 --- a/fs/nfsd/lockd.c +++ b/fs/nfsd/lockd.c @@ -35,10 +35,8 @@ nlm_fopen(struct svc_rqst *rqstp, struct nfs_fh *f, struct file **filp) memcpy((char*)&fh.fh_handle.fh_base, f->data, f->size); fh.fh_export = NULL; - exp_readlock(); nfserr = nfsd_open(rqstp, &fh, S_IFREG, NFSD_MAY_LOCK, filp); fh_put(&fh); - exp_readunlock(); /* We return nlm error codes as nlm doesn't know * about nfsd, but nfsd does know about nlm.. */ diff --git a/fs/nfsd/nfs4acl.c b/fs/nfsd/nfs4acl.c index ad88f1c..435a9be 100644 --- a/fs/nfsd/nfs4acl.c +++ b/fs/nfsd/nfs4acl.c @@ -36,6 +36,7 @@ #include #include +#include #include "acl.h" @@ -372,8 +373,10 @@ sort_pacl(struct posix_acl *pacl) * by uid/gid. */ int i, j; - if (pacl->a_count <= 4) - return; /* no users or groups */ + /* no users or groups */ + if (!pacl || pacl->a_count <= 4) + return; + i = 1; while (pacl->a_entries[i].e_tag == ACL_USER) i++; @@ -497,13 +500,12 @@ posix_state_to_acl(struct posix_acl_state *state, unsigned int flags) /* * ACLs with no ACEs are treated differently in the inheritable - * and effective cases: when there are no inheritable ACEs, we - * set a zero-length default posix acl: + * and effective cases: when there are no inheritable ACEs, + * calls ->set_acl with a NULL ACL structure. */ - if (state->empty && (flags & NFS4_ACL_TYPE_DEFAULT)) { - pacl = posix_acl_alloc(0, GFP_KERNEL); - return pacl ? pacl : ERR_PTR(-ENOMEM); - } + if (state->empty && (flags & NFS4_ACL_TYPE_DEFAULT)) + return NULL; + /* * When there are no effective ACEs, the following will end * up setting a 3-element effective posix ACL with all diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 02eb4ed..51b4f43 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -39,6 +39,8 @@ #define NFSDDBG_FACILITY NFSDDBG_PROC +static void nfsd4_mark_cb_fault(struct nfs4_client *, int reason); + #define NFSPROC4_CB_NULL 0 #define NFSPROC4_CB_COMPOUND 1 @@ -351,7 +353,7 @@ static void encode_cb_recall4args(struct xdr_stream *xdr, __be32 *p; encode_nfs_cb_opnum4(xdr, OP_CB_RECALL); - encode_stateid4(xdr, &dp->dl_stateid); + encode_stateid4(xdr, &dp->dl_stid.sc_stateid); p = xdr_reserve_space(xdr, 4); *p++ = xdr_zero; /* truncate */ @@ -460,6 +462,8 @@ static int decode_cb_sequence4resok(struct xdr_stream *xdr, */ status = 0; out: + if (status) + nfsd4_mark_cb_fault(cb->cb_clp, status); return status; out_overflow: print_overflow_msg(__func__, xdr); @@ -629,9 +633,11 @@ static int max_cb_time(void) static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *conn, struct nfsd4_session *ses) { + int maxtime = max_cb_time(); struct rpc_timeout timeparms = { - .to_initval = max_cb_time(), + .to_initval = maxtime, .to_retries = 0, + .to_maxval = maxtime, }; struct rpc_create_args args = { .net = &init_net, @@ -686,6 +692,12 @@ static void nfsd4_mark_cb_down(struct nfs4_client *clp, int reason) warn_no_callback_path(clp, reason); } +static void nfsd4_mark_cb_fault(struct nfs4_client *clp, int reason) +{ + clp->cl_cb_state = NFSD4_CB_FAULT; + warn_no_callback_path(clp, reason); +} + static void nfsd4_cb_probe_done(struct rpc_task *task, void *calldata) { struct nfs4_client *clp = container_of(calldata, struct nfs4_client, cl_cb_null); @@ -773,8 +785,12 @@ static bool nfsd41_cb_get_slot(struct nfs4_client *clp, struct rpc_task *task) { if (test_and_set_bit(0, &clp->cl_cb_slot_busy) != 0) { rpc_sleep_on(&clp->cl_cb_waitq, task, NULL); - dprintk("%s slot is busy\n", __func__); - return false; + /* Race breaker */ + if (test_and_set_bit(0, &clp->cl_cb_slot_busy) != 0) { + dprintk("%s slot is busy\n", __func__); + return false; + } + rpc_wake_up_queued_task(&clp->cl_cb_waitq, task); } return true; } @@ -787,7 +803,7 @@ static void nfsd4_cb_prepare(struct rpc_task *task, void *calldata) { struct nfsd4_callback *cb = calldata; struct nfs4_delegation *dp = container_of(cb, struct nfs4_delegation, dl_recall); - struct nfs4_client *clp = dp->dl_client; + struct nfs4_client *clp = dp->dl_stid.sc_client; u32 minorversion = clp->cl_minorversion; cb->cb_minorversion = minorversion; @@ -809,7 +825,7 @@ static void nfsd4_cb_done(struct rpc_task *task, void *calldata) { struct nfsd4_callback *cb = calldata; struct nfs4_delegation *dp = container_of(cb, struct nfs4_delegation, dl_recall); - struct nfs4_client *clp = dp->dl_client; + struct nfs4_client *clp = dp->dl_stid.sc_client; dprintk("%s: minorversion=%d\n", __func__, clp->cl_minorversion); @@ -832,7 +848,7 @@ static void nfsd4_cb_recall_done(struct rpc_task *task, void *calldata) { struct nfsd4_callback *cb = calldata; struct nfs4_delegation *dp = container_of(cb, struct nfs4_delegation, dl_recall); - struct nfs4_client *clp = dp->dl_client; + struct nfs4_client *clp = dp->dl_stid.sc_client; struct rpc_clnt *current_rpc_client = clp->cl_cb_client; nfsd4_cb_done(task, calldata); @@ -1006,7 +1022,7 @@ void nfsd4_do_callback_rpc(struct work_struct *w) void nfsd4_cb_recall(struct nfs4_delegation *dp) { struct nfsd4_callback *cb = &dp->dl_recall; - struct nfs4_client *clp = dp->dl_client; + struct nfs4_client *clp = dp->dl_stid.sc_client; dp->dl_retries = 1; cb->cb_op = dp; diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index d06a02c..9a959de 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -35,6 +35,7 @@ #include #include +#include "idmap.h" #include "cache.h" #include "xdr4.h" #include "vfs.h" @@ -170,12 +171,30 @@ do_open_permission(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfs return status; } +static __be32 nfsd_check_obj_isreg(struct svc_fh *fh) +{ + umode_t mode = fh->fh_dentry->d_inode->i_mode; + + if (S_ISREG(mode)) + return nfs_ok; + if (S_ISDIR(mode)) + return nfserr_isdir; + /* + * Using err_symlink as our catch-all case may look odd; but + * there's no other obvious error for this case in 4.0, and we + * happen to know that it will cause the linux v4 client to do + * the right thing on attempts to open something other than a + * regular file. + */ + return nfserr_symlink; +} + static __be32 do_open_lookup(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open *open) { struct svc_fh resfh; + int accmode; __be32 status; - int created = 0; fh_init(&resfh, NFS4_FHSIZE); open->op_truncate = 0; @@ -204,7 +223,7 @@ do_open_lookup(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_o open->op_fname.len, &open->op_iattr, &resfh, open->op_createmode, (u32 *)open->op_verf.data, - &open->op_truncate, &created); + &open->op_truncate, &open->op_created); /* * Following rfc 3530 14.2.16, use the returned bitmask @@ -213,7 +232,7 @@ do_open_lookup(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_o */ if (open->op_createmode == NFS4_CREATE_EXCLUSIVE && status == 0) open->op_bmval[1] = (FATTR4_WORD1_TIME_ACCESS | - FATTR4_WORD1_TIME_MODIFY); + FATTR4_WORD1_TIME_MODIFY); } else { status = nfsd_lookup(rqstp, current_fh, open->op_fname.data, open->op_fname.len, &resfh); @@ -221,6 +240,9 @@ do_open_lookup(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_o } if (status) goto out; + status = nfsd_check_obj_isreg(&resfh); + if (status) + goto out; if (is_create_with_attrs(open) && open->op_acl != NULL) do_set_nfs4_acl(rqstp, &resfh, open->op_acl, open->op_bmval); @@ -229,11 +251,12 @@ do_open_lookup(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_o fh_dup2(current_fh, &resfh); /* set reply cache */ - fh_copy_shallow(&open->op_stateowner->so_replay.rp_openfh, + fh_copy_shallow(&open->op_openowner->oo_owner.so_replay.rp_openfh, &resfh.fh_handle); - if (!created) - status = do_open_permission(rqstp, current_fh, open, - NFSD_MAY_NOP); + accmode = NFSD_MAY_NOP; + if (open->op_created) + accmode |= NFSD_MAY_OWNER_OVERRIDE; + status = do_open_permission(rqstp, current_fh, open, accmode); out: fh_put(&resfh); @@ -244,6 +267,7 @@ static __be32 do_open_fhandle(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open *open) { __be32 status; + int accmode = 0; /* Only reclaims from previously confirmed clients are valid */ if ((status = nfs4_check_open_reclaim(&open->op_clientid))) @@ -256,14 +280,24 @@ do_open_fhandle(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_ memset(&open->op_cinfo, 0, sizeof(struct nfsd4_change_info)); /* set replay cache */ - fh_copy_shallow(&open->op_stateowner->so_replay.rp_openfh, + fh_copy_shallow(&open->op_openowner->oo_owner.so_replay.rp_openfh, ¤t_fh->fh_handle); open->op_truncate = (open->op_iattr.ia_valid & ATTR_SIZE) && (open->op_iattr.ia_size == 0); + /* + * In the delegation case, the client is telling us about an + * open that it *already* performed locally, some time ago. We + * should let it succeed now if possible. + * + * In the case of a CLAIM_FH open, on the other hand, the client + * may be counting on us to enforce permissions (the Linux 4.1 + * client uses this for normal opens, for example). + */ + if (open->op_claim_type == NFS4_OPEN_CLAIM_DELEG_CUR_FH) + accmode = NFSD_MAY_OWNER_OVERRIDE; - status = do_open_permission(rqstp, current_fh, open, - NFSD_MAY_OWNER_OVERRIDE); + status = do_open_permission(rqstp, current_fh, open, accmode); return status; } @@ -285,14 +319,27 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, __be32 status; struct nfsd4_compoundres *resp; - dprintk("NFSD: nfsd4_open filename %.*s op_stateowner %p\n", + dprintk("NFSD: nfsd4_open filename %.*s op_openowner %p\n", (int)open->op_fname.len, open->op_fname.data, - open->op_stateowner); + open->op_openowner); /* This check required by spec. */ if (open->op_create && open->op_claim_type != NFS4_OPEN_CLAIM_NULL) return nfserr_inval; + /* We don't yet support WANT bits: */ + open->op_share_access &= NFS4_SHARE_ACCESS_MASK; + + open->op_created = 0; + /* + * RFC5661 18.51.3 + * Before RECLAIM_COMPLETE done, server should deny new lock + */ + if (nfsd4_has_session(cstate) && + !cstate->session->se_client->cl_firststate && + open->op_claim_type != NFS4_OPEN_CLAIM_PREVIOUS) + return nfserr_grace; + if (nfsd4_has_session(cstate)) copy_clientid(&open->op_clientid, cstate->session); @@ -302,7 +349,7 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, resp = rqstp->rq_resp; status = nfsd4_process_open1(&resp->cstate, open); if (status == nfserr_replay_me) { - struct nfs4_replay *rp = &open->op_stateowner->so_replay; + struct nfs4_replay *rp = &open->op_openowner->oo_owner.so_replay; fh_put(&cstate->current_fh); fh_copy_shallow(&cstate->current_fh.fh_handle, &rp->rp_openfh); @@ -332,32 +379,23 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, switch (open->op_claim_type) { case NFS4_OPEN_CLAIM_DELEGATE_CUR: case NFS4_OPEN_CLAIM_NULL: - /* - * (1) set CURRENT_FH to the file being opened, - * creating it if necessary, (2) set open->op_cinfo, - * (3) set open->op_truncate if the file is to be - * truncated after opening, (4) do permission checking. - */ status = do_open_lookup(rqstp, &cstate->current_fh, open); if (status) goto out; break; case NFS4_OPEN_CLAIM_PREVIOUS: - open->op_stateowner->so_confirmed = 1; - /* - * The CURRENT_FH is already set to the file being - * opened. (1) set open->op_cinfo, (2) set - * open->op_truncate if the file is to be truncated - * after opening, (3) do permission checking. - */ + open->op_openowner->oo_flags |= NFS4_OO_CONFIRMED; + case NFS4_OPEN_CLAIM_FH: + case NFS4_OPEN_CLAIM_DELEG_CUR_FH: status = do_open_fhandle(rqstp, &cstate->current_fh, open); if (status) goto out; break; + case NFS4_OPEN_CLAIM_DELEG_PREV_FH: case NFS4_OPEN_CLAIM_DELEGATE_PREV: - open->op_stateowner->so_confirmed = 1; + open->op_openowner->oo_flags |= NFS4_OO_CONFIRMED; dprintk("NFSD: unsupported OPEN claim type %d\n", open->op_claim_type); status = nfserr_notsupp; @@ -374,12 +412,13 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, * set, (2) sets open->op_stateid, (3) sets open->op_delegation. */ status = nfsd4_process_open2(rqstp, &cstate->current_fh, open); + WARN_ON(status && open->op_created); out: - if (open->op_stateowner) { - nfs4_get_stateowner(open->op_stateowner); - cstate->replay_owner = open->op_stateowner; - } - nfs4_unlock_state(); + nfsd4_cleanup_open_state(open, status); + if (open->op_openowner) + cstate->replay_owner = &open->op_openowner->oo_owner; + else + nfs4_unlock_state(); return status; } @@ -460,17 +499,12 @@ static __be32 nfsd4_commit(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_commit *commit) { - __be32 status; - u32 *p = (u32 *)commit->co_verf.data; *p++ = nfssvc_boot.tv_sec; *p++ = nfssvc_boot.tv_usec; - status = nfsd_commit(rqstp, &cstate->current_fh, commit->co_offset, + return nfsd_commit(rqstp, &cstate->current_fh, commit->co_offset, commit->co_count); - if (status == nfserr_symlink) - status = nfserr_inval; - return status; } static __be32 @@ -485,8 +519,6 @@ nfsd4_create(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, status = fh_verify(rqstp, &cstate->current_fh, S_IFDIR, NFSD_MAY_CREATE); - if (status == nfserr_symlink) - status = nfserr_notdir; if (status) return status; @@ -497,15 +529,6 @@ nfsd4_create(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, switch (create->cr_type) { case NF4LNK: - /* ugh! we have to null-terminate the linktext, or - * vfs_symlink() will choke. it is always safe to - * null-terminate by brute force, since at worst we - * will overwrite the first byte of the create namelen - * in the XDR buffer, which has already been extracted - * during XDR decode. - */ - create->cr_linkname[create->cr_linklen] = 0; - status = nfsd_symlink(rqstp, &cstate->current_fh, create->cr_name, create->cr_namelen, create->cr_linkname, create->cr_linklen, @@ -712,8 +735,6 @@ nfsd4_remove(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, return nfserr_grace; status = nfsd_unlink(rqstp, &cstate->current_fh, 0, remove->rm_name, remove->rm_namelen); - if (status == nfserr_symlink) - return nfserr_notdir; if (!status) { fh_unlock(&cstate->current_fh); set_change_info(&remove->rm_cinfo, &cstate->current_fh); @@ -744,8 +765,6 @@ nfsd4_rename(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, (S_ISDIR(cstate->save_fh.fh_dentry->d_inode->i_mode) && S_ISDIR(cstate->current_fh.fh_dentry->d_inode->i_mode))) status = nfserr_exist; - else if (status == nfserr_symlink) - status = nfserr_notdir; if (!status) { set_change_info(&rename->rn_sinfo, &cstate->current_fh); @@ -863,14 +882,14 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, nfs4_lock_state(); status = nfs4_preprocess_stateid_op(cstate, stateid, WR_STATE, &filp); - if (filp) - get_file(filp); - nfs4_unlock_state(); - if (status) { + nfs4_unlock_state(); dprintk("NFSD: nfsd4_write: couldn't process stateid!\n"); return status; } + if (filp) + get_file(filp); + nfs4_unlock_state(); cnt = write->wr_buflen; write->wr_how_written = write->wr_stable_how; @@ -886,8 +905,6 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, write->wr_bytes_written = cnt; - if (status == nfserr_symlink) - status = nfserr_inval; return status; } @@ -988,6 +1005,8 @@ static inline void nfsd4_increment_op_stats(u32 opnum) typedef __be32(*nfsd4op_func)(struct svc_rqst *, struct nfsd4_compound_state *, void *); +typedef u32(*nfsd4op_rsize)(struct svc_rqst *, struct nfsd4_op *op); + enum nfsd4_op_flags { ALLOWED_WITHOUT_FH = 1 << 0, /* No current filehandle required */ ALLOWED_ON_ABSENT_FS = 1 << 1, /* ops processed on absent fs */ @@ -995,12 +1014,31 @@ enum nfsd4_op_flags { /* For rfc 5661 section 2.6.3.1.1: */ OP_HANDLES_WRONGSEC = 1 << 3, OP_IS_PUTFH_LIKE = 1 << 4, + /* + * These are the ops whose result size we estimate before + * encoding, to avoid performing an op then not being able to + * respond or cache a response. This includes writes and setattrs + * as well as the operations usually called "nonidempotent": + */ + OP_MODIFIES_SOMETHING = 1 << 5, + /* + * Cache compounds containing these ops in the xid-based drc: + * We use the DRC for compounds containing non-idempotent + * operations, *except* those that are 4.1-specific (since + * sessions provide their own EOS), and except for stateful + * operations other than setclientid and setclientid_confirm + * (since sequence numbers provide EOS for open, lock, etc in + * the v4.0 case). + */ + OP_CACHEME = 1 << 6, }; struct nfsd4_operation { nfsd4op_func op_func; u32 op_flags; char *op_name; + /* Try to get response size before operation */ + nfsd4op_rsize op_rsize_bop; }; static struct nfsd4_operation nfsd4_ops[]; @@ -1045,6 +1083,11 @@ static inline struct nfsd4_operation *OPDESC(struct nfsd4_op *op) return &nfsd4_ops[op->opnum]; } +bool nfsd4_cache_this_op(struct nfsd4_op *op) +{ + return OPDESC(op)->op_flags & OP_CACHEME; +} + static bool need_wrongsec_check(struct svc_rqst *rqstp) { struct nfsd4_compoundres *resp = rqstp->rq_resp; @@ -1068,7 +1111,8 @@ static bool need_wrongsec_check(struct svc_rqst *rqstp) */ if (argp->opcnt == resp->opcnt) return false; - + if (next->opnum == OP_ILLEGAL) + return false; nextd = OPDESC(next); /* * Rest of 2.6.3.1.1: certain operations will return WRONGSEC @@ -1090,6 +1134,7 @@ nfsd4_proc_compound(struct svc_rqst *rqstp, struct nfsd4_operation *opdesc; struct nfsd4_compound_state *cstate = &resp->cstate; int slack_bytes; + u32 plen = 0; __be32 status; resp->xbuf = &rqstp->rq_res; @@ -1168,6 +1213,21 @@ nfsd4_proc_compound(struct svc_rqst *rqstp, goto encode_op; } + /* If op is non-idempotent */ + if (opdesc->op_flags & OP_MODIFIES_SOMETHING) { + plen = opdesc->op_rsize_bop(rqstp, op); + /* + * If there's still another operation, make sure + * we'll have space to at least encode an error: + */ + if (resp->opcnt < args->opcnt) + plen += COMPOUND_ERR_SLACK_SPACE; + op->status = nfsd4_check_resp_size(resp, plen); + } + + if (op->status) + goto encode_op; + if (opdesc->op_func) op->status = opdesc->op_func(rqstp, cstate, &op->u); else @@ -1197,7 +1257,7 @@ encode_op: be32_to_cpu(status)); if (cstate->replay_owner) { - nfs4_put_stateowner(cstate->replay_owner); + nfs4_unlock_state(); cstate->replay_owner = NULL; } /* XXX Ugh, we need to get rid of this kind of special case: */ @@ -1212,13 +1272,151 @@ encode_op: fh_put(&resp->cstate.save_fh); BUG_ON(resp->cstate.replay_owner); out: - nfsd4_release_compoundargs(args); /* Reset deferral mechanism for RPC deferrals */ rqstp->rq_usedeferral = 1; dprintk("nfsv4 compound returned %d\n", ntohl(status)); return status; } +#define op_encode_hdr_size (2) +#define op_encode_stateid_maxsz (XDR_QUADLEN(NFS4_STATEID_SIZE)) +#define op_encode_verifier_maxsz (XDR_QUADLEN(NFS4_VERIFIER_SIZE)) +#define op_encode_change_info_maxsz (5) +#define nfs4_fattr_bitmap_maxsz (4) + +#define op_encode_lockowner_maxsz (1 + XDR_QUADLEN(IDMAP_NAMESZ)) +#define op_encode_lock_denied_maxsz (8 + op_encode_lockowner_maxsz) + +#define nfs4_owner_maxsz (1 + XDR_QUADLEN(IDMAP_NAMESZ)) + +#define op_encode_ace_maxsz (3 + nfs4_owner_maxsz) +#define op_encode_delegation_maxsz (1 + op_encode_stateid_maxsz + 1 + \ + op_encode_ace_maxsz) + +#define op_encode_channel_attrs_maxsz (6 + 1 + 1) + +static inline u32 nfsd4_only_status_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) +{ + return (op_encode_hdr_size) * sizeof(__be32); +} + +static inline u32 nfsd4_status_stateid_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) +{ + return (op_encode_hdr_size + op_encode_stateid_maxsz)* sizeof(__be32); +} + +static inline u32 nfsd4_commit_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) +{ + return (op_encode_hdr_size + op_encode_verifier_maxsz) * sizeof(__be32); +} + +static inline u32 nfsd4_create_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) +{ + return (op_encode_hdr_size + op_encode_change_info_maxsz + + nfs4_fattr_bitmap_maxsz) * sizeof(__be32); +} + +static inline u32 nfsd4_link_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) +{ + return (op_encode_hdr_size + op_encode_change_info_maxsz) + * sizeof(__be32); +} + +static inline u32 nfsd4_lock_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) +{ + return (op_encode_hdr_size + op_encode_lock_denied_maxsz) + * sizeof(__be32); +} + +static inline u32 nfsd4_open_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) +{ + return (op_encode_hdr_size + op_encode_stateid_maxsz + + op_encode_change_info_maxsz + 1 + + nfs4_fattr_bitmap_maxsz + + op_encode_delegation_maxsz) * sizeof(__be32); +} + +static inline u32 nfsd4_read_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) +{ + u32 maxcount = 0, rlen = 0; + + maxcount = svc_max_payload(rqstp); + rlen = op->u.read.rd_length; + + if (rlen > maxcount) + rlen = maxcount; + + return (op_encode_hdr_size + 2) * sizeof(__be32) + rlen; +} + +static inline u32 nfsd4_readdir_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) +{ + u32 rlen = op->u.readdir.rd_maxcount; + + if (rlen > PAGE_SIZE) + rlen = PAGE_SIZE; + + return (op_encode_hdr_size + op_encode_verifier_maxsz) + * sizeof(__be32) + rlen; +} + +static inline u32 nfsd4_remove_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) +{ + return (op_encode_hdr_size + op_encode_change_info_maxsz) + * sizeof(__be32); +} + +static inline u32 nfsd4_rename_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) +{ + return (op_encode_hdr_size + op_encode_change_info_maxsz + + op_encode_change_info_maxsz) * sizeof(__be32); +} + +static inline u32 nfsd4_setattr_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) +{ + return (op_encode_hdr_size + nfs4_fattr_bitmap_maxsz) * sizeof(__be32); +} + +static inline u32 nfsd4_setclientid_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) +{ + return (op_encode_hdr_size + 2 + XDR_QUADLEN(NFS4_VERIFIER_SIZE)) * + sizeof(__be32); +} + +static inline u32 nfsd4_write_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) +{ + return (op_encode_hdr_size + op_encode_verifier_maxsz) * sizeof(__be32); +} + +static inline u32 nfsd4_exchange_id_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) +{ + return (op_encode_hdr_size + 2 + 1 + /* eir_clientid, eir_sequenceid */\ + 1 + 1 + 0 + /* eir_flags, spr_how, SP4_NONE (for now) */\ + 2 + /*eir_server_owner.so_minor_id */\ + /* eir_server_owner.so_major_id<> */\ + XDR_QUADLEN(NFS4_OPAQUE_LIMIT) + 1 +\ + /* eir_server_scope<> */\ + XDR_QUADLEN(NFS4_OPAQUE_LIMIT) + 1 +\ + 1 + /* eir_server_impl_id array length */\ + 0 /* ignored eir_server_impl_id contents */) * sizeof(__be32); +} + +static inline u32 nfsd4_bind_conn_to_session_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) +{ + return (op_encode_hdr_size + \ + XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + /* bctsr_sessid */\ + 2 /* bctsr_dir, use_conn_in_rdma_mode */) * sizeof(__be32); +} + +static inline u32 nfsd4_create_session_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) +{ + return (op_encode_hdr_size + \ + XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + /* sessionid */\ + 2 + /* csr_sequence, csr_flags */\ + op_encode_channel_attrs_maxsz + \ + op_encode_channel_attrs_maxsz) * sizeof(__be32); +} + static struct nfsd4_operation nfsd4_ops[] = { [OP_ACCESS] = { .op_func = (nfsd4op_func)nfsd4_access, @@ -1226,19 +1424,27 @@ static struct nfsd4_operation nfsd4_ops[] = { }, [OP_CLOSE] = { .op_func = (nfsd4op_func)nfsd4_close, + .op_flags = OP_MODIFIES_SOMETHING, .op_name = "OP_CLOSE", + .op_rsize_bop = (nfsd4op_rsize)nfsd4_status_stateid_rsize, }, [OP_COMMIT] = { .op_func = (nfsd4op_func)nfsd4_commit, + .op_flags = OP_MODIFIES_SOMETHING, .op_name = "OP_COMMIT", + .op_rsize_bop = (nfsd4op_rsize)nfsd4_commit_rsize, }, [OP_CREATE] = { .op_func = (nfsd4op_func)nfsd4_create, + .op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME, .op_name = "OP_CREATE", + .op_rsize_bop = (nfsd4op_rsize)nfsd4_create_rsize, }, [OP_DELEGRETURN] = { .op_func = (nfsd4op_func)nfsd4_delegreturn, + .op_flags = OP_MODIFIES_SOMETHING, .op_name = "OP_DELEGRETURN", + .op_rsize_bop = nfsd4_only_status_rsize, }, [OP_GETATTR] = { .op_func = (nfsd4op_func)nfsd4_getattr, @@ -1251,11 +1457,16 @@ static struct nfsd4_operation nfsd4_ops[] = { }, [OP_LINK] = { .op_func = (nfsd4op_func)nfsd4_link, + .op_flags = ALLOWED_ON_ABSENT_FS | OP_MODIFIES_SOMETHING + | OP_CACHEME, .op_name = "OP_LINK", + .op_rsize_bop = (nfsd4op_rsize)nfsd4_link_rsize, }, [OP_LOCK] = { .op_func = (nfsd4op_func)nfsd4_lock, + .op_flags = OP_MODIFIES_SOMETHING, .op_name = "OP_LOCK", + .op_rsize_bop = (nfsd4op_rsize)nfsd4_lock_rsize, }, [OP_LOCKT] = { .op_func = (nfsd4op_func)nfsd4_lockt, @@ -1263,7 +1474,9 @@ static struct nfsd4_operation nfsd4_ops[] = { }, [OP_LOCKU] = { .op_func = (nfsd4op_func)nfsd4_locku, + .op_flags = OP_MODIFIES_SOMETHING, .op_name = "OP_LOCKU", + .op_rsize_bop = (nfsd4op_rsize)nfsd4_status_stateid_rsize, }, [OP_LOOKUP] = { .op_func = (nfsd4op_func)nfsd4_lookup, @@ -1281,42 +1494,54 @@ static struct nfsd4_operation nfsd4_ops[] = { }, [OP_OPEN] = { .op_func = (nfsd4op_func)nfsd4_open, - .op_flags = OP_HANDLES_WRONGSEC, + .op_flags = OP_HANDLES_WRONGSEC | OP_MODIFIES_SOMETHING, .op_name = "OP_OPEN", + .op_rsize_bop = (nfsd4op_rsize)nfsd4_open_rsize, }, [OP_OPEN_CONFIRM] = { .op_func = (nfsd4op_func)nfsd4_open_confirm, + .op_flags = OP_MODIFIES_SOMETHING, .op_name = "OP_OPEN_CONFIRM", + .op_rsize_bop = (nfsd4op_rsize)nfsd4_status_stateid_rsize, }, [OP_OPEN_DOWNGRADE] = { .op_func = (nfsd4op_func)nfsd4_open_downgrade, + .op_flags = OP_MODIFIES_SOMETHING, .op_name = "OP_OPEN_DOWNGRADE", + .op_rsize_bop = (nfsd4op_rsize)nfsd4_status_stateid_rsize, }, [OP_PUTFH] = { .op_func = (nfsd4op_func)nfsd4_putfh, .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS - | OP_IS_PUTFH_LIKE, + | OP_IS_PUTFH_LIKE | OP_MODIFIES_SOMETHING, .op_name = "OP_PUTFH", + .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize, }, [OP_PUTPUBFH] = { .op_func = (nfsd4op_func)nfsd4_putrootfh, .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS - | OP_IS_PUTFH_LIKE, + | OP_IS_PUTFH_LIKE | OP_MODIFIES_SOMETHING, .op_name = "OP_PUTPUBFH", + .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize, }, [OP_PUTROOTFH] = { .op_func = (nfsd4op_func)nfsd4_putrootfh, .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS - | OP_IS_PUTFH_LIKE, + | OP_IS_PUTFH_LIKE | OP_MODIFIES_SOMETHING, .op_name = "OP_PUTROOTFH", + .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize, }, [OP_READ] = { .op_func = (nfsd4op_func)nfsd4_read, + .op_flags = OP_MODIFIES_SOMETHING, .op_name = "OP_READ", + .op_rsize_bop = (nfsd4op_rsize)nfsd4_read_rsize, }, [OP_READDIR] = { .op_func = (nfsd4op_func)nfsd4_readdir, + .op_flags = OP_MODIFIES_SOMETHING, .op_name = "OP_READDIR", + .op_rsize_bop = (nfsd4op_rsize)nfsd4_readdir_rsize, }, [OP_READLINK] = { .op_func = (nfsd4op_func)nfsd4_readlink, @@ -1324,27 +1549,36 @@ static struct nfsd4_operation nfsd4_ops[] = { }, [OP_REMOVE] = { .op_func = (nfsd4op_func)nfsd4_remove, + .op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME, .op_name = "OP_REMOVE", + .op_rsize_bop = (nfsd4op_rsize)nfsd4_remove_rsize, }, [OP_RENAME] = { - .op_name = "OP_RENAME", .op_func = (nfsd4op_func)nfsd4_rename, + .op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME, + .op_name = "OP_RENAME", + .op_rsize_bop = (nfsd4op_rsize)nfsd4_rename_rsize, }, [OP_RENEW] = { .op_func = (nfsd4op_func)nfsd4_renew, - .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS, + .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS + | OP_MODIFIES_SOMETHING, .op_name = "OP_RENEW", + .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize, + }, [OP_RESTOREFH] = { .op_func = (nfsd4op_func)nfsd4_restorefh, .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS - | OP_IS_PUTFH_LIKE, + | OP_IS_PUTFH_LIKE | OP_MODIFIES_SOMETHING, .op_name = "OP_RESTOREFH", + .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize, }, [OP_SAVEFH] = { .op_func = (nfsd4op_func)nfsd4_savefh, - .op_flags = OP_HANDLES_WRONGSEC, + .op_flags = OP_HANDLES_WRONGSEC | OP_MODIFIES_SOMETHING, .op_name = "OP_SAVEFH", + .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize, }, [OP_SECINFO] = { .op_func = (nfsd4op_func)nfsd4_secinfo, @@ -1354,16 +1588,22 @@ static struct nfsd4_operation nfsd4_ops[] = { [OP_SETATTR] = { .op_func = (nfsd4op_func)nfsd4_setattr, .op_name = "OP_SETATTR", + .op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME, + .op_rsize_bop = (nfsd4op_rsize)nfsd4_setattr_rsize, }, [OP_SETCLIENTID] = { .op_func = (nfsd4op_func)nfsd4_setclientid, - .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS, + .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS + | OP_MODIFIES_SOMETHING | OP_CACHEME, .op_name = "OP_SETCLIENTID", + .op_rsize_bop = (nfsd4op_rsize)nfsd4_setclientid_rsize, }, [OP_SETCLIENTID_CONFIRM] = { .op_func = (nfsd4op_func)nfsd4_setclientid_confirm, - .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS, + .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS + | OP_MODIFIES_SOMETHING | OP_CACHEME, .op_name = "OP_SETCLIENTID_CONFIRM", + .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize, }, [OP_VERIFY] = { .op_func = (nfsd4op_func)nfsd4_verify, @@ -1371,50 +1611,81 @@ static struct nfsd4_operation nfsd4_ops[] = { }, [OP_WRITE] = { .op_func = (nfsd4op_func)nfsd4_write, + .op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME, .op_name = "OP_WRITE", + .op_rsize_bop = (nfsd4op_rsize)nfsd4_write_rsize, }, [OP_RELEASE_LOCKOWNER] = { .op_func = (nfsd4op_func)nfsd4_release_lockowner, - .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS, + .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS + | OP_MODIFIES_SOMETHING, .op_name = "OP_RELEASE_LOCKOWNER", + .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize, }, /* NFSv4.1 operations */ [OP_EXCHANGE_ID] = { .op_func = (nfsd4op_func)nfsd4_exchange_id, - .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP, + .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP + | OP_MODIFIES_SOMETHING, .op_name = "OP_EXCHANGE_ID", + .op_rsize_bop = (nfsd4op_rsize)nfsd4_exchange_id_rsize, }, [OP_BIND_CONN_TO_SESSION] = { .op_func = (nfsd4op_func)nfsd4_bind_conn_to_session, - .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP, + .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP + | OP_MODIFIES_SOMETHING, .op_name = "OP_BIND_CONN_TO_SESSION", + .op_rsize_bop = (nfsd4op_rsize)nfsd4_bind_conn_to_session_rsize, }, [OP_CREATE_SESSION] = { .op_func = (nfsd4op_func)nfsd4_create_session, - .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP, + .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP + | OP_MODIFIES_SOMETHING, .op_name = "OP_CREATE_SESSION", + .op_rsize_bop = (nfsd4op_rsize)nfsd4_create_session_rsize, }, [OP_DESTROY_SESSION] = { .op_func = (nfsd4op_func)nfsd4_destroy_session, - .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP, + .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP + | OP_MODIFIES_SOMETHING, .op_name = "OP_DESTROY_SESSION", + .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize, }, [OP_SEQUENCE] = { .op_func = (nfsd4op_func)nfsd4_sequence, .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP, .op_name = "OP_SEQUENCE", }, + [OP_DESTROY_CLIENTID] = { + .op_func = (nfsd4op_func)nfsd4_destroy_clientid, + .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP + | OP_MODIFIES_SOMETHING, + .op_name = "OP_DESTROY_CLIENTID", + .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize, + }, [OP_RECLAIM_COMPLETE] = { .op_func = (nfsd4op_func)nfsd4_reclaim_complete, - .op_flags = ALLOWED_WITHOUT_FH, + .op_flags = ALLOWED_WITHOUT_FH | OP_MODIFIES_SOMETHING, .op_name = "OP_RECLAIM_COMPLETE", + .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize, }, [OP_SECINFO_NO_NAME] = { .op_func = (nfsd4op_func)nfsd4_secinfo_no_name, .op_flags = OP_HANDLES_WRONGSEC, .op_name = "OP_SECINFO_NO_NAME", }, + [OP_TEST_STATEID] = { + .op_func = (nfsd4op_func)nfsd4_test_stateid, + .op_flags = ALLOWED_WITHOUT_FH, + .op_name = "OP_TEST_STATEID", + }, + [OP_FREE_STATEID] = { + .op_func = (nfsd4op_func)nfsd4_free_stateid, + .op_flags = ALLOWED_WITHOUT_FH | OP_MODIFIES_SOMETHING, + .op_name = "OP_FREE_STATEID", + .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize, + }, }; static const char *nfsd4_op_name(unsigned opnum) @@ -1427,16 +1698,6 @@ static const char *nfsd4_op_name(unsigned opnum) #define nfsd4_voidres nfsd4_voidargs struct nfsd4_voidargs { int dummy; }; -/* - * TODO: At the present time, the NFSv4 server does not do XID caching - * of requests. Implementing XID caching would not be a serious problem, - * although it would require a mild change in interfaces since one - * doesn't know whether an NFSv4 request is idempotent until after the - * XDR decode. However, XID caching totally confuses pynfs (Peter - * Astrand's regression testsuite for NFSv4 servers), which reuses - * XID's liberally, so I've left it unimplemented until pynfs generates - * better XID's. - */ static struct svc_procedure nfsd_procedures4[2] = { [NFSPROC4_NULL] = { .pc_func = (svc_procfunc) nfsd4_proc_null, @@ -1452,6 +1713,7 @@ static struct svc_procedure nfsd_procedures4[2] = { .pc_encode = (kxdrproc_t) nfs4svc_encode_compoundres, .pc_argsize = sizeof(struct nfsd4_compoundargs), .pc_ressize = sizeof(struct nfsd4_compoundres), + .pc_release = nfsd4_release_compoundargs, .pc_cachetype = RC_NOCACHE, .pc_xdrressize = NFSD_BUFSIZE/4, }, diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c index be26814..ed083b9 100644 --- a/fs/nfsd/nfs4recover.c +++ b/fs/nfsd/nfs4recover.c @@ -45,6 +45,7 @@ /* Globals */ static struct file *rec_file; +static char user_recovery_dirname[PATH_MAX] = "/var/lib/nfs/v4recovery"; static int nfs4_save_creds(const struct cred **original_creds) @@ -129,6 +130,7 @@ nfsd4_create_clid_dir(struct nfs4_client *clp) if (!rec_file || clp->cl_firststate) return 0; + clp->cl_firststate = 1; status = nfs4_save_creds(&original_cred); if (status < 0) return status; @@ -143,10 +145,8 @@ nfsd4_create_clid_dir(struct nfs4_client *clp) goto out_unlock; } status = -EEXIST; - if (dentry->d_inode) { - dprintk("NFSD: nfsd4_create_clid_dir: DIRECTORY EXISTS\n"); + if (dentry->d_inode) goto out_put; - } status = mnt_want_write(rec_file->f_path.mnt); if (status) goto out_put; @@ -156,12 +156,14 @@ out_put: dput(dentry); out_unlock: mutex_unlock(&dir->d_inode->i_mutex); - if (status == 0) { - clp->cl_firststate = 1; + if (status == 0) vfs_fsync(rec_file, 0); - } + else + printk(KERN_ERR "NFSD: failed to write recovery record" + " (err %d); please check that %s exists" + " and is writeable", status, + user_recovery_dirname); nfs4_reset_creds(original_cred); - dprintk("NFSD: nfsd4_create_clid_dir returns %d\n", status); return status; } @@ -191,52 +193,42 @@ nfsd4_build_namelist(void *arg, const char *name, int namlen, } static int -nfsd4_list_rec_dir(struct dentry *dir, recdir_func *f) +nfsd4_list_rec_dir(recdir_func *f) { const struct cred *original_cred; - struct file *filp; + struct dentry *dir = rec_file->f_path.dentry; LIST_HEAD(names); - struct name_list *entry; - struct dentry *dentry; int status; - if (!rec_file) - return 0; - status = nfs4_save_creds(&original_cred); if (status < 0) return status; - filp = dentry_open(dget(dir), mntget(rec_file->f_path.mnt), O_RDONLY, - current_cred()); - status = PTR_ERR(filp); - if (IS_ERR(filp)) - goto out; - status = vfs_readdir(filp, nfsd4_build_namelist, &names); - fput(filp); + status = vfs_llseek(rec_file, 0, SEEK_SET); + if (status < 0) { + nfs4_reset_creds(original_cred); + return status; + } + + status = vfs_readdir(rec_file, nfsd4_build_namelist, &names); mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT); while (!list_empty(&names)) { + struct name_list *entry; entry = list_entry(names.next, struct name_list, list); - - dentry = lookup_one_len(entry->name, dir, HEXDIR_LEN-1); - if (IS_ERR(dentry)) { - status = PTR_ERR(dentry); - break; + if (!status) { + struct dentry *dentry; + dentry = lookup_one_len(entry->name, dir, HEXDIR_LEN-1); + if (IS_ERR(dentry)) { + status = PTR_ERR(dentry); + break; + } + status = f(dir, dentry); + dput(dentry); } - status = f(dir, dentry); - dput(dentry); - if (status) - break; list_del(&entry->list); kfree(entry); } mutex_unlock(&dir->d_inode->i_mutex); -out: - while (!list_empty(&names)) { - entry = list_entry(names.next, struct name_list, list); - list_del(&entry->list); - kfree(entry); - } nfs4_reset_creds(original_cred); return status; } @@ -322,7 +314,7 @@ nfsd4_recdir_purge_old(void) { status = mnt_want_write(rec_file->f_path.mnt); if (status) goto out; - status = nfsd4_list_rec_dir(rec_file->f_path.dentry, purge_old); + status = nfsd4_list_rec_dir(purge_old); if (status == 0) vfs_fsync(rec_file, 0); mnt_drop_write(rec_file->f_path.mnt); @@ -352,7 +344,7 @@ nfsd4_recdir_load(void) { if (!rec_file) return 0; - status = nfsd4_list_rec_dir(rec_file->f_path.dentry, load_recdir); + status = nfsd4_list_rec_dir(load_recdir); if (status) printk("nfsd4: failed loading clients from recovery" " directory %s\n", rec_file->f_path.dentry->d_name.name); @@ -364,13 +356,13 @@ nfsd4_recdir_load(void) { */ void -nfsd4_init_recdir(char *rec_dirname) +nfsd4_init_recdir() { const struct cred *original_cred; int status; printk("NFSD: Using %s as the NFSv4 state recovery directory\n", - rec_dirname); + user_recovery_dirname); BUG_ON(rec_file); @@ -382,10 +374,10 @@ nfsd4_init_recdir(char *rec_dirname) return; } - rec_file = filp_open(rec_dirname, O_RDONLY | O_DIRECTORY, 0); + rec_file = filp_open(user_recovery_dirname, O_RDONLY | O_DIRECTORY, 0); if (IS_ERR(rec_file)) { printk("NFSD: unable to find recovery directory %s\n", - rec_dirname); + user_recovery_dirname); rec_file = NULL; } @@ -400,3 +392,30 @@ nfsd4_shutdown_recdir(void) fput(rec_file); rec_file = NULL; } + +/* + * Change the NFSv4 recovery directory to recdir. + */ +int +nfs4_reset_recoverydir(char *recdir) +{ + int status; + struct path path; + + status = kern_path(recdir, LOOKUP_FOLLOW, &path); + if (status) + return status; + status = -ENOTDIR; + if (S_ISDIR(path.dentry->d_inode->i_mode)) { + strcpy(user_recovery_dirname, recdir); + status = 0; + } + path_put(&path); + return status; +} + +char * +nfs4_recoverydir(void) +{ + return user_recovery_dirname; +} diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 4ec38df..d455ea0 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -37,6 +37,7 @@ #include #include #include +#include #include #include #include "xdr4.h" @@ -48,9 +49,6 @@ time_t nfsd4_lease = 90; /* default lease time */ time_t nfsd4_grace = 90; static time_t boot_time; -static u32 current_ownerid = 1; -static u32 current_fileid = 1; -static u32 current_delegid = 1; static stateid_t zerostateid; /* bits all 0 */ static stateid_t onestateid; /* bits all 1 */ static u64 current_sessionid = 1; @@ -59,10 +57,7 @@ static u64 current_sessionid = 1; #define ONE_STATEID(stateid) (!memcmp((stateid), &onestateid, sizeof(stateid_t))) /* forward declarations */ -static struct nfs4_stateid * find_stateid(stateid_t *stid, int flags); -static struct nfs4_delegation * find_delegation_stateid(struct inode *ino, stateid_t *stid); -static char user_recovery_dirname[PATH_MAX] = "/var/lib/nfs/v4recovery"; -static void nfs4_set_recdir(char *recdir); +static int check_for_locks(struct nfs4_file *filp, struct nfs4_lockowner *lowner); /* Locking: */ @@ -76,7 +71,8 @@ static DEFINE_MUTEX(client_mutex); */ static DEFINE_SPINLOCK(recall_lock); -static struct kmem_cache *stateowner_slab = NULL; +static struct kmem_cache *openowner_slab = NULL; +static struct kmem_cache *lockowner_slab = NULL; static struct kmem_cache *file_slab = NULL; static struct kmem_cache *stateid_slab = NULL; static struct kmem_cache *deleg_slab = NULL; @@ -108,6 +104,11 @@ opaque_hashval(const void *ptr, int nbytes) static struct list_head del_recall_lru; +static void nfsd4_free_file(struct nfs4_file *f) +{ + kmem_cache_free(file_slab, f); +} + static inline void put_nfs4_file(struct nfs4_file *fi) { @@ -115,7 +116,7 @@ put_nfs4_file(struct nfs4_file *fi) list_del(&fi->fi_hash); spin_unlock(&recall_lock); iput(fi->fi_inode); - kmem_cache_free(file_slab, fi); + nfsd4_free_file(fi); } } @@ -132,35 +133,33 @@ unsigned int max_delegations; * Open owner state (share locks) */ -/* hash tables for nfs4_stateowner */ -#define OWNER_HASH_BITS 8 -#define OWNER_HASH_SIZE (1 << OWNER_HASH_BITS) -#define OWNER_HASH_MASK (OWNER_HASH_SIZE - 1) +/* hash tables for open owners */ +#define OPEN_OWNER_HASH_BITS 8 +#define OPEN_OWNER_HASH_SIZE (1 << OPEN_OWNER_HASH_BITS) +#define OPEN_OWNER_HASH_MASK (OPEN_OWNER_HASH_SIZE - 1) + +static unsigned int open_ownerstr_hashval(u32 clientid, struct xdr_netobj *ownername) +{ + unsigned int ret; -#define ownerid_hashval(id) \ - ((id) & OWNER_HASH_MASK) -#define ownerstr_hashval(clientid, ownername) \ - (((clientid) + opaque_hashval((ownername.data), (ownername.len))) & OWNER_HASH_MASK) + ret = opaque_hashval(ownername->data, ownername->len); + ret += clientid; + return ret & OPEN_OWNER_HASH_MASK; +} -static struct list_head ownerid_hashtbl[OWNER_HASH_SIZE]; -static struct list_head ownerstr_hashtbl[OWNER_HASH_SIZE]; +static struct list_head open_ownerstr_hashtbl[OPEN_OWNER_HASH_SIZE]; /* hash table for nfs4_file */ #define FILE_HASH_BITS 8 #define FILE_HASH_SIZE (1 << FILE_HASH_BITS) -/* hash table for (open)nfs4_stateid */ -#define STATEID_HASH_BITS 10 -#define STATEID_HASH_SIZE (1 << STATEID_HASH_BITS) -#define STATEID_HASH_MASK (STATEID_HASH_SIZE - 1) - -#define file_hashval(x) \ - hash_ptr(x, FILE_HASH_BITS) -#define stateid_hashval(owner_id, file_id) \ - (((owner_id) + (file_id)) & STATEID_HASH_MASK) +static unsigned int file_hashval(struct inode *ino) +{ + /* XXX: why are we hashing on inode pointer, anyway? */ + return hash_ptr(ino, FILE_HASH_BITS); +} static struct list_head file_hashtbl[FILE_HASH_SIZE]; -static struct list_head stateid_hashtbl[STATEID_HASH_SIZE]; static void __nfs4_file_get_access(struct nfs4_file *fp, int oflag) { @@ -203,8 +202,73 @@ static void nfs4_file_put_access(struct nfs4_file *fp, int oflag) __nfs4_file_put_access(fp, oflag); } +static inline int get_new_stid(struct nfs4_stid *stid) +{ + static int min_stateid = 0; + struct idr *stateids = &stid->sc_client->cl_stateids; + int new_stid; + int error; + + error = idr_get_new_above(stateids, stid, min_stateid, &new_stid); + /* + * Note: the necessary preallocation was done in + * nfs4_alloc_stateid(). The idr code caps the number of + * preallocations that can exist at a time, but the state lock + * prevents anyone from using ours before we get here: + */ + BUG_ON(error); + /* + * It shouldn't be a problem to reuse an opaque stateid value. + * I don't think it is for 4.1. But with 4.0 I worry that, for + * example, a stray write retransmission could be accepted by + * the server when it should have been rejected. Therefore, + * adopt a trick from the sctp code to attempt to maximize the + * amount of time until an id is reused, by ensuring they always + * "increase" (mod INT_MAX): + */ + + min_stateid = new_stid+1; + if (min_stateid == INT_MAX) + min_stateid = 0; + return new_stid; +} + +static void init_stid(struct nfs4_stid *stid, struct nfs4_client *cl, unsigned char type) +{ + stateid_t *s = &stid->sc_stateid; + int new_id; + + stid->sc_type = type; + stid->sc_client = cl; + s->si_opaque.so_clid = cl->cl_clientid; + new_id = get_new_stid(stid); + s->si_opaque.so_id = (u32)new_id; + /* Will be incremented before return to client: */ + s->si_generation = 0; +} + +static struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct kmem_cache *slab) +{ + struct idr *stateids = &cl->cl_stateids; + + if (!idr_pre_get(stateids, GFP_KERNEL)) + return NULL; + /* + * Note: if we fail here (or any time between now and the time + * we actually get the new idr), we won't need to undo the idr + * preallocation, since the idr code caps the number of + * preallocated entries. + */ + return kmem_cache_alloc(slab, GFP_KERNEL); +} + +static struct nfs4_ol_stateid * nfs4_alloc_stateid(struct nfs4_client *clp) +{ + return openlockstateid(nfs4_alloc_stid(clp, stateid_slab)); +} + static struct nfs4_delegation * -alloc_init_deleg(struct nfs4_client *clp, struct nfs4_stateid *stp, struct svc_fh *current_fh, u32 type) +alloc_init_deleg(struct nfs4_client *clp, struct nfs4_ol_stateid *stp, struct svc_fh *current_fh, u32 type) { struct nfs4_delegation *dp; struct nfs4_file *fp = stp->st_file; @@ -221,21 +285,23 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_stateid *stp, struct svc_f return NULL; if (num_delegations > max_delegations) return NULL; - dp = kmem_cache_alloc(deleg_slab, GFP_KERNEL); + dp = delegstateid(nfs4_alloc_stid(clp, deleg_slab)); if (dp == NULL) return dp; + init_stid(&dp->dl_stid, clp, NFS4_DELEG_STID); + /* + * delegation seqid's are never incremented. The 4.1 special + * meaning of seqid 0 isn't meaningful, really, but let's avoid + * 0 anyway just for consistency and use 1: + */ + dp->dl_stid.sc_stateid.si_generation = 1; num_delegations++; INIT_LIST_HEAD(&dp->dl_perfile); INIT_LIST_HEAD(&dp->dl_perclnt); INIT_LIST_HEAD(&dp->dl_recall_lru); - dp->dl_client = clp; get_nfs4_file(fp); dp->dl_file = fp; dp->dl_type = type; - dp->dl_stateid.si_boot = boot_time; - dp->dl_stateid.si_stateownerid = current_delegid++; - dp->dl_stateid.si_fileid = 0; - dp->dl_stateid.si_generation = 0; fh_copy_shallow(&dp->dl_fh, ¤t_fh->fh_handle); dp->dl_time = 0; atomic_set(&dp->dl_count, 1); @@ -264,12 +330,29 @@ static void nfs4_put_deleg_lease(struct nfs4_file *fp) } } +static void unhash_stid(struct nfs4_stid *s) +{ + struct idr *stateids = &s->sc_client->cl_stateids; + + idr_remove(stateids, s->sc_stateid.si_opaque.so_id); +} + +static void +hash_delegation_locked(struct nfs4_delegation *dp, struct nfs4_file *fp) +{ + lockdep_assert_held(&recall_lock); + + list_add(&dp->dl_perfile, &fp->fi_delegations); + list_add(&dp->dl_perclnt, &dp->dl_stid.sc_client->cl_delegations); +} + /* Called under the state lock. */ static void unhash_delegation(struct nfs4_delegation *dp) { - list_del_init(&dp->dl_perclnt); + unhash_stid(&dp->dl_stid); spin_lock(&recall_lock); + list_del_init(&dp->dl_perclnt); list_del_init(&dp->dl_perfile); list_del_init(&dp->dl_recall_lru); spin_unlock(&recall_lock); @@ -289,10 +372,16 @@ static DEFINE_SPINLOCK(client_lock); #define CLIENT_HASH_SIZE (1 << CLIENT_HASH_BITS) #define CLIENT_HASH_MASK (CLIENT_HASH_SIZE - 1) -#define clientid_hashval(id) \ - ((id) & CLIENT_HASH_MASK) -#define clientstr_hashval(name) \ - (opaque_hashval((name), 8) & CLIENT_HASH_MASK) +static unsigned int clientid_hashval(u32 id) +{ + return id & CLIENT_HASH_MASK; +} + +static unsigned int clientstr_hashval(const char *name) +{ + return opaque_hashval(name, 8) & CLIENT_HASH_MASK; +} + /* * reclaim_str_hashtbl[] holds known client info from previous reset/reboot * used in reboot/reset lease grace period processing @@ -359,7 +448,7 @@ set_deny(unsigned int *deny, unsigned long bmap) { } static int -test_share(struct nfs4_stateid *stp, struct nfsd4_open *open) { +test_share(struct nfs4_ol_stateid *stp, struct nfsd4_open *open) { unsigned int access, deny; set_access(&access, stp->st_access_bmap); @@ -382,14 +471,13 @@ static int nfs4_access_to_omode(u32 access) BUG(); } -static void unhash_generic_stateid(struct nfs4_stateid *stp) +static void unhash_generic_stateid(struct nfs4_ol_stateid *stp) { - list_del(&stp->st_hash); list_del(&stp->st_perfile); list_del(&stp->st_perstateowner); } -static void free_generic_stateid(struct nfs4_stateid *stp) +static void close_generic_stateid(struct nfs4_ol_stateid *stp) { int i; @@ -398,84 +486,106 @@ static void free_generic_stateid(struct nfs4_stateid *stp) if (test_bit(i, &stp->st_access_bmap)) nfs4_file_put_access(stp->st_file, nfs4_access_to_omode(i)); + __clear_bit(i, &stp->st_access_bmap); } } put_nfs4_file(stp->st_file); + stp->st_file = NULL; +} + +static void free_generic_stateid(struct nfs4_ol_stateid *stp) +{ kmem_cache_free(stateid_slab, stp); } -static void release_lock_stateid(struct nfs4_stateid *stp) +static void release_lock_stateid(struct nfs4_ol_stateid *stp) { struct file *file; unhash_generic_stateid(stp); + unhash_stid(&stp->st_stid); file = find_any_file(stp->st_file); if (file) - locks_remove_posix(file, (fl_owner_t)stp->st_stateowner); + locks_remove_posix(file, (fl_owner_t)lockowner(stp->st_stateowner)); + close_generic_stateid(stp); free_generic_stateid(stp); } -static void unhash_lockowner(struct nfs4_stateowner *sop) +static void unhash_lockowner(struct nfs4_lockowner *lo) { - struct nfs4_stateid *stp; + struct nfs4_ol_stateid *stp; - list_del(&sop->so_idhash); - list_del(&sop->so_strhash); - list_del(&sop->so_perstateid); - while (!list_empty(&sop->so_stateids)) { - stp = list_first_entry(&sop->so_stateids, - struct nfs4_stateid, st_perstateowner); + list_del(&lo->lo_owner.so_strhash); + list_del(&lo->lo_perstateid); + while (!list_empty(&lo->lo_owner.so_stateids)) { + stp = list_first_entry(&lo->lo_owner.so_stateids, + struct nfs4_ol_stateid, st_perstateowner); release_lock_stateid(stp); } } -static void release_lockowner(struct nfs4_stateowner *sop) +static void release_lockowner(struct nfs4_lockowner *lo) { - unhash_lockowner(sop); - nfs4_put_stateowner(sop); + unhash_lockowner(lo); + nfs4_free_lockowner(lo); } static void -release_stateid_lockowners(struct nfs4_stateid *open_stp) +release_stateid_lockowners(struct nfs4_ol_stateid *open_stp) { - struct nfs4_stateowner *lock_sop; + struct nfs4_lockowner *lo; while (!list_empty(&open_stp->st_lockowners)) { - lock_sop = list_entry(open_stp->st_lockowners.next, - struct nfs4_stateowner, so_perstateid); - /* list_del(&open_stp->st_lockowners); */ - BUG_ON(lock_sop->so_is_open_owner); - release_lockowner(lock_sop); + lo = list_entry(open_stp->st_lockowners.next, + struct nfs4_lockowner, lo_perstateid); + release_lockowner(lo); } } -static void release_open_stateid(struct nfs4_stateid *stp) +static void unhash_open_stateid(struct nfs4_ol_stateid *stp) { unhash_generic_stateid(stp); release_stateid_lockowners(stp); + close_generic_stateid(stp); +} + +static void release_open_stateid(struct nfs4_ol_stateid *stp) +{ + unhash_open_stateid(stp); + unhash_stid(&stp->st_stid); free_generic_stateid(stp); } -static void unhash_openowner(struct nfs4_stateowner *sop) +static void unhash_openowner(struct nfs4_openowner *oo) { - struct nfs4_stateid *stp; + struct nfs4_ol_stateid *stp; - list_del(&sop->so_idhash); - list_del(&sop->so_strhash); - list_del(&sop->so_perclient); - list_del(&sop->so_perstateid); /* XXX: necessary? */ - while (!list_empty(&sop->so_stateids)) { - stp = list_first_entry(&sop->so_stateids, - struct nfs4_stateid, st_perstateowner); + list_del(&oo->oo_owner.so_strhash); + list_del(&oo->oo_perclient); + while (!list_empty(&oo->oo_owner.so_stateids)) { + stp = list_first_entry(&oo->oo_owner.so_stateids, + struct nfs4_ol_stateid, st_perstateowner); release_open_stateid(stp); } } -static void release_openowner(struct nfs4_stateowner *sop) +static void release_last_closed_stateid(struct nfs4_openowner *oo) +{ + struct nfs4_ol_stateid *s = oo->oo_last_closed_stid; + + if (s) { + unhash_stid(&s->st_stid); + free_generic_stateid(s); + oo->oo_last_closed_stid = NULL; + } +} + +static void release_openowner(struct nfs4_openowner *oo) { - unhash_openowner(sop); - list_del(&sop->so_close_lru); - nfs4_put_stateowner(sop); + unhash_openowner(oo); + list_del(&oo->oo_close_lru); + release_last_closed_stateid(oo); + nfs4_free_openowner(oo); } #define SESSION_HASH_SIZE 512 @@ -840,9 +950,6 @@ renew_client_locked(struct nfs4_client *clp) return; } - /* - * Move client to the end to the LRU list. - */ dprintk("renewing client (clientid %08x/%08x)\n", clp->cl_clientid.cl_boot, clp->cl_clientid.cl_id); @@ -888,6 +995,18 @@ static struct nfs4_client *alloc_client(struct xdr_netobj name) } memcpy(clp->cl_name.data, name.data, name.len); clp->cl_name.len = name.len; + INIT_LIST_HEAD(&clp->cl_sessions); + idr_init(&clp->cl_stateids); + atomic_set(&clp->cl_refcount, 0); + clp->cl_cb_state = NFSD4_CB_UNKNOWN; + INIT_LIST_HEAD(&clp->cl_idhash); + INIT_LIST_HEAD(&clp->cl_strhash); + INIT_LIST_HEAD(&clp->cl_openowners); + INIT_LIST_HEAD(&clp->cl_delegations); + INIT_LIST_HEAD(&clp->cl_lru); + INIT_LIST_HEAD(&clp->cl_callbacks); + spin_lock_init(&clp->cl_lock); + rpc_init_wait_queue(&clp->cl_cb_waitq, "Backchannel slot table"); return clp; } @@ -901,10 +1020,13 @@ free_client(struct nfs4_client *clp) list_del(&ses->se_perclnt); nfsd4_put_session(ses); } + rpc_destroy_wait_queue(&clp->cl_cb_waitq); if (clp->cl_cred.cr_group_info) put_group_info(clp->cl_cred.cr_group_info); kfree(clp->cl_principal); kfree(clp->cl_name.data); + idr_remove_all(&clp->cl_stateids); + idr_destroy(&clp->cl_stateids); kfree(clp); } @@ -940,7 +1062,7 @@ unhash_client_locked(struct nfs4_client *clp) static void expire_client(struct nfs4_client *clp) { - struct nfs4_stateowner *sop; + struct nfs4_openowner *oo; struct nfs4_delegation *dp; struct list_head reaplist; @@ -958,8 +1080,8 @@ expire_client(struct nfs4_client *clp) unhash_delegation(dp); } while (!list_empty(&clp->cl_openowners)) { - sop = list_entry(clp->cl_openowners.next, struct nfs4_stateowner, so_perclient); - release_openowner(sop); + oo = list_entry(clp->cl_openowners.next, struct nfs4_openowner, oo_perclient); + release_openowner(oo); } nfsd4_shutdown_callback(clp); if (clp->cl_cb_conn.cb_xprt) @@ -1035,6 +1157,23 @@ static void gen_confirm(struct nfs4_client *clp) *p++ = i++; } +static struct nfs4_stid *find_stateid(struct nfs4_client *cl, stateid_t *t) +{ + return idr_find(&cl->cl_stateids, t->si_opaque.so_id); +} + +static struct nfs4_stid *find_stateid_by_type(struct nfs4_client *cl, stateid_t *t, char typemask) +{ + struct nfs4_stid *s; + + s = find_stateid(cl, t); + if (!s) + return NULL; + if (typemask & s->sc_type) + return s; + return NULL; +} + static struct nfs4_client *create_client(struct xdr_netobj name, char *recdir, struct svc_rqst *rqstp, nfs4_verifier *verf) { @@ -1046,7 +1185,6 @@ static struct nfs4_client *create_client(struct xdr_netobj name, char *recdir, if (clp == NULL) return NULL; - INIT_LIST_HEAD(&clp->cl_sessions); princ = svc_gss_principal(rqstp); if (princ) { @@ -1058,19 +1196,9 @@ static struct nfs4_client *create_client(struct xdr_netobj name, char *recdir, } memcpy(clp->cl_recdir, recdir, HEXDIR_LEN); - atomic_set(&clp->cl_refcount, 0); - clp->cl_cb_state = NFSD4_CB_UNKNOWN; - INIT_LIST_HEAD(&clp->cl_idhash); - INIT_LIST_HEAD(&clp->cl_strhash); - INIT_LIST_HEAD(&clp->cl_openowners); - INIT_LIST_HEAD(&clp->cl_delegations); - INIT_LIST_HEAD(&clp->cl_lru); - INIT_LIST_HEAD(&clp->cl_callbacks); - spin_lock_init(&clp->cl_lock); INIT_WORK(&clp->cl_cb_null.cb_work, nfsd4_do_callback_rpc); clp->cl_time = get_seconds(); clear_bit(0, &clp->cl_cb_slot_busy); - rpc_init_wait_queue(&clp->cl_cb_waitq, "Backchannel slot table"); copy_verf(clp, verf); rpc_copy_addr((struct sockaddr *) &clp->cl_addr, sa); clp->cl_flavor = rqstp->rq_flavor; @@ -1080,17 +1208,6 @@ static struct nfs4_client *create_client(struct xdr_netobj name, char *recdir, return clp; } -static int check_name(struct xdr_netobj name) -{ - if (name.len == 0) - return 0; - if (name.len > NFS4_OPAQUE_LIMIT) { - dprintk("NFSD: check_name: name too long(%d)!\n", name.len); - return 0; - } - return 1; -} - static void add_to_unconfirmed(struct nfs4_client *clp, unsigned int strhashval) { @@ -1122,8 +1239,10 @@ find_confirmed_client(clientid_t *clid) unsigned int idhashval = clientid_hashval(clid->cl_id); list_for_each_entry(clp, &conf_id_hashtbl[idhashval], cl_idhash) { - if (same_clid(&clp->cl_clientid, clid)) + if (same_clid(&clp->cl_clientid, clid)) { + renew_client(clp); return clp; + } } return NULL; } @@ -1170,20 +1289,6 @@ find_unconfirmed_client_by_str(const char *dname, unsigned int hashval) return NULL; } -static void rpc_svcaddr2sockaddr(struct sockaddr *sa, unsigned short family, union svc_addr_u *svcaddr) -{ - switch (family) { - case AF_INET: - ((struct sockaddr_in *)sa)->sin_family = AF_INET; - ((struct sockaddr_in *)sa)->sin_addr = svcaddr->addr; - return; - case AF_INET6: - ((struct sockaddr_in6 *)sa)->sin6_family = AF_INET6; - ((struct sockaddr_in6 *)sa)->sin6_addr = svcaddr->addr6; - return; - } -} - static void gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se, struct svc_rqst *rqstp) { @@ -1215,7 +1320,7 @@ gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se, struct svc_r conn->cb_prog = se->se_callback_prog; conn->cb_ident = se->se_callback_ident; - rpc_svcaddr2sockaddr((struct sockaddr *)&conn->cb_saddr, expected_family, &rqstp->rq_daddr); + memcpy(&conn->cb_saddr, &rqstp->rq_daddr, rqstp->rq_daddrlen); return; out_err: conn->cb_addr.ss_family = AF_UNSPEC; @@ -1347,7 +1452,7 @@ nfsd4_exchange_id(struct svc_rqst *rqstp, __func__, rqstp, exid, exid->clname.len, exid->clname.data, addr_str, exid->flags, exid->spa_how); - if (!check_name(exid->clname) || (exid->flags & ~EXCHGID4_FLAG_MASK_A)) + if (exid->flags & ~EXCHGID4_FLAG_MASK_A) return nfserr_inval; /* Currently only support SP4_NONE */ @@ -1503,6 +1608,29 @@ nfsd4_replay_create_session(struct nfsd4_create_session *cr_ses, return slot->sl_status; } +#define NFSD_MIN_REQ_HDR_SEQ_SZ ((\ + 2 * 2 + /* credential,verifier: AUTH_NULL, length 0 */ \ + 1 + /* MIN tag is length with zero, only length */ \ + 3 + /* version, opcount, opcode */ \ + XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + \ + /* seqid, slotID, slotID, cache */ \ + 4 ) * sizeof(__be32)) + +#define NFSD_MIN_RESP_HDR_SEQ_SZ ((\ + 2 + /* verifier: AUTH_NULL, length 0 */\ + 1 + /* status */ \ + 1 + /* MIN tag is length with zero, only length */ \ + 3 + /* opcount, opcode, opstatus*/ \ + XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + \ + /* seqid, slotID, slotID, slotID, status */ \ + 5 ) * sizeof(__be32)) + +static __be32 check_forechannel_attrs(struct nfsd4_channel_attrs fchannel) +{ + return fchannel.maxreq_sz < NFSD_MIN_REQ_HDR_SEQ_SZ + || fchannel.maxresp_sz < NFSD_MIN_RESP_HDR_SEQ_SZ; +} + __be32 nfsd4_create_session(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, @@ -1571,6 +1699,10 @@ nfsd4_create_session(struct svc_rqst *rqstp, cr_ses->flags &= ~SESSION4_PERSIST; cr_ses->flags &= ~SESSION4_RDMA; + status = nfserr_toosmall; + if (check_forechannel_attrs(cr_ses->fore_channel)) + goto out; + status = nfserr_jukebox; new = alloc_init_session(rqstp, conf, cr_ses); if (!new) @@ -1732,6 +1864,14 @@ static bool nfsd4_session_too_many_ops(struct svc_rqst *rqstp, struct nfsd4_sess return args->opcnt > session->se_fchannel.maxops; } +static bool nfsd4_request_too_big(struct svc_rqst *rqstp, + struct nfsd4_session *session) +{ + struct xdr_buf *xb = &rqstp->rq_arg; + + return xb->len > session->se_fchannel.maxreq_sz; +} + __be32 nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, @@ -1764,6 +1904,10 @@ nfsd4_sequence(struct svc_rqst *rqstp, if (nfsd4_session_too_many_ops(rqstp, session)) goto out; + status = nfserr_req_too_big; + if (nfsd4_request_too_big(rqstp, session)) + goto out; + status = nfserr_badslot; if (seq->slotid >= session->se_fchannel.maxreqs) goto out; @@ -1807,8 +1951,16 @@ out: nfsd4_get_session(cstate->session); atomic_inc(&clp->cl_refcount); - if (clp->cl_cb_state == NFSD4_CB_DOWN) - seq->status_flags |= SEQ4_STATUS_CB_PATH_DOWN; + switch (clp->cl_cb_state) { + case NFSD4_CB_DOWN: + seq->status_flags = SEQ4_STATUS_CB_PATH_DOWN; + break; + case NFSD4_CB_FAULT: + seq->status_flags = SEQ4_STATUS_BACKCHANNEL_FAULT; + break; + default: + seq->status_flags = 0; + } } kfree(conn); spin_unlock(&client_lock); @@ -1816,6 +1968,50 @@ out: return status; } +static inline bool has_resources(struct nfs4_client *clp) +{ + return !list_empty(&clp->cl_openowners) + || !list_empty(&clp->cl_delegations) + || !list_empty(&clp->cl_sessions); +} + +__be32 +nfsd4_destroy_clientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_destroy_clientid *dc) +{ + struct nfs4_client *conf, *unconf, *clp; + int status = 0; + + nfs4_lock_state(); + unconf = find_unconfirmed_client(&dc->clientid); + conf = find_confirmed_client(&dc->clientid); + + if (conf) { + clp = conf; + + if (!is_client_expired(conf) && has_resources(conf)) { + status = nfserr_clientid_busy; + goto out; + } + + /* rfc5661 18.50.3 */ + if (cstate->session && conf == cstate->session->se_client) { + status = nfserr_clientid_busy; + goto out; + } + } else if (unconf) + clp = unconf; + else { + status = nfserr_stale_clientid; + goto out; + } + + expire_client(clp); +out: + nfs4_unlock_state(); + dprintk("%s return %d\n", __func__, ntohl(status)); + return status; +} + __be32 nfsd4_reclaim_complete(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_reclaim_complete *rc) { @@ -1858,19 +2054,13 @@ __be32 nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_setclientid *setclid) { - struct xdr_netobj clname = { - .len = setclid->se_namelen, - .data = setclid->se_name, - }; + struct xdr_netobj clname = setclid->se_name; nfs4_verifier clverifier = setclid->se_verf; unsigned int strhashval; struct nfs4_client *conf, *unconf, *new; __be32 status; char dname[HEXDIR_LEN]; - if (!check_name(clname)) - return nfserr_inval; - status = nfs4_make_rec_clidname(dname, &clname); if (status) return status; @@ -2074,31 +2264,28 @@ out: return status; } +static struct nfs4_file *nfsd4_alloc_file(void) +{ + return kmem_cache_alloc(file_slab, GFP_KERNEL); +} + /* OPEN Share state helper functions */ -static inline struct nfs4_file * -alloc_init_file(struct inode *ino) +static void nfsd4_init_file(struct nfs4_file *fp, struct inode *ino) { - struct nfs4_file *fp; unsigned int hashval = file_hashval(ino); - fp = kmem_cache_alloc(file_slab, GFP_KERNEL); - if (fp) { - atomic_set(&fp->fi_ref, 1); - INIT_LIST_HEAD(&fp->fi_hash); - INIT_LIST_HEAD(&fp->fi_stateids); - INIT_LIST_HEAD(&fp->fi_delegations); - fp->fi_inode = igrab(ino); - fp->fi_id = current_fileid++; - fp->fi_had_conflict = false; - fp->fi_lease = NULL; - memset(fp->fi_fds, 0, sizeof(fp->fi_fds)); - memset(fp->fi_access, 0, sizeof(fp->fi_access)); - spin_lock(&recall_lock); - list_add(&fp->fi_hash, &file_hashtbl[hashval]); - spin_unlock(&recall_lock); - return fp; - } - return NULL; + atomic_set(&fp->fi_ref, 1); + INIT_LIST_HEAD(&fp->fi_hash); + INIT_LIST_HEAD(&fp->fi_stateids); + INIT_LIST_HEAD(&fp->fi_delegations); + fp->fi_inode = igrab(ino); + fp->fi_had_conflict = false; + fp->fi_lease = NULL; + memset(fp->fi_fds, 0, sizeof(fp->fi_fds)); + memset(fp->fi_access, 0, sizeof(fp->fi_access)); + spin_lock(&recall_lock); + list_add(&fp->fi_hash, &file_hashtbl[hashval]); + spin_unlock(&recall_lock); } static void @@ -2113,7 +2300,8 @@ nfsd4_free_slab(struct kmem_cache **slab) void nfsd4_free_slabs(void) { - nfsd4_free_slab(&stateowner_slab); + nfsd4_free_slab(&openowner_slab); + nfsd4_free_slab(&lockowner_slab); nfsd4_free_slab(&file_slab); nfsd4_free_slab(&stateid_slab); nfsd4_free_slab(&deleg_slab); @@ -2122,16 +2310,20 @@ nfsd4_free_slabs(void) static int nfsd4_init_slabs(void) { - stateowner_slab = kmem_cache_create("nfsd4_stateowners", - sizeof(struct nfs4_stateowner), 0, 0, NULL); - if (stateowner_slab == NULL) + openowner_slab = kmem_cache_create("nfsd4_openowners", + sizeof(struct nfs4_openowner), 0, 0, NULL); + if (openowner_slab == NULL) + goto out_nomem; + lockowner_slab = kmem_cache_create("nfsd4_lockowners", + sizeof(struct nfs4_lockowner), 0, 0, NULL); + if (lockowner_slab == NULL) goto out_nomem; file_slab = kmem_cache_create("nfsd4_files", sizeof(struct nfs4_file), 0, 0, NULL); if (file_slab == NULL) goto out_nomem; stateid_slab = kmem_cache_create("nfsd4_stateids", - sizeof(struct nfs4_stateid), 0, 0, NULL); + sizeof(struct nfs4_ol_stateid), 0, 0, NULL); if (stateid_slab == NULL) goto out_nomem; deleg_slab = kmem_cache_create("nfsd4_delegations", @@ -2145,97 +2337,94 @@ out_nomem: return -ENOMEM; } -void -nfs4_free_stateowner(struct kref *kref) +void nfs4_free_openowner(struct nfs4_openowner *oo) { - struct nfs4_stateowner *sop = - container_of(kref, struct nfs4_stateowner, so_ref); - kfree(sop->so_owner.data); - kmem_cache_free(stateowner_slab, sop); + kfree(oo->oo_owner.so_owner.data); + kmem_cache_free(openowner_slab, oo); } -static inline struct nfs4_stateowner * -alloc_stateowner(struct xdr_netobj *owner) +void nfs4_free_lockowner(struct nfs4_lockowner *lo) { - struct nfs4_stateowner *sop; + kfree(lo->lo_owner.so_owner.data); + kmem_cache_free(lockowner_slab, lo); +} - if ((sop = kmem_cache_alloc(stateowner_slab, GFP_KERNEL))) { - if ((sop->so_owner.data = kmalloc(owner->len, GFP_KERNEL))) { - memcpy(sop->so_owner.data, owner->data, owner->len); - sop->so_owner.len = owner->len; - kref_init(&sop->so_ref); - return sop; - } - kmem_cache_free(stateowner_slab, sop); - } - return NULL; +static void init_nfs4_replay(struct nfs4_replay *rp) +{ + rp->rp_status = nfserr_serverfault; + rp->rp_buflen = 0; + rp->rp_buf = rp->rp_ibuf; } -static struct nfs4_stateowner * -alloc_init_open_stateowner(unsigned int strhashval, struct nfs4_client *clp, struct nfsd4_open *open) { +static inline void *alloc_stateowner(struct kmem_cache *slab, struct xdr_netobj *owner, struct nfs4_client *clp) +{ struct nfs4_stateowner *sop; - struct nfs4_replay *rp; - unsigned int idhashval; - if (!(sop = alloc_stateowner(&open->op_owner))) + sop = kmem_cache_alloc(slab, GFP_KERNEL); + if (!sop) + return NULL; + + sop->so_owner.data = kmemdup(owner->data, owner->len, GFP_KERNEL); + if (!sop->so_owner.data) { + kmem_cache_free(slab, sop); return NULL; - idhashval = ownerid_hashval(current_ownerid); - INIT_LIST_HEAD(&sop->so_idhash); - INIT_LIST_HEAD(&sop->so_strhash); - INIT_LIST_HEAD(&sop->so_perclient); + } + sop->so_owner.len = owner->len; + INIT_LIST_HEAD(&sop->so_stateids); - INIT_LIST_HEAD(&sop->so_perstateid); /* not used */ - INIT_LIST_HEAD(&sop->so_close_lru); - sop->so_time = 0; - list_add(&sop->so_idhash, &ownerid_hashtbl[idhashval]); - list_add(&sop->so_strhash, &ownerstr_hashtbl[strhashval]); - list_add(&sop->so_perclient, &clp->cl_openowners); - sop->so_is_open_owner = 1; - sop->so_id = current_ownerid++; sop->so_client = clp; - sop->so_seqid = open->op_seqid; - sop->so_confirmed = 0; - rp = &sop->so_replay; - rp->rp_status = nfserr_serverfault; - rp->rp_buflen = 0; - rp->rp_buf = rp->rp_ibuf; + init_nfs4_replay(&sop->so_replay); return sop; } -static inline void -init_stateid(struct nfs4_stateid *stp, struct nfs4_file *fp, struct nfsd4_open *open) { - struct nfs4_stateowner *sop = open->op_stateowner; - unsigned int hashval = stateid_hashval(sop->so_id, fp->fi_id); +static void hash_openowner(struct nfs4_openowner *oo, struct nfs4_client *clp, unsigned int strhashval) +{ + list_add(&oo->oo_owner.so_strhash, &open_ownerstr_hashtbl[strhashval]); + list_add(&oo->oo_perclient, &clp->cl_openowners); +} + +static struct nfs4_openowner * +alloc_init_open_stateowner(unsigned int strhashval, struct nfs4_client *clp, struct nfsd4_open *open) { + struct nfs4_openowner *oo; + + oo = alloc_stateowner(openowner_slab, &open->op_owner, clp); + if (!oo) + return NULL; + oo->oo_owner.so_is_open_owner = 1; + oo->oo_owner.so_seqid = open->op_seqid; + oo->oo_flags = NFS4_OO_NEW; + oo->oo_time = 0; + oo->oo_last_closed_stid = NULL; + INIT_LIST_HEAD(&oo->oo_close_lru); + hash_openowner(oo, clp, strhashval); + return oo; +} - INIT_LIST_HEAD(&stp->st_hash); - INIT_LIST_HEAD(&stp->st_perstateowner); +static void init_open_stateid(struct nfs4_ol_stateid *stp, struct nfs4_file *fp, struct nfsd4_open *open) { + struct nfs4_openowner *oo = open->op_openowner; + struct nfs4_client *clp = oo->oo_owner.so_client; + + init_stid(&stp->st_stid, clp, NFS4_OPEN_STID); INIT_LIST_HEAD(&stp->st_lockowners); - INIT_LIST_HEAD(&stp->st_perfile); - list_add(&stp->st_hash, &stateid_hashtbl[hashval]); - list_add(&stp->st_perstateowner, &sop->so_stateids); + list_add(&stp->st_perstateowner, &oo->oo_owner.so_stateids); list_add(&stp->st_perfile, &fp->fi_stateids); - stp->st_stateowner = sop; + stp->st_stateowner = &oo->oo_owner; get_nfs4_file(fp); stp->st_file = fp; - stp->st_stateid.si_boot = boot_time; - stp->st_stateid.si_stateownerid = sop->so_id; - stp->st_stateid.si_fileid = fp->fi_id; - stp->st_stateid.si_generation = 0; stp->st_access_bmap = 0; stp->st_deny_bmap = 0; - __set_bit(open->op_share_access & ~NFS4_SHARE_WANT_MASK, - &stp->st_access_bmap); + __set_bit(open->op_share_access, &stp->st_access_bmap); __set_bit(open->op_share_deny, &stp->st_deny_bmap); stp->st_openstp = NULL; } static void -move_to_close_lru(struct nfs4_stateowner *sop) +move_to_close_lru(struct nfs4_openowner *oo) { - dprintk("NFSD: move_to_close_lru nfs4_stateowner %p\n", sop); + dprintk("NFSD: move_to_close_lru nfs4_openowner %p\n", oo); - list_move_tail(&sop->so_close_lru, &close_lru); - sop->so_time = get_seconds(); + list_move_tail(&oo->oo_close_lru, &close_lru); + oo->oo_time = get_seconds(); } static int @@ -2247,14 +2436,18 @@ same_owner_str(struct nfs4_stateowner *sop, struct xdr_netobj *owner, (sop->so_client->cl_clientid.cl_id == clid->cl_id); } -static struct nfs4_stateowner * +static struct nfs4_openowner * find_openstateowner_str(unsigned int hashval, struct nfsd4_open *open) { - struct nfs4_stateowner *so = NULL; + struct nfs4_stateowner *so; + struct nfs4_openowner *oo; - list_for_each_entry(so, &ownerstr_hashtbl[hashval], so_strhash) { - if (same_owner_str(so, &open->op_owner, &open->op_clientid)) - return so; + list_for_each_entry(so, &open_ownerstr_hashtbl[hashval], so_strhash) { + if (same_owner_str(so, &open->op_owner, &open->op_clientid)) { + oo = openowner(so); + renew_client(oo->oo_owner.so_client); + return oo; + } } return NULL; } @@ -2278,31 +2471,6 @@ find_file(struct inode *ino) return NULL; } -static inline int access_valid(u32 x, u32 minorversion) -{ - if ((x & NFS4_SHARE_ACCESS_MASK) < NFS4_SHARE_ACCESS_READ) - return 0; - if ((x & NFS4_SHARE_ACCESS_MASK) > NFS4_SHARE_ACCESS_BOTH) - return 0; - x &= ~NFS4_SHARE_ACCESS_MASK; - if (minorversion && x) { - if ((x & NFS4_SHARE_WANT_MASK) > NFS4_SHARE_WANT_CANCEL) - return 0; - if ((x & NFS4_SHARE_WHEN_MASK) > NFS4_SHARE_PUSH_DELEG_WHEN_UNCONTENDED) - return 0; - x &= ~(NFS4_SHARE_WANT_MASK | NFS4_SHARE_WHEN_MASK); - } - if (x) - return 0; - return 1; -} - -static inline int deny_valid(u32 x) -{ - /* Note: unlike access bits, deny bits may be zero. */ - return x <= NFS4_SHARE_DENY_BOTH; -} - /* * Called to check deny when READ with all zero stateid or * WRITE with all zero or all one stateid @@ -2312,7 +2480,7 @@ nfs4_share_conflict(struct svc_fh *current_fh, unsigned int deny_type) { struct inode *ino = current_fh->fh_dentry->d_inode; struct nfs4_file *fp; - struct nfs4_stateid *stp; + struct nfs4_ol_stateid *stp; __be32 ret; dprintk("NFSD: nfs4_share_conflict\n"); @@ -2383,10 +2551,20 @@ int nfsd_change_deleg_cb(struct file_lock **onlist, int arg) } static const struct lock_manager_operations nfsd_lease_mng_ops = { - .fl_break = nfsd_break_deleg_cb, - .fl_change = nfsd_change_deleg_cb, + .lm_break = nfsd_break_deleg_cb, + .lm_change = nfsd_change_deleg_cb, }; +static __be32 nfsd4_check_seqid(struct nfsd4_compound_state *cstate, struct nfs4_stateowner *so, u32 seqid) +{ + if (nfsd4_has_session(cstate)) + return nfs_ok; + if (seqid == so->so_seqid - 1) + return nfserr_replay_me; + if (seqid == so->so_seqid) + return nfs_ok; + return nfserr_bad_seqid; +} __be32 nfsd4_process_open1(struct nfsd4_compound_state *cstate, @@ -2395,57 +2573,49 @@ nfsd4_process_open1(struct nfsd4_compound_state *cstate, clientid_t *clientid = &open->op_clientid; struct nfs4_client *clp = NULL; unsigned int strhashval; - struct nfs4_stateowner *sop = NULL; - - if (!check_name(open->op_owner)) - return nfserr_inval; + struct nfs4_openowner *oo = NULL; + __be32 status; if (STALE_CLIENTID(&open->op_clientid)) return nfserr_stale_clientid; + /* + * In case we need it later, after we've already created the + * file and don't want to risk a further failure: + */ + open->op_file = nfsd4_alloc_file(); + if (open->op_file == NULL) + return nfserr_jukebox; - strhashval = ownerstr_hashval(clientid->cl_id, open->op_owner); - sop = find_openstateowner_str(strhashval, open); - open->op_stateowner = sop; - if (!sop) { - /* Make sure the client's lease hasn't expired. */ + strhashval = open_ownerstr_hashval(clientid->cl_id, &open->op_owner); + oo = find_openstateowner_str(strhashval, open); + open->op_openowner = oo; + if (!oo) { clp = find_confirmed_client(clientid); if (clp == NULL) return nfserr_expired; - goto renew; + goto new_owner; } - /* When sessions are used, skip open sequenceid processing */ - if (nfsd4_has_session(cstate)) - goto renew; - if (!sop->so_confirmed) { + if (!(oo->oo_flags & NFS4_OO_CONFIRMED)) { /* Replace unconfirmed owners without checking for replay. */ - clp = sop->so_client; - release_openowner(sop); - open->op_stateowner = NULL; - goto renew; - } - if (open->op_seqid == sop->so_seqid - 1) { - if (sop->so_replay.rp_buflen) - return nfserr_replay_me; - /* The original OPEN failed so spectacularly - * that we don't even have replay data saved! - * Therefore, we have no choice but to continue - * processing this OPEN; presumably, we'll - * fail again for the same reason. - */ - dprintk("nfsd4_process_open1: replay with no replay cache\n"); - goto renew; - } - if (open->op_seqid != sop->so_seqid) - return nfserr_bad_seqid; -renew: - if (open->op_stateowner == NULL) { - sop = alloc_init_open_stateowner(strhashval, clp, open); - if (sop == NULL) - return nfserr_jukebox; - open->op_stateowner = sop; + clp = oo->oo_owner.so_client; + release_openowner(oo); + open->op_openowner = NULL; + goto new_owner; } - list_del_init(&sop->so_close_lru); - renew_client(sop->so_client); + status = nfsd4_check_seqid(cstate, &oo->oo_owner, open->op_seqid); + if (status) + return status; + clp = oo->oo_owner.so_client; + goto alloc_stateid; +new_owner: + oo = alloc_init_open_stateowner(strhashval, clp, open); + if (oo == NULL) + return nfserr_jukebox; + open->op_openowner = oo; +alloc_stateid: + open->op_stp = nfs4_alloc_stateid(clp); + if (!open->op_stp) + return nfserr_jukebox; return nfs_ok; } @@ -2458,36 +2628,37 @@ nfs4_check_delegmode(struct nfs4_delegation *dp, int flags) return nfs_ok; } -static struct nfs4_delegation * -find_delegation_file(struct nfs4_file *fp, stateid_t *stid) +static int share_access_to_flags(u32 share_access) { - struct nfs4_delegation *dp; + share_access &= ~NFS4_SHARE_WANT_MASK; - spin_lock(&recall_lock); - list_for_each_entry(dp, &fp->fi_delegations, dl_perfile) - if (dp->dl_stateid.si_stateownerid == stid->si_stateownerid) { - spin_unlock(&recall_lock); - return dp; - } - spin_unlock(&recall_lock); - return NULL; + return share_access == NFS4_SHARE_ACCESS_READ ? RD_STATE : WR_STATE; } -static int share_access_to_flags(u32 share_access) +static struct nfs4_delegation *find_deleg_stateid(struct nfs4_client *cl, stateid_t *s) { - share_access &= ~NFS4_SHARE_WANT_MASK; + struct nfs4_stid *ret; - return share_access == NFS4_SHARE_ACCESS_READ ? RD_STATE : WR_STATE; + ret = find_stateid_by_type(cl, s, NFS4_DELEG_STID); + if (!ret) + return NULL; + return delegstateid(ret); +} + +static bool nfsd4_is_deleg_cur(struct nfsd4_open *open) +{ + return open->op_claim_type == NFS4_OPEN_CLAIM_DELEGATE_CUR || + open->op_claim_type == NFS4_OPEN_CLAIM_DELEG_CUR_FH; } static __be32 -nfs4_check_deleg(struct nfs4_file *fp, struct nfsd4_open *open, +nfs4_check_deleg(struct nfs4_client *cl, struct nfs4_file *fp, struct nfsd4_open *open, struct nfs4_delegation **dp) { int flags; __be32 status = nfserr_bad_stateid; - *dp = find_delegation_file(fp, &open->op_delegate_stateid); + *dp = find_deleg_stateid(cl, &open->op_delegate_stateid); if (*dp == NULL) goto out; flags = share_access_to_flags(open->op_share_access); @@ -2495,41 +2666,37 @@ nfs4_check_deleg(struct nfs4_file *fp, struct nfsd4_open *open, if (status) *dp = NULL; out: - if (open->op_claim_type != NFS4_OPEN_CLAIM_DELEGATE_CUR) + if (!nfsd4_is_deleg_cur(open)) return nfs_ok; if (status) return status; - open->op_stateowner->so_confirmed = 1; + open->op_openowner->oo_flags |= NFS4_OO_CONFIRMED; return nfs_ok; } static __be32 -nfs4_check_open(struct nfs4_file *fp, struct nfsd4_open *open, struct nfs4_stateid **stpp) +nfs4_check_open(struct nfs4_file *fp, struct nfsd4_open *open, struct nfs4_ol_stateid **stpp) { - struct nfs4_stateid *local; - __be32 status = nfserr_share_denied; - struct nfs4_stateowner *sop = open->op_stateowner; + struct nfs4_ol_stateid *local; + struct nfs4_openowner *oo = open->op_openowner; list_for_each_entry(local, &fp->fi_stateids, st_perfile) { /* ignore lock owners */ if (local->st_stateowner->so_is_open_owner == 0) continue; /* remember if we have seen this open owner */ - if (local->st_stateowner == sop) + if (local->st_stateowner == &oo->oo_owner) *stpp = local; /* check for conflicting share reservations */ if (!test_share(local, open)) - goto out; + return nfserr_share_denied; } - status = 0; -out: - return status; + return nfs_ok; } -static inline struct nfs4_stateid * -nfs4_alloc_stateid(void) +static void nfs4_free_stateid(struct nfs4_ol_stateid *s) { - return kmem_cache_alloc(stateid_slab, GFP_KERNEL); + kmem_cache_free(stateid_slab, s); } static inline int nfs4_access_to_access(u32 nfs4_access) @@ -2550,12 +2717,6 @@ static __be32 nfs4_get_vfs_file(struct svc_rqst *rqstp, struct nfs4_file *fp, int oflag = nfs4_access_to_omode(open->op_share_access); int access = nfs4_access_to_access(open->op_share_access); - /* CLAIM_DELEGATE_CUR is used in response to a broken lease; - * allowing it to break the lease and return EAGAIN leaves the - * client unable to make progress in returning the delegation */ - if (open->op_claim_type == NFS4_OPEN_CLAIM_DELEGATE_CUR) - access |= NFSD_MAY_NOT_BREAK_LEASE; - if (!fp->fi_fds[oflag]) { status = nfsd_open(rqstp, cur_fh, S_IFREG, access, &fp->fi_fds[oflag]); @@ -2567,27 +2728,6 @@ static __be32 nfs4_get_vfs_file(struct svc_rqst *rqstp, struct nfs4_file *fp, return nfs_ok; } -static __be32 -nfs4_new_open(struct svc_rqst *rqstp, struct nfs4_stateid **stpp, - struct nfs4_file *fp, struct svc_fh *cur_fh, - struct nfsd4_open *open) -{ - struct nfs4_stateid *stp; - __be32 status; - - stp = nfs4_alloc_stateid(); - if (stp == NULL) - return nfserr_jukebox; - - status = nfs4_get_vfs_file(rqstp, fp, cur_fh, open); - if (status) { - kmem_cache_free(stateid_slab, stp); - return status; - } - *stpp = stp; - return 0; -} - static inline __be32 nfsd4_truncate(struct svc_rqst *rqstp, struct svc_fh *fh, struct nfsd4_open *open) @@ -2604,9 +2744,9 @@ nfsd4_truncate(struct svc_rqst *rqstp, struct svc_fh *fh, } static __be32 -nfs4_upgrade_open(struct svc_rqst *rqstp, struct nfs4_file *fp, struct svc_fh *cur_fh, struct nfs4_stateid *stp, struct nfsd4_open *open) +nfs4_upgrade_open(struct svc_rqst *rqstp, struct nfs4_file *fp, struct svc_fh *cur_fh, struct nfs4_ol_stateid *stp, struct nfsd4_open *open) { - u32 op_share_access = open->op_share_access & ~NFS4_SHARE_WANT_MASK; + u32 op_share_access = open->op_share_access; bool new_access; __be32 status; @@ -2635,8 +2775,8 @@ nfs4_upgrade_open(struct svc_rqst *rqstp, struct nfs4_file *fp, struct svc_fh *c static void nfs4_set_claim_prev(struct nfsd4_open *open) { - open->op_stateowner->so_confirmed = 1; - open->op_stateowner->so_client->cl_firststate = 1; + open->op_openowner->oo_flags |= NFS4_OO_CONFIRMED; + open->op_openowner->oo_owner.so_client->cl_firststate = 1; } /* Should we give out recallable state?: */ @@ -2679,10 +2819,8 @@ static int nfs4_setlease(struct nfs4_delegation *dp, int flag) if (!fl) return -ENOMEM; fl->fl_file = find_readable_file(fp); - list_add(&dp->dl_perclnt, &dp->dl_client->cl_delegations); status = vfs_setlease(fl->fl_file, fl->fl_type, &fl); if (status) { - list_del_init(&dp->dl_perclnt); locks_free_lock(fl); return -ENOMEM; } @@ -2690,7 +2828,9 @@ static int nfs4_setlease(struct nfs4_delegation *dp, int flag) fp->fi_deleg_file = fl->fl_file; get_file(fp->fi_deleg_file); atomic_set(&fp->fi_delegees, 1); - list_add(&dp->dl_perfile, &fp->fi_delegations); + spin_lock(&recall_lock); + hash_delegation_locked(dp, fp); + spin_unlock(&recall_lock); return 0; } @@ -2706,9 +2846,8 @@ static int nfs4_set_delegation(struct nfs4_delegation *dp, int flag) return -EAGAIN; } atomic_inc(&fp->fi_delegees); - list_add(&dp->dl_perfile, &fp->fi_delegations); + hash_delegation_locked(dp, fp); spin_unlock(&recall_lock); - list_add(&dp->dl_perclnt, &dp->dl_client->cl_delegations); return 0; } @@ -2716,14 +2855,14 @@ static int nfs4_set_delegation(struct nfs4_delegation *dp, int flag) * Attempt to hand out a delegation. */ static void -nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, struct nfs4_stateid *stp) +nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, struct nfs4_ol_stateid *stp) { struct nfs4_delegation *dp; - struct nfs4_stateowner *sop = stp->st_stateowner; + struct nfs4_openowner *oo = container_of(stp->st_stateowner, struct nfs4_openowner, oo_owner); int cb_up; int status, flag = 0; - cb_up = nfsd4_cb_channel_good(sop->so_client); + cb_up = nfsd4_cb_channel_good(oo->oo_owner.so_client); flag = NFS4_OPEN_DELEGATE_NONE; open->op_recall = 0; switch (open->op_claim_type) { @@ -2739,7 +2878,7 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, struct nfs4_sta * had the chance to reclaim theirs.... */ if (locks_in_grace()) goto out; - if (!cb_up || !sop->so_confirmed) + if (!cb_up || !(oo->oo_flags & NFS4_OO_CONFIRMED)) goto out; if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE) flag = NFS4_OPEN_DELEGATE_WRITE; @@ -2750,17 +2889,17 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, struct nfs4_sta goto out; } - dp = alloc_init_deleg(sop->so_client, stp, fh, flag); + dp = alloc_init_deleg(oo->oo_owner.so_client, stp, fh, flag); if (dp == NULL) goto out_no_deleg; status = nfs4_set_delegation(dp, flag); if (status) goto out_free; - memcpy(&open->op_delegate_stateid, &dp->dl_stateid, sizeof(dp->dl_stateid)); + memcpy(&open->op_delegate_stateid, &dp->dl_stid.sc_stateid, sizeof(dp->dl_stid.sc_stateid)); dprintk("NFSD: delegation stateid=" STATEID_FMT "\n", - STATEID_VAL(&dp->dl_stateid)); + STATEID_VAL(&dp->dl_stid.sc_stateid)); out: if (open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS && flag == NFS4_OPEN_DELEGATE_NONE @@ -2782,16 +2921,13 @@ __be32 nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open *open) { struct nfsd4_compoundres *resp = rqstp->rq_resp; + struct nfs4_client *cl = open->op_openowner->oo_owner.so_client; struct nfs4_file *fp = NULL; struct inode *ino = current_fh->fh_dentry->d_inode; - struct nfs4_stateid *stp = NULL; + struct nfs4_ol_stateid *stp = NULL; struct nfs4_delegation *dp = NULL; __be32 status; - status = nfserr_inval; - if (!access_valid(open->op_share_access, resp->cstate.minorversion) - || !deny_valid(open->op_share_deny)) - goto out; /* * Lookup file; if found, lookup stateid and check open request, * and check for delegations in the process of being recalled. @@ -2801,17 +2937,17 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf if (fp) { if ((status = nfs4_check_open(fp, open, &stp))) goto out; - status = nfs4_check_deleg(fp, open, &dp); + status = nfs4_check_deleg(cl, fp, open, &dp); if (status) goto out; } else { status = nfserr_bad_stateid; - if (open->op_claim_type == NFS4_OPEN_CLAIM_DELEGATE_CUR) + if (nfsd4_is_deleg_cur(open)) goto out; status = nfserr_jukebox; - fp = alloc_init_file(ino); - if (fp == NULL) - goto out; + fp = open->op_file; + open->op_file = NULL; + nfsd4_init_file(fp, ino); } /* @@ -2823,24 +2959,24 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf status = nfs4_upgrade_open(rqstp, fp, current_fh, stp, open); if (status) goto out; - update_stateid(&stp->st_stateid); } else { - status = nfs4_new_open(rqstp, &stp, fp, current_fh, open); + status = nfs4_get_vfs_file(rqstp, fp, current_fh, open); if (status) goto out; - init_stateid(stp, fp, open); + stp = open->op_stp; + open->op_stp = NULL; + init_open_stateid(stp, fp, open); status = nfsd4_truncate(rqstp, current_fh, open); if (status) { release_open_stateid(stp); goto out; } - if (nfsd4_has_session(&resp->cstate)) - update_stateid(&stp->st_stateid); } - memcpy(&open->op_stateid, &stp->st_stateid, sizeof(stateid_t)); + update_stateid(&stp->st_stid.sc_stateid); + memcpy(&open->op_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t)); if (nfsd4_has_session(&resp->cstate)) - open->op_stateowner->so_confirmed = 1; + open->op_openowner->oo_flags |= NFS4_OO_CONFIRMED; /* * Attempt to hand out a delegation. No error return, because the @@ -2851,7 +2987,7 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf status = nfs_ok; dprintk("%s: stateid=" STATEID_FMT "\n", __func__, - STATEID_VAL(&stp->st_stateid)); + STATEID_VAL(&stp->st_stid.sc_stateid)); out: if (fp) put_nfs4_file(fp); @@ -2861,13 +2997,34 @@ out: * To finish the open response, we just need to set the rflags. */ open->op_rflags = NFS4_OPEN_RESULT_LOCKTYPE_POSIX; - if (!open->op_stateowner->so_confirmed && + if (!(open->op_openowner->oo_flags & NFS4_OO_CONFIRMED) && !nfsd4_has_session(&resp->cstate)) open->op_rflags |= NFS4_OPEN_RESULT_CONFIRM; return status; } +void nfsd4_cleanup_open_state(struct nfsd4_open *open, __be32 status) +{ + if (open->op_openowner) { + struct nfs4_openowner *oo = open->op_openowner; + + if (!list_empty(&oo->oo_owner.so_stateids)) + list_del_init(&oo->oo_close_lru); + if (oo->oo_flags & NFS4_OO_NEW) { + if (status) { + release_openowner(oo); + open->op_openowner = NULL; + } else + oo->oo_flags &= ~NFS4_OO_NEW; + } + } + if (open->op_file) + nfsd4_free_file(open->op_file); + if (open->op_stp) + nfs4_free_stateid(open->op_stp); +} + __be32 nfsd4_renew(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, clientid_t *clid) @@ -2888,7 +3045,6 @@ nfsd4_renew(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, dprintk("nfsd4_renew: clientid not found!\n"); goto out; } - renew_client(clp); status = nfserr_cb_path_down; if (!list_empty(&clp->cl_delegations) && clp->cl_cb_state != NFSD4_CB_UP) @@ -2920,7 +3076,7 @@ static time_t nfs4_laundromat(void) { struct nfs4_client *clp; - struct nfs4_stateowner *sop; + struct nfs4_openowner *oo; struct nfs4_delegation *dp; struct list_head *pos, *next, reaplist; time_t cutoff = get_seconds() - nfsd4_lease; @@ -2977,16 +3133,14 @@ nfs4_laundromat(void) } test_val = nfsd4_lease; list_for_each_safe(pos, next, &close_lru) { - sop = list_entry(pos, struct nfs4_stateowner, so_close_lru); - if (time_after((unsigned long)sop->so_time, (unsigned long)cutoff)) { - u = sop->so_time - cutoff; + oo = container_of(pos, struct nfs4_openowner, oo_close_lru); + if (time_after((unsigned long)oo->oo_time, (unsigned long)cutoff)) { + u = oo->oo_time - cutoff; if (test_val > u) test_val = u; break; } - dprintk("NFSD: purging unused open stateowner (so_id %d)\n", - sop->so_id); - release_openowner(sop); + release_openowner(oo); } if (clientid_val < NFSD_LAUNDROMAT_MINTIMEOUT) clientid_val = NFSD_LAUNDROMAT_MINTIMEOUT; @@ -3008,30 +3162,17 @@ laundromat_main(struct work_struct *not_used) queue_delayed_work(laundry_wq, &laundromat_work, t*HZ); } -static struct nfs4_stateowner * -search_close_lru(u32 st_id, int flags) -{ - struct nfs4_stateowner *local = NULL; - - if (flags & CLOSE_STATE) { - list_for_each_entry(local, &close_lru, so_close_lru) { - if (local->so_id == st_id) - return local; - } - } - return NULL; -} - -static inline int -nfs4_check_fh(struct svc_fh *fhp, struct nfs4_stateid *stp) +static inline __be32 nfs4_check_fh(struct svc_fh *fhp, struct nfs4_ol_stateid *stp) { - return fhp->fh_dentry->d_inode != stp->st_file->fi_inode; + if (fhp->fh_dentry->d_inode != stp->st_file->fi_inode) + return nfserr_bad_stateid; + return nfs_ok; } static int STALE_STATEID(stateid_t *stateid) { - if (stateid->si_boot == boot_time) + if (stateid->si_opaque.so_clid.cl_boot == boot_time) return 0; dprintk("NFSD: stale stateid " STATEID_FMT "!\n", STATEID_VAL(stateid)); @@ -3054,7 +3195,7 @@ access_permit_write(unsigned long access_bmap) } static -__be32 nfs4_check_openmode(struct nfs4_stateid *stp, int flags) +__be32 nfs4_check_openmode(struct nfs4_ol_stateid *stp, int flags) { __be32 status = nfserr_openmode; @@ -3097,37 +3238,83 @@ grace_disallows_io(struct inode *inode) return locks_in_grace() && mandatory_lock(inode); } -static int check_stateid_generation(stateid_t *in, stateid_t *ref, int flags) +/* Returns true iff a is later than b: */ +static bool stateid_generation_after(stateid_t *a, stateid_t *b) +{ + return (s32)a->si_generation - (s32)b->si_generation > 0; +} + +static int check_stateid_generation(stateid_t *in, stateid_t *ref, bool has_session) { /* * When sessions are used the stateid generation number is ignored * when it is zero. */ - if ((flags & HAS_SESSION) && in->si_generation == 0) - goto out; + if (has_session && in->si_generation == 0) + return nfs_ok; + + if (in->si_generation == ref->si_generation) + return nfs_ok; /* If the client sends us a stateid from the future, it's buggy: */ - if (in->si_generation > ref->si_generation) + if (stateid_generation_after(in, ref)) return nfserr_bad_stateid; /* - * The following, however, can happen. For example, if the - * client sends an open and some IO at the same time, the open - * may bump si_generation while the IO is still in flight. - * Thanks to hard links and renames, the client never knows what - * file an open will affect. So it could avoid that situation - * only by serializing all opens and IO from the same open - * owner. To recover from the old_stateid error, the client - * will just have to retry the IO: + * However, we could see a stateid from the past, even from a + * non-buggy client. For example, if the client sends a lock + * while some IO is outstanding, the lock may bump si_generation + * while the IO is still in flight. The client could avoid that + * situation by waiting for responses on all the IO requests, + * but better performance may result in retrying IO that + * receives an old_stateid error if requests are rarely + * reordered in flight: */ - if (in->si_generation < ref->si_generation) - return nfserr_old_stateid; -out: + return nfserr_old_stateid; +} + +static __be32 nfsd4_check_openowner_confirmed(struct nfs4_ol_stateid *ols) +{ + if (ols->st_stateowner->so_is_open_owner && + !(openowner(ols->st_stateowner)->oo_flags & NFS4_OO_CONFIRMED)) + return nfserr_bad_stateid; return nfs_ok; } -static int is_delegation_stateid(stateid_t *stateid) +__be32 nfs4_validate_stateid(struct nfs4_client *cl, stateid_t *stateid) +{ + struct nfs4_stid *s; + __be32 status; + + if (STALE_STATEID(stateid)) + return nfserr_stale_stateid; + + s = find_stateid(cl, stateid); + if (!s) + return nfserr_stale_stateid; + status = check_stateid_generation(stateid, &s->sc_stateid, 1); + if (status) + return status; + if (!(s->sc_type & (NFS4_OPEN_STID | NFS4_LOCK_STID))) + return nfs_ok; + return nfsd4_check_openowner_confirmed(openlockstateid(s)); +} + +static __be32 nfsd4_lookup_stateid(stateid_t *stateid, unsigned char typemask, struct nfs4_stid **s) { - return stateid->si_fileid == 0; + struct nfs4_client *cl; + + if (ZERO_STATEID(stateid) || ONE_STATEID(stateid)) + return nfserr_bad_stateid; + if (STALE_STATEID(stateid)) + return nfserr_stale_stateid; + cl = find_confirmed_client(&stateid->si_opaque.so_clid); + if (!cl) + return nfserr_expired; + *s = find_stateid_by_type(cl, stateid, typemask); + if (!*s) + return nfserr_bad_stateid; + return nfs_ok; + } /* @@ -3137,7 +3324,8 @@ __be32 nfs4_preprocess_stateid_op(struct nfsd4_compound_state *cstate, stateid_t *stateid, int flags, struct file **filpp) { - struct nfs4_stateid *stp = NULL; + struct nfs4_stid *s; + struct nfs4_ol_stateid *stp = NULL; struct nfs4_delegation *dp = NULL; struct svc_fh *current_fh = &cstate->current_fh; struct inode *ino = current_fh->fh_dentry->d_inode; @@ -3149,66 +3337,115 @@ nfs4_preprocess_stateid_op(struct nfsd4_compound_state *cstate, if (grace_disallows_io(ino)) return nfserr_grace; - if (nfsd4_has_session(cstate)) - flags |= HAS_SESSION; - if (ZERO_STATEID(stateid) || ONE_STATEID(stateid)) return check_special_stateids(current_fh, stateid, flags); - status = nfserr_stale_stateid; - if (STALE_STATEID(stateid)) + status = nfsd4_lookup_stateid(stateid, NFS4_DELEG_STID|NFS4_OPEN_STID|NFS4_LOCK_STID, &s); + if (status) + return status; + status = check_stateid_generation(stateid, &s->sc_stateid, nfsd4_has_session(cstate)); + if (status) goto out; - - /* - * We assume that any stateid that has the current boot time, - * but that we can't find, is expired: - */ - status = nfserr_expired; - if (is_delegation_stateid(stateid)) { - dp = find_delegation_stateid(ino, stateid); - if (!dp) - goto out; - status = check_stateid_generation(stateid, &dp->dl_stateid, - flags); - if (status) - goto out; + switch (s->sc_type) { + case NFS4_DELEG_STID: + dp = delegstateid(s); status = nfs4_check_delegmode(dp, flags); if (status) goto out; - renew_client(dp->dl_client); if (filpp) { *filpp = dp->dl_file->fi_deleg_file; BUG_ON(!*filpp); } - } else { /* open or lock stateid */ - stp = find_stateid(stateid, flags); - if (!stp) - goto out; - status = nfserr_bad_stateid; - if (nfs4_check_fh(current_fh, stp)) - goto out; - if (!stp->st_stateowner->so_confirmed) + break; + case NFS4_OPEN_STID: + case NFS4_LOCK_STID: + stp = openlockstateid(s); + status = nfs4_check_fh(current_fh, stp); + if (status) goto out; - status = check_stateid_generation(stateid, &stp->st_stateid, - flags); + status = nfsd4_check_openowner_confirmed(stp); if (status) goto out; status = nfs4_check_openmode(stp, flags); if (status) goto out; - renew_client(stp->st_stateowner->so_client); if (filpp) { if (flags & RD_STATE) *filpp = find_readable_file(stp->st_file); else *filpp = find_writeable_file(stp->st_file); } + break; + default: + return nfserr_bad_stateid; } status = nfs_ok; out: return status; } +static __be32 +nfsd4_free_lock_stateid(struct nfs4_ol_stateid *stp) +{ + struct nfs4_lockowner *lo = lockowner(stp->st_stateowner); + + if (check_for_locks(stp->st_file, lo)) + return nfserr_locks_held; + /* + * Currently there's a 1-1 lock stateid<->lockowner + * correspondance, and we have to delete the lockowner when we + * delete the lock stateid: + */ + release_lockowner(lo); + return nfs_ok; +} + +/* + * Test if the stateid is valid + */ +__be32 +nfsd4_test_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, + struct nfsd4_test_stateid *test_stateid) +{ + /* real work is done during encoding */ + return nfs_ok; +} + +__be32 +nfsd4_free_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, + struct nfsd4_free_stateid *free_stateid) +{ + stateid_t *stateid = &free_stateid->fr_stateid; + struct nfs4_stid *s; + struct nfs4_client *cl = cstate->session->se_client; + __be32 ret = nfserr_bad_stateid; + + nfs4_lock_state(); + s = find_stateid(cl, stateid); + if (!s) + goto out; + switch (s->sc_type) { + case NFS4_DELEG_STID: + ret = nfserr_locks_held; + goto out; + case NFS4_OPEN_STID: + case NFS4_LOCK_STID: + ret = check_stateid_generation(stateid, &s->sc_stateid, 1); + if (ret) + goto out; + if (s->sc_type == NFS4_LOCK_STID) + ret = nfsd4_free_lock_stateid(openlockstateid(s)); + else + ret = nfserr_locks_held; + break; + default: + ret = nfserr_bad_stateid; + } +out: + nfs4_unlock_state(); + return ret; +} + static inline int setlkflg (int type) { @@ -3216,124 +3453,64 @@ setlkflg (int type) RD_STATE : WR_STATE; } +static __be32 nfs4_seqid_op_checks(struct nfsd4_compound_state *cstate, stateid_t *stateid, u32 seqid, struct nfs4_ol_stateid *stp) +{ + struct svc_fh *current_fh = &cstate->current_fh; + struct nfs4_stateowner *sop = stp->st_stateowner; + __be32 status; + + status = nfsd4_check_seqid(cstate, sop, seqid); + if (status) + return status; + if (stp->st_stid.sc_type == NFS4_CLOSED_STID) + /* + * "Closed" stateid's exist *only* to return + * nfserr_replay_me from the previous step. + */ + return nfserr_bad_stateid; + status = check_stateid_generation(stateid, &stp->st_stid.sc_stateid, nfsd4_has_session(cstate)); + if (status) + return status; + return nfs4_check_fh(current_fh, stp); +} + /* * Checks for sequence id mutating operations. */ static __be32 nfs4_preprocess_seqid_op(struct nfsd4_compound_state *cstate, u32 seqid, - stateid_t *stateid, int flags, - struct nfs4_stateowner **sopp, - struct nfs4_stateid **stpp, struct nfsd4_lock *lock) + stateid_t *stateid, char typemask, + struct nfs4_ol_stateid **stpp) { - struct nfs4_stateid *stp; - struct nfs4_stateowner *sop; - struct svc_fh *current_fh = &cstate->current_fh; __be32 status; + struct nfs4_stid *s; dprintk("NFSD: %s: seqid=%d stateid = " STATEID_FMT "\n", __func__, seqid, STATEID_VAL(stateid)); *stpp = NULL; - *sopp = NULL; - - if (ZERO_STATEID(stateid) || ONE_STATEID(stateid)) { - dprintk("NFSD: preprocess_seqid_op: magic stateid!\n"); - return nfserr_bad_stateid; - } - - if (STALE_STATEID(stateid)) - return nfserr_stale_stateid; - - if (nfsd4_has_session(cstate)) - flags |= HAS_SESSION; - - /* - * We return BAD_STATEID if filehandle doesn't match stateid, - * the confirmed flag is incorrecly set, or the generation - * number is incorrect. - */ - stp = find_stateid(stateid, flags); - if (stp == NULL) { - /* - * Also, we should make sure this isn't just the result of - * a replayed close: - */ - sop = search_close_lru(stateid->si_stateownerid, flags); - /* It's not stale; let's assume it's expired: */ - if (sop == NULL) - return nfserr_expired; - *sopp = sop; - goto check_replay; - } - - *stpp = stp; - *sopp = sop = stp->st_stateowner; - - if (lock) { - clientid_t *lockclid = &lock->v.new.clientid; - struct nfs4_client *clp = sop->so_client; - int lkflg = 0; - __be32 status; - - lkflg = setlkflg(lock->lk_type); - - if (lock->lk_is_new) { - if (!sop->so_is_open_owner) - return nfserr_bad_stateid; - if (!(flags & HAS_SESSION) && - !same_clid(&clp->cl_clientid, lockclid)) - return nfserr_bad_stateid; - /* stp is the open stateid */ - status = nfs4_check_openmode(stp, lkflg); - if (status) - return status; - } else { - /* stp is the lock stateid */ - status = nfs4_check_openmode(stp->st_openstp, lkflg); - if (status) - return status; - } - } + status = nfsd4_lookup_stateid(stateid, typemask, &s); + if (status) + return status; + *stpp = openlockstateid(s); + cstate->replay_owner = (*stpp)->st_stateowner; - if (nfs4_check_fh(current_fh, stp)) { - dprintk("NFSD: preprocess_seqid_op: fh-stateid mismatch!\n"); - return nfserr_bad_stateid; - } + return nfs4_seqid_op_checks(cstate, stateid, seqid, *stpp); +} - /* - * We now validate the seqid and stateid generation numbers. - * For the moment, we ignore the possibility of - * generation number wraparound. - */ - if (!(flags & HAS_SESSION) && seqid != sop->so_seqid) - goto check_replay; +static __be32 nfs4_preprocess_confirmed_seqid_op(struct nfsd4_compound_state *cstate, u32 seqid, stateid_t *stateid, struct nfs4_ol_stateid **stpp) +{ + __be32 status; + struct nfs4_openowner *oo; - if (sop->so_confirmed && flags & CONFIRM) { - dprintk("NFSD: preprocess_seqid_op: expected" - " unconfirmed stateowner!\n"); - return nfserr_bad_stateid; - } - if (!sop->so_confirmed && !(flags & CONFIRM)) { - dprintk("NFSD: preprocess_seqid_op: stateowner not" - " confirmed yet!\n"); - return nfserr_bad_stateid; - } - status = check_stateid_generation(stateid, &stp->st_stateid, flags); + status = nfs4_preprocess_seqid_op(cstate, seqid, stateid, + NFS4_OPEN_STID, stpp); if (status) return status; - renew_client(sop->so_client); + oo = openowner((*stpp)->st_stateowner); + if (!(oo->oo_flags & NFS4_OO_CONFIRMED)) + return nfserr_bad_stateid; return nfs_ok; - -check_replay: - if (seqid == sop->so_seqid - 1) { - dprintk("NFSD: preprocess_seqid_op: retransmission?\n"); - /* indicate replay to calling function */ - return nfserr_replay_me; - } - dprintk("NFSD: preprocess_seqid_op: bad seqid (expected %d, got %d)\n", - sop->so_seqid, seqid); - *sopp = NULL; - return nfserr_bad_seqid; } __be32 @@ -3341,8 +3518,8 @@ nfsd4_open_confirm(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_open_confirm *oc) { __be32 status; - struct nfs4_stateowner *sop; - struct nfs4_stateid *stp; + struct nfs4_openowner *oo; + struct nfs4_ol_stateid *stp; dprintk("NFSD: nfsd4_open_confirm on file %.*s\n", (int)cstate->current_fh.fh_dentry->d_name.len, @@ -3354,39 +3531,52 @@ nfsd4_open_confirm(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, nfs4_lock_state(); - if ((status = nfs4_preprocess_seqid_op(cstate, + status = nfs4_preprocess_seqid_op(cstate, oc->oc_seqid, &oc->oc_req_stateid, - CONFIRM | OPEN_STATE, - &oc->oc_stateowner, &stp, NULL))) - goto out; - - sop = oc->oc_stateowner; - sop->so_confirmed = 1; - update_stateid(&stp->st_stateid); - memcpy(&oc->oc_resp_stateid, &stp->st_stateid, sizeof(stateid_t)); + NFS4_OPEN_STID, &stp); + if (status) + goto out; + oo = openowner(stp->st_stateowner); + status = nfserr_bad_stateid; + if (oo->oo_flags & NFS4_OO_CONFIRMED) + goto out; + oo->oo_flags |= NFS4_OO_CONFIRMED; + update_stateid(&stp->st_stid.sc_stateid); + memcpy(&oc->oc_resp_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t)); dprintk("NFSD: %s: success, seqid=%d stateid=" STATEID_FMT "\n", - __func__, oc->oc_seqid, STATEID_VAL(&stp->st_stateid)); + __func__, oc->oc_seqid, STATEID_VAL(&stp->st_stid.sc_stateid)); - nfsd4_create_clid_dir(sop->so_client); + nfsd4_create_clid_dir(oo->oo_owner.so_client); + status = nfs_ok; out: - if (oc->oc_stateowner) { - nfs4_get_stateowner(oc->oc_stateowner); - cstate->replay_owner = oc->oc_stateowner; - } - nfs4_unlock_state(); + if (!cstate->replay_owner) + nfs4_unlock_state(); return status; } -static inline void nfs4_file_downgrade(struct nfs4_stateid *stp, unsigned int to_access) +static inline void nfs4_stateid_downgrade_bit(struct nfs4_ol_stateid *stp, u32 access) { - int i; + if (!test_bit(access, &stp->st_access_bmap)) + return; + nfs4_file_put_access(stp->st_file, nfs4_access_to_omode(access)); + __clear_bit(access, &stp->st_access_bmap); +} - for (i = 1; i < 4; i++) { - if (test_bit(i, &stp->st_access_bmap) - && ((i & to_access) != i)) { - nfs4_file_put_access(stp->st_file, nfs4_access_to_omode(i)); - __clear_bit(i, &stp->st_access_bmap); - } +static inline void nfs4_stateid_downgrade(struct nfs4_ol_stateid *stp, u32 to_access) +{ + switch (to_access) { + case NFS4_SHARE_ACCESS_READ: + nfs4_stateid_downgrade_bit(stp, NFS4_SHARE_ACCESS_WRITE); + nfs4_stateid_downgrade_bit(stp, NFS4_SHARE_ACCESS_BOTH); + break; + case NFS4_SHARE_ACCESS_WRITE: + nfs4_stateid_downgrade_bit(stp, NFS4_SHARE_ACCESS_READ); + nfs4_stateid_downgrade_bit(stp, NFS4_SHARE_ACCESS_BOTH); + break; + case NFS4_SHARE_ACCESS_BOTH: + break; + default: + BUG(); } } @@ -3406,26 +3596,20 @@ nfsd4_open_downgrade(struct svc_rqst *rqstp, struct nfsd4_open_downgrade *od) { __be32 status; - struct nfs4_stateid *stp; + struct nfs4_ol_stateid *stp; dprintk("NFSD: nfsd4_open_downgrade on file %.*s\n", (int)cstate->current_fh.fh_dentry->d_name.len, cstate->current_fh.fh_dentry->d_name.name); - if (!access_valid(od->od_share_access, cstate->minorversion) - || !deny_valid(od->od_share_deny)) - return nfserr_inval; /* We don't yet support WANT bits: */ od->od_share_access &= NFS4_SHARE_ACCESS_MASK; nfs4_lock_state(); - if ((status = nfs4_preprocess_seqid_op(cstate, - od->od_seqid, - &od->od_stateid, - OPEN_STATE, - &od->od_stateowner, &stp, NULL))) + status = nfs4_preprocess_confirmed_seqid_op(cstate, od->od_seqid, + &od->od_stateid, &stp); + if (status) goto out; - status = nfserr_inval; if (!test_bit(od->od_share_access, &stp->st_access_bmap)) { dprintk("NFSD:access not a subset current bitmap: 0x%lx, input access=%08x\n", @@ -3437,22 +3621,45 @@ nfsd4_open_downgrade(struct svc_rqst *rqstp, stp->st_deny_bmap, od->od_share_deny); goto out; } - nfs4_file_downgrade(stp, od->od_share_access); + nfs4_stateid_downgrade(stp, od->od_share_access); reset_union_bmap_deny(od->od_share_deny, &stp->st_deny_bmap); - update_stateid(&stp->st_stateid); - memcpy(&od->od_stateid, &stp->st_stateid, sizeof(stateid_t)); + update_stateid(&stp->st_stid.sc_stateid); + memcpy(&od->od_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t)); status = nfs_ok; out: - if (od->od_stateowner) { - nfs4_get_stateowner(od->od_stateowner); - cstate->replay_owner = od->od_stateowner; - } - nfs4_unlock_state(); + if (!cstate->replay_owner) + nfs4_unlock_state(); return status; } +void nfsd4_purge_closed_stateid(struct nfs4_stateowner *so) +{ + struct nfs4_openowner *oo; + struct nfs4_ol_stateid *s; + + if (!so->so_is_open_owner) + return; + oo = openowner(so); + s = oo->oo_last_closed_stid; + if (!s) + return; + if (!(oo->oo_flags & NFS4_OO_PURGE_CLOSE)) { + /* Release the last_closed_stid on the next seqid bump: */ + oo->oo_flags |= NFS4_OO_PURGE_CLOSE; + return; + } + oo->oo_flags &= ~NFS4_OO_PURGE_CLOSE; + release_last_closed_stateid(oo); +} + +static void nfsd4_close_open_stateid(struct nfs4_ol_stateid *s) +{ + unhash_open_stateid(s); + s->st_stid.sc_type = NFS4_CLOSED_STID; +} + /* * nfs4_unlock_state() called after encode */ @@ -3461,39 +3668,38 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_close *close) { __be32 status; - struct nfs4_stateid *stp; + struct nfs4_openowner *oo; + struct nfs4_ol_stateid *stp; dprintk("NFSD: nfsd4_close on file %.*s\n", (int)cstate->current_fh.fh_dentry->d_name.len, cstate->current_fh.fh_dentry->d_name.name); nfs4_lock_state(); - /* check close_lru for replay */ - if ((status = nfs4_preprocess_seqid_op(cstate, - close->cl_seqid, - &close->cl_stateid, - OPEN_STATE | CLOSE_STATE, - &close->cl_stateowner, &stp, NULL))) + status = nfs4_preprocess_seqid_op(cstate, close->cl_seqid, + &close->cl_stateid, + NFS4_OPEN_STID|NFS4_CLOSED_STID, + &stp); + if (status) goto out; + oo = openowner(stp->st_stateowner); status = nfs_ok; - update_stateid(&stp->st_stateid); - memcpy(&close->cl_stateid, &stp->st_stateid, sizeof(stateid_t)); + update_stateid(&stp->st_stid.sc_stateid); + memcpy(&close->cl_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t)); - /* release_stateid() calls nfsd_close() if needed */ - release_open_stateid(stp); + nfsd4_close_open_stateid(stp); + release_last_closed_stateid(oo); + oo->oo_last_closed_stid = stp; /* place unused nfs4_stateowners on so_close_lru list to be * released by the laundromat service after the lease period * to enable us to handle CLOSE replay */ - if (list_empty(&close->cl_stateowner->so_stateids)) - move_to_close_lru(close->cl_stateowner); + if (list_empty(&oo->oo_owner.so_stateids)) + move_to_close_lru(oo); out: - if (close->cl_stateowner) { - nfs4_get_stateowner(close->cl_stateowner); - cstate->replay_owner = close->cl_stateowner; - } - nfs4_unlock_state(); + if (!cstate->replay_owner) + nfs4_unlock_state(); return status; } @@ -3503,34 +3709,22 @@ nfsd4_delegreturn(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, { struct nfs4_delegation *dp; stateid_t *stateid = &dr->dr_stateid; + struct nfs4_stid *s; struct inode *inode; __be32 status; - int flags = 0; if ((status = fh_verify(rqstp, &cstate->current_fh, S_IFREG, 0))) return status; inode = cstate->current_fh.fh_dentry->d_inode; - if (nfsd4_has_session(cstate)) - flags |= HAS_SESSION; nfs4_lock_state(); - status = nfserr_bad_stateid; - if (ZERO_STATEID(stateid) || ONE_STATEID(stateid)) - goto out; - status = nfserr_stale_stateid; - if (STALE_STATEID(stateid)) - goto out; - status = nfserr_bad_stateid; - if (!is_delegation_stateid(stateid)) - goto out; - status = nfserr_expired; - dp = find_delegation_stateid(inode, stateid); - if (!dp) + status = nfsd4_lookup_stateid(stateid, NFS4_DELEG_STID, &s); + if (status) goto out; - status = check_stateid_generation(stateid, &dp->dl_stateid, flags); + dp = delegstateid(s); + status = check_stateid_generation(stateid, &dp->dl_stid.sc_stateid, nfsd4_has_session(cstate)); if (status) goto out; - renew_client(dp->dl_client); unhash_delegation(dp); out: @@ -3568,9 +3762,6 @@ last_byte_offset(u64 start, u64 len) return end > start ? end - 1: NFS4_MAX_UINT64; } -#define lockownerid_hashval(id) \ - ((id) & LOCK_HASH_MASK) - static inline unsigned int lock_ownerstr_hashval(struct inode *inode, u32 cl_id, struct xdr_netobj *ownername) @@ -3580,55 +3771,7 @@ lock_ownerstr_hashval(struct inode *inode, u32 cl_id, & LOCK_HASH_MASK; } -static struct list_head lock_ownerid_hashtbl[LOCK_HASH_SIZE]; static struct list_head lock_ownerstr_hashtbl[LOCK_HASH_SIZE]; -static struct list_head lockstateid_hashtbl[STATEID_HASH_SIZE]; - -static struct nfs4_stateid * -find_stateid(stateid_t *stid, int flags) -{ - struct nfs4_stateid *local; - u32 st_id = stid->si_stateownerid; - u32 f_id = stid->si_fileid; - unsigned int hashval; - - dprintk("NFSD: find_stateid flags 0x%x\n",flags); - if (flags & (LOCK_STATE | RD_STATE | WR_STATE)) { - hashval = stateid_hashval(st_id, f_id); - list_for_each_entry(local, &lockstateid_hashtbl[hashval], st_hash) { - if ((local->st_stateid.si_stateownerid == st_id) && - (local->st_stateid.si_fileid == f_id)) - return local; - } - } - - if (flags & (OPEN_STATE | RD_STATE | WR_STATE)) { - hashval = stateid_hashval(st_id, f_id); - list_for_each_entry(local, &stateid_hashtbl[hashval], st_hash) { - if ((local->st_stateid.si_stateownerid == st_id) && - (local->st_stateid.si_fileid == f_id)) - return local; - } - } - return NULL; -} - -static struct nfs4_delegation * -find_delegation_stateid(struct inode *ino, stateid_t *stid) -{ - struct nfs4_file *fp; - struct nfs4_delegation *dl; - - dprintk("NFSD: %s: stateid=" STATEID_FMT "\n", __func__, - STATEID_VAL(stid)); - - fp = find_file(ino); - if (!fp) - return NULL; - dl = find_delegation_file(fp, stid); - put_nfs4_file(fp); - return dl; -} /* * TODO: Linux file offsets are _signed_ 64-bit quantities, which means that @@ -3655,15 +3798,21 @@ static const struct lock_manager_operations nfsd_posix_mng_ops = { static inline void nfs4_set_lock_denied(struct file_lock *fl, struct nfsd4_lock_denied *deny) { - struct nfs4_stateowner *sop; + struct nfs4_lockowner *lo; if (fl->fl_lmops == &nfsd_posix_mng_ops) { - sop = (struct nfs4_stateowner *) fl->fl_owner; - kref_get(&sop->so_ref); - deny->ld_sop = sop; - deny->ld_clientid = sop->so_client->cl_clientid; + lo = (struct nfs4_lockowner *) fl->fl_owner; + deny->ld_owner.data = kmemdup(lo->lo_owner.so_owner.data, + lo->lo_owner.so_owner.len, GFP_KERNEL); + if (!deny->ld_owner.data) + /* We just don't care that much */ + goto nevermind; + deny->ld_owner.len = lo->lo_owner.so_owner.len; + deny->ld_clientid = lo->lo_owner.so_client->cl_clientid; } else { - deny->ld_sop = NULL; +nevermind: + deny->ld_owner.len = 0; + deny->ld_owner.data = NULL; deny->ld_clientid.cl_boot = 0; deny->ld_clientid.cl_id = 0; } @@ -3676,20 +3825,43 @@ nfs4_set_lock_denied(struct file_lock *fl, struct nfsd4_lock_denied *deny) deny->ld_type = NFS4_WRITE_LT; } -static struct nfs4_stateowner * -find_lockstateowner_str(struct inode *inode, clientid_t *clid, +static bool same_lockowner_ino(struct nfs4_lockowner *lo, struct inode *inode, clientid_t *clid, struct xdr_netobj *owner) +{ + struct nfs4_ol_stateid *lst; + + if (!same_owner_str(&lo->lo_owner, owner, clid)) + return false; + if (list_empty(&lo->lo_owner.so_stateids)) { + WARN_ON_ONCE(1); + return false; + } + lst = list_first_entry(&lo->lo_owner.so_stateids, + struct nfs4_ol_stateid, st_perstateowner); + return lst->st_file->fi_inode == inode; +} + +static struct nfs4_lockowner * +find_lockowner_str(struct inode *inode, clientid_t *clid, struct xdr_netobj *owner) { unsigned int hashval = lock_ownerstr_hashval(inode, clid->cl_id, owner); + struct nfs4_lockowner *lo; struct nfs4_stateowner *op; list_for_each_entry(op, &lock_ownerstr_hashtbl[hashval], so_strhash) { - if (same_owner_str(op, owner, clid)) - return op; + lo = lockowner(op); + if (same_lockowner_ino(lo, inode, clid, owner)) + return lo; } return NULL; } +static void hash_lockowner(struct nfs4_lockowner *lo, unsigned int strhashval, struct nfs4_client *clp, struct nfs4_ol_stateid *open_stp) +{ + list_add(&lo->lo_owner.so_strhash, &lock_ownerstr_hashtbl[strhashval]); + list_add(&lo->lo_perstateid, &open_stp->st_lockowners); +} + /* * Alloc a lock owner structure. * Called in nfsd4_lock - therefore, OPEN and OPEN_CONFIRM (if needed) has @@ -3698,67 +3870,40 @@ find_lockstateowner_str(struct inode *inode, clientid_t *clid, * strhashval = lock_ownerstr_hashval */ -static struct nfs4_stateowner * -alloc_init_lock_stateowner(unsigned int strhashval, struct nfs4_client *clp, struct nfs4_stateid *open_stp, struct nfsd4_lock *lock) { - struct nfs4_stateowner *sop; - struct nfs4_replay *rp; - unsigned int idhashval; +static struct nfs4_lockowner * +alloc_init_lock_stateowner(unsigned int strhashval, struct nfs4_client *clp, struct nfs4_ol_stateid *open_stp, struct nfsd4_lock *lock) { + struct nfs4_lockowner *lo; - if (!(sop = alloc_stateowner(&lock->lk_new_owner))) + lo = alloc_stateowner(lockowner_slab, &lock->lk_new_owner, clp); + if (!lo) return NULL; - idhashval = lockownerid_hashval(current_ownerid); - INIT_LIST_HEAD(&sop->so_idhash); - INIT_LIST_HEAD(&sop->so_strhash); - INIT_LIST_HEAD(&sop->so_perclient); - INIT_LIST_HEAD(&sop->so_stateids); - INIT_LIST_HEAD(&sop->so_perstateid); - INIT_LIST_HEAD(&sop->so_close_lru); /* not used */ - sop->so_time = 0; - list_add(&sop->so_idhash, &lock_ownerid_hashtbl[idhashval]); - list_add(&sop->so_strhash, &lock_ownerstr_hashtbl[strhashval]); - list_add(&sop->so_perstateid, &open_stp->st_lockowners); - sop->so_is_open_owner = 0; - sop->so_id = current_ownerid++; - sop->so_client = clp; + INIT_LIST_HEAD(&lo->lo_owner.so_stateids); + lo->lo_owner.so_is_open_owner = 0; /* It is the openowner seqid that will be incremented in encode in the * case of new lockowners; so increment the lock seqid manually: */ - sop->so_seqid = lock->lk_new_lock_seqid + 1; - sop->so_confirmed = 1; - rp = &sop->so_replay; - rp->rp_status = nfserr_serverfault; - rp->rp_buflen = 0; - rp->rp_buf = rp->rp_ibuf; - return sop; + lo->lo_owner.so_seqid = lock->lk_new_lock_seqid + 1; + hash_lockowner(lo, strhashval, clp, open_stp); + return lo; } -static struct nfs4_stateid * -alloc_init_lock_stateid(struct nfs4_stateowner *sop, struct nfs4_file *fp, struct nfs4_stateid *open_stp) +static struct nfs4_ol_stateid * +alloc_init_lock_stateid(struct nfs4_lockowner *lo, struct nfs4_file *fp, struct nfs4_ol_stateid *open_stp) { - struct nfs4_stateid *stp; - unsigned int hashval = stateid_hashval(sop->so_id, fp->fi_id); + struct nfs4_ol_stateid *stp; + struct nfs4_client *clp = lo->lo_owner.so_client; - stp = nfs4_alloc_stateid(); + stp = nfs4_alloc_stateid(clp); if (stp == NULL) - goto out; - INIT_LIST_HEAD(&stp->st_hash); - INIT_LIST_HEAD(&stp->st_perfile); - INIT_LIST_HEAD(&stp->st_perstateowner); - INIT_LIST_HEAD(&stp->st_lockowners); /* not used */ - list_add(&stp->st_hash, &lockstateid_hashtbl[hashval]); + return NULL; + init_stid(&stp->st_stid, clp, NFS4_LOCK_STID); list_add(&stp->st_perfile, &fp->fi_stateids); - list_add(&stp->st_perstateowner, &sop->so_stateids); - stp->st_stateowner = sop; + list_add(&stp->st_perstateowner, &lo->lo_owner.so_stateids); + stp->st_stateowner = &lo->lo_owner; get_nfs4_file(fp); stp->st_file = fp; - stp->st_stateid.si_boot = boot_time; - stp->st_stateid.si_stateownerid = sop->so_id; - stp->st_stateid.si_fileid = fp->fi_id; - stp->st_stateid.si_generation = 0; stp->st_access_bmap = 0; stp->st_deny_bmap = open_stp->st_deny_bmap; stp->st_openstp = open_stp; - -out: return stp; } @@ -3769,7 +3914,7 @@ check_lock_length(u64 offset, u64 length) LOFF_OVERFLOW(offset, length))); } -static void get_lock_access(struct nfs4_stateid *lock_stp, u32 access) +static void get_lock_access(struct nfs4_ol_stateid *lock_stp, u32 access) { struct nfs4_file *fp = lock_stp->st_file; int oflag = nfs4_access_to_omode(access); @@ -3787,15 +3932,16 @@ __be32 nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_lock *lock) { - struct nfs4_stateowner *open_sop = NULL; - struct nfs4_stateowner *lock_sop = NULL; - struct nfs4_stateid *lock_stp; + struct nfs4_openowner *open_sop = NULL; + struct nfs4_lockowner *lock_sop = NULL; + struct nfs4_ol_stateid *lock_stp; struct nfs4_file *fp; struct file *filp = NULL; struct file_lock file_lock; struct file_lock conflock; __be32 status = 0; unsigned int strhashval; + int lkflg; int err; dprintk("NFSD: nfsd4_lock: start=%Ld length=%Ld\n", @@ -3819,7 +3965,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, * Use open owner and open stateid to create lock owner and * lock stateid. */ - struct nfs4_stateid *open_stp = NULL; + struct nfs4_ol_stateid *open_stp = NULL; status = nfserr_stale_clientid; if (!nfsd4_has_session(cstate) && @@ -3827,26 +3973,29 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, goto out; /* validate and update open stateid and open seqid */ - status = nfs4_preprocess_seqid_op(cstate, + status = nfs4_preprocess_confirmed_seqid_op(cstate, lock->lk_new_open_seqid, &lock->lk_new_open_stateid, - OPEN_STATE, - &lock->lk_replay_owner, &open_stp, - lock); + &open_stp); if (status) goto out; - open_sop = lock->lk_replay_owner; + open_sop = openowner(open_stp->st_stateowner); + status = nfserr_bad_stateid; + if (!nfsd4_has_session(cstate) && + !same_clid(&open_sop->oo_owner.so_client->cl_clientid, + &lock->v.new.clientid)) + goto out; /* create lockowner and lock stateid */ fp = open_stp->st_file; - strhashval = lock_ownerstr_hashval(fp->fi_inode, - open_sop->so_client->cl_clientid.cl_id, + strhashval = lock_ownerstr_hashval(fp->fi_inode, + open_sop->oo_owner.so_client->cl_clientid.cl_id, &lock->v.new.owner); /* XXX: Do we need to check for duplicate stateowners on * the same file, or should they just be allowed (and * create new stateids)? */ status = nfserr_jukebox; lock_sop = alloc_init_lock_stateowner(strhashval, - open_sop->so_client, open_stp, lock); + open_sop->oo_owner.so_client, open_stp, lock); if (lock_sop == NULL) goto out; lock_stp = alloc_init_lock_stateid(lock_sop, fp, open_stp); @@ -3855,16 +4004,20 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, } else { /* lock (lock owner + lock stateid) already exists */ status = nfs4_preprocess_seqid_op(cstate, - lock->lk_old_lock_seqid, - &lock->lk_old_lock_stateid, - LOCK_STATE, - &lock->lk_replay_owner, &lock_stp, lock); + lock->lk_old_lock_seqid, + &lock->lk_old_lock_stateid, + NFS4_LOCK_STID, &lock_stp); if (status) goto out; - lock_sop = lock->lk_replay_owner; + lock_sop = lockowner(lock_stp->st_stateowner); fp = lock_stp->st_file; } - /* lock->lk_replay_owner and lock_stp have been created or found */ + /* lock_sop and lock_stp have been created or found */ + + lkflg = setlkflg(lock->lk_type); + status = nfs4_check_openmode(lock_stp, lkflg); + if (status) + goto out; status = nfserr_grace; if (locks_in_grace() && !lock->lk_reclaim) @@ -3915,8 +4068,8 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, err = vfs_lock_file(filp, F_SETLK, &file_lock, &conflock); switch (-err) { case 0: /* success! */ - update_stateid(&lock_stp->st_stateid); - memcpy(&lock->lk_resp_stateid, &lock_stp->st_stateid, + update_stateid(&lock_stp->st_stid.sc_stateid); + memcpy(&lock->lk_resp_stateid, &lock_stp->st_stid.sc_stateid, sizeof(stateid_t)); status = 0; break; @@ -3936,11 +4089,8 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, out: if (status && lock->lk_is_new && lock_sop) release_lockowner(lock_sop); - if (lock->lk_replay_owner) { - nfs4_get_stateowner(lock->lk_replay_owner); - cstate->replay_owner = lock->lk_replay_owner; - } - nfs4_unlock_state(); + if (!cstate->replay_owner) + nfs4_unlock_state(); return status; } @@ -3970,6 +4120,7 @@ nfsd4_lockt(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, { struct inode *inode; struct file_lock file_lock; + struct nfs4_lockowner *lo; __be32 status; if (locks_in_grace()) @@ -3978,19 +4129,14 @@ nfsd4_lockt(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (check_lock_length(lockt->lt_offset, lockt->lt_length)) return nfserr_inval; - lockt->lt_stateowner = NULL; nfs4_lock_state(); status = nfserr_stale_clientid; if (!nfsd4_has_session(cstate) && STALE_CLIENTID(&lockt->lt_clientid)) goto out; - if ((status = fh_verify(rqstp, &cstate->current_fh, S_IFREG, 0))) { - dprintk("NFSD: nfsd4_lockt: fh_verify() failed!\n"); - if (status == nfserr_symlink) - status = nfserr_inval; + if ((status = fh_verify(rqstp, &cstate->current_fh, S_IFREG, 0))) goto out; - } inode = cstate->current_fh.fh_dentry->d_inode; locks_init_lock(&file_lock); @@ -4009,10 +4155,9 @@ nfsd4_lockt(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, goto out; } - lockt->lt_stateowner = find_lockstateowner_str(inode, - &lockt->lt_clientid, &lockt->lt_owner); - if (lockt->lt_stateowner) - file_lock.fl_owner = (fl_owner_t)lockt->lt_stateowner; + lo = find_lockowner_str(inode, &lockt->lt_clientid, &lockt->lt_owner); + if (lo) + file_lock.fl_owner = (fl_owner_t)lo; file_lock.fl_pid = current->tgid; file_lock.fl_flags = FL_POSIX; @@ -4038,7 +4183,7 @@ __be32 nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_locku *locku) { - struct nfs4_stateid *stp; + struct nfs4_ol_stateid *stp; struct file *filp = NULL; struct file_lock file_lock; __be32 status; @@ -4053,13 +4198,10 @@ nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, nfs4_lock_state(); - if ((status = nfs4_preprocess_seqid_op(cstate, - locku->lu_seqid, - &locku->lu_stateid, - LOCK_STATE, - &locku->lu_stateowner, &stp, NULL))) + status = nfs4_preprocess_seqid_op(cstate, locku->lu_seqid, + &locku->lu_stateid, NFS4_LOCK_STID, &stp); + if (status) goto out; - filp = find_any_file(stp->st_file); if (!filp) { status = nfserr_lock_range; @@ -4068,7 +4210,7 @@ nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, BUG_ON(!filp); locks_init_lock(&file_lock); file_lock.fl_type = F_UNLCK; - file_lock.fl_owner = (fl_owner_t) locku->lu_stateowner; + file_lock.fl_owner = (fl_owner_t)lockowner(stp->st_stateowner); file_lock.fl_pid = current->tgid; file_lock.fl_file = filp; file_lock.fl_flags = FL_POSIX; @@ -4089,15 +4231,12 @@ nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, /* * OK, unlock succeeded; the only thing left to do is update the stateid. */ - update_stateid(&stp->st_stateid); - memcpy(&locku->lu_stateid, &stp->st_stateid, sizeof(stateid_t)); + update_stateid(&stp->st_stid.sc_stateid); + memcpy(&locku->lu_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t)); out: - if (locku->lu_stateowner) { - nfs4_get_stateowner(locku->lu_stateowner); - cstate->replay_owner = locku->lu_stateowner; - } - nfs4_unlock_state(); + if (!cstate->replay_owner) + nfs4_unlock_state(); return status; out_nfserr: @@ -4111,7 +4250,7 @@ out_nfserr: * 0: no locks held by lockowner */ static int -check_for_locks(struct nfs4_file *filp, struct nfs4_stateowner *lowner) +check_for_locks(struct nfs4_file *filp, struct nfs4_lockowner *lowner) { struct file_lock **flpp; struct inode *inode = filp->fi_inode; @@ -4136,7 +4275,8 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp, { clientid_t *clid = &rlockowner->rl_clientid; struct nfs4_stateowner *sop; - struct nfs4_stateid *stp; + struct nfs4_lockowner *lo; + struct nfs4_ol_stateid *stp; struct xdr_netobj *owner = &rlockowner->rl_owner; struct list_head matches; int i; @@ -4160,16 +4300,15 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp, * data structures. */ INIT_LIST_HEAD(&matches); for (i = 0; i < LOCK_HASH_SIZE; i++) { - list_for_each_entry(sop, &lock_ownerid_hashtbl[i], so_idhash) { + list_for_each_entry(sop, &lock_ownerstr_hashtbl[i], so_strhash) { if (!same_owner_str(sop, owner, clid)) continue; list_for_each_entry(stp, &sop->so_stateids, st_perstateowner) { - if (check_for_locks(stp->st_file, sop)) + lo = lockowner(sop); + if (check_for_locks(stp->st_file, lo)) goto out; - /* Note: so_perclient unused for lockowners, - * so it's OK to fool with here. */ - list_add(&sop->so_perclient, &matches); + list_add(&lo->lo_list, &matches); } } } @@ -4178,12 +4317,12 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp, * have been checked. */ status = nfs_ok; while (!list_empty(&matches)) { - sop = list_entry(matches.next, struct nfs4_stateowner, - so_perclient); + lo = list_entry(matches.next, struct nfs4_lockowner, + lo_list); /* unhash_stateowner deletes so_perclient only * for openowners. */ - list_del(&sop->so_perclient); - release_lockowner(sop); + list_del(&lo->lo_list); + release_lockowner(lo); } out: nfs4_unlock_state(); @@ -4305,16 +4444,10 @@ nfs4_state_init(void) for (i = 0; i < FILE_HASH_SIZE; i++) { INIT_LIST_HEAD(&file_hashtbl[i]); } - for (i = 0; i < OWNER_HASH_SIZE; i++) { - INIT_LIST_HEAD(&ownerstr_hashtbl[i]); - INIT_LIST_HEAD(&ownerid_hashtbl[i]); - } - for (i = 0; i < STATEID_HASH_SIZE; i++) { - INIT_LIST_HEAD(&stateid_hashtbl[i]); - INIT_LIST_HEAD(&lockstateid_hashtbl[i]); + for (i = 0; i < OPEN_OWNER_HASH_SIZE; i++) { + INIT_LIST_HEAD(&open_ownerstr_hashtbl[i]); } for (i = 0; i < LOCK_HASH_SIZE; i++) { - INIT_LIST_HEAD(&lock_ownerid_hashtbl[i]); INIT_LIST_HEAD(&lock_ownerstr_hashtbl[i]); } memset(&onestateid, ~0, sizeof(stateid_t)); @@ -4331,7 +4464,7 @@ nfsd4_load_reboot_recovery_data(void) int status; nfs4_lock_state(); - nfsd4_init_recdir(user_recovery_dirname); + nfsd4_init_recdir(); status = nfsd4_recdir_load(); nfs4_unlock_state(); if (status) @@ -4440,40 +4573,3 @@ nfs4_state_shutdown(void) nfs4_unlock_state(); nfsd4_destroy_callback_queue(); } - -/* - * user_recovery_dirname is protected by the nfsd_mutex since it's only - * accessed when nfsd is starting. - */ -static void -nfs4_set_recdir(char *recdir) -{ - strcpy(user_recovery_dirname, recdir); -} - -/* - * Change the NFSv4 recovery directory to recdir. - */ -int -nfs4_reset_recoverydir(char *recdir) -{ - int status; - struct path path; - - status = kern_path(recdir, LOOKUP_FOLLOW, &path); - if (status) - return status; - status = -ENOTDIR; - if (S_ISDIR(path.dentry->d_inode->i_mode)) { - nfs4_set_recdir(recdir); - status = 0; - } - path_put(&path); - return status; -} - -char * -nfs4_recoverydir(void) -{ - return user_recovery_dirname; -} diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 45f53ae..9d2c52b 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -44,13 +44,15 @@ #include #include #include +#include #include #include "idmap.h" #include "acl.h" #include "xdr4.h" #include "vfs.h" - +#include "state.h" +#include "cache.h" #define NFSDDBG_FACILITY NFSDDBG_XDR @@ -131,6 +133,22 @@ xdr_error: \ } \ } while (0) +static void save_buf(struct nfsd4_compoundargs *argp, struct nfsd4_saved_compoundargs *savep) +{ + savep->p = argp->p; + savep->end = argp->end; + savep->pagelen = argp->pagelen; + savep->pagelist = argp->pagelist; +} + +static void restore_buf(struct nfsd4_compoundargs *argp, struct nfsd4_saved_compoundargs *savep) +{ + argp->p = savep->p; + argp->end = savep->end; + argp->pagelen = savep->pagelen; + argp->pagelist = savep->pagelist; +} + static __be32 *read_buf(struct nfsd4_compoundargs *argp, u32 nbytes) { /* We want more bytes than seem to be available. @@ -159,8 +177,8 @@ static __be32 *read_buf(struct nfsd4_compoundargs *argp, u32 nbytes) */ memcpy(p, argp->p, avail); /* step to next page */ - argp->pagelist++; argp->p = page_address(argp->pagelist[0]); + argp->pagelist++; if (argp->pagelen < PAGE_SIZE) { argp->end = argp->p + (argp->pagelen>>2); argp->pagelen = 0; @@ -432,7 +450,6 @@ nfsd4_decode_close(struct nfsd4_compoundargs *argp, struct nfsd4_close *close) { DECODE_HEAD; - close->cl_stateowner = NULL; READ_BUF(4); READ32(close->cl_seqid); return nfsd4_decode_stateid(argp, &close->cl_stateid); @@ -465,7 +482,18 @@ nfsd4_decode_create(struct nfsd4_compoundargs *argp, struct nfsd4_create *create READ_BUF(4); READ32(create->cr_linklen); READ_BUF(create->cr_linklen); - SAVEMEM(create->cr_linkname, create->cr_linklen); + /* + * The VFS will want a null-terminated string, and + * null-terminating in place isn't safe since this might + * end on a page boundary: + */ + create->cr_linkname = + kmalloc(create->cr_linklen + 1, GFP_KERNEL); + if (!create->cr_linkname) + return nfserr_jukebox; + memcpy(create->cr_linkname, p, create->cr_linklen); + create->cr_linkname[create->cr_linklen] = '\0'; + defer_free(argp, kfree, create->cr_linkname); break; case NF4BLK: case NF4CHR: @@ -527,7 +555,6 @@ nfsd4_decode_lock(struct nfsd4_compoundargs *argp, struct nfsd4_lock *lock) { DECODE_HEAD; - lock->lk_replay_owner = NULL; /* * type, reclaim(boolean), offset, length, new_lock_owner(boolean) */ @@ -587,7 +614,6 @@ nfsd4_decode_locku(struct nfsd4_compoundargs *argp, struct nfsd4_locku *locku) { DECODE_HEAD; - locku->lu_stateowner = NULL; READ_BUF(8); READ32(locku->lu_type); if ((locku->lu_type < NFS4_READ_LT) || (locku->lu_type > NFS4_WRITEW_LT)) @@ -618,6 +644,83 @@ nfsd4_decode_lookup(struct nfsd4_compoundargs *argp, struct nfsd4_lookup *lookup DECODE_TAIL; } +static __be32 nfsd4_decode_share_access(struct nfsd4_compoundargs *argp, u32 *x) +{ + __be32 *p; + u32 w; + + READ_BUF(4); + READ32(w); + *x = w; + switch (w & NFS4_SHARE_ACCESS_MASK) { + case NFS4_SHARE_ACCESS_READ: + case NFS4_SHARE_ACCESS_WRITE: + case NFS4_SHARE_ACCESS_BOTH: + break; + default: + return nfserr_bad_xdr; + } + w &= ~NFS4_SHARE_ACCESS_MASK; + if (!w) + return nfs_ok; + if (!argp->minorversion) + return nfserr_bad_xdr; + switch (w & NFS4_SHARE_WANT_MASK) { + case NFS4_SHARE_WANT_NO_PREFERENCE: + case NFS4_SHARE_WANT_READ_DELEG: + case NFS4_SHARE_WANT_WRITE_DELEG: + case NFS4_SHARE_WANT_ANY_DELEG: + case NFS4_SHARE_WANT_NO_DELEG: + case NFS4_SHARE_WANT_CANCEL: + break; + default: + return nfserr_bad_xdr; + } + w &= ~NFS4_SHARE_WANT_MASK; + if (!w) + return nfs_ok; + switch (w) { + case NFS4_SHARE_SIGNAL_DELEG_WHEN_RESRC_AVAIL: + case NFS4_SHARE_PUSH_DELEG_WHEN_UNCONTENDED: + case (NFS4_SHARE_SIGNAL_DELEG_WHEN_RESRC_AVAIL | + NFS4_SHARE_PUSH_DELEG_WHEN_UNCONTENDED): + return nfs_ok; + } +xdr_error: + return nfserr_bad_xdr; +} + +static __be32 nfsd4_decode_share_deny(struct nfsd4_compoundargs *argp, u32 *x) +{ + __be32 *p; + + READ_BUF(4); + READ32(*x); + /* Note: unlinke access bits, deny bits may be zero. */ + if (*x & ~NFS4_SHARE_DENY_BOTH) + return nfserr_bad_xdr; + return nfs_ok; +xdr_error: + return nfserr_bad_xdr; +} + +static __be32 nfsd4_decode_opaque(struct nfsd4_compoundargs *argp, struct xdr_netobj *o) +{ + __be32 *p; + + READ_BUF(4); + READ32(o->len); + + if (o->len == 0 || o->len > NFS4_OPAQUE_LIMIT) + return nfserr_bad_xdr; + + READ_BUF(o->len); + SAVEMEM(o->data, o->len); + return nfs_ok; +xdr_error: + return nfserr_bad_xdr; +} + static __be32 nfsd4_decode_open(struct nfsd4_compoundargs *argp, struct nfsd4_open *open) { @@ -625,19 +728,23 @@ nfsd4_decode_open(struct nfsd4_compoundargs *argp, struct nfsd4_open *open) memset(open->op_bmval, 0, sizeof(open->op_bmval)); open->op_iattr.ia_valid = 0; - open->op_stateowner = NULL; + open->op_openowner = NULL; /* seqid, share_access, share_deny, clientid, ownerlen */ - READ_BUF(16 + sizeof(clientid_t)); + READ_BUF(4); READ32(open->op_seqid); - READ32(open->op_share_access); - READ32(open->op_share_deny); + status = nfsd4_decode_share_access(argp, &open->op_share_access); + if (status) + goto xdr_error; + status = nfsd4_decode_share_deny(argp, &open->op_share_deny); + if (status) + goto xdr_error; + READ_BUF(sizeof(clientid_t)); COPYMEM(&open->op_clientid, sizeof(clientid_t)); - READ32(open->op_owner.len); - - /* owner, open_flag */ - READ_BUF(open->op_owner.len + 4); - SAVEMEM(open->op_owner.data, open->op_owner.len); + status = nfsd4_decode_opaque(argp, &open->op_owner); + if (status) + goto xdr_error; + READ_BUF(4); READ32(open->op_create); switch (open->op_create) { case NFS4_OPEN_NOCREATE: @@ -703,6 +810,19 @@ nfsd4_decode_open(struct nfsd4_compoundargs *argp, struct nfsd4_open *open) if ((status = check_filename(open->op_fname.data, open->op_fname.len, nfserr_inval))) return status; break; + case NFS4_OPEN_CLAIM_FH: + case NFS4_OPEN_CLAIM_DELEG_PREV_FH: + if (argp->minorversion < 1) + goto xdr_error; + /* void */ + break; + case NFS4_OPEN_CLAIM_DELEG_CUR_FH: + if (argp->minorversion < 1) + goto xdr_error; + status = nfsd4_decode_stateid(argp, &open->op_delegate_stateid); + if (status) + return status; + break; default: goto xdr_error; } @@ -715,7 +835,6 @@ nfsd4_decode_open_confirm(struct nfsd4_compoundargs *argp, struct nfsd4_open_con { DECODE_HEAD; - open_conf->oc_stateowner = NULL; status = nfsd4_decode_stateid(argp, &open_conf->oc_req_stateid); if (status) return status; @@ -730,15 +849,17 @@ nfsd4_decode_open_downgrade(struct nfsd4_compoundargs *argp, struct nfsd4_open_d { DECODE_HEAD; - open_down->od_stateowner = NULL; status = nfsd4_decode_stateid(argp, &open_down->od_stateid); if (status) return status; - READ_BUF(12); + READ_BUF(4); READ32(open_down->od_seqid); - READ32(open_down->od_share_access); - READ32(open_down->od_share_deny); - + status = nfsd4_decode_share_access(argp, &open_down->od_share_access); + if (status) + return status; + status = nfsd4_decode_share_deny(argp, &open_down->od_share_deny); + if (status) + return status; DECODE_TAIL; } @@ -879,12 +1000,13 @@ nfsd4_decode_setclientid(struct nfsd4_compoundargs *argp, struct nfsd4_setclient { DECODE_HEAD; - READ_BUF(12); + READ_BUF(8); COPYMEM(setclientid->se_verf.data, 8); - READ32(setclientid->se_namelen); - READ_BUF(setclientid->se_namelen + 8); - SAVEMEM(setclientid->se_name, setclientid->se_namelen); + status = nfsd4_decode_opaque(argp, &setclientid->se_name); + if (status) + return nfserr_bad_xdr; + READ_BUF(8); READ32(setclientid->se_callback_prog); READ32(setclientid->se_callback_netid_len); @@ -1027,11 +1149,9 @@ nfsd4_decode_exchange_id(struct nfsd4_compoundargs *argp, READ_BUF(NFS4_VERIFIER_SIZE); COPYMEM(exid->verifier.data, NFS4_VERIFIER_SIZE); - READ_BUF(4); - READ32(exid->clname.len); - - READ_BUF(exid->clname.len); - SAVEMEM(exid->clname.data, exid->clname.len); + status = nfsd4_decode_opaque(argp, &exid->clname); + if (status) + return nfserr_bad_xdr; READ_BUF(4); READ32(exid->flags); @@ -1240,6 +1360,19 @@ nfsd4_decode_destroy_session(struct nfsd4_compoundargs *argp, } static __be32 +nfsd4_decode_free_stateid(struct nfsd4_compoundargs *argp, + struct nfsd4_free_stateid *free_stateid) +{ + DECODE_HEAD; + + READ_BUF(sizeof(stateid_t)); + READ32(free_stateid->fr_stateid.si_generation); + COPYMEM(&free_stateid->fr_stateid.si_opaque, sizeof(stateid_opaque_t)); + + DECODE_TAIL; +} + +static __be32 nfsd4_decode_sequence(struct nfsd4_compoundargs *argp, struct nfsd4_sequence *seq) { @@ -1255,6 +1388,50 @@ nfsd4_decode_sequence(struct nfsd4_compoundargs *argp, DECODE_TAIL; } +static __be32 +nfsd4_decode_test_stateid(struct nfsd4_compoundargs *argp, struct nfsd4_test_stateid *test_stateid) +{ + unsigned int nbytes; + stateid_t si; + int i; + __be32 *p; + __be32 status; + + READ_BUF(4); + test_stateid->ts_num_ids = ntohl(*p++); + + nbytes = test_stateid->ts_num_ids * sizeof(stateid_t); + if (nbytes > (u32)((char *)argp->end - (char *)argp->p)) + goto xdr_error; + + test_stateid->ts_saved_args = argp; + save_buf(argp, &test_stateid->ts_savedp); + + for (i = 0; i < test_stateid->ts_num_ids; i++) { + status = nfsd4_decode_stateid(argp, &si); + if (status) + return status; + } + + status = 0; +out: + return status; +xdr_error: + dprintk("NFSD: xdr error (%s:%d)\n", __FILE__, __LINE__); + status = nfserr_bad_xdr; + goto out; +} + +static __be32 nfsd4_decode_destroy_clientid(struct nfsd4_compoundargs *argp, struct nfsd4_destroy_clientid *dc) +{ + DECODE_HEAD; + + READ_BUF(8); + COPYMEM(&dc->clientid, 8); + + DECODE_TAIL; +} + static __be32 nfsd4_decode_reclaim_complete(struct nfsd4_compoundargs *argp, struct nfsd4_reclaim_complete *rc) { DECODE_HEAD; @@ -1364,7 +1541,7 @@ static nfsd4_dec nfsd41_dec_ops[] = { [OP_EXCHANGE_ID] = (nfsd4_dec)nfsd4_decode_exchange_id, [OP_CREATE_SESSION] = (nfsd4_dec)nfsd4_decode_create_session, [OP_DESTROY_SESSION] = (nfsd4_dec)nfsd4_decode_destroy_session, - [OP_FREE_STATEID] = (nfsd4_dec)nfsd4_decode_notsupp, + [OP_FREE_STATEID] = (nfsd4_dec)nfsd4_decode_free_stateid, [OP_GET_DIR_DELEGATION] = (nfsd4_dec)nfsd4_decode_notsupp, [OP_GETDEVICEINFO] = (nfsd4_dec)nfsd4_decode_notsupp, [OP_GETDEVICELIST] = (nfsd4_dec)nfsd4_decode_notsupp, @@ -1374,9 +1551,9 @@ static nfsd4_dec nfsd41_dec_ops[] = { [OP_SECINFO_NO_NAME] = (nfsd4_dec)nfsd4_decode_secinfo_no_name, [OP_SEQUENCE] = (nfsd4_dec)nfsd4_decode_sequence, [OP_SET_SSV] = (nfsd4_dec)nfsd4_decode_notsupp, - [OP_TEST_STATEID] = (nfsd4_dec)nfsd4_decode_notsupp, + [OP_TEST_STATEID] = (nfsd4_dec)nfsd4_decode_test_stateid, [OP_WANT_DELEGATION] = (nfsd4_dec)nfsd4_decode_notsupp, - [OP_DESTROY_CLIENTID] = (nfsd4_dec)nfsd4_decode_notsupp, + [OP_DESTROY_CLIENTID] = (nfsd4_dec)nfsd4_decode_destroy_clientid, [OP_RECLAIM_COMPLETE] = (nfsd4_dec)nfsd4_decode_reclaim_complete, }; @@ -1396,6 +1573,7 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp) DECODE_HEAD; struct nfsd4_op *op; struct nfsd4_minorversion_ops *ops; + bool cachethis = false; int i; /* @@ -1477,7 +1655,16 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp) argp->opcnt = i+1; break; } + /* + * We'll try to cache the result in the DRC if any one + * op in the compound wants to be cached: + */ + cachethis |= nfsd4_cache_this_op(op); } + /* Sessions make the DRC unnecessary: */ + if (argp->minorversion) + cachethis = false; + argp->rqstp->rq_cachetype = cachethis ? RC_REPLBUFF : RC_NOCACHE; DECODE_TAIL; } @@ -1542,18 +1729,6 @@ static void write_cinfo(__be32 **p, struct nfsd4_change_info *c) \ save = resp->p; -static bool seqid_mutating_err(__be32 err) -{ - /* rfc 3530 section 8.1.5: */ - return err != nfserr_stale_clientid && - err != nfserr_stale_stateid && - err != nfserr_bad_stateid && - err != nfserr_bad_seqid && - err != nfserr_bad_xdr && - err != nfserr_resource && - err != nfserr_nofilehandle; -} - /* * Routine for encoding the result of a "seqid-mutating" NFSv4 operation. This * is where sequence id's are incremented, and the replay cache is filled. @@ -1561,15 +1736,20 @@ static bool seqid_mutating_err(__be32 err) * we know whether the error to be returned is a sequence id mutating error. */ -#define ENCODE_SEQID_OP_TAIL(stateowner) do { \ - if (seqid_mutating_err(nfserr) && stateowner) { \ - stateowner->so_seqid++; \ - stateowner->so_replay.rp_status = nfserr; \ - stateowner->so_replay.rp_buflen = \ - (((char *)(resp)->p - (char *)save)); \ - memcpy(stateowner->so_replay.rp_buf, save, \ - stateowner->so_replay.rp_buflen); \ - } } while (0); +static void encode_seqid_op_tail(struct nfsd4_compoundres *resp, __be32 *save, __be32 nfserr) +{ + struct nfs4_stateowner *stateowner = resp->cstate.replay_owner; + + if (seqid_mutating_err(ntohl(nfserr)) && stateowner) { + stateowner->so_seqid++; + stateowner->so_replay.rp_status = nfserr; + stateowner->so_replay.rp_buflen = + (char *)resp->p - (char *)save; + memcpy(stateowner->so_replay.rp_buf, save, + stateowner->so_replay.rp_buflen); + nfsd4_purge_closed_stateid(stateowner); + } +} /* Encode as an array of strings the string given with components * separated @sep. @@ -1628,36 +1808,89 @@ static __be32 nfsd4_encode_fs_location4(struct nfsd4_fs_location *location, } /* - * Return the path to an export point in the pseudo filesystem namespace - * Returned string is safe to use as long as the caller holds a reference - * to @exp. + * Encode a path in RFC3530 'pathname4' format */ -static char *nfsd4_path(struct svc_rqst *rqstp, struct svc_export *exp, __be32 *stat) +static __be32 nfsd4_encode_path(const struct path *root, + const struct path *path, __be32 **pp, int *buflen) { - struct svc_fh tmp_fh; - char *path = NULL, *rootpath; - size_t rootlen; + struct path cur = { + .mnt = path->mnt, + .dentry = path->dentry, + }; + __be32 *p = *pp; + struct dentry **components = NULL; + unsigned int ncomponents = 0; + __be32 err = nfserr_jukebox; - fh_init(&tmp_fh, NFS4_FHSIZE); - *stat = exp_pseudoroot(rqstp, &tmp_fh); - if (*stat) - return NULL; - rootpath = tmp_fh.fh_export->ex_pathname; + dprintk("nfsd4_encode_components("); + + path_get(&cur); + /* First walk the path up to the nfsd root, and store the + * dentries/path components in an array. + */ + for (;;) { + if (cur.dentry == root->dentry && cur.mnt == root->mnt) + break; + if (cur.dentry == cur.mnt->mnt_root) { + if (follow_up(&cur)) + continue; + goto out_free; + } + if ((ncomponents & 15) == 0) { + struct dentry **new; + new = krealloc(components, + sizeof(*new) * (ncomponents + 16), + GFP_KERNEL); + if (!new) + goto out_free; + components = new; + } + components[ncomponents++] = cur.dentry; + cur.dentry = dget_parent(cur.dentry); + } - path = exp->ex_pathname; + *buflen -= 4; + if (*buflen < 0) + goto out_free; + WRITE32(ncomponents); - rootlen = strlen(rootpath); - if (strncmp(path, rootpath, rootlen)) { - dprintk("nfsd: fs_locations failed;" - "%s is not contained in %s\n", path, rootpath); - *stat = nfserr_notsupp; - path = NULL; - goto out; + while (ncomponents) { + struct dentry *dentry = components[ncomponents - 1]; + unsigned int len = dentry->d_name.len; + + *buflen -= 4 + (XDR_QUADLEN(len) << 2); + if (*buflen < 0) + goto out_free; + WRITE32(len); + WRITEMEM(dentry->d_name.name, len); + dprintk("/%s", dentry->d_name.name); + dput(dentry); + ncomponents--; } - path += rootlen; -out: - fh_put(&tmp_fh); - return path; + + *pp = p; + err = 0; +out_free: + dprintk(")\n"); + while (ncomponents) + dput(components[--ncomponents]); + kfree(components); + path_put(&cur); + return err; +} + +static __be32 nfsd4_encode_fsloc_fsroot(struct svc_rqst *rqstp, + const struct path *path, __be32 **pp, int *buflen) +{ + struct svc_export *exp_ps; + __be32 res; + + exp_ps = rqst_find_fsidzero_export(rqstp); + if (IS_ERR(exp_ps)) + return nfserrno(PTR_ERR(exp_ps)); + res = nfsd4_encode_path(&exp_ps->ex_path, path, pp, buflen); + exp_put(exp_ps); + return res; } /* @@ -1671,11 +1904,8 @@ static __be32 nfsd4_encode_fs_locations(struct svc_rqst *rqstp, int i; __be32 *p = *pp; struct nfsd4_fs_locations *fslocs = &exp->ex_fslocs; - char *root = nfsd4_path(rqstp, exp, &status); - if (status) - return status; - status = nfsd4_encode_components('/', root, &p, buflen); + status = nfsd4_encode_fsloc_fsroot(rqstp, &exp->ex_path, &p, buflen); if (status) return status; if ((*buflen -= 4) < 0) @@ -1691,12 +1921,19 @@ static __be32 nfsd4_encode_fs_locations(struct svc_rqst *rqstp, return 0; } -static u32 nfs4_ftypes[16] = { - NF4BAD, NF4FIFO, NF4CHR, NF4BAD, - NF4DIR, NF4BAD, NF4BLK, NF4BAD, - NF4REG, NF4BAD, NF4LNK, NF4BAD, - NF4SOCK, NF4BAD, NF4LNK, NF4BAD, -}; +static u32 nfs4_file_type(umode_t mode) +{ + switch (mode & S_IFMT) { + case S_IFIFO: return NF4FIFO; + case S_IFCHR: return NF4CHR; + case S_IFDIR: return NF4DIR; + case S_IFBLK: return NF4BLK; + case S_IFLNK: return NF4LNK; + case S_IFREG: return NF4REG; + case S_IFSOCK: return NF4SOCK; + default: return NF4BAD; + }; +} static __be32 nfsd4_encode_name(struct svc_rqst *rqstp, int whotype, uid_t id, int group, @@ -1809,8 +2046,8 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp, err = vfs_getattr(exp->ex_path.mnt, dentry, &stat); if (err) goto out_nfserr; - if ((bmval0 & (FATTR4_WORD0_FILES_FREE | FATTR4_WORD0_FILES_TOTAL | - FATTR4_WORD0_MAXNAME)) || + if ((bmval0 & (FATTR4_WORD0_FILES_AVAIL | FATTR4_WORD0_FILES_FREE | + FATTR4_WORD0_FILES_TOTAL | FATTR4_WORD0_MAXNAME)) || (bmval1 & (FATTR4_WORD1_SPACE_AVAIL | FATTR4_WORD1_SPACE_FREE | FATTR4_WORD1_SPACE_TOTAL))) { err = vfs_statfs(&path, &statfs); @@ -1885,7 +2122,7 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp, if (bmval0 & FATTR4_WORD0_TYPE) { if ((buflen -= 4) < 0) goto out_resource; - dummy = nfs4_ftypes[(stat.mode & S_IFMT) >> 12]; + dummy = nfs4_file_type(stat.mode); if (dummy == NF4BAD) goto out_serverfault; WRITE32(dummy); @@ -2187,6 +2424,8 @@ out_acl: WRITE64(stat.ino); } if (bmval2 & FATTR4_WORD2_SUPPATTR_EXCLCREAT) { + if ((buflen -= 16) < 0) + goto out_resource; WRITE32(3); WRITE32(NFSD_SUPPATTR_EXCLCREAT_WORD0); WRITE32(NFSD_SUPPATTR_EXCLCREAT_WORD1); @@ -2416,7 +2655,7 @@ nfsd4_encode_close(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_c if (!nfserr) nfsd4_encode_stateid(resp, &close->cl_stateid); - ENCODE_SEQID_OP_TAIL(close->cl_stateowner); + encode_seqid_op_tail(resp, save, nfserr); return nfserr; } @@ -2492,17 +2731,18 @@ nfsd4_encode_getfh(struct nfsd4_compoundres *resp, __be32 nfserr, struct svc_fh static void nfsd4_encode_lock_denied(struct nfsd4_compoundres *resp, struct nfsd4_lock_denied *ld) { + struct xdr_netobj *conf = &ld->ld_owner; __be32 *p; - RESERVE_SPACE(32 + XDR_LEN(ld->ld_sop ? ld->ld_sop->so_owner.len : 0)); + RESERVE_SPACE(32 + XDR_LEN(conf->len)); WRITE64(ld->ld_start); WRITE64(ld->ld_length); WRITE32(ld->ld_type); - if (ld->ld_sop) { + if (conf->len) { WRITEMEM(&ld->ld_clientid, 8); - WRITE32(ld->ld_sop->so_owner.len); - WRITEMEM(ld->ld_sop->so_owner.data, ld->ld_sop->so_owner.len); - kref_put(&ld->ld_sop->so_ref, nfs4_free_stateowner); + WRITE32(conf->len); + WRITEMEM(conf->data, conf->len); + kfree(conf->data); } else { /* non - nfsv4 lock in conflict, no clientid nor owner */ WRITE64((u64)0); /* clientid */ WRITE32(0); /* length of owner name */ @@ -2520,7 +2760,7 @@ nfsd4_encode_lock(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_lo else if (nfserr == nfserr_denied) nfsd4_encode_lock_denied(resp, &lock->lk_denied); - ENCODE_SEQID_OP_TAIL(lock->lk_replay_owner); + encode_seqid_op_tail(resp, save, nfserr); return nfserr; } @@ -2540,7 +2780,7 @@ nfsd4_encode_locku(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_l if (!nfserr) nfsd4_encode_stateid(resp, &locku->lu_stateid); - ENCODE_SEQID_OP_TAIL(locku->lu_stateowner); + encode_seqid_op_tail(resp, save, nfserr); return nfserr; } @@ -2621,7 +2861,7 @@ nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_op } /* XXX save filehandle here */ out: - ENCODE_SEQID_OP_TAIL(open->op_stateowner); + encode_seqid_op_tail(resp, save, nfserr); return nfserr; } @@ -2633,7 +2873,7 @@ nfsd4_encode_open_confirm(struct nfsd4_compoundres *resp, __be32 nfserr, struct if (!nfserr) nfsd4_encode_stateid(resp, &oc->oc_resp_stateid); - ENCODE_SEQID_OP_TAIL(oc->oc_stateowner); + encode_seqid_op_tail(resp, save, nfserr); return nfserr; } @@ -2645,7 +2885,7 @@ nfsd4_encode_open_downgrade(struct nfsd4_compoundres *resp, __be32 nfserr, struc if (!nfserr) nfsd4_encode_stateid(resp, &od->od_stateid); - ENCODE_SEQID_OP_TAIL(od->od_stateowner); + encode_seqid_op_tail(resp, save, nfserr); return nfserr; } @@ -2692,8 +2932,6 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, read->rd_offset, resp->rqstp->rq_vec, read->rd_vlen, &maxcount); - if (nfserr == nfserr_symlink) - nfserr = nfserr_inval; if (nfserr) return nfserr; eof = (read->rd_offset + maxcount >= @@ -2823,8 +3061,6 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4 readdir->common.err == nfserr_toosmall && readdir->buffer == page) nfserr = nfserr_toosmall; - if (nfserr == nfserr_symlink) - nfserr = nfserr_notdir; if (nfserr) goto err_no_verf; @@ -3128,6 +3364,21 @@ nfsd4_encode_destroy_session(struct nfsd4_compoundres *resp, int nfserr, } static __be32 +nfsd4_encode_free_stateid(struct nfsd4_compoundres *resp, int nfserr, + struct nfsd4_free_stateid *free_stateid) +{ + __be32 *p; + + if (nfserr) + return nfserr; + + RESERVE_SPACE(4); + WRITE32(nfserr); + ADJUST_ARGS(); + return nfserr; +} + +static __be32 nfsd4_encode_sequence(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_sequence *seq) { @@ -3140,9 +3391,9 @@ nfsd4_encode_sequence(struct nfsd4_compoundres *resp, int nfserr, WRITEMEM(seq->sessionid.data, NFS4_MAX_SESSIONID_LEN); WRITE32(seq->seqid); WRITE32(seq->slotid); - WRITE32(seq->maxslots); - /* For now: target_maxslots = maxslots */ - WRITE32(seq->maxslots); + /* Note slotid's are numbered from zero: */ + WRITE32(seq->maxslots - 1); /* sr_highest_slotid */ + WRITE32(seq->maxslots - 1); /* sr_target_highest_slotid */ WRITE32(seq->status_flags); ADJUST_ARGS(); @@ -3150,6 +3401,37 @@ nfsd4_encode_sequence(struct nfsd4_compoundres *resp, int nfserr, return 0; } +__be32 +nfsd4_encode_test_stateid(struct nfsd4_compoundres *resp, int nfserr, + struct nfsd4_test_stateid *test_stateid) +{ + struct nfsd4_compoundargs *argp; + struct nfs4_client *cl = resp->cstate.session->se_client; + stateid_t si; + __be32 *p; + int i; + int valid; + + restore_buf(test_stateid->ts_saved_args, &test_stateid->ts_savedp); + argp = test_stateid->ts_saved_args; + + RESERVE_SPACE(4); + *p++ = htonl(test_stateid->ts_num_ids); + resp->p = p; + + nfs4_lock_state(); + for (i = 0; i < test_stateid->ts_num_ids; i++) { + nfsd4_decode_stateid(argp, &si); + valid = nfs4_validate_stateid(cl, &si); + RESERVE_SPACE(4); + *p++ = valid; + resp->p = p; + } + nfs4_unlock_state(); + + return nfserr; +} + static __be32 nfsd4_encode_noop(struct nfsd4_compoundres *resp, __be32 nfserr, void *p) { @@ -3208,7 +3490,7 @@ static nfsd4_enc nfsd4_enc_ops[] = { [OP_EXCHANGE_ID] = (nfsd4_enc)nfsd4_encode_exchange_id, [OP_CREATE_SESSION] = (nfsd4_enc)nfsd4_encode_create_session, [OP_DESTROY_SESSION] = (nfsd4_enc)nfsd4_encode_destroy_session, - [OP_FREE_STATEID] = (nfsd4_enc)nfsd4_encode_noop, + [OP_FREE_STATEID] = (nfsd4_enc)nfsd4_encode_free_stateid, [OP_GET_DIR_DELEGATION] = (nfsd4_enc)nfsd4_encode_noop, [OP_GETDEVICEINFO] = (nfsd4_enc)nfsd4_encode_noop, [OP_GETDEVICELIST] = (nfsd4_enc)nfsd4_encode_noop, @@ -3218,7 +3500,7 @@ static nfsd4_enc nfsd4_enc_ops[] = { [OP_SECINFO_NO_NAME] = (nfsd4_enc)nfsd4_encode_secinfo_no_name, [OP_SEQUENCE] = (nfsd4_enc)nfsd4_encode_sequence, [OP_SET_SSV] = (nfsd4_enc)nfsd4_encode_noop, - [OP_TEST_STATEID] = (nfsd4_enc)nfsd4_encode_noop, + [OP_TEST_STATEID] = (nfsd4_enc)nfsd4_encode_test_stateid, [OP_WANT_DELEGATION] = (nfsd4_enc)nfsd4_encode_noop, [OP_DESTROY_CLIENTID] = (nfsd4_enc)nfsd4_encode_noop, [OP_RECLAIM_COMPLETE] = (nfsd4_enc)nfsd4_encode_noop, @@ -3226,34 +3508,29 @@ static nfsd4_enc nfsd4_enc_ops[] = { /* * Calculate the total amount of memory that the compound response has taken - * after encoding the current operation. + * after encoding the current operation with pad. * - * pad: add on 8 bytes for the next operation's op_code and status so that - * there is room to cache a failure on the next operation. + * pad: if operation is non-idempotent, pad was calculate by op_rsize_bop() + * which was specified at nfsd4_operation, else pad is zero. * - * Compare this length to the session se_fmaxresp_cached. + * Compare this length to the session se_fmaxresp_sz and se_fmaxresp_cached. * * Our se_fmaxresp_cached will always be a multiple of PAGE_SIZE, and so * will be at least a page and will therefore hold the xdr_buf head. */ -static int nfsd4_check_drc_limit(struct nfsd4_compoundres *resp) +int nfsd4_check_resp_size(struct nfsd4_compoundres *resp, u32 pad) { - int status = 0; struct xdr_buf *xb = &resp->rqstp->rq_res; - struct nfsd4_compoundargs *args = resp->rqstp->rq_argp; struct nfsd4_session *session = NULL; struct nfsd4_slot *slot = resp->cstate.slot; - u32 length, tlen = 0, pad = 8; + u32 length, tlen = 0; if (!nfsd4_has_session(&resp->cstate)) - return status; + return 0; session = resp->cstate.session; - if (session == NULL || slot->sl_cachethis == 0) - return status; - - if (resp->opcnt >= args->opcnt) - pad = 0; /* this is the last operation */ + if (session == NULL) + return 0; if (xb->page_len == 0) { length = (char *)resp->p - (char *)xb->head[0].iov_base + pad; @@ -3266,10 +3543,14 @@ static int nfsd4_check_drc_limit(struct nfsd4_compoundres *resp) dprintk("%s length %u, xb->page_len %u tlen %u pad %u\n", __func__, length, xb->page_len, tlen, pad); - if (length <= session->se_fchannel.maxresp_cached) - return status; - else + if (length > session->se_fchannel.maxresp_sz) + return nfserr_rep_too_big; + + if (slot->sl_cachethis == 1 && + length > session->se_fchannel.maxresp_cached) return nfserr_rep_too_big_to_cache; + + return 0; } void @@ -3289,8 +3570,8 @@ nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op) !nfsd4_enc_ops[op->opnum]); op->status = nfsd4_enc_ops[op->opnum](resp, op->status, &op->u); /* nfsd4_check_drc_limit guarantees enough room for error status */ - if (!op->status && nfsd4_check_drc_limit(resp)) - op->status = nfserr_rep_too_big_to_cache; + if (!op->status) + op->status = nfsd4_check_resp_size(resp, 0); status: /* * Note: We write the status directly, instead of using WRITE32(), @@ -3331,8 +3612,11 @@ nfs4svc_encode_voidres(struct svc_rqst *rqstp, __be32 *p, void *dummy) return xdr_ressize_check(rqstp, p); } -void nfsd4_release_compoundargs(struct nfsd4_compoundargs *args) +int nfsd4_release_compoundargs(void *rq, __be32 *p, void *resp) { + struct svc_rqst *rqstp = rq; + struct nfsd4_compoundargs *args = rqstp->rq_argp; + if (args->ops != args->iops) { kfree(args->ops); args->ops = args->iops; @@ -3345,13 +3629,12 @@ void nfsd4_release_compoundargs(struct nfsd4_compoundargs *args) tb->release(tb->buf); kfree(tb); } + return 1; } int nfs4svc_decode_compoundargs(struct svc_rqst *rqstp, __be32 *p, struct nfsd4_compoundargs *args) { - __be32 status; - args->p = p; args->end = rqstp->rq_arg.head[0].iov_base + rqstp->rq_arg.head[0].iov_len; args->pagelist = rqstp->rq_arg.pages; @@ -3361,11 +3644,7 @@ nfs4svc_decode_compoundargs(struct svc_rqst *rqstp, __be32 *p, struct nfsd4_comp args->ops = args->iops; args->rqstp = rqstp; - status = nfsd4_decode_compound(args); - if (status) { - nfsd4_release_compoundargs(args); - } - return !status; + return !nfsd4_decode_compound(args); } int diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c index 4666a20..2cbac34 100644 --- a/fs/nfsd/nfscache.c +++ b/fs/nfsd/nfscache.c @@ -118,7 +118,7 @@ hash_refile(struct svc_cacherep *rp) * Note that no operation within the loop may sleep. */ int -nfsd_cache_lookup(struct svc_rqst *rqstp, int type) +nfsd_cache_lookup(struct svc_rqst *rqstp) { struct hlist_node *hn; struct hlist_head *rh; @@ -128,6 +128,7 @@ nfsd_cache_lookup(struct svc_rqst *rqstp, int type) vers = rqstp->rq_vers, proc = rqstp->rq_proc; unsigned long age; + int type = rqstp->rq_cachetype; int rtn; rqstp->rq_cacherep = NULL; diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 2b1449d..c45a2ea 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -9,11 +9,11 @@ #include #include -#include #include #include #include #include +#include #include "idmap.h" #include "nfsd.h" @@ -24,15 +24,6 @@ */ enum { NFSD_Root = 1, -#ifdef CONFIG_NFSD_DEPRECATED - NFSD_Svc, - NFSD_Add, - NFSD_Del, - NFSD_Export, - NFSD_Unexport, - NFSD_Getfd, - NFSD_Getfs, -#endif NFSD_List, NFSD_Export_features, NFSD_Fh, @@ -59,15 +50,6 @@ enum { /* * write() for these nodes. */ -#ifdef CONFIG_NFSD_DEPRECATED -static ssize_t write_svc(struct file *file, char *buf, size_t size); -static ssize_t write_add(struct file *file, char *buf, size_t size); -static ssize_t write_del(struct file *file, char *buf, size_t size); -static ssize_t write_export(struct file *file, char *buf, size_t size); -static ssize_t write_unexport(struct file *file, char *buf, size_t size); -static ssize_t write_getfd(struct file *file, char *buf, size_t size); -static ssize_t write_getfs(struct file *file, char *buf, size_t size); -#endif static ssize_t write_filehandle(struct file *file, char *buf, size_t size); static ssize_t write_unlock_ip(struct file *file, char *buf, size_t size); static ssize_t write_unlock_fs(struct file *file, char *buf, size_t size); @@ -83,15 +65,6 @@ static ssize_t write_recoverydir(struct file *file, char *buf, size_t size); #endif static ssize_t (*write_op[])(struct file *, char *, size_t) = { -#ifdef CONFIG_NFSD_DEPRECATED - [NFSD_Svc] = write_svc, - [NFSD_Add] = write_add, - [NFSD_Del] = write_del, - [NFSD_Export] = write_export, - [NFSD_Unexport] = write_unexport, - [NFSD_Getfd] = write_getfd, - [NFSD_Getfs] = write_getfs, -#endif [NFSD_Fh] = write_filehandle, [NFSD_FO_UnlockIP] = write_unlock_ip, [NFSD_FO_UnlockFS] = write_unlock_fs, @@ -130,16 +103,6 @@ static ssize_t nfsctl_transaction_write(struct file *file, const char __user *bu static ssize_t nfsctl_transaction_read(struct file *file, char __user *buf, size_t size, loff_t *pos) { -#ifdef CONFIG_NFSD_DEPRECATED - static int warned; - if (file->f_dentry->d_name.name[0] == '.' && !warned) { - printk(KERN_INFO - "Warning: \"%s\" uses deprecated NFSD interface: %s." - " This will be removed in 2.6.40\n", - current->comm, file->f_dentry->d_name.name); - warned = 1; - } -#endif if (! file->private_data) { /* An attempt to read a transaction file without writing * causes a 0-byte write so that the file can return @@ -226,303 +189,6 @@ static const struct file_operations pool_stats_operations = { * payload - write methods */ -#ifdef CONFIG_NFSD_DEPRECATED -/** - * write_svc - Start kernel's NFSD server - * - * Deprecated. /proc/fs/nfsd/threads is preferred. - * Function remains to support old versions of nfs-utils. - * - * Input: - * buf: struct nfsctl_svc - * svc_port: port number of this - * server's listener - * svc_nthreads: number of threads to start - * size: size in bytes of passed in nfsctl_svc - * Output: - * On success: returns zero - * On error: return code is negative errno value - */ -static ssize_t write_svc(struct file *file, char *buf, size_t size) -{ - struct nfsctl_svc *data; - int err; - if (size < sizeof(*data)) - return -EINVAL; - data = (struct nfsctl_svc*) buf; - err = nfsd_svc(data->svc_port, data->svc_nthreads); - if (err < 0) - return err; - return 0; -} - -/** - * write_add - Add or modify client entry in auth unix cache - * - * Deprecated. /proc/net/rpc/auth.unix.ip is preferred. - * Function remains to support old versions of nfs-utils. - * - * Input: - * buf: struct nfsctl_client - * cl_ident: '\0'-terminated C string - * containing domain name - * of client - * cl_naddr: no. of items in cl_addrlist - * cl_addrlist: array of client addresses - * cl_fhkeytype: ignored - * cl_fhkeylen: ignored - * cl_fhkey: ignored - * size: size in bytes of passed in nfsctl_client - * Output: - * On success: returns zero - * On error: return code is negative errno value - * - * Note: Only AF_INET client addresses are passed in, since - * nfsctl_client.cl_addrlist contains only in_addr fields for addresses. - */ -static ssize_t write_add(struct file *file, char *buf, size_t size) -{ - struct nfsctl_client *data; - if (size < sizeof(*data)) - return -EINVAL; - data = (struct nfsctl_client *)buf; - return exp_addclient(data); -} - -/** - * write_del - Remove client from auth unix cache - * - * Deprecated. /proc/net/rpc/auth.unix.ip is preferred. - * Function remains to support old versions of nfs-utils. - * - * Input: - * buf: struct nfsctl_client - * cl_ident: '\0'-terminated C string - * containing domain name - * of client - * cl_naddr: ignored - * cl_addrlist: ignored - * cl_fhkeytype: ignored - * cl_fhkeylen: ignored - * cl_fhkey: ignored - * size: size in bytes of passed in nfsctl_client - * Output: - * On success: returns zero - * On error: return code is negative errno value - * - * Note: Only AF_INET client addresses are passed in, since - * nfsctl_client.cl_addrlist contains only in_addr fields for addresses. - */ -static ssize_t write_del(struct file *file, char *buf, size_t size) -{ - struct nfsctl_client *data; - if (size < sizeof(*data)) - return -EINVAL; - data = (struct nfsctl_client *)buf; - return exp_delclient(data); -} - -/** - * write_export - Export part or all of a local file system - * - * Deprecated. /proc/net/rpc/{nfsd.export,nfsd.fh} are preferred. - * Function remains to support old versions of nfs-utils. - * - * Input: - * buf: struct nfsctl_export - * ex_client: '\0'-terminated C string - * containing domain name - * of client allowed to access - * this export - * ex_path: '\0'-terminated C string - * containing pathname of - * directory in local file system - * ex_dev: fsid to use for this export - * ex_ino: ignored - * ex_flags: export flags for this export - * ex_anon_uid: UID to use for anonymous - * requests - * ex_anon_gid: GID to use for anonymous - * requests - * size: size in bytes of passed in nfsctl_export - * Output: - * On success: returns zero - * On error: return code is negative errno value - */ -static ssize_t write_export(struct file *file, char *buf, size_t size) -{ - struct nfsctl_export *data; - if (size < sizeof(*data)) - return -EINVAL; - data = (struct nfsctl_export*)buf; - return exp_export(data); -} - -/** - * write_unexport - Unexport a previously exported file system - * - * Deprecated. /proc/net/rpc/{nfsd.export,nfsd.fh} are preferred. - * Function remains to support old versions of nfs-utils. - * - * Input: - * buf: struct nfsctl_export - * ex_client: '\0'-terminated C string - * containing domain name - * of client no longer allowed - * to access this export - * ex_path: '\0'-terminated C string - * containing pathname of - * directory in local file system - * ex_dev: ignored - * ex_ino: ignored - * ex_flags: ignored - * ex_anon_uid: ignored - * ex_anon_gid: ignored - * size: size in bytes of passed in nfsctl_export - * Output: - * On success: returns zero - * On error: return code is negative errno value - */ -static ssize_t write_unexport(struct file *file, char *buf, size_t size) -{ - struct nfsctl_export *data; - - if (size < sizeof(*data)) - return -EINVAL; - data = (struct nfsctl_export*)buf; - return exp_unexport(data); -} - -/** - * write_getfs - Get a variable-length NFS file handle by path - * - * Deprecated. /proc/fs/nfsd/filehandle is preferred. - * Function remains to support old versions of nfs-utils. - * - * Input: - * buf: struct nfsctl_fsparm - * gd_addr: socket address of client - * gd_path: '\0'-terminated C string - * containing pathname of - * directory in local file system - * gd_maxlen: maximum size of returned file - * handle - * size: size in bytes of passed in nfsctl_fsparm - * Output: - * On success: passed-in buffer filled with a knfsd_fh structure - * (a variable-length raw NFS file handle); - * return code is the size in bytes of the file handle - * On error: return code is negative errno value - * - * Note: Only AF_INET client addresses are passed in, since gd_addr - * is the same size as a struct sockaddr_in. - */ -static ssize_t write_getfs(struct file *file, char *buf, size_t size) -{ - struct nfsctl_fsparm *data; - struct sockaddr_in *sin; - struct auth_domain *clp; - int err = 0; - struct knfsd_fh *res; - struct in6_addr in6; - - if (size < sizeof(*data)) - return -EINVAL; - data = (struct nfsctl_fsparm*)buf; - err = -EPROTONOSUPPORT; - if (data->gd_addr.sa_family != AF_INET) - goto out; - sin = (struct sockaddr_in *)&data->gd_addr; - if (data->gd_maxlen > NFS3_FHSIZE) - data->gd_maxlen = NFS3_FHSIZE; - - res = (struct knfsd_fh*)buf; - - exp_readlock(); - - ipv6_addr_set_v4mapped(sin->sin_addr.s_addr, &in6); - - clp = auth_unix_lookup(&init_net, &in6); - if (!clp) - err = -EPERM; - else { - err = exp_rootfh(clp, data->gd_path, res, data->gd_maxlen); - auth_domain_put(clp); - } - exp_readunlock(); - if (err == 0) - err = res->fh_size + offsetof(struct knfsd_fh, fh_base); - out: - return err; -} - -/** - * write_getfd - Get a fixed-length NFS file handle by path (used by mountd) - * - * Deprecated. /proc/fs/nfsd/filehandle is preferred. - * Function remains to support old versions of nfs-utils. - * - * Input: - * buf: struct nfsctl_fdparm - * gd_addr: socket address of client - * gd_path: '\0'-terminated C string - * containing pathname of - * directory in local file system - * gd_version: fdparm structure version - * size: size in bytes of passed in nfsctl_fdparm - * Output: - * On success: passed-in buffer filled with nfsctl_res - * (a fixed-length raw NFS file handle); - * return code is the size in bytes of the file handle - * On error: return code is negative errno value - * - * Note: Only AF_INET client addresses are passed in, since gd_addr - * is the same size as a struct sockaddr_in. - */ -static ssize_t write_getfd(struct file *file, char *buf, size_t size) -{ - struct nfsctl_fdparm *data; - struct sockaddr_in *sin; - struct auth_domain *clp; - int err = 0; - struct knfsd_fh fh; - char *res; - struct in6_addr in6; - - if (size < sizeof(*data)) - return -EINVAL; - data = (struct nfsctl_fdparm*)buf; - err = -EPROTONOSUPPORT; - if (data->gd_addr.sa_family != AF_INET) - goto out; - err = -EINVAL; - if (data->gd_version < 2 || data->gd_version > NFSSVC_MAXVERS) - goto out; - - res = buf; - sin = (struct sockaddr_in *)&data->gd_addr; - exp_readlock(); - - ipv6_addr_set_v4mapped(sin->sin_addr.s_addr, &in6); - - clp = auth_unix_lookup(&init_net, &in6); - if (!clp) - err = -EPERM; - else { - err = exp_rootfh(clp, data->gd_path, &fh, NFS_FHSIZE); - auth_domain_put(clp); - } - exp_readunlock(); - - if (err == 0) { - memset(res,0, NFS_FHSIZE); - memcpy(res, &fh.fh_base, fh.fh_size); - err = NFS_FHSIZE; - } - out: - return err; -} -#endif /* CONFIG_NFSD_DEPRECATED */ /** * write_unlock_ip - Release all locks used by a client @@ -1397,15 +1063,6 @@ static ssize_t write_recoverydir(struct file *file, char *buf, size_t size) static int nfsd_fill_super(struct super_block * sb, void * data, int silent) { static struct tree_descr nfsd_files[] = { -#ifdef CONFIG_NFSD_DEPRECATED - [NFSD_Svc] = {".svc", &transaction_ops, S_IWUSR}, - [NFSD_Add] = {".add", &transaction_ops, S_IWUSR}, - [NFSD_Del] = {".del", &transaction_ops, S_IWUSR}, - [NFSD_Export] = {".export", &transaction_ops, S_IWUSR}, - [NFSD_Unexport] = {".unexport", &transaction_ops, S_IWUSR}, - [NFSD_Getfd] = {".getfd", &transaction_ops, S_IWUSR|S_IRUSR}, - [NFSD_Getfs] = {".getfs", &transaction_ops, S_IWUSR|S_IRUSR}, -#endif [NFSD_List] = {"exports", &exports_operations, S_IRUGO}, [NFSD_Export_features] = {"export_features", &export_features_operations, S_IRUGO}, diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h index 7ecfa24..58134a2 100644 --- a/fs/nfsd/nfsd.h +++ b/fs/nfsd/nfsd.h @@ -11,13 +11,39 @@ #include #include +#include +#include +#include +#include +#include + #include #include #include + /* * nfsd version */ #define NFSD_SUPPORTED_MINOR_VERSION 1 +/* + * Maximum blocksizes supported by daemon under various circumstances. + */ +#define NFSSVC_MAXBLKSIZE RPCSVC_MAXPAYLOAD +/* NFSv2 is limited by the protocol specification, see RFC 1094 */ +#define NFSSVC_MAXBLKSIZE_V2 (8*1024) + + +/* + * Largest number of bytes we need to allocate for an NFS + * call or reply. Used to control buffer sizes. We use + * the length of v3 WRITE, READDIR and READDIR replies + * which are an RPC header, up to 26 XDR units of reply + * data, and some page data. + * + * Note that accuracy here doesn't matter too much as the + * size is rounded up to a page size when allocating space. + */ +#define NFSD_BUFSIZE ((RPC_MAX_HEADER_WITH_AUTH+26)*XDR_UNIT + NFSSVC_MAXBLKSIZE) struct readdir_cd { __be32 err; /* 0, nfserr, or nfserr_eof */ @@ -335,6 +361,13 @@ static inline u32 nfsd_suppattrs2(u32 minorversion) #define NFSD_SUPPATTR_EXCLCREAT_WORD2 \ NFSD_WRITEABLE_ATTRS_WORD2 +extern int nfsd4_is_junction(struct dentry *dentry); +#else +static inline int nfsd4_is_junction(struct dentry *dentry) +{ + return 0; +} + #endif /* CONFIG_NFSD_V4 */ #endif /* LINUX_NFSD_NFSD_H */ diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c index 90c6aa6..c763de5 100644 --- a/fs/nfsd/nfsfh.c +++ b/fs/nfsd/nfsfh.c @@ -59,28 +59,25 @@ static int nfsd_acceptable(void *expv, struct dentry *dentry) * the write call). */ static inline __be32 -nfsd_mode_check(struct svc_rqst *rqstp, umode_t mode, int type) +nfsd_mode_check(struct svc_rqst *rqstp, umode_t mode, int requested) { - /* Type can be negative when creating hardlinks - not to a dir */ - if (type > 0 && (mode & S_IFMT) != type) { - if (rqstp->rq_vers == 4 && (mode & S_IFMT) == S_IFLNK) - return nfserr_symlink; - else if (type == S_IFDIR) - return nfserr_notdir; - else if ((mode & S_IFMT) == S_IFDIR) - return nfserr_isdir; - else - return nfserr_inval; - } - if (type < 0 && (mode & S_IFMT) == -type) { - if (rqstp->rq_vers == 4 && (mode & S_IFMT) == S_IFLNK) - return nfserr_symlink; - else if (type == -S_IFDIR) - return nfserr_isdir; - else - return nfserr_notdir; - } - return 0; + mode &= S_IFMT; + + if (requested == 0) /* the caller doesn't care */ + return nfs_ok; + if (mode == requested) + return nfs_ok; + /* + * v4 has an error more specific than err_notdir which we should + * return in preference to err_notdir: + */ + if (rqstp->rq_vers == 4 && mode == S_IFLNK) + return nfserr_symlink; + if (requested == S_IFDIR) + return nfserr_notdir; + if (mode == S_IFDIR) + return nfserr_isdir; + return nfserr_inval; } static __be32 nfsd_setuser_and_check_port(struct svc_rqst *rqstp, diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 18743c4..7595582 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -8,6 +8,7 @@ #include #include +#include #include #include @@ -256,6 +257,8 @@ static void nfsd_last_thread(struct svc_serv *serv) nfsd_serv = NULL; nfsd_shutdown(); + svc_rpcb_cleanup(serv); + printk(KERN_WARNING "nfsd: last server has exited, flushing export " "cache\n"); nfsd_export_flush(); @@ -528,16 +531,9 @@ nfsd(void *vrqstp) continue; } - - /* Lock the export hash tables for reading. */ - exp_readlock(); - validate_process_creds(); svc_process(rqstp); validate_process_creds(); - - /* Unlock export hash tables */ - exp_readunlock(); } /* Clear signals before calling svc_exit_thread() */ @@ -577,8 +573,22 @@ nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp) rqstp->rq_vers, rqstp->rq_proc); proc = rqstp->rq_procinfo; + /* + * Give the xdr decoder a chance to change this if it wants + * (necessary in the NFSv4.0 compound case) + */ + rqstp->rq_cachetype = proc->pc_cachetype; + /* Decode arguments */ + xdr = proc->pc_decode; + if (xdr && !xdr(rqstp, (__be32*)rqstp->rq_arg.head[0].iov_base, + rqstp->rq_argp)) { + dprintk("nfsd: failed to decode arguments!\n"); + *statp = rpc_garbage_args; + return 1; + } + /* Check whether we have this call in the cache. */ - switch (nfsd_cache_lookup(rqstp, proc->pc_cachetype)) { + switch (nfsd_cache_lookup(rqstp)) { case RC_INTR: case RC_DROPIT: return 0; @@ -588,16 +598,6 @@ nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp) /* do it */ } - /* Decode arguments */ - xdr = proc->pc_decode; - if (xdr && !xdr(rqstp, (__be32*)rqstp->rq_arg.head[0].iov_base, - rqstp->rq_argp)) { - dprintk("nfsd: failed to decode arguments!\n"); - nfsd_cache_update(rqstp, RC_NOCACHE, NULL); - *statp = rpc_garbage_args; - return 1; - } - /* need to grab the location to store the status, as * nfsv4 does some encoding while processing */ @@ -633,7 +633,7 @@ nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp) } /* Store reply in cache. */ - nfsd_cache_update(rqstp, proc->pc_cachetype, statp + 1); + nfsd_cache_update(rqstp, rqstp->rq_cachetype, statp + 1); return 1; } diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index 858c7ba..a3cf384 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -35,6 +35,7 @@ #ifndef _NFSD4_STATE_H #define _NFSD4_STATE_H +#include #include #include #include "nfsfh.h" @@ -45,24 +46,20 @@ typedef struct { } clientid_t; typedef struct { - u32 so_boot; - u32 so_stateownerid; - u32 so_fileid; + clientid_t so_clid; + u32 so_id; } stateid_opaque_t; typedef struct { u32 si_generation; stateid_opaque_t si_opaque; } stateid_t; -#define si_boot si_opaque.so_boot -#define si_stateownerid si_opaque.so_stateownerid -#define si_fileid si_opaque.so_fileid #define STATEID_FMT "(%08x/%08x/%08x/%08x)" #define STATEID_VAL(s) \ - (s)->si_boot, \ - (s)->si_stateownerid, \ - (s)->si_fileid, \ + (s)->si_opaque.so_clid.cl_boot, \ + (s)->si_opaque.so_clid.cl_id, \ + (s)->si_opaque.so_id, \ (s)->si_generation struct nfsd4_callback { @@ -76,17 +73,27 @@ struct nfsd4_callback { bool cb_done; }; +struct nfs4_stid { +#define NFS4_OPEN_STID 1 +#define NFS4_LOCK_STID 2 +#define NFS4_DELEG_STID 4 +/* For an open stateid kept around *only* to process close replays: */ +#define NFS4_CLOSED_STID 8 + unsigned char sc_type; + stateid_t sc_stateid; + struct nfs4_client *sc_client; +}; + struct nfs4_delegation { + struct nfs4_stid dl_stid; /* must be first field */ struct list_head dl_perfile; struct list_head dl_perclnt; struct list_head dl_recall_lru; /* delegation recalled */ atomic_t dl_count; /* ref count */ - struct nfs4_client *dl_client; struct nfs4_file *dl_file; u32 dl_type; time_t dl_time; /* For recall: */ - stateid_t dl_stateid; struct knfsd_fh dl_fh; int dl_retries; struct nfsd4_callback dl_recall; @@ -104,6 +111,11 @@ struct nfs4_cb_conn { struct svc_xprt *cb_xprt; /* minorversion 1 only */ }; +static inline struct nfs4_delegation *delegstateid(struct nfs4_stid *s) +{ + return container_of(s, struct nfs4_delegation, dl_stid); +} + /* Maximum number of slots per session. 160 is useful for long haul TCP */ #define NFSD_MAX_SLOTS_PER_SESSION 160 /* Maximum number of operations per session compound */ @@ -220,6 +232,7 @@ struct nfs4_client { struct list_head cl_idhash; /* hash by cl_clientid.id */ struct list_head cl_strhash; /* hash by cl_name */ struct list_head cl_openowners; + struct idr cl_stateids; /* stateid lookup */ struct list_head cl_delegations; struct list_head cl_lru; /* tail queue */ struct xdr_netobj cl_name; /* id generated by client */ @@ -245,6 +258,7 @@ struct nfs4_client { #define NFSD4_CB_UP 0 #define NFSD4_CB_UNKNOWN 1 #define NFSD4_CB_DOWN 2 +#define NFSD4_CB_FAULT 3 int cl_cb_state; struct nfsd4_callback cl_cb_null; struct nfsd4_session *cl_cb_session; @@ -293,6 +307,9 @@ static inline void update_stateid(stateid_t *stateid) { stateid->si_generation++; + /* Wraparound recommendation from 3530bis-13 9.1.3.2: */ + if (stateid->si_generation == 0) + stateid->si_generation = 1; } /* A reasonable value for REPLAY_ISIZE was estimated as follows: @@ -312,49 +329,57 @@ struct nfs4_replay { __be32 rp_status; unsigned int rp_buflen; char *rp_buf; - unsigned intrp_allocated; struct knfsd_fh rp_openfh; char rp_ibuf[NFSD4_REPLAY_ISIZE]; }; -/* -* nfs4_stateowner can either be an open_owner, or a lock_owner -* -* so_idhash: stateid_hashtbl[] for open owner, lockstateid_hashtbl[] -* for lock_owner -* so_strhash: ownerstr_hashtbl[] for open_owner, lock_ownerstr_hashtbl[] -* for lock_owner -* so_perclient: nfs4_client->cl_perclient entry - used when nfs4_client -* struct is reaped. -* so_perfilestate: heads the list of nfs4_stateid (either open or lock) -* and is used to ensure no dangling nfs4_stateid references when we -* release a stateowner. -* so_perlockowner: (open) nfs4_stateid->st_perlockowner entry - used when -* close is called to reap associated byte-range locks -* so_close_lru: (open) stateowner is placed on this list instead of being -* reaped (when so_perfilestate is empty) to hold the last close replay. -* reaped by laundramat thread after lease period. -*/ struct nfs4_stateowner { - struct kref so_ref; - struct list_head so_idhash; /* hash by so_id */ struct list_head so_strhash; /* hash by op_name */ - struct list_head so_perclient; struct list_head so_stateids; - struct list_head so_perstateid; /* for lockowners only */ - struct list_head so_close_lru; /* tail queue */ - time_t so_time; /* time of placement on so_close_lru */ - int so_is_open_owner; /* 1=openowner,0=lockowner */ - u32 so_id; struct nfs4_client * so_client; /* after increment in ENCODE_SEQID_OP_TAIL, represents the next * sequence id expected from the client: */ u32 so_seqid; struct xdr_netobj so_owner; /* open owner name */ - int so_confirmed; /* successful OPEN_CONFIRM? */ struct nfs4_replay so_replay; + bool so_is_open_owner; }; +struct nfs4_openowner { + struct nfs4_stateowner oo_owner; /* must be first field */ + struct list_head oo_perclient; + /* + * We keep around openowners a little while after last close, + * which saves clients from having to confirm, and allows us to + * handle close replays if they come soon enough. The close_lru + * is a list of such openowners, to be reaped by the laundromat + * thread eventually if they remain unused: + */ + struct list_head oo_close_lru; + struct nfs4_ol_stateid *oo_last_closed_stid; + time_t oo_time; /* time of placement on so_close_lru */ +#define NFS4_OO_CONFIRMED 1 +#define NFS4_OO_PURGE_CLOSE 2 +#define NFS4_OO_NEW 4 + unsigned char oo_flags; +}; + +struct nfs4_lockowner { + struct nfs4_stateowner lo_owner; /* must be first element */ + struct list_head lo_perstateid; /* for lockowners only */ + struct list_head lo_list; /* for temporary uses */ +}; + +static inline struct nfs4_openowner * openowner(struct nfs4_stateowner *so) +{ + return container_of(so, struct nfs4_openowner, oo_owner); +} + +static inline struct nfs4_lockowner * lockowner(struct nfs4_stateowner *so) +{ + return container_of(so, struct nfs4_lockowner, lo_owner); +} + /* * nfs4_file: a file opened by some number of (open) nfs4_stateowners. * o fi_perfile list is used to search for conflicting @@ -368,17 +393,17 @@ struct nfs4_file { /* One each for O_RDONLY, O_WRONLY, O_RDWR: */ struct file * fi_fds[3]; /* - * Each open or lock stateid contributes 1 to either - * fi_access[O_RDONLY], fi_access[O_WRONLY], or both, depending - * on open or lock mode: + * Each open or lock stateid contributes 0-4 to the counts + * below depending on which bits are set in st_access_bitmap: + * 1 to fi_access[O_RDONLY] if NFS4_SHARE_ACCES_READ is set + * + 1 to fi_access[O_WRONLY] if NFS4_SHARE_ACCESS_WRITE is set + * + 1 to both of the above if NFS4_SHARE_ACCESS_BOTH is set. */ atomic_t fi_access[2]; struct file *fi_deleg_file; struct file_lock *fi_lease; atomic_t fi_delegees; struct inode *fi_inode; - u32 fi_id; /* used with stateowner->so_id - * for stateid_hashtbl hash */ bool fi_had_conflict; }; @@ -408,44 +433,27 @@ static inline struct file *find_any_file(struct nfs4_file *f) return f->fi_fds[O_RDONLY]; } -/* -* nfs4_stateid can either be an open stateid or (eventually) a lock stateid -* -* (open)nfs4_stateid: one per (open)nfs4_stateowner, nfs4_file -* -* st_hash: stateid_hashtbl[] entry or lockstateid_hashtbl entry -* st_perfile: file_hashtbl[] entry. -* st_perfile_state: nfs4_stateowner->so_perfilestate -* st_perlockowner: (open stateid) list of lock nfs4_stateowners -* st_access_bmap: used only for open stateid -* st_deny_bmap: used only for open stateid -* st_openstp: open stateid lock stateid was derived from -* -* XXX: open stateids and lock stateids have diverged sufficiently that -* we should consider defining separate structs for the two cases. -*/ - -struct nfs4_stateid { - struct list_head st_hash; +/* "ol" stands for "Open or Lock". Better suggestions welcome. */ +struct nfs4_ol_stateid { + struct nfs4_stid st_stid; /* must be first field */ struct list_head st_perfile; struct list_head st_perstateowner; struct list_head st_lockowners; struct nfs4_stateowner * st_stateowner; struct nfs4_file * st_file; - stateid_t st_stateid; unsigned long st_access_bmap; unsigned long st_deny_bmap; - struct nfs4_stateid * st_openstp; + struct nfs4_ol_stateid * st_openstp; }; +static inline struct nfs4_ol_stateid *openlockstateid(struct nfs4_stid *s) +{ + return container_of(s, struct nfs4_ol_stateid, st_stid); +} + /* flags for preprocess_seqid_op() */ -#define HAS_SESSION 0x00000001 -#define CONFIRM 0x00000002 -#define OPEN_STATE 0x00000004 -#define LOCK_STATE 0x00000008 #define RD_STATE 0x00000010 #define WR_STATE 0x00000020 -#define CLOSE_STATE 0x00000040 struct nfsd4_compound_state; @@ -455,7 +463,8 @@ extern void nfs4_lock_state(void); extern void nfs4_unlock_state(void); extern int nfs4_in_grace(void); extern __be32 nfs4_check_open_reclaim(clientid_t *clid); -extern void nfs4_free_stateowner(struct kref *kref); +extern void nfs4_free_openowner(struct nfs4_openowner *); +extern void nfs4_free_lockowner(struct nfs4_lockowner *); extern int set_callback_cred(void); extern void nfsd4_probe_callback(struct nfs4_client *clp); extern void nfsd4_probe_callback_sync(struct nfs4_client *clp); @@ -467,7 +476,7 @@ extern void nfsd4_destroy_callback_queue(void); extern void nfsd4_shutdown_callback(struct nfs4_client *); extern void nfs4_put_delegation(struct nfs4_delegation *dp); extern __be32 nfs4_make_rec_clidname(char *clidname, struct xdr_netobj *clname); -extern void nfsd4_init_recdir(char *recdir_name); +extern void nfsd4_init_recdir(void); extern int nfsd4_recdir_load(void); extern void nfsd4_shutdown_recdir(void); extern int nfs4_client_to_reclaim(const char *name); @@ -476,17 +485,7 @@ extern void nfsd4_recdir_purge_old(void); extern int nfsd4_create_clid_dir(struct nfs4_client *clp); extern void nfsd4_remove_clid_dir(struct nfs4_client *clp); extern void release_session_client(struct nfsd4_session *); - -static inline void -nfs4_put_stateowner(struct nfs4_stateowner *so) -{ - kref_put(&so->so_ref, nfs4_free_stateowner); -} - -static inline void -nfs4_get_stateowner(struct nfs4_stateowner *so) -{ - kref_get(&so->so_ref); -} +extern __be32 nfs4_validate_stateid(struct nfs4_client *, stateid_t *); +extern void nfsd4_purge_closed_stateid(struct nfs4_stateowner *); #endif /* NFSD4_STATE_H */ diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index acf88ae..e2e7914 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -168,6 +168,8 @@ int nfsd_mountpoint(struct dentry *dentry, struct svc_export *exp) { if (d_mountpoint(dentry)) return 1; + if (nfsd4_is_junction(dentry)) + return 1; if (!(exp->ex_flags & NFSEXP_V4ROOT)) return 0; return dentry->d_inode != NULL; @@ -295,41 +297,12 @@ commit_metadata(struct svc_fh *fhp) } /* - * Set various file attributes. - * N.B. After this call fhp needs an fh_put + * Go over the attributes and take care of the small differences between + * NFS semantics and what Linux expects. */ -__be32 -nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap, - int check_guard, time_t guardtime) +static void +nfsd_sanitize_attrs(struct inode *inode, struct iattr *iap) { - struct dentry *dentry; - struct inode *inode; - int accmode = NFSD_MAY_SATTR; - int ftype = 0; - __be32 err; - int host_err; - int size_change = 0; - - if (iap->ia_valid & (ATTR_ATIME | ATTR_MTIME | ATTR_SIZE)) - accmode |= NFSD_MAY_WRITE|NFSD_MAY_OWNER_OVERRIDE; - if (iap->ia_valid & ATTR_SIZE) - ftype = S_IFREG; - - /* Get inode */ - err = fh_verify(rqstp, fhp, ftype, accmode); - if (err) - goto out; - - dentry = fhp->fh_dentry; - inode = dentry->d_inode; - - /* Ignore any mode updates on symlinks */ - if (S_ISLNK(inode->i_mode)) - iap->ia_valid &= ~ATTR_MODE; - - if (!iap->ia_valid) - goto out; - /* * NFSv2 does not differentiate between "set-[ac]time-to-now" * which only requires access, and "set-[ac]time-to-X" which @@ -339,8 +312,7 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap, * convert to "set to now" instead of "set to explicit time" * * We only call inode_change_ok as the last test as technically - * it is not an interface that we should be using. It is only - * valid if the filesystem does not define it's own i_op->setattr. + * it is not an interface that we should be using. */ #define BOTH_TIME_SET (ATTR_ATIME_SET | ATTR_MTIME_SET) #define MAX_TOUCH_TIME_ERROR (30*60) @@ -366,30 +338,6 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap, iap->ia_valid &= ~BOTH_TIME_SET; } } - - /* - * The size case is special. - * It changes the file as well as the attributes. - */ - if (iap->ia_valid & ATTR_SIZE) { - if (iap->ia_size < inode->i_size) { - err = nfsd_permission(rqstp, fhp->fh_export, dentry, - NFSD_MAY_TRUNC|NFSD_MAY_OWNER_OVERRIDE); - if (err) - goto out; - } - - host_err = get_write_access(inode); - if (host_err) - goto out_nfserr; - - size_change = 1; - host_err = locks_verify_truncate(inode, NULL, iap->ia_size); - if (host_err) { - put_write_access(inode); - goto out_nfserr; - } - } /* sanitize the mode change */ if (iap->ia_valid & ATTR_MODE) { @@ -412,32 +360,120 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap, iap->ia_valid |= (ATTR_KILL_SUID | ATTR_KILL_SGID); } } +} - /* Change the attributes. */ +static __be32 +nfsd_get_write_access(struct svc_rqst *rqstp, struct svc_fh *fhp, + struct iattr *iap) +{ + struct inode *inode = fhp->fh_dentry->d_inode; + int host_err; - iap->ia_valid |= ATTR_CTIME; + if (iap->ia_size < inode->i_size) { + __be32 err; + + err = nfsd_permission(rqstp, fhp->fh_export, fhp->fh_dentry, + NFSD_MAY_TRUNC | NFSD_MAY_OWNER_OVERRIDE); + if (err) + return err; + } + + host_err = get_write_access(inode); + if (host_err) + goto out_nfserrno; + + host_err = locks_verify_truncate(inode, NULL, iap->ia_size); + if (host_err) + goto out_put_write_access; + return 0; + +out_put_write_access: + put_write_access(inode); +out_nfserrno: + return nfserrno(host_err); +} + +/* + * Set various file attributes. After this call fhp needs an fh_put. + */ +__be32 +nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap, + int check_guard, time_t guardtime) +{ + struct dentry *dentry; + struct inode *inode; + int accmode = NFSD_MAY_SATTR; + int ftype = 0; + __be32 err; + int host_err; + bool get_write_count; + int size_change = 0; - err = nfserr_notsync; - if (!check_guard || guardtime == inode->i_ctime.tv_sec) { - host_err = nfsd_break_lease(inode); + if (iap->ia_valid & (ATTR_ATIME | ATTR_MTIME | ATTR_SIZE)) + accmode |= NFSD_MAY_WRITE|NFSD_MAY_OWNER_OVERRIDE; + if (iap->ia_valid & ATTR_SIZE) + ftype = S_IFREG; + + /* Callers that do fh_verify should do the fh_want_write: */ + get_write_count = !fhp->fh_dentry; + + /* Get inode */ + err = fh_verify(rqstp, fhp, ftype, accmode); + if (err) + goto out; + if (get_write_count) { + host_err = fh_want_write(fhp); if (host_err) - goto out_nfserr; - fh_lock(fhp); + return nfserrno(host_err); + } - host_err = notify_change(dentry, iap); - err = nfserrno(host_err); - fh_unlock(fhp); + dentry = fhp->fh_dentry; + inode = dentry->d_inode; + + /* Ignore any mode updates on symlinks */ + if (S_ISLNK(inode->i_mode)) + iap->ia_valid &= ~ATTR_MODE; + + if (!iap->ia_valid) + goto out; + + nfsd_sanitize_attrs(inode, iap); + + /* + * The size case is special, it changes the file in addition to the + * attributes. + */ + if (iap->ia_valid & ATTR_SIZE) { + err = nfsd_get_write_access(rqstp, fhp, iap); + if (err) + goto out; + size_change = 1; } + + iap->ia_valid |= ATTR_CTIME; + + if (check_guard && guardtime != inode->i_ctime.tv_sec) { + err = nfserr_notsync; + goto out_put_write_access; + } + + host_err = nfsd_break_lease(inode); + if (host_err) + goto out_put_write_access_nfserror; + + fh_lock(fhp); + host_err = notify_change(dentry, iap); + fh_unlock(fhp); + +out_put_write_access_nfserror: + err = nfserrno(host_err); +out_put_write_access: if (size_change) put_write_access(inode); if (!err) commit_metadata(fhp); out: return err; - -out_nfserr: - err = nfserrno(host_err); - goto out; } #if defined(CONFIG_NFSD_V2_ACL) || \ @@ -472,6 +508,9 @@ set_nfsv4_acl_one(struct dentry *dentry, struct posix_acl *pacl, char *key) char *buf = NULL; int error = 0; + if (!pacl) + return vfs_setxattr(dentry, key, NULL, 0, 0); + buflen = posix_acl_xattr_size(pacl->a_count); buf = kmalloc(buflen, GFP_KERNEL); error = -ENOMEM; @@ -502,7 +541,7 @@ nfsd4_set_nfs4_acl(struct svc_rqst *rqstp, struct svc_fh *fhp, unsigned int flags = 0; /* Get inode */ - error = fh_verify(rqstp, fhp, 0 /* S_IFREG */, NFSD_MAY_SATTR); + error = fh_verify(rqstp, fhp, 0, NFSD_MAY_SATTR); if (error) return error; @@ -592,6 +631,22 @@ nfsd4_get_nfs4_acl(struct svc_rqst *rqstp, struct dentry *dentry, struct nfs4_ac return error; } +#define NFSD_XATTR_JUNCTION_PREFIX XATTR_TRUSTED_PREFIX "junction." +#define NFSD_XATTR_JUNCTION_TYPE NFSD_XATTR_JUNCTION_PREFIX "type" +int nfsd4_is_junction(struct dentry *dentry) +{ + struct inode *inode = dentry->d_inode; + + if (inode == NULL) + return 0; + if (inode->i_mode & S_IXUGO) + return 0; + if (!(inode->i_mode & S_ISVTX)) + return 0; + if (vfs_getxattr(dentry, NFSD_XATTR_JUNCTION_TYPE, NULL, 0) <= 0) + return 0; + return 1; +} #endif /* defined(CONFIG_NFSD_V4) */ #ifdef CONFIG_NFSD_V3 @@ -708,12 +763,13 @@ static int nfsd_open_break_lease(struct inode *inode, int access) /* * Open an existing file or directory. - * The access argument indicates the type of open (read/write/lock) + * The may_flags argument indicates the type of open (read/write/lock) + * and additional flags. * N.B. After this call fhp needs an fh_put */ __be32 nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, - int access, struct file **filp) + int may_flags, struct file **filp) { struct dentry *dentry; struct inode *inode; @@ -728,7 +784,7 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, * and (hopefully) checked permission - so allow OWNER_OVERRIDE * in case a chmod has now revoked permission. */ - err = fh_verify(rqstp, fhp, type, access | NFSD_MAY_OWNER_OVERRIDE); + err = fh_verify(rqstp, fhp, type, may_flags | NFSD_MAY_OWNER_OVERRIDE); if (err) goto out; @@ -739,7 +795,7 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, * or any access when mandatory locking enabled */ err = nfserr_perm; - if (IS_APPEND(inode) && (access & NFSD_MAY_WRITE)) + if (IS_APPEND(inode) && (may_flags & NFSD_MAY_WRITE)) goto out; /* * We must ignore files (but only files) which might have mandatory @@ -752,22 +808,30 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, if (!inode->i_fop) goto out; - host_err = nfsd_open_break_lease(inode, access); + host_err = nfsd_open_break_lease(inode, may_flags); if (host_err) /* NOMEM or WOULDBLOCK */ goto out_nfserr; - if (access & NFSD_MAY_WRITE) { - if (access & NFSD_MAY_READ) + if (may_flags & NFSD_MAY_WRITE) { + if (may_flags & NFSD_MAY_READ) flags = O_RDWR|O_LARGEFILE; else flags = O_WRONLY|O_LARGEFILE; } *filp = dentry_open(dget(dentry), mntget(fhp->fh_export->ex_path.mnt), flags, current_cred()); - if (IS_ERR(*filp)) + if (IS_ERR(*filp)) { host_err = PTR_ERR(*filp); - else - host_err = ima_file_check(*filp, access); + *filp = NULL; + } else { + host_err = ima_file_check(*filp, may_flags); + + if (may_flags & NFSD_MAY_64BIT_COOKIE) + (*filp)->f_mode |= FMODE_64BITHASH; + else + (*filp)->f_mode |= FMODE_32BITHASH; + } + out_nfserr: err = nfserrno(host_err); out: @@ -1352,7 +1416,7 @@ __be32 do_nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, char *fname, int flen, struct iattr *iap, struct svc_fh *resfhp, int createmode, u32 *verifier, - int *truncp, int *created) + bool *truncp, bool *created) { struct dentry *dentry, *dchild = NULL; struct inode *dirp; @@ -1421,7 +1485,7 @@ do_nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, switch (createmode) { case NFS3_CREATE_UNCHECKED: if (! S_ISREG(dchild->d_inode->i_mode)) - err = nfserr_exist; + goto out; else if (truncp) { /* in nfsv4, we need to treat this case a little * differently. we don't want to truncate the @@ -1440,13 +1504,19 @@ do_nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, case NFS3_CREATE_EXCLUSIVE: if ( dchild->d_inode->i_mtime.tv_sec == v_mtime && dchild->d_inode->i_atime.tv_sec == v_atime - && dchild->d_inode->i_size == 0 ) + && dchild->d_inode->i_size == 0 ) { + if (created) + *created = 1; break; + } case NFS4_CREATE_EXCLUSIVE4_1: if ( dchild->d_inode->i_mtime.tv_sec == v_mtime && dchild->d_inode->i_atime.tv_sec == v_atime - && dchild->d_inode->i_size == 0 ) + && dchild->d_inode->i_size == 0 ) { + if (created) + *created = 1; goto set_attr; + } /* fallthru */ case NFS3_CREATE_GUARDED: err = nfserr_exist; @@ -1632,10 +1702,12 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp, err = fh_verify(rqstp, ffhp, S_IFDIR, NFSD_MAY_CREATE); if (err) goto out; - err = fh_verify(rqstp, tfhp, -S_IFDIR, NFSD_MAY_NOP); + err = fh_verify(rqstp, tfhp, 0, NFSD_MAY_NOP); if (err) goto out; - + err = nfserr_isdir; + if (S_ISDIR(tfhp->fh_dentry->d_inode->i_mode)) + goto out; err = nfserr_perm; if (!len) goto out; @@ -1989,8 +2061,13 @@ nfsd_readdir(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t *offsetp, __be32 err; struct file *file; loff_t offset = *offsetp; + int may_flags = NFSD_MAY_READ; + + /* NFSv2 only supports 32 bit cookies */ + if (rqstp->rq_vers > 2) + may_flags |= NFSD_MAY_64BIT_COOKIE; - err = nfsd_open(rqstp, fhp, S_IFDIR, NFSD_MAY_READ, &file); + err = nfsd_open(rqstp, fhp, S_IFDIR, may_flags, &file); if (err) goto out; diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h index a22e40e..c7fe962 100644 --- a/fs/nfsd/vfs.h +++ b/fs/nfsd/vfs.h @@ -10,22 +10,24 @@ /* * Flags for nfsd_permission */ -#define NFSD_MAY_NOP 0 -#define NFSD_MAY_EXEC 1 /* == MAY_EXEC */ -#define NFSD_MAY_WRITE 2 /* == MAY_WRITE */ -#define NFSD_MAY_READ 4 /* == MAY_READ */ -#define NFSD_MAY_SATTR 8 -#define NFSD_MAY_TRUNC 16 -#define NFSD_MAY_LOCK 32 -#define NFSD_MAY_MASK 63 +#define NFSD_MAY_NOP 0 +#define NFSD_MAY_EXEC 0x001 /* == MAY_EXEC */ +#define NFSD_MAY_WRITE 0x002 /* == MAY_WRITE */ +#define NFSD_MAY_READ 0x004 /* == MAY_READ */ +#define NFSD_MAY_SATTR 0x008 +#define NFSD_MAY_TRUNC 0x010 +#define NFSD_MAY_LOCK 0x020 +#define NFSD_MAY_MASK 0x03f /* extra hints to permission and open routines: */ -#define NFSD_MAY_OWNER_OVERRIDE 64 -#define NFSD_MAY_LOCAL_ACCESS 128 /* IRIX doing local access check on device special file*/ -#define NFSD_MAY_BYPASS_GSS_ON_ROOT 256 -#define NFSD_MAY_NOT_BREAK_LEASE 512 -#define NFSD_MAY_BYPASS_GSS 1024 -#define NFSD_MAY_READ_IF_EXEC 2048 +#define NFSD_MAY_OWNER_OVERRIDE 0x040 +#define NFSD_MAY_LOCAL_ACCESS 0x080 /* for device special files */ +#define NFSD_MAY_BYPASS_GSS_ON_ROOT 0x100 +#define NFSD_MAY_NOT_BREAK_LEASE 0x200 +#define NFSD_MAY_BYPASS_GSS 0x400 +#define NFSD_MAY_READ_IF_EXEC 0x800 + +#define NFSD_MAY_64BIT_COOKIE 0x1000 /* 64 bit readdir cookies for >= NFSv3 */ #define NFSD_MAY_CREATE (NFSD_MAY_EXEC|NFSD_MAY_WRITE) #define NFSD_MAY_REMOVE (NFSD_MAY_EXEC|NFSD_MAY_WRITE|NFSD_MAY_TRUNC) @@ -62,7 +64,7 @@ __be32 nfsd_access(struct svc_rqst *, struct svc_fh *, u32 *, u32 *); __be32 do_nfsd_create(struct svc_rqst *, struct svc_fh *, char *name, int len, struct iattr *attrs, struct svc_fh *res, int createmode, - u32 *verifier, int *truncp, int *created); + u32 *verifier, bool *truncp, bool *created); __be32 nfsd_commit(struct svc_rqst *, struct svc_fh *, loff_t, unsigned long); #endif /* CONFIG_NFSD_V3 */ @@ -106,4 +108,14 @@ struct posix_acl *nfsd_get_posix_acl(struct svc_fh *, int); int nfsd_set_posix_acl(struct svc_fh *, int, struct posix_acl *); #endif +static inline int fh_want_write(struct svc_fh *fh) +{ + return mnt_want_write(fh->fh_export->ex_path.mnt); +} + +static inline void fh_drop_write(struct svc_fh *fh) +{ + mnt_drop_write(fh->fh_export->ex_path.mnt); +} + #endif /* LINUX_NFSD_VFS_H */ diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index 366401e..2364747 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h @@ -81,7 +81,6 @@ struct nfsd4_access { struct nfsd4_close { u32 cl_seqid; /* request */ stateid_t cl_stateid; /* request+response */ - struct nfs4_stateowner * cl_stateowner; /* response */ }; struct nfsd4_commit { @@ -131,7 +130,7 @@ struct nfsd4_link { struct nfsd4_lock_denied { clientid_t ld_clientid; - struct nfs4_stateowner *ld_sop; + struct xdr_netobj ld_owner; u64 ld_start; u64 ld_length; u32 ld_type; @@ -165,9 +164,6 @@ struct nfsd4_lock { } ok; struct nfsd4_lock_denied denied; } u; - /* The lk_replay_owner is the open owner in the open_to_lock_owner - * case and the lock owner otherwise: */ - struct nfs4_stateowner *lk_replay_owner; }; #define lk_new_open_seqid v.new.open_seqid #define lk_new_open_stateid v.new.open_stateid @@ -188,7 +184,6 @@ struct nfsd4_lockt { struct xdr_netobj lt_owner; u64 lt_offset; u64 lt_length; - struct nfs4_stateowner * lt_stateowner; struct nfsd4_lock_denied lt_denied; }; @@ -199,7 +194,6 @@ struct nfsd4_locku { stateid_t lu_stateid; u64 lu_offset; u64 lu_length; - struct nfs4_stateowner *lu_stateowner; }; @@ -232,8 +226,11 @@ struct nfsd4_open { u32 op_recall; /* recall */ struct nfsd4_change_info op_cinfo; /* response */ u32 op_rflags; /* response */ - int op_truncate; /* used during processing */ - struct nfs4_stateowner *op_stateowner; /* used during processing */ + bool op_truncate; /* used during processing */ + bool op_created; /* used during processing */ + struct nfs4_openowner *op_openowner; /* used during processing */ + struct nfs4_file *op_file; /* used during processing */ + struct nfs4_ol_stateid *op_stp; /* used during processing */ struct nfs4_acl *op_acl; }; #define op_iattr iattr @@ -243,7 +240,6 @@ struct nfsd4_open_confirm { stateid_t oc_req_stateid /* request */; u32 oc_seqid /* request */; stateid_t oc_resp_stateid /* response */; - struct nfs4_stateowner * oc_stateowner; /* response */ }; struct nfsd4_open_downgrade { @@ -251,7 +247,6 @@ struct nfsd4_open_downgrade { u32 od_seqid; u32 od_share_access; u32 od_share_deny; - struct nfs4_stateowner *od_stateowner; }; @@ -325,8 +320,7 @@ struct nfsd4_setattr { struct nfsd4_setclientid { nfs4_verifier se_verf; /* request */ - u32 se_namelen; /* request */ - char * se_name; /* request */ + struct xdr_netobj se_name; u32 se_callback_prog; /* request */ u32 se_callback_netid_len; /* request */ char * se_callback_netid_val; /* request */ @@ -342,6 +336,24 @@ struct nfsd4_setclientid_confirm { nfs4_verifier sc_confirm; }; +struct nfsd4_saved_compoundargs { + __be32 *p; + __be32 *end; + int pagelen; + struct page **pagelist; +}; + +struct nfsd4_test_stateid { + __be32 ts_num_ids; + struct nfsd4_compoundargs *ts_saved_args; + struct nfsd4_saved_compoundargs ts_savedp; +}; + +struct nfsd4_free_stateid { + stateid_t fr_stateid; /* request */ + __be32 fr_status; /* response */ +}; + /* also used for NVERIFY */ struct nfsd4_verify { u32 ve_bmval[3]; /* request */ @@ -386,6 +398,10 @@ struct nfsd4_destroy_session { struct nfs4_sessionid sessionid; }; +struct nfsd4_destroy_clientid { + clientid_t clientid; +}; + struct nfsd4_reclaim_complete { u32 rca_one_fs; }; @@ -432,10 +448,14 @@ struct nfsd4_op { struct nfsd4_destroy_session destroy_session; struct nfsd4_sequence sequence; struct nfsd4_reclaim_complete reclaim_complete; + struct nfsd4_test_stateid test_stateid; + struct nfsd4_free_stateid free_stateid; } u; struct nfs4_replay * replay; }; +bool nfsd4_cache_this_op(struct nfsd4_op *); + struct nfsd4_compoundargs { /* scratch variables for XDR decode */ __be32 * p; @@ -458,6 +478,7 @@ struct nfsd4_compoundargs { u32 opcnt; struct nfsd4_op *ops; struct nfsd4_op iops[8]; + int cachetype; }; struct nfsd4_compoundres { @@ -508,6 +529,7 @@ int nfs4svc_decode_compoundargs(struct svc_rqst *, __be32 *, struct nfsd4_compoundargs *); int nfs4svc_encode_compoundres(struct svc_rqst *, __be32 *, struct nfsd4_compoundres *); +int nfsd4_check_resp_size(struct nfsd4_compoundres *, u32); void nfsd4_encode_operation(struct nfsd4_compoundres *, struct nfsd4_op *); void nfsd4_encode_replay(struct nfsd4_compoundres *resp, struct nfsd4_op *op); __be32 nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp, @@ -534,11 +556,13 @@ extern __be32 nfsd4_sequence(struct svc_rqst *, extern __be32 nfsd4_destroy_session(struct svc_rqst *, struct nfsd4_compound_state *, struct nfsd4_destroy_session *); +extern __be32 nfsd4_destroy_clientid(struct svc_rqst *, struct nfsd4_compound_state *, struct nfsd4_destroy_clientid *); __be32 nfsd4_reclaim_complete(struct svc_rqst *, struct nfsd4_compound_state *, struct nfsd4_reclaim_complete *); extern __be32 nfsd4_process_open1(struct nfsd4_compound_state *, struct nfsd4_open *open); extern __be32 nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open *open); +extern void nfsd4_cleanup_open_state(struct nfsd4_open *open, __be32 status); extern __be32 nfsd4_open_confirm(struct svc_rqst *rqstp, struct nfsd4_compound_state *, struct nfsd4_open_confirm *oc); extern __be32 nfsd4_close(struct svc_rqst *rqstp, @@ -559,11 +583,15 @@ extern __be32 nfsd4_release_lockowner(struct svc_rqst *rqstp, struct nfsd4_compound_state *, struct nfsd4_release_lockowner *rlockowner); -extern void nfsd4_release_compoundargs(struct nfsd4_compoundargs *); +extern int nfsd4_release_compoundargs(void *rq, __be32 *p, void *resp); extern __be32 nfsd4_delegreturn(struct svc_rqst *rqstp, struct nfsd4_compound_state *, struct nfsd4_delegreturn *dr); extern __be32 nfsd4_renew(struct svc_rqst *rqstp, struct nfsd4_compound_state *, clientid_t *clid); +extern __be32 nfsd4_test_stateid(struct svc_rqst *rqstp, + struct nfsd4_compound_state *, struct nfsd4_test_stateid *test_stateid); +extern __be32 nfsd4_free_stateid(struct svc_rqst *rqstp, + struct nfsd4_compound_state *, struct nfsd4_free_stateid *free_stateid); #endif /* diff --git a/fs/nilfs2/btree.c b/fs/nilfs2/btree.c index b2e3ff3..090d8ce 100644 --- a/fs/nilfs2/btree.c +++ b/fs/nilfs2/btree.c @@ -31,6 +31,8 @@ #include "alloc.h" #include "dat.h" +static void __nilfs_btree_init(struct nilfs_bmap *bmap); + static struct nilfs_btree_path *nilfs_btree_alloc_path(void) { struct nilfs_btree_path *path; @@ -368,6 +370,34 @@ static int nilfs_btree_node_broken(const struct nilfs_btree_node *node, return ret; } +/** + * nilfs_btree_root_broken - verify consistency of btree root node + * @node: btree root node to be examined + * @ino: inode number + * + * Return Value: If node is broken, 1 is returned. Otherwise, 0 is returned. + */ +static int nilfs_btree_root_broken(const struct nilfs_btree_node *node, + unsigned long ino) +{ + int level, flags, nchildren; + int ret = 0; + + level = nilfs_btree_node_get_level(node); + flags = nilfs_btree_node_get_flags(node); + nchildren = nilfs_btree_node_get_nchildren(node); + + if (unlikely(level < NILFS_BTREE_LEVEL_NODE_MIN || + level >= NILFS_BTREE_LEVEL_MAX || + nchildren < 0 || + nchildren > NILFS_BTREE_ROOT_NCHILDREN_MAX)) { + pr_crit("NILFS: bad btree root (inode number=%lu): level = %d, flags = 0x%x, nchildren = %d\n", + ino, level, flags, nchildren); + ret = 1; + } + return ret; +} + int nilfs_btree_broken_node_block(struct buffer_head *bh) { int ret; @@ -1713,7 +1743,7 @@ nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *btree, /* convert and insert */ dat = NILFS_BMAP_USE_VBN(btree) ? nilfs_bmap_get_dat(btree) : NULL; - nilfs_btree_init(btree); + __nilfs_btree_init(btree); if (nreq != NULL) { nilfs_bmap_commit_alloc_ptr(btree, dreq, dat); nilfs_bmap_commit_alloc_ptr(btree, nreq, dat); @@ -2294,12 +2324,23 @@ static const struct nilfs_bmap_operations nilfs_btree_ops_gc = { .bop_gather_data = NULL, }; -int nilfs_btree_init(struct nilfs_bmap *bmap) +static void __nilfs_btree_init(struct nilfs_bmap *bmap) { bmap->b_ops = &nilfs_btree_ops; bmap->b_nchildren_per_block = NILFS_BTREE_NODE_NCHILDREN_MAX(nilfs_btree_node_size(bmap)); - return 0; +} + +int nilfs_btree_init(struct nilfs_bmap *bmap) +{ + int ret = 0; + + __nilfs_btree_init(bmap); + + if (nilfs_btree_root_broken(nilfs_btree_get_root(bmap), + bmap->b_inode->i_ino)) + ret = -EIO; + return ret; } void nilfs_btree_init_gc(struct nilfs_bmap *bmap) diff --git a/fs/nilfs2/file.c b/fs/nilfs2/file.c index d7eeca6..2660152 100644 --- a/fs/nilfs2/file.c +++ b/fs/nilfs2/file.c @@ -27,7 +27,7 @@ #include "nilfs.h" #include "segment.h" -int nilfs_sync_file(struct file *file, int datasync) +int nilfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) { /* * Called from fsync() system call @@ -40,8 +40,15 @@ int nilfs_sync_file(struct file *file, int datasync) struct inode *inode = file->f_mapping->host; int err; - if (!nilfs_inode_dirty(inode)) + err = filemap_write_and_wait_range(inode->i_mapping, start, end); + if (err) + return err; + mutex_lock(&inode->i_mutex); + + if (!nilfs_inode_dirty(inode)) { + mutex_unlock(&inode->i_mutex); return 0; + } if (datasync) err = nilfs_construct_dsync_segment(inode->i_sb, inode, 0, @@ -49,6 +56,7 @@ int nilfs_sync_file(struct file *file, int datasync) else err = nilfs_construct_segment(inode->i_sb); + mutex_unlock(&inode->i_mutex); return err; } diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c index 373cd7b..b2d8a96 100644 --- a/fs/nilfs2/inode.c +++ b/fs/nilfs2/inode.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include "nilfs.h" @@ -195,10 +196,10 @@ static int nilfs_writepage(struct page *page, struct writeback_control *wbc) static int nilfs_set_page_dirty(struct page *page) { + struct inode *inode = page->mapping->host; int ret = __set_page_dirty_nobuffers(page); if (page_has_buffers(page)) { - struct inode *inode = page->mapping->host; unsigned nr_dirty = 0; struct buffer_head *bh, *head; @@ -221,6 +222,10 @@ static int nilfs_set_page_dirty(struct page *page) if (nr_dirty) nilfs_set_file_dirty(inode, nr_dirty); + } else if (ret) { + unsigned nr_dirty = 1 << (PAGE_CACHE_SHIFT - inode->i_blkbits); + + nilfs_set_file_dirty(inode, nr_dirty); } return ret; } @@ -278,8 +283,8 @@ nilfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, return 0; /* Needs synchronization with the cleaner */ - size = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, - offset, nr_segs, nilfs_get_block, NULL); + size = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs, + nilfs_get_block); /* * In case of error extending write may have instantiated a few @@ -373,7 +378,7 @@ struct inode *nilfs_new_inode(struct inode *dir, int mode) failed_acl: failed_bmap: - inode->i_nlink = 0; + clear_nlink(inode); iput(inode); /* raw_inode will be deleted through generic_delete_inode() */ goto failed; @@ -415,7 +420,7 @@ int nilfs_read_inode_common(struct inode *inode, inode->i_mode = le16_to_cpu(raw_inode->i_mode); inode->i_uid = (uid_t)le32_to_cpu(raw_inode->i_uid); inode->i_gid = (gid_t)le32_to_cpu(raw_inode->i_gid); - inode->i_nlink = le16_to_cpu(raw_inode->i_links_count); + set_nlink(inode, le16_to_cpu(raw_inode->i_links_count)); inode->i_size = le64_to_cpu(raw_inode->i_size); inode->i_atime.tv_sec = le64_to_cpu(raw_inode->i_mtime); inode->i_ctime.tv_sec = le64_to_cpu(raw_inode->i_ctime); @@ -797,6 +802,8 @@ int nilfs_setattr(struct dentry *dentry, struct iattr *iattr) if ((iattr->ia_valid & ATTR_SIZE) && iattr->ia_size != i_size_read(inode)) { + inode_dio_wait(inode); + err = vmtruncate(inode, iattr->ia_size); if (unlikely(err)) goto out_err; @@ -818,14 +825,14 @@ out_err: return err; } -int nilfs_permission(struct inode *inode, int mask, unsigned int flags) +int nilfs_permission(struct inode *inode, int mask) { struct nilfs_root *root = NILFS_I(inode)->i_root; if ((mask & MAY_WRITE) && root && root->cno != NILFS_CPTREE_CURRENT_CNO) return -EROFS; /* snapshot is not writable */ - return generic_permission(inode, mask, flags, NULL); + return generic_permission(inode, mask); } int nilfs_load_inode_block(struct inode *inode, struct buffer_head **pbh) diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c index cee648e..2b5e695 100644 --- a/fs/nilfs2/ioctl.c +++ b/fs/nilfs2/ioctl.c @@ -625,6 +625,9 @@ static int nilfs_ioctl_clean_segments(struct inode *inode, struct file *filp, if (argv[n].v_nmembs > nsegs * nilfs->ns_blocks_per_segment) goto out_free; + if (argv[n].v_nmembs >= UINT_MAX / argv[n].v_size) + goto out_free; + len = argv[n].v_size * argv[n].v_nmembs; base = (void __user *)(unsigned long)argv[n].v_base; if (len == 0) { diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c index 546849b..768982d 100644 --- a/fs/nilfs2/namei.c +++ b/fs/nilfs2/namei.c @@ -72,12 +72,7 @@ nilfs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) return ERR_PTR(-ENAMETOOLONG); ino = nilfs_inode_by_name(dir, &dentry->d_name); - inode = NULL; - if (ino) { - inode = nilfs_iget(dir->i_sb, NILFS_I(dir)->i_root, ino); - if (IS_ERR(inode)) - return ERR_CAST(inode); - } + inode = ino ? nilfs_iget(dir->i_sb, NILFS_I(dir)->i_root, ino) : NULL; return d_splice_alias(inode, dentry); } @@ -294,7 +289,7 @@ static int nilfs_do_unlink(struct inode *dir, struct dentry *dentry) nilfs_warning(inode->i_sb, __func__, "deleting nonexistent file (%lu), %d\n", inode->i_ino, inode->i_nlink); - inode->i_nlink = 1; + set_nlink(inode, 1); } err = nilfs_delete_entry(de, page); if (err) diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h index f02b9ad..7a2e7b8 100644 --- a/fs/nilfs2/nilfs.h +++ b/fs/nilfs2/nilfs.h @@ -128,7 +128,6 @@ enum { * @ti_save: Backup of journal_info field of task_struct * @ti_flags: Flags * @ti_count: Nest level - * @ti_garbage: List of inode to be put when releasing semaphore */ struct nilfs_transaction_info { u32 ti_magic; @@ -137,7 +136,6 @@ struct nilfs_transaction_info { one of other filesystems has a bug. */ unsigned short ti_flags; unsigned short ti_count; - struct list_head ti_garbage; }; /* ti_magic */ @@ -235,7 +233,7 @@ extern void nilfs_set_link(struct inode *, struct nilfs_dir_entry *, struct page *, struct inode *); /* file.c */ -extern int nilfs_sync_file(struct file *, int); +extern int nilfs_sync_file(struct file *, loff_t, loff_t, int); /* ioctl.c */ long nilfs_ioctl(struct file *, unsigned int, unsigned long); @@ -264,7 +262,7 @@ extern void nilfs_update_inode(struct inode *, struct buffer_head *); extern void nilfs_truncate(struct inode *); extern void nilfs_evict_inode(struct inode *); extern int nilfs_setattr(struct dentry *, struct iattr *); -int nilfs_permission(struct inode *inode, int mask, unsigned int flags); +int nilfs_permission(struct inode *inode, int mask); int nilfs_load_inode_block(struct inode *inode, struct buffer_head **pbh); extern int nilfs_inode_dirty(struct inode *); int nilfs_set_file_dirty(struct inode *inode, unsigned nr_dirty); @@ -276,10 +274,10 @@ int nilfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, /* super.c */ extern struct inode *nilfs_alloc_inode(struct super_block *); extern void nilfs_destroy_inode(struct inode *); -extern void nilfs_error(struct super_block *, const char *, const char *, ...) - __attribute__ ((format (printf, 3, 4))); -extern void nilfs_warning(struct super_block *, const char *, const char *, ...) - __attribute__ ((format (printf, 3, 4))); +extern __printf(3, 4) +void nilfs_error(struct super_block *, const char *, const char *, ...); +extern __printf(3, 4) +void nilfs_warning(struct super_block *, const char *, const char *, ...); extern struct nilfs_super_block * nilfs_read_super_block(struct super_block *, u64, int, struct buffer_head **); extern int nilfs_store_magic_and_option(struct super_block *, diff --git a/fs/nilfs2/page.c b/fs/nilfs2/page.c index 65221a0..16eacec 100644 --- a/fs/nilfs2/page.c +++ b/fs/nilfs2/page.c @@ -94,6 +94,7 @@ void nilfs_forget_buffer(struct buffer_head *bh) clear_buffer_nilfs_volatile(bh); clear_buffer_nilfs_checked(bh); clear_buffer_nilfs_redirected(bh); + clear_buffer_async_write(bh); clear_buffer_dirty(bh); if (nilfs_page_buffers_clean(page)) __nilfs_clear_page_dirty(page); @@ -390,6 +391,7 @@ void nilfs_clear_dirty_pages(struct address_space *mapping) bh = head = page_buffers(page); do { lock_buffer(bh); + clear_buffer_async_write(bh); clear_buffer_dirty(bh); clear_buffer_nilfs_volatile(bh); clear_buffer_nilfs_checked(bh); diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index 6f24e67..6bba106 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c @@ -302,7 +302,6 @@ static void nilfs_transaction_lock(struct super_block *sb, ti->ti_count = 0; ti->ti_save = cur_ti; ti->ti_magic = NILFS_TI_MAGIC; - INIT_LIST_HEAD(&ti->ti_garbage); current->journal_info = ti; for (;;) { @@ -329,8 +328,6 @@ static void nilfs_transaction_unlock(struct super_block *sb) up_write(&nilfs->ns_segctor_sem); current->journal_info = ti->ti_save; - if (!list_empty(&ti->ti_garbage)) - nilfs_dispose_list(nilfs, &ti->ti_garbage, 0); } static void *nilfs_segctor_map_segsum_entry(struct nilfs_sc_info *sci, @@ -662,7 +659,7 @@ static size_t nilfs_lookup_dirty_data_buffers(struct inode *inode, bh = head = page_buffers(page); do { - if (!buffer_dirty(bh)) + if (!buffer_dirty(bh) || buffer_async_write(bh)) continue; get_bh(bh); list_add_tail(&bh->b_assoc_buffers, listp); @@ -696,7 +693,8 @@ static void nilfs_lookup_dirty_node_buffers(struct inode *inode, for (i = 0; i < pagevec_count(&pvec); i++) { bh = head = page_buffers(pvec.pages[i]); do { - if (buffer_dirty(bh)) { + if (buffer_dirty(bh) && + !buffer_async_write(bh)) { get_bh(bh); list_add_tail(&bh->b_assoc_buffers, listp); @@ -742,6 +740,15 @@ static void nilfs_dispose_list(struct the_nilfs *nilfs, } } +static void nilfs_iput_work_func(struct work_struct *work) +{ + struct nilfs_sc_info *sci = container_of(work, struct nilfs_sc_info, + sc_iput_work); + struct the_nilfs *nilfs = sci->sc_super->s_fs_info; + + nilfs_dispose_list(nilfs, &sci->sc_iput_queue, 0); +} + static int nilfs_test_metadata_dirty(struct the_nilfs *nilfs, struct nilfs_root *root) { @@ -1436,17 +1443,19 @@ static int nilfs_segctor_collect(struct nilfs_sc_info *sci, nilfs_clear_logs(&sci->sc_segbufs); - err = nilfs_segctor_extend_segments(sci, nilfs, nadd); - if (unlikely(err)) - return err; - if (sci->sc_stage.flags & NILFS_CF_SUFREED) { err = nilfs_sufile_cancel_freev(nilfs->ns_sufile, sci->sc_freesegs, sci->sc_nfreesegs, NULL); WARN_ON(err); /* do not happen */ + sci->sc_stage.flags &= ~NILFS_CF_SUFREED; } + + err = nilfs_segctor_extend_segments(sci, nilfs, nadd); + if (unlikely(err)) + return err; + nadd = min_t(int, nadd << 1, SC_MAX_SEGDELTA); sci->sc_stage = prev_stage; } @@ -1576,6 +1585,7 @@ static void nilfs_segctor_prepare_write(struct nilfs_sc_info *sci) list_for_each_entry(bh, &segbuf->sb_segsum_buffers, b_assoc_buffers) { + set_buffer_async_write(bh); if (bh->b_page != bd_page) { if (bd_page) { lock_page(bd_page); @@ -1589,6 +1599,7 @@ static void nilfs_segctor_prepare_write(struct nilfs_sc_info *sci) list_for_each_entry(bh, &segbuf->sb_payload_buffers, b_assoc_buffers) { + set_buffer_async_write(bh); if (bh == segbuf->sb_super_root) { if (bh->b_page != bd_page) { lock_page(bd_page); @@ -1674,6 +1685,7 @@ static void nilfs_abort_logs(struct list_head *logs, int err) list_for_each_entry(segbuf, logs, sb_list) { list_for_each_entry(bh, &segbuf->sb_segsum_buffers, b_assoc_buffers) { + clear_buffer_async_write(bh); if (bh->b_page != bd_page) { if (bd_page) end_page_writeback(bd_page); @@ -1683,6 +1695,7 @@ static void nilfs_abort_logs(struct list_head *logs, int err) list_for_each_entry(bh, &segbuf->sb_payload_buffers, b_assoc_buffers) { + clear_buffer_async_write(bh); if (bh == segbuf->sb_super_root) { if (bh->b_page != bd_page) { end_page_writeback(bd_page); @@ -1752,6 +1765,7 @@ static void nilfs_segctor_complete_write(struct nilfs_sc_info *sci) b_assoc_buffers) { set_buffer_uptodate(bh); clear_buffer_dirty(bh); + clear_buffer_async_write(bh); if (bh->b_page != bd_page) { if (bd_page) end_page_writeback(bd_page); @@ -1773,6 +1787,7 @@ static void nilfs_segctor_complete_write(struct nilfs_sc_info *sci) b_assoc_buffers) { set_buffer_uptodate(bh); clear_buffer_dirty(bh); + clear_buffer_async_write(bh); clear_buffer_delay(bh); clear_buffer_nilfs_volatile(bh); clear_buffer_nilfs_redirected(bh); @@ -1887,8 +1902,9 @@ static int nilfs_segctor_collect_dirty_files(struct nilfs_sc_info *sci, static void nilfs_segctor_drop_written_files(struct nilfs_sc_info *sci, struct the_nilfs *nilfs) { - struct nilfs_transaction_info *ti = current->journal_info; struct nilfs_inode_info *ii, *n; + int during_mount = !(sci->sc_super->s_flags & MS_ACTIVE); + int defer_iput = false; spin_lock(&nilfs->ns_inode_lock); list_for_each_entry_safe(ii, n, &sci->sc_dirty_files, i_dirty) { @@ -1899,9 +1915,24 @@ static void nilfs_segctor_drop_written_files(struct nilfs_sc_info *sci, clear_bit(NILFS_I_BUSY, &ii->i_state); brelse(ii->i_bh); ii->i_bh = NULL; - list_move_tail(&ii->i_dirty, &ti->ti_garbage); + list_del_init(&ii->i_dirty); + if (!ii->vfs_inode.i_nlink || during_mount) { + /* + * Defer calling iput() to avoid deadlocks if + * i_nlink == 0 or mount is not yet finished. + */ + list_add_tail(&ii->i_dirty, &sci->sc_iput_queue); + defer_iput = true; + } else { + spin_unlock(&nilfs->ns_inode_lock); + iput(&ii->vfs_inode); + spin_lock(&nilfs->ns_inode_lock); + } } spin_unlock(&nilfs->ns_inode_lock); + + if (defer_iput) + schedule_work(&sci->sc_iput_work); } /* @@ -2568,6 +2599,8 @@ static struct nilfs_sc_info *nilfs_segctor_new(struct super_block *sb, INIT_LIST_HEAD(&sci->sc_segbufs); INIT_LIST_HEAD(&sci->sc_write_logs); INIT_LIST_HEAD(&sci->sc_gc_inodes); + INIT_LIST_HEAD(&sci->sc_iput_queue); + INIT_WORK(&sci->sc_iput_work, nilfs_iput_work_func); init_timer(&sci->sc_timer); sci->sc_interval = HZ * NILFS_SC_DEFAULT_TIMEOUT; @@ -2594,6 +2627,8 @@ static void nilfs_segctor_write_out(struct nilfs_sc_info *sci) ret = nilfs_segctor_construct(sci, SC_LSEG_SR); nilfs_transaction_unlock(sci->sc_super); + flush_work(&sci->sc_iput_work); + } while (ret && retrycount-- > 0); } @@ -2618,6 +2653,9 @@ static void nilfs_segctor_destroy(struct nilfs_sc_info *sci) || sci->sc_seq_request != sci->sc_seq_done); spin_unlock(&sci->sc_state_lock); + if (flush_work(&sci->sc_iput_work)) + flag = true; + if (flag || !nilfs_segctor_confirm(sci)) nilfs_segctor_write_out(sci); @@ -2627,6 +2665,12 @@ static void nilfs_segctor_destroy(struct nilfs_sc_info *sci) nilfs_dispose_list(nilfs, &sci->sc_dirty_files, 1); } + if (!list_empty(&sci->sc_iput_queue)) { + nilfs_warning(sci->sc_super, __func__, + "iput queue is not empty\n"); + nilfs_dispose_list(nilfs, &sci->sc_iput_queue, 1); + } + WARN_ON(!list_empty(&sci->sc_segbufs)); WARN_ON(!list_empty(&sci->sc_write_logs)); diff --git a/fs/nilfs2/segment.h b/fs/nilfs2/segment.h index 38a1d00..a48d6de 100644 --- a/fs/nilfs2/segment.h +++ b/fs/nilfs2/segment.h @@ -26,6 +26,7 @@ #include #include #include +#include #include #include "nilfs.h" @@ -92,6 +93,8 @@ struct nilfs_segsum_pointer { * @sc_nblk_inc: Block count of current generation * @sc_dirty_files: List of files to be written * @sc_gc_inodes: List of GC inodes having blocks to be written + * @sc_iput_queue: list of inodes for which iput should be done + * @sc_iput_work: work struct to defer iput call * @sc_freesegs: array of segment numbers to be freed * @sc_nfreesegs: number of segments on @sc_freesegs * @sc_dsync_inode: inode whose data pages are written for a sync operation @@ -135,6 +138,8 @@ struct nilfs_sc_info { struct list_head sc_dirty_files; struct list_head sc_gc_inodes; + struct list_head sc_iput_queue; + struct work_struct sc_iput_work; __u64 *sc_freesegs; size_t sc_nfreesegs; diff --git a/fs/ocfs2/acl.c b/fs/ocfs2/acl.c index e913ad1..a721907 100644 --- a/fs/ocfs2/acl.c +++ b/fs/ocfs2/acl.c @@ -247,7 +247,7 @@ static int ocfs2_set_acl(handle_t *handle, case ACL_TYPE_ACCESS: name_index = OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS; if (acl) { - mode_t mode = inode->i_mode; + umode_t mode = inode->i_mode; ret = posix_acl_equiv_mode(acl, &mode); if (ret < 0) return ret; @@ -290,47 +290,32 @@ static int ocfs2_set_acl(handle_t *handle, return ret; } -int ocfs2_check_acl(struct inode *inode, int mask, unsigned int flags) +struct posix_acl *ocfs2_iop_get_acl(struct inode *inode, int type) { struct ocfs2_super *osb; struct buffer_head *di_bh = NULL; struct posix_acl *acl; int ret = -EAGAIN; - if (flags & IPERM_FLAG_RCU) - return -ECHILD; - osb = OCFS2_SB(inode->i_sb); if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL)) - return ret; + return NULL; ret = ocfs2_read_inode_block(inode, &di_bh); - if (ret < 0) { - mlog_errno(ret); - return ret; - } + if (ret < 0) + return ERR_PTR(ret); - acl = ocfs2_get_acl_nolock(inode, ACL_TYPE_ACCESS, di_bh); + acl = ocfs2_get_acl_nolock(inode, type, di_bh); brelse(di_bh); - if (IS_ERR(acl)) { - mlog_errno(PTR_ERR(acl)); - return PTR_ERR(acl); - } - if (acl) { - ret = posix_acl_permission(inode, acl, mask); - posix_acl_release(acl); - return ret; - } - - return -EAGAIN; + return acl; } int ocfs2_acl_chmod(struct inode *inode) { struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); - struct posix_acl *acl, *clone; + struct posix_acl *acl; int ret; if (S_ISLNK(inode->i_mode)) @@ -342,15 +327,12 @@ int ocfs2_acl_chmod(struct inode *inode) acl = ocfs2_get_acl(inode, ACL_TYPE_ACCESS); if (IS_ERR(acl) || !acl) return PTR_ERR(acl); - clone = posix_acl_clone(acl, GFP_KERNEL); + ret = posix_acl_chmod(&acl, GFP_KERNEL, inode->i_mode); + if (ret) + return ret; + ret = ocfs2_set_acl(NULL, inode, NULL, ACL_TYPE_ACCESS, + acl, NULL, NULL); posix_acl_release(acl); - if (!clone) - return -ENOMEM; - ret = posix_acl_chmod_masq(clone, inode->i_mode); - if (!ret) - ret = ocfs2_set_acl(NULL, inode, NULL, ACL_TYPE_ACCESS, - clone, NULL, NULL); - posix_acl_release(clone); return ret; } @@ -369,7 +351,7 @@ int ocfs2_init_acl(handle_t *handle, struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); struct posix_acl *acl = NULL; int ret = 0, ret2; - mode_t mode; + umode_t mode; if (!S_ISLNK(inode->i_mode)) { if (osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) { @@ -388,8 +370,6 @@ int ocfs2_init_acl(handle_t *handle, } } if ((osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) && acl) { - struct posix_acl *clone; - if (S_ISDIR(inode->i_mode)) { ret = ocfs2_set_acl(handle, inode, di_bh, ACL_TYPE_DEFAULT, acl, @@ -397,27 +377,22 @@ int ocfs2_init_acl(handle_t *handle, if (ret) goto cleanup; } - clone = posix_acl_clone(acl, GFP_NOFS); - ret = -ENOMEM; - if (!clone) - goto cleanup; - mode = inode->i_mode; - ret = posix_acl_create_masq(clone, &mode); - if (ret >= 0) { - ret2 = ocfs2_acl_set_mode(inode, di_bh, handle, mode); - if (ret2) { - mlog_errno(ret2); - ret = ret2; - goto cleanup; - } - if (ret > 0) { - ret = ocfs2_set_acl(handle, inode, - di_bh, ACL_TYPE_ACCESS, - clone, meta_ac, data_ac); - } + ret = posix_acl_create(&acl, GFP_NOFS, &mode); + if (ret < 0) + return ret; + + ret2 = ocfs2_acl_set_mode(inode, di_bh, handle, mode); + if (ret2) { + mlog_errno(ret2); + ret = ret2; + goto cleanup; + } + if (ret > 0) { + ret = ocfs2_set_acl(handle, inode, + di_bh, ACL_TYPE_ACCESS, + acl, meta_ac, data_ac); } - posix_acl_release(clone); } cleanup: posix_acl_release(acl); diff --git a/fs/ocfs2/acl.h b/fs/ocfs2/acl.h index 4fe7c9c..071fbd38 100644 --- a/fs/ocfs2/acl.h +++ b/fs/ocfs2/acl.h @@ -26,7 +26,7 @@ struct ocfs2_acl_entry { __le32 e_id; }; -extern int ocfs2_check_acl(struct inode *, int, unsigned int); +struct posix_acl *ocfs2_iop_get_acl(struct inode *inode, int type); extern int ocfs2_acl_chmod(struct inode *); extern int ocfs2_init_acl(handle_t *, struct inode *, struct inode *, struct buffer_head *, struct buffer_head *, diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index 76c8165..31b9463 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c @@ -5699,7 +5699,7 @@ int ocfs2_remove_btree_range(struct inode *inode, OCFS2_JOURNAL_ACCESS_WRITE); if (ret) { mlog_errno(ret); - goto out; + goto out_commit; } dquot_free_space_nodirty(inode, diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index ac97bca..16653b2 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c @@ -290,7 +290,15 @@ static int ocfs2_readpage(struct file *file, struct page *page) } if (down_read_trylock(&oi->ip_alloc_sem) == 0) { + /* + * Unlock the page and cycle ip_alloc_sem so that we don't + * busyloop waiting for ip_alloc_sem to unlock + */ ret = AOP_TRUNCATED_PAGE; + unlock_page(page); + unlock = 0; + down_read(&oi->ip_alloc_sem); + up_read(&oi->ip_alloc_sem); goto out_inode_unlock; } @@ -551,9 +559,8 @@ bail: /* * ocfs2_dio_end_io is called by the dio core when a dio is finished. We're - * particularly interested in the aio/dio case. Like the core uses - * i_alloc_sem, we use the rw_lock DLM lock to protect io on one node from - * truncation on another. + * particularly interested in the aio/dio case. We use the rw_lock DLM lock + * to protect io on one node from truncation on another. */ static void ocfs2_dio_end_io(struct kiocb *iocb, loff_t offset, @@ -564,13 +571,21 @@ static void ocfs2_dio_end_io(struct kiocb *iocb, { struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode; int level; + wait_queue_head_t *wq = ocfs2_ioend_wq(inode); /* this io's submitter should not have unlocked this before we could */ BUG_ON(!ocfs2_iocb_is_rw_locked(iocb)); - if (ocfs2_iocb_is_sem_locked(iocb)) { - up_read(&inode->i_alloc_sem); + if (ocfs2_iocb_is_sem_locked(iocb)) ocfs2_iocb_clear_sem_locked(iocb); + + if (ocfs2_iocb_is_unaligned_aio(iocb)) { + ocfs2_iocb_clear_unaligned_aio(iocb); + + if (atomic_dec_and_test(&OCFS2_I(inode)->ip_unaligned_aio) && + waitqueue_active(wq)) { + wake_up_all(wq); + } } ocfs2_iocb_clear_rw_locked(iocb); @@ -578,6 +593,7 @@ static void ocfs2_dio_end_io(struct kiocb *iocb, level = ocfs2_iocb_rw_locked_level(iocb); ocfs2_rw_unlock(inode, level); + inode_dio_done(inode); if (is_async) aio_complete(iocb, ret, 0); } @@ -865,6 +881,12 @@ struct ocfs2_write_ctxt { struct page *w_target_page; /* + * w_target_locked is used for page_mkwrite path indicating no unlocking + * against w_target_page in ocfs2_write_end_nolock. + */ + unsigned int w_target_locked:1; + + /* * ocfs2_write_end() uses this to know what the real range to * write in the target should be. */ @@ -895,10 +917,32 @@ void ocfs2_unlock_and_free_pages(struct page **pages, int num_pages) } } -static void ocfs2_free_write_ctxt(struct ocfs2_write_ctxt *wc) +static void ocfs2_unlock_pages(struct ocfs2_write_ctxt *wc) { + int i; + + /* + * w_target_locked is only set to true in the page_mkwrite() case. + * The intent is to allow us to lock the target page from write_begin() + * to write_end(). The caller must hold a ref on w_target_page. + */ + if (wc->w_target_locked) { + BUG_ON(!wc->w_target_page); + for (i = 0; i < wc->w_num_pages; i++) { + if (wc->w_target_page == wc->w_pages[i]) { + wc->w_pages[i] = NULL; + break; + } + } + mark_page_accessed(wc->w_target_page); + page_cache_release(wc->w_target_page); + } ocfs2_unlock_and_free_pages(wc->w_pages, wc->w_num_pages); +} +static void ocfs2_free_write_ctxt(struct ocfs2_write_ctxt *wc) +{ + ocfs2_unlock_pages(wc); brelse(wc->w_di_bh); kfree(wc); } @@ -1134,20 +1178,17 @@ static int ocfs2_grab_pages_for_write(struct address_space *mapping, */ lock_page(mmap_page); + /* Exit and let the caller retry */ if (mmap_page->mapping != mapping) { + WARN_ON(mmap_page->mapping); unlock_page(mmap_page); - /* - * Sanity check - the locking in - * ocfs2_pagemkwrite() should ensure - * that this code doesn't trigger. - */ - ret = -EINVAL; - mlog_errno(ret); + ret = -EAGAIN; goto out; } page_cache_get(mmap_page); wc->w_pages[i] = mmap_page; + wc->w_target_locked = true; } else { wc->w_pages[i] = find_or_create_page(mapping, index, GFP_NOFS); @@ -1162,6 +1203,8 @@ static int ocfs2_grab_pages_for_write(struct address_space *mapping, wc->w_target_page = wc->w_pages[i]; } out: + if (ret) + wc->w_target_locked = false; return ret; } @@ -1819,11 +1862,23 @@ try_again: */ ret = ocfs2_grab_pages_for_write(mapping, wc, wc->w_cpos, pos, len, cluster_of_pages, mmap_page); - if (ret) { + if (ret && ret != -EAGAIN) { mlog_errno(ret); goto out_quota; } + /* + * ocfs2_grab_pages_for_write() returns -EAGAIN if it could not lock + * the target page. In this case, we exit with no error and no target + * page. This will trigger the caller, page_mkwrite(), to re-try + * the operation. + */ + if (ret == -EAGAIN) { + BUG_ON(wc->w_target_page); + ret = 0; + goto out_quota; + } + ret = ocfs2_write_cluster_by_desc(mapping, data_ac, meta_ac, wc, pos, len); if (ret) { @@ -2008,11 +2063,19 @@ out_write_size: di->i_mtime_nsec = di->i_ctime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec); ocfs2_journal_dirty(handle, wc->w_di_bh); + /* unlock pages before dealloc since it needs acquiring j_trans_barrier + * lock, or it will cause a deadlock since journal commit threads holds + * this lock and will ask for the page lock when flushing the data. + * put it here to preserve the unlock order. + */ + ocfs2_unlock_pages(wc); + ocfs2_commit_trans(osb, handle); ocfs2_run_deallocs(osb, &wc->w_dealloc); - ocfs2_free_write_ctxt(wc); + brelse(wc->w_di_bh); + kfree(wc); return copied; } diff --git a/fs/ocfs2/aops.h b/fs/ocfs2/aops.h index 75cf3ad..ffb2da3 100644 --- a/fs/ocfs2/aops.h +++ b/fs/ocfs2/aops.h @@ -78,6 +78,7 @@ enum ocfs2_iocb_lock_bits { OCFS2_IOCB_RW_LOCK = 0, OCFS2_IOCB_RW_LOCK_LEVEL, OCFS2_IOCB_SEM, + OCFS2_IOCB_UNALIGNED_IO, OCFS2_IOCB_NUM_LOCKS }; @@ -91,4 +92,17 @@ enum ocfs2_iocb_lock_bits { clear_bit(OCFS2_IOCB_SEM, (unsigned long *)&iocb->private) #define ocfs2_iocb_is_sem_locked(iocb) \ test_bit(OCFS2_IOCB_SEM, (unsigned long *)&iocb->private) + +#define ocfs2_iocb_set_unaligned_aio(iocb) \ + set_bit(OCFS2_IOCB_UNALIGNED_IO, (unsigned long *)&iocb->private) +#define ocfs2_iocb_clear_unaligned_aio(iocb) \ + clear_bit(OCFS2_IOCB_UNALIGNED_IO, (unsigned long *)&iocb->private) +#define ocfs2_iocb_is_unaligned_aio(iocb) \ + test_bit(OCFS2_IOCB_UNALIGNED_IO, (unsigned long *)&iocb->private) + +#define OCFS2_IOEND_WQ_HASH_SZ 37 +#define ocfs2_ioend_wq(v) (&ocfs2__ioend_wq[((unsigned long)(v)) %\ + OCFS2_IOEND_WQ_HASH_SZ]) +extern wait_queue_head_t ocfs2__ioend_wq[OCFS2_IOEND_WQ_HASH_SZ]; + #endif /* OCFS2_FILE_H */ diff --git a/fs/ocfs2/buffer_head_io.c b/fs/ocfs2/buffer_head_io.c index 5d18ad1..4f66e00 100644 --- a/fs/ocfs2/buffer_head_io.c +++ b/fs/ocfs2/buffer_head_io.c @@ -90,7 +90,6 @@ int ocfs2_write_block(struct ocfs2_super *osb, struct buffer_head *bh, * information for this bh as it's not marked locally * uptodate. */ ret = -EIO; - put_bh(bh); mlog_errno(ret); } @@ -420,7 +419,6 @@ int ocfs2_write_super_or_backup(struct ocfs2_super *osb, if (!buffer_uptodate(bh)) { ret = -EIO; - put_bh(bh); mlog_errno(ret); } diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c index 9a3e6bb..a4e855e 100644 --- a/fs/ocfs2/cluster/heartbeat.c +++ b/fs/ocfs2/cluster/heartbeat.c @@ -216,6 +216,7 @@ struct o2hb_region { struct list_head hr_all_item; unsigned hr_unclean_stop:1, + hr_aborted_start:1, hr_item_pinned:1, hr_item_dropped:1; @@ -254,6 +255,10 @@ struct o2hb_region { * a more complete api that doesn't lead to this sort of fragility. */ atomic_t hr_steady_iterations; + /* terminate o2hb thread if it does not reach steady state + * (hr_steady_iterations == 0) within hr_unsteady_iterations */ + atomic_t hr_unsteady_iterations; + char hr_dev_name[BDEVNAME_SIZE]; unsigned int hr_timeout_ms; @@ -324,6 +329,10 @@ static void o2hb_write_timeout(struct work_struct *work) static void o2hb_arm_write_timeout(struct o2hb_region *reg) { + /* Arm writeout only after thread reaches steady state */ + if (atomic_read(®->hr_steady_iterations) != 0) + return; + mlog(ML_HEARTBEAT, "Queue write timeout for %u ms\n", O2HB_MAX_WRITE_TIMEOUT_MS); @@ -537,9 +546,14 @@ static int o2hb_verify_crc(struct o2hb_region *reg, return read == computed; } -/* We want to make sure that nobody is heartbeating on top of us -- - * this will help detect an invalid configuration. */ -static void o2hb_check_last_timestamp(struct o2hb_region *reg) +/* + * Compare the slot data with what we wrote in the last iteration. + * If the match fails, print an appropriate error message. This is to + * detect errors like... another node hearting on the same slot, + * flaky device that is losing writes, etc. + * Returns 1 if check succeeds, 0 otherwise. + */ +static int o2hb_check_own_slot(struct o2hb_region *reg) { struct o2hb_disk_slot *slot; struct o2hb_disk_heartbeat_block *hb_block; @@ -548,13 +562,13 @@ static void o2hb_check_last_timestamp(struct o2hb_region *reg) slot = ®->hr_slots[o2nm_this_node()]; /* Don't check on our 1st timestamp */ if (!slot->ds_last_time) - return; + return 0; hb_block = slot->ds_raw_block; if (le64_to_cpu(hb_block->hb_seq) == slot->ds_last_time && le64_to_cpu(hb_block->hb_generation) == slot->ds_last_generation && hb_block->hb_node == slot->ds_node_num) - return; + return 1; #define ERRSTR1 "Another node is heartbeating on device" #define ERRSTR2 "Heartbeat generation mismatch on device" @@ -574,6 +588,8 @@ static void o2hb_check_last_timestamp(struct o2hb_region *reg) (unsigned long long)slot->ds_last_time, hb_block->hb_node, (unsigned long long)le64_to_cpu(hb_block->hb_generation), (unsigned long long)le64_to_cpu(hb_block->hb_seq)); + + return 0; } static inline void o2hb_prepare_block(struct o2hb_region *reg, @@ -719,17 +735,24 @@ static void o2hb_shutdown_slot(struct o2hb_disk_slot *slot) o2nm_node_put(node); } -static void o2hb_set_quorum_device(struct o2hb_region *reg, - struct o2hb_disk_slot *slot) +static void o2hb_set_quorum_device(struct o2hb_region *reg) { - assert_spin_locked(&o2hb_live_lock); - if (!o2hb_global_heartbeat_active()) return; - if (test_bit(reg->hr_region_num, o2hb_quorum_region_bitmap)) + /* Prevent race with o2hb_heartbeat_group_drop_item() */ + if (kthread_should_stop()) + return; + + /* Tag region as quorum only after thread reaches steady state */ + if (atomic_read(®->hr_steady_iterations) != 0) return; + spin_lock(&o2hb_live_lock); + + if (test_bit(reg->hr_region_num, o2hb_quorum_region_bitmap)) + goto unlock; + /* * A region can be added to the quorum only when it sees all * live nodes heartbeat on it. In other words, the region has been @@ -737,13 +760,10 @@ static void o2hb_set_quorum_device(struct o2hb_region *reg, */ if (memcmp(reg->hr_live_node_bitmap, o2hb_live_node_bitmap, sizeof(o2hb_live_node_bitmap))) - return; - - if (slot->ds_changed_samples < O2HB_LIVE_THRESHOLD) - return; + goto unlock; - printk(KERN_NOTICE "o2hb: Region %s is now a quorum device\n", - config_item_name(®->hr_item)); + printk(KERN_NOTICE "o2hb: Region %s (%s) is now a quorum device\n", + config_item_name(®->hr_item), reg->hr_dev_name); set_bit(reg->hr_region_num, o2hb_quorum_region_bitmap); @@ -754,6 +774,8 @@ static void o2hb_set_quorum_device(struct o2hb_region *reg, if (o2hb_pop_count(&o2hb_quorum_region_bitmap, O2NM_MAX_REGIONS) > O2HB_PIN_CUT_OFF) o2hb_region_unpin(NULL); +unlock: + spin_unlock(&o2hb_live_lock); } static int o2hb_check_slot(struct o2hb_region *reg, @@ -925,8 +947,6 @@ fire_callbacks: slot->ds_equal_samples = 0; } out: - o2hb_set_quorum_device(reg, slot); - spin_unlock(&o2hb_live_lock); o2hb_run_event_list(&event); @@ -957,7 +977,8 @@ static int o2hb_highest_node(unsigned long *nodes, static int o2hb_do_disk_heartbeat(struct o2hb_region *reg) { - int i, ret, highest_node, change = 0; + int i, ret, highest_node; + int membership_change = 0, own_slot_ok = 0; unsigned long configured_nodes[BITS_TO_LONGS(O2NM_MAX_NODES)]; unsigned long live_node_bitmap[BITS_TO_LONGS(O2NM_MAX_NODES)]; struct o2hb_bio_wait_ctxt write_wc; @@ -966,7 +987,7 @@ static int o2hb_do_disk_heartbeat(struct o2hb_region *reg) sizeof(configured_nodes)); if (ret) { mlog_errno(ret); - return ret; + goto bail; } /* @@ -982,8 +1003,9 @@ static int o2hb_do_disk_heartbeat(struct o2hb_region *reg) highest_node = o2hb_highest_node(configured_nodes, O2NM_MAX_NODES); if (highest_node >= O2NM_MAX_NODES) { - mlog(ML_NOTICE, "ocfs2_heartbeat: no configured nodes found!\n"); - return -EINVAL; + mlog(ML_NOTICE, "o2hb: No configured nodes found!\n"); + ret = -EINVAL; + goto bail; } /* No sense in reading the slots of nodes that don't exist @@ -993,29 +1015,27 @@ static int o2hb_do_disk_heartbeat(struct o2hb_region *reg) ret = o2hb_read_slots(reg, highest_node + 1); if (ret < 0) { mlog_errno(ret); - return ret; + goto bail; } /* With an up to date view of the slots, we can check that no * other node has been improperly configured to heartbeat in * our slot. */ - o2hb_check_last_timestamp(reg); + own_slot_ok = o2hb_check_own_slot(reg); /* fill in the proper info for our next heartbeat */ o2hb_prepare_block(reg, reg->hr_generation); - /* And fire off the write. Note that we don't wait on this I/O - * until later. */ ret = o2hb_issue_node_write(reg, &write_wc); if (ret < 0) { mlog_errno(ret); - return ret; + goto bail; } i = -1; while((i = find_next_bit(configured_nodes, O2NM_MAX_NODES, i + 1)) < O2NM_MAX_NODES) { - change |= o2hb_check_slot(reg, ®->hr_slots[i]); + membership_change |= o2hb_check_slot(reg, ®->hr_slots[i]); } /* @@ -1030,18 +1050,39 @@ static int o2hb_do_disk_heartbeat(struct o2hb_region *reg) * disk */ mlog(ML_ERROR, "Write error %d on device \"%s\"\n", write_wc.wc_error, reg->hr_dev_name); - return write_wc.wc_error; + ret = write_wc.wc_error; + goto bail; } - o2hb_arm_write_timeout(reg); + /* Skip disarming the timeout if own slot has stale/bad data */ + if (own_slot_ok) { + o2hb_set_quorum_device(reg); + o2hb_arm_write_timeout(reg); + } +bail: /* let the person who launched us know when things are steady */ - if (!change && (atomic_read(®->hr_steady_iterations) != 0)) { - if (atomic_dec_and_test(®->hr_steady_iterations)) + if (atomic_read(®->hr_steady_iterations) != 0) { + if (!ret && own_slot_ok && !membership_change) { + if (atomic_dec_and_test(®->hr_steady_iterations)) + wake_up(&o2hb_steady_queue); + } + } + + if (atomic_read(®->hr_steady_iterations) != 0) { + if (atomic_dec_and_test(®->hr_unsteady_iterations)) { + printk(KERN_NOTICE "o2hb: Unable to stabilize " + "heartbeart on region %s (%s)\n", + config_item_name(®->hr_item), + reg->hr_dev_name); + atomic_set(®->hr_steady_iterations, 0); + reg->hr_aborted_start = 1; wake_up(&o2hb_steady_queue); + ret = -EIO; + } } - return 0; + return ret; } /* Subtract b from a, storing the result in a. a *must* have a larger @@ -1095,7 +1136,8 @@ static int o2hb_thread(void *data) /* Pin node */ o2nm_depend_this_node(); - while (!kthread_should_stop() && !reg->hr_unclean_stop) { + while (!kthread_should_stop() && + !reg->hr_unclean_stop && !reg->hr_aborted_start) { /* We track the time spent inside * o2hb_do_disk_heartbeat so that we avoid more than * hr_timeout_ms between disk writes. On busy systems @@ -1103,10 +1145,7 @@ static int o2hb_thread(void *data) * likely to time itself out. */ do_gettimeofday(&before_hb); - i = 0; - do { - ret = o2hb_do_disk_heartbeat(reg); - } while (ret && ++i < 2); + ret = o2hb_do_disk_heartbeat(reg); do_gettimeofday(&after_hb); elapsed_msec = o2hb_elapsed_msecs(&before_hb, &after_hb); @@ -1117,7 +1156,8 @@ static int o2hb_thread(void *data) after_hb.tv_sec, (unsigned long) after_hb.tv_usec, elapsed_msec); - if (elapsed_msec < reg->hr_timeout_ms) { + if (!kthread_should_stop() && + elapsed_msec < reg->hr_timeout_ms) { /* the kthread api has blocked signals for us so no * need to record the return value. */ msleep_interruptible(reg->hr_timeout_ms - elapsed_msec); @@ -1134,20 +1174,20 @@ static int o2hb_thread(void *data) * to timeout on this region when we could just as easily * write a clear generation - thus indicating to them that * this node has left this region. - * - * XXX: Should we skip this on unclean_stop? */ - o2hb_prepare_block(reg, 0); - ret = o2hb_issue_node_write(reg, &write_wc); - if (ret == 0) { - o2hb_wait_on_io(reg, &write_wc); - } else { - mlog_errno(ret); + */ + if (!reg->hr_unclean_stop && !reg->hr_aborted_start) { + o2hb_prepare_block(reg, 0); + ret = o2hb_issue_node_write(reg, &write_wc); + if (ret == 0) + o2hb_wait_on_io(reg, &write_wc); + else + mlog_errno(ret); } /* Unpin node */ o2nm_undepend_this_node(); - mlog(ML_HEARTBEAT|ML_KTHREAD, "hb thread exiting\n"); + mlog(ML_HEARTBEAT|ML_KTHREAD, "o2hb thread exiting\n"); return 0; } @@ -1158,6 +1198,7 @@ static int o2hb_debug_open(struct inode *inode, struct file *file) struct o2hb_debug_buf *db = inode->i_private; struct o2hb_region *reg; unsigned long map[BITS_TO_LONGS(O2NM_MAX_NODES)]; + unsigned long lts; char *buf = NULL; int i = -1; int out = 0; @@ -1194,9 +1235,11 @@ static int o2hb_debug_open(struct inode *inode, struct file *file) case O2HB_DB_TYPE_REGION_ELAPSED_TIME: reg = (struct o2hb_region *)db->db_data; - out += snprintf(buf + out, PAGE_SIZE - out, "%u\n", - jiffies_to_msecs(jiffies - - reg->hr_last_timeout_start)); + lts = reg->hr_last_timeout_start; + /* If 0, it has never been set before */ + if (lts) + lts = jiffies_to_msecs(jiffies - lts); + out += snprintf(buf + out, PAGE_SIZE - out, "%lu\n", lts); goto done; case O2HB_DB_TYPE_REGION_PINNED: @@ -1426,6 +1469,8 @@ static void o2hb_region_release(struct config_item *item) struct page *page; struct o2hb_region *reg = to_o2hb_region(item); + mlog(ML_HEARTBEAT, "hb region release (%s)\n", reg->hr_dev_name); + if (reg->hr_tmp_block) kfree(reg->hr_tmp_block); @@ -1792,7 +1837,10 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg, live_threshold <<= 1; spin_unlock(&o2hb_live_lock); } - atomic_set(®->hr_steady_iterations, live_threshold + 1); + ++live_threshold; + atomic_set(®->hr_steady_iterations, live_threshold); + /* unsteady_iterations is double the steady_iterations */ + atomic_set(®->hr_unsteady_iterations, (live_threshold << 1)); hb_task = kthread_run(o2hb_thread, reg, "o2hb-%s", reg->hr_item.ci_name); @@ -1809,14 +1857,12 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg, ret = wait_event_interruptible(o2hb_steady_queue, atomic_read(®->hr_steady_iterations) == 0); if (ret) { - /* We got interrupted (hello ptrace!). Clean up */ - spin_lock(&o2hb_live_lock); - hb_task = reg->hr_task; - reg->hr_task = NULL; - spin_unlock(&o2hb_live_lock); + atomic_set(®->hr_steady_iterations, 0); + reg->hr_aborted_start = 1; + } - if (hb_task) - kthread_stop(hb_task); + if (reg->hr_aborted_start) { + ret = -EIO; goto out; } @@ -1833,8 +1879,8 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg, ret = -EIO; if (hb_task && o2hb_global_heartbeat_active()) - printk(KERN_NOTICE "o2hb: Heartbeat started on region %s\n", - config_item_name(®->hr_item)); + printk(KERN_NOTICE "o2hb: Heartbeat started on region %s (%s)\n", + config_item_name(®->hr_item), reg->hr_dev_name); out: if (filp) @@ -2092,13 +2138,6 @@ static void o2hb_heartbeat_group_drop_item(struct config_group *group, /* stop the thread when the user removes the region dir */ spin_lock(&o2hb_live_lock); - if (o2hb_global_heartbeat_active()) { - clear_bit(reg->hr_region_num, o2hb_region_bitmap); - clear_bit(reg->hr_region_num, o2hb_live_region_bitmap); - if (test_bit(reg->hr_region_num, o2hb_quorum_region_bitmap)) - quorum_region = 1; - clear_bit(reg->hr_region_num, o2hb_quorum_region_bitmap); - } hb_task = reg->hr_task; reg->hr_task = NULL; reg->hr_item_dropped = 1; @@ -2107,19 +2146,30 @@ static void o2hb_heartbeat_group_drop_item(struct config_group *group, if (hb_task) kthread_stop(hb_task); + if (o2hb_global_heartbeat_active()) { + spin_lock(&o2hb_live_lock); + clear_bit(reg->hr_region_num, o2hb_region_bitmap); + clear_bit(reg->hr_region_num, o2hb_live_region_bitmap); + if (test_bit(reg->hr_region_num, o2hb_quorum_region_bitmap)) + quorum_region = 1; + clear_bit(reg->hr_region_num, o2hb_quorum_region_bitmap); + spin_unlock(&o2hb_live_lock); + printk(KERN_NOTICE "o2hb: Heartbeat %s on region %s (%s)\n", + ((atomic_read(®->hr_steady_iterations) == 0) ? + "stopped" : "start aborted"), config_item_name(item), + reg->hr_dev_name); + } + /* * If we're racing a dev_write(), we need to wake them. They will * check reg->hr_task */ if (atomic_read(®->hr_steady_iterations) != 0) { + reg->hr_aborted_start = 1; atomic_set(®->hr_steady_iterations, 0); wake_up(&o2hb_steady_queue); } - if (o2hb_global_heartbeat_active()) - printk(KERN_NOTICE "o2hb: Heartbeat stopped on region %s\n", - config_item_name(®->hr_item)); - config_item_put(item); if (!o2hb_global_heartbeat_active() || !quorum_region) diff --git a/fs/ocfs2/cluster/netdebug.c b/fs/ocfs2/cluster/netdebug.c index 3a58359..dc45deb 100644 --- a/fs/ocfs2/cluster/netdebug.c +++ b/fs/ocfs2/cluster/netdebug.c @@ -47,6 +47,7 @@ #define SC_DEBUG_NAME "sock_containers" #define NST_DEBUG_NAME "send_tracking" #define STATS_DEBUG_NAME "stats" +#define NODES_DEBUG_NAME "connected_nodes" #define SHOW_SOCK_CONTAINERS 0 #define SHOW_SOCK_STATS 1 @@ -55,6 +56,7 @@ static struct dentry *o2net_dentry; static struct dentry *sc_dentry; static struct dentry *nst_dentry; static struct dentry *stats_dentry; +static struct dentry *nodes_dentry; static DEFINE_SPINLOCK(o2net_debug_lock); @@ -491,53 +493,87 @@ static const struct file_operations sc_seq_fops = { .release = sc_fop_release, }; -int o2net_debugfs_init(void) +static int o2net_fill_bitmap(char *buf, int len) { - o2net_dentry = debugfs_create_dir(O2NET_DEBUG_DIR, NULL); - if (!o2net_dentry) { - mlog_errno(-ENOMEM); - goto bail; - } + unsigned long map[BITS_TO_LONGS(O2NM_MAX_NODES)]; + int i = -1, out = 0; - nst_dentry = debugfs_create_file(NST_DEBUG_NAME, S_IFREG|S_IRUSR, - o2net_dentry, NULL, - &nst_seq_fops); - if (!nst_dentry) { - mlog_errno(-ENOMEM); - goto bail; - } + o2net_fill_node_map(map, sizeof(map)); - sc_dentry = debugfs_create_file(SC_DEBUG_NAME, S_IFREG|S_IRUSR, - o2net_dentry, NULL, - &sc_seq_fops); - if (!sc_dentry) { - mlog_errno(-ENOMEM); - goto bail; - } + while ((i = find_next_bit(map, O2NM_MAX_NODES, i + 1)) < O2NM_MAX_NODES) + out += snprintf(buf + out, PAGE_SIZE - out, "%d ", i); + out += snprintf(buf + out, PAGE_SIZE - out, "\n"); - stats_dentry = debugfs_create_file(STATS_DEBUG_NAME, S_IFREG|S_IRUSR, - o2net_dentry, NULL, - &stats_seq_fops); - if (!stats_dentry) { - mlog_errno(-ENOMEM); - goto bail; - } + return out; +} + +static int nodes_fop_open(struct inode *inode, struct file *file) +{ + char *buf; + + buf = kmalloc(PAGE_SIZE, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + i_size_write(inode, o2net_fill_bitmap(buf, PAGE_SIZE)); + + file->private_data = buf; return 0; -bail: - debugfs_remove(stats_dentry); - debugfs_remove(sc_dentry); - debugfs_remove(nst_dentry); - debugfs_remove(o2net_dentry); - return -ENOMEM; } +static int o2net_debug_release(struct inode *inode, struct file *file) +{ + kfree(file->private_data); + return 0; +} + +static ssize_t o2net_debug_read(struct file *file, char __user *buf, + size_t nbytes, loff_t *ppos) +{ + return simple_read_from_buffer(buf, nbytes, ppos, file->private_data, + i_size_read(file->f_mapping->host)); +} + +static const struct file_operations nodes_fops = { + .open = nodes_fop_open, + .release = o2net_debug_release, + .read = o2net_debug_read, + .llseek = generic_file_llseek, +}; + void o2net_debugfs_exit(void) { + debugfs_remove(nodes_dentry); debugfs_remove(stats_dentry); debugfs_remove(sc_dentry); debugfs_remove(nst_dentry); debugfs_remove(o2net_dentry); } +int o2net_debugfs_init(void) +{ + mode_t mode = S_IFREG|S_IRUSR; + + o2net_dentry = debugfs_create_dir(O2NET_DEBUG_DIR, NULL); + if (o2net_dentry) + nst_dentry = debugfs_create_file(NST_DEBUG_NAME, mode, + o2net_dentry, NULL, &nst_seq_fops); + if (nst_dentry) + sc_dentry = debugfs_create_file(SC_DEBUG_NAME, mode, + o2net_dentry, NULL, &sc_seq_fops); + if (sc_dentry) + stats_dentry = debugfs_create_file(STATS_DEBUG_NAME, mode, + o2net_dentry, NULL, &stats_seq_fops); + if (stats_dentry) + nodes_dentry = debugfs_create_file(NODES_DEBUG_NAME, mode, + o2net_dentry, NULL, &nodes_fops); + if (nodes_dentry) + return 0; + + o2net_debugfs_exit(); + mlog_errno(-ENOMEM); + return -ENOMEM; +} + #endif /* CONFIG_DEBUG_FS */ diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c index db5ee4b..044e7b5 100644 --- a/fs/ocfs2/cluster/tcp.c +++ b/fs/ocfs2/cluster/tcp.c @@ -59,6 +59,7 @@ #include #include #include +#include #include #include @@ -545,7 +546,7 @@ static void o2net_set_nn_state(struct o2net_node *nn, } if (was_valid && !valid) { - printk(KERN_NOTICE "o2net: no longer connected to " + printk(KERN_NOTICE "o2net: No longer connected to " SC_NODEF_FMT "\n", SC_NODEF_ARGS(old_sc)); o2net_complete_nodes_nsw(nn); } @@ -555,7 +556,7 @@ static void o2net_set_nn_state(struct o2net_node *nn, cancel_delayed_work(&nn->nn_connect_expired); printk(KERN_NOTICE "o2net: %s " SC_NODEF_FMT "\n", o2nm_this_node() > sc->sc_node->nd_num ? - "connected to" : "accepted connection from", + "Connected to" : "Accepted connection from", SC_NODEF_ARGS(sc)); } @@ -643,7 +644,7 @@ static void o2net_state_change(struct sock *sk) o2net_sc_queue_work(sc, &sc->sc_connect_work); break; default: - printk(KERN_INFO "o2net: connection to " SC_NODEF_FMT + printk(KERN_INFO "o2net: Connection to " SC_NODEF_FMT " shutdown, state %d\n", SC_NODEF_ARGS(sc), sk->sk_state); o2net_sc_queue_work(sc, &sc->sc_shutdown_work); @@ -1034,6 +1035,25 @@ static int o2net_tx_can_proceed(struct o2net_node *nn, return ret; } +/* Get a map of all nodes to which this node is currently connected to */ +void o2net_fill_node_map(unsigned long *map, unsigned bytes) +{ + struct o2net_sock_container *sc; + int node, ret; + + BUG_ON(bytes < (BITS_TO_LONGS(O2NM_MAX_NODES) * sizeof(unsigned long))); + + memset(map, 0, bytes); + for (node = 0; node < O2NM_MAX_NODES; ++node) { + o2net_tx_can_proceed(o2net_nn_from_num(node), &sc, &ret); + if (!ret) { + set_bit(node, map); + sc_put(sc); + } + } +} +EXPORT_SYMBOL_GPL(o2net_fill_node_map); + int o2net_send_message_vec(u32 msg_type, u32 key, struct kvec *caller_vec, size_t caller_veclen, u8 target_node, int *status) { @@ -1284,11 +1304,11 @@ static int o2net_check_handshake(struct o2net_sock_container *sc) struct o2net_node *nn = o2net_nn_from_num(sc->sc_node->nd_num); if (hand->protocol_version != cpu_to_be64(O2NET_PROTOCOL_VERSION)) { - mlog(ML_NOTICE, SC_NODEF_FMT " advertised net protocol " - "version %llu but %llu is required, disconnecting\n", - SC_NODEF_ARGS(sc), - (unsigned long long)be64_to_cpu(hand->protocol_version), - O2NET_PROTOCOL_VERSION); + printk(KERN_NOTICE "o2net: " SC_NODEF_FMT " Advertised net " + "protocol version %llu but %llu is required. " + "Disconnecting.\n", SC_NODEF_ARGS(sc), + (unsigned long long)be64_to_cpu(hand->protocol_version), + O2NET_PROTOCOL_VERSION); /* don't bother reconnecting if its the wrong version. */ o2net_ensure_shutdown(nn, sc, -ENOTCONN); @@ -1302,33 +1322,33 @@ static int o2net_check_handshake(struct o2net_sock_container *sc) */ if (be32_to_cpu(hand->o2net_idle_timeout_ms) != o2net_idle_timeout()) { - mlog(ML_NOTICE, SC_NODEF_FMT " uses a network idle timeout of " - "%u ms, but we use %u ms locally. disconnecting\n", - SC_NODEF_ARGS(sc), - be32_to_cpu(hand->o2net_idle_timeout_ms), - o2net_idle_timeout()); + printk(KERN_NOTICE "o2net: " SC_NODEF_FMT " uses a network " + "idle timeout of %u ms, but we use %u ms locally. " + "Disconnecting.\n", SC_NODEF_ARGS(sc), + be32_to_cpu(hand->o2net_idle_timeout_ms), + o2net_idle_timeout()); o2net_ensure_shutdown(nn, sc, -ENOTCONN); return -1; } if (be32_to_cpu(hand->o2net_keepalive_delay_ms) != o2net_keepalive_delay()) { - mlog(ML_NOTICE, SC_NODEF_FMT " uses a keepalive delay of " - "%u ms, but we use %u ms locally. disconnecting\n", - SC_NODEF_ARGS(sc), - be32_to_cpu(hand->o2net_keepalive_delay_ms), - o2net_keepalive_delay()); + printk(KERN_NOTICE "o2net: " SC_NODEF_FMT " uses a keepalive " + "delay of %u ms, but we use %u ms locally. " + "Disconnecting.\n", SC_NODEF_ARGS(sc), + be32_to_cpu(hand->o2net_keepalive_delay_ms), + o2net_keepalive_delay()); o2net_ensure_shutdown(nn, sc, -ENOTCONN); return -1; } if (be32_to_cpu(hand->o2hb_heartbeat_timeout_ms) != O2HB_MAX_WRITE_TIMEOUT_MS) { - mlog(ML_NOTICE, SC_NODEF_FMT " uses a heartbeat timeout of " - "%u ms, but we use %u ms locally. disconnecting\n", - SC_NODEF_ARGS(sc), - be32_to_cpu(hand->o2hb_heartbeat_timeout_ms), - O2HB_MAX_WRITE_TIMEOUT_MS); + printk(KERN_NOTICE "o2net: " SC_NODEF_FMT " uses a heartbeat " + "timeout of %u ms, but we use %u ms locally. " + "Disconnecting.\n", SC_NODEF_ARGS(sc), + be32_to_cpu(hand->o2hb_heartbeat_timeout_ms), + O2HB_MAX_WRITE_TIMEOUT_MS); o2net_ensure_shutdown(nn, sc, -ENOTCONN); return -1; } @@ -1539,28 +1559,16 @@ static void o2net_idle_timer(unsigned long data) { struct o2net_sock_container *sc = (struct o2net_sock_container *)data; struct o2net_node *nn = o2net_nn_from_num(sc->sc_node->nd_num); - #ifdef CONFIG_DEBUG_FS - ktime_t now = ktime_get(); + unsigned long msecs = ktime_to_ms(ktime_get()) - + ktime_to_ms(sc->sc_tv_timer); +#else + unsigned long msecs = o2net_idle_timeout(); #endif - printk(KERN_NOTICE "o2net: connection to " SC_NODEF_FMT " has been idle for %u.%u " - "seconds, shutting it down.\n", SC_NODEF_ARGS(sc), - o2net_idle_timeout() / 1000, - o2net_idle_timeout() % 1000); - -#ifdef CONFIG_DEBUG_FS - mlog(ML_NOTICE, "Here are some times that might help debug the " - "situation: (Timer: %lld, Now %lld, DataReady %lld, Advance %lld-%lld, " - "Key 0x%08x, Func %u, FuncTime %lld-%lld)\n", - (long long)ktime_to_us(sc->sc_tv_timer), (long long)ktime_to_us(now), - (long long)ktime_to_us(sc->sc_tv_data_ready), - (long long)ktime_to_us(sc->sc_tv_advance_start), - (long long)ktime_to_us(sc->sc_tv_advance_stop), - sc->sc_msg_key, sc->sc_msg_type, - (long long)ktime_to_us(sc->sc_tv_func_start), - (long long)ktime_to_us(sc->sc_tv_func_stop)); -#endif + printk(KERN_NOTICE "o2net: Connection to " SC_NODEF_FMT " has been " + "idle for %lu.%lu secs, shutting it down.\n", SC_NODEF_ARGS(sc), + msecs / 1000, msecs % 1000); /* * Initialize the nn_timeout so that the next connection attempt @@ -1693,8 +1701,8 @@ static void o2net_start_connect(struct work_struct *work) out: if (ret) { - mlog(ML_NOTICE, "connect attempt to " SC_NODEF_FMT " failed " - "with errno %d\n", SC_NODEF_ARGS(sc), ret); + printk(KERN_NOTICE "o2net: Connect attempt to " SC_NODEF_FMT + " failed with errno %d\n", SC_NODEF_ARGS(sc), ret); /* 0 err so that another will be queued and attempted * from set_nn_state */ if (sc) @@ -1717,8 +1725,8 @@ static void o2net_connect_expired(struct work_struct *work) spin_lock(&nn->nn_lock); if (!nn->nn_sc_valid) { - mlog(ML_ERROR, "no connection established with node %u after " - "%u.%u seconds, giving up and returning errors.\n", + printk(KERN_NOTICE "o2net: No connection established with " + "node %u after %u.%u seconds, giving up.\n", o2net_num_from_nn(nn), o2net_idle_timeout() / 1000, o2net_idle_timeout() % 1000); @@ -1861,21 +1869,21 @@ static int o2net_accept_one(struct socket *sock) node = o2nm_get_node_by_ip(sin.sin_addr.s_addr); if (node == NULL) { - mlog(ML_NOTICE, "attempt to connect from unknown node at %pI4:%d\n", - &sin.sin_addr.s_addr, ntohs(sin.sin_port)); + printk(KERN_NOTICE "o2net: Attempt to connect from unknown " + "node at %pI4:%d\n", &sin.sin_addr.s_addr, + ntohs(sin.sin_port)); ret = -EINVAL; goto out; } if (o2nm_this_node() >= node->nd_num) { local_node = o2nm_get_node_by_num(o2nm_this_node()); - mlog(ML_NOTICE, "unexpected connect attempt seen at node '%s' (" - "%u, %pI4:%d) from node '%s' (%u, %pI4:%d)\n", - local_node->nd_name, local_node->nd_num, - &(local_node->nd_ipv4_address), - ntohs(local_node->nd_ipv4_port), - node->nd_name, node->nd_num, &sin.sin_addr.s_addr, - ntohs(sin.sin_port)); + printk(KERN_NOTICE "o2net: Unexpected connect attempt seen " + "at node '%s' (%u, %pI4:%d) from node '%s' (%u, " + "%pI4:%d)\n", local_node->nd_name, local_node->nd_num, + &(local_node->nd_ipv4_address), + ntohs(local_node->nd_ipv4_port), node->nd_name, + node->nd_num, &sin.sin_addr.s_addr, ntohs(sin.sin_port)); ret = -EINVAL; goto out; } @@ -1900,10 +1908,10 @@ static int o2net_accept_one(struct socket *sock) ret = 0; spin_unlock(&nn->nn_lock); if (ret) { - mlog(ML_NOTICE, "attempt to connect from node '%s' at " - "%pI4:%d but it already has an open connection\n", - node->nd_name, &sin.sin_addr.s_addr, - ntohs(sin.sin_port)); + printk(KERN_NOTICE "o2net: Attempt to connect from node '%s' " + "at %pI4:%d but it already has an open connection\n", + node->nd_name, &sin.sin_addr.s_addr, + ntohs(sin.sin_port)); goto out; } @@ -1983,7 +1991,7 @@ static int o2net_open_listening_sock(__be32 addr, __be16 port) ret = sock_create(PF_INET, SOCK_STREAM, IPPROTO_TCP, &sock); if (ret < 0) { - mlog(ML_ERROR, "unable to create socket, ret=%d\n", ret); + printk(KERN_ERR "o2net: Error %d while creating socket\n", ret); goto out; } @@ -2000,16 +2008,15 @@ static int o2net_open_listening_sock(__be32 addr, __be16 port) sock->sk->sk_reuse = 1; ret = sock->ops->bind(sock, (struct sockaddr *)&sin, sizeof(sin)); if (ret < 0) { - mlog(ML_ERROR, "unable to bind socket at %pI4:%u, " - "ret=%d\n", &addr, ntohs(port), ret); + printk(KERN_ERR "o2net: Error %d while binding socket at " + "%pI4:%u\n", ret, &addr, ntohs(port)); goto out; } ret = sock->ops->listen(sock, 64); - if (ret < 0) { - mlog(ML_ERROR, "unable to listen on %pI4:%u, ret=%d\n", - &addr, ntohs(port), ret); - } + if (ret < 0) + printk(KERN_ERR "o2net: Error %d while listening on %pI4:%u\n", + ret, &addr, ntohs(port)); out: if (ret) { diff --git a/fs/ocfs2/cluster/tcp.h b/fs/ocfs2/cluster/tcp.h index fd6179e..5bada2a 100644 --- a/fs/ocfs2/cluster/tcp.h +++ b/fs/ocfs2/cluster/tcp.h @@ -106,6 +106,8 @@ int o2net_register_handler(u32 msg_type, u32 key, u32 max_len, struct list_head *unreg_list); void o2net_unregister_handler_list(struct list_head *list); +void o2net_fill_node_map(unsigned long *map, unsigned bytes); + struct o2nm_node; int o2net_register_hb_callbacks(void); void o2net_unregister_hb_callbacks(void); diff --git a/fs/ocfs2/dcache.c b/fs/ocfs2/dcache.c index e5ba348..26977cc 100644 --- a/fs/ocfs2/dcache.c +++ b/fs/ocfs2/dcache.c @@ -175,7 +175,7 @@ struct dentry *ocfs2_find_local_alias(struct inode *inode, spin_lock(&inode->i_lock); list_for_each(p, &inode->i_dentry) { - dentry = list_entry(p, struct dentry, d_alias); + dentry = list_entry(p, struct dentry, d_u.d_alias); spin_lock(&dentry->d_lock); if (ocfs2_match_dentry(dentry, parent_blkno, skip_unhashed)) { diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c index 8582e3f..8fe4e28 100644 --- a/fs/ocfs2/dir.c +++ b/fs/ocfs2/dir.c @@ -1184,8 +1184,7 @@ static int __ocfs2_delete_entry(handle_t *handle, struct inode *dir, if (pde) le16_add_cpu(&pde->rec_len, le16_to_cpu(de->rec_len)); - else - de->inode = 0; + de->inode = 0; dir->i_version++; ocfs2_journal_dirty(handle, bh); goto bail; @@ -2292,7 +2291,7 @@ static int ocfs2_fill_new_dir_id(struct ocfs2_super *osb, ocfs2_journal_dirty(handle, di_bh); i_size_write(inode, size); - inode->i_nlink = 2; + set_nlink(inode, 2); inode->i_blocks = ocfs2_inode_sector_count(inode); ret = ocfs2_mark_inode_dirty(handle, inode, di_bh); @@ -2354,7 +2353,7 @@ static int ocfs2_fill_new_dir_el(struct ocfs2_super *osb, ocfs2_journal_dirty(handle, new_bh); i_size_write(inode, inode->i_sb->s_blocksize); - inode->i_nlink = 2; + set_nlink(inode, 2); inode->i_blocks = ocfs2_inode_sector_count(inode); status = ocfs2_mark_inode_dirty(handle, inode, fe_bh); if (status < 0) { diff --git a/fs/ocfs2/dlm/dlmcommon.h b/fs/ocfs2/dlm/dlmcommon.h index d602abb..a5952ce 100644 --- a/fs/ocfs2/dlm/dlmcommon.h +++ b/fs/ocfs2/dlm/dlmcommon.h @@ -859,8 +859,8 @@ void dlm_complete_recovery_thread(struct dlm_ctxt *dlm); void dlm_wait_for_recovery(struct dlm_ctxt *dlm); void dlm_kick_recovery_thread(struct dlm_ctxt *dlm); int dlm_is_node_dead(struct dlm_ctxt *dlm, u8 node); -int dlm_wait_for_node_death(struct dlm_ctxt *dlm, u8 node, int timeout); -int dlm_wait_for_node_recovery(struct dlm_ctxt *dlm, u8 node, int timeout); +void dlm_wait_for_node_death(struct dlm_ctxt *dlm, u8 node, int timeout); +void dlm_wait_for_node_recovery(struct dlm_ctxt *dlm, u8 node, int timeout); void dlm_put(struct dlm_ctxt *dlm); struct dlm_ctxt *dlm_grab(struct dlm_ctxt *dlm); @@ -877,9 +877,8 @@ static inline void dlm_lockres_get(struct dlm_lock_resource *res) kref_get(&res->refs); } void dlm_lockres_put(struct dlm_lock_resource *res); -void __dlm_unhash_lockres(struct dlm_lock_resource *res); -void __dlm_insert_lockres(struct dlm_ctxt *dlm, - struct dlm_lock_resource *res); +void __dlm_unhash_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res); +void __dlm_insert_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res); struct dlm_lock_resource * __dlm_lookup_lockres_full(struct dlm_ctxt *dlm, const char *name, unsigned int len, @@ -902,46 +901,15 @@ struct dlm_lock_resource *dlm_new_lockres(struct dlm_ctxt *dlm, const char *name, unsigned int namelen); -#define dlm_lockres_set_refmap_bit(bit,res) \ - __dlm_lockres_set_refmap_bit(bit,res,__FILE__,__LINE__) -#define dlm_lockres_clear_refmap_bit(bit,res) \ - __dlm_lockres_clear_refmap_bit(bit,res,__FILE__,__LINE__) +void dlm_lockres_set_refmap_bit(struct dlm_ctxt *dlm, + struct dlm_lock_resource *res, int bit); +void dlm_lockres_clear_refmap_bit(struct dlm_ctxt *dlm, + struct dlm_lock_resource *res, int bit); -static inline void __dlm_lockres_set_refmap_bit(int bit, - struct dlm_lock_resource *res, - const char *file, - int line) -{ - //printk("%s:%d:%.*s: setting bit %d\n", file, line, - // res->lockname.len, res->lockname.name, bit); - set_bit(bit, res->refmap); -} - -static inline void __dlm_lockres_clear_refmap_bit(int bit, - struct dlm_lock_resource *res, - const char *file, - int line) -{ - //printk("%s:%d:%.*s: clearing bit %d\n", file, line, - // res->lockname.len, res->lockname.name, bit); - clear_bit(bit, res->refmap); -} - -void __dlm_lockres_drop_inflight_ref(struct dlm_ctxt *dlm, - struct dlm_lock_resource *res, - const char *file, - int line); -void __dlm_lockres_grab_inflight_ref(struct dlm_ctxt *dlm, - struct dlm_lock_resource *res, - int new_lockres, - const char *file, - int line); -#define dlm_lockres_drop_inflight_ref(d,r) \ - __dlm_lockres_drop_inflight_ref(d,r,__FILE__,__LINE__) -#define dlm_lockres_grab_inflight_ref(d,r) \ - __dlm_lockres_grab_inflight_ref(d,r,0,__FILE__,__LINE__) -#define dlm_lockres_grab_inflight_ref_new(d,r) \ - __dlm_lockres_grab_inflight_ref(d,r,1,__FILE__,__LINE__) +void dlm_lockres_drop_inflight_ref(struct dlm_ctxt *dlm, + struct dlm_lock_resource *res); +void dlm_lockres_grab_inflight_ref(struct dlm_ctxt *dlm, + struct dlm_lock_resource *res); void dlm_queue_ast(struct dlm_ctxt *dlm, struct dlm_lock *lock); void dlm_queue_bast(struct dlm_ctxt *dlm, struct dlm_lock *lock); diff --git a/fs/ocfs2/dlm/dlmdebug.c b/fs/ocfs2/dlm/dlmdebug.c index 56f82cb..0e28e24 100644 --- a/fs/ocfs2/dlm/dlmdebug.c +++ b/fs/ocfs2/dlm/dlmdebug.c @@ -30,6 +30,7 @@ #include #include #include +#include #include "cluster/heartbeat.h" #include "cluster/nodemanager.h" diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c index 6ed6b95..92f2ead 100644 --- a/fs/ocfs2/dlm/dlmdomain.c +++ b/fs/ocfs2/dlm/dlmdomain.c @@ -157,16 +157,18 @@ static int dlm_protocol_compare(struct dlm_protocol_version *existing, static void dlm_unregister_domain_handlers(struct dlm_ctxt *dlm); -void __dlm_unhash_lockres(struct dlm_lock_resource *lockres) +void __dlm_unhash_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res) { - if (!hlist_unhashed(&lockres->hash_node)) { - hlist_del_init(&lockres->hash_node); - dlm_lockres_put(lockres); - } + if (hlist_unhashed(&res->hash_node)) + return; + + mlog(0, "%s: Unhash res %.*s\n", dlm->name, res->lockname.len, + res->lockname.name); + hlist_del_init(&res->hash_node); + dlm_lockres_put(res); } -void __dlm_insert_lockres(struct dlm_ctxt *dlm, - struct dlm_lock_resource *res) +void __dlm_insert_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res) { struct hlist_head *bucket; struct qstr *q; @@ -180,6 +182,9 @@ void __dlm_insert_lockres(struct dlm_ctxt *dlm, dlm_lockres_get(res); hlist_add_head(&res->hash_node, bucket); + + mlog(0, "%s: Hash res %.*s\n", dlm->name, res->lockname.len, + res->lockname.name); } struct dlm_lock_resource * __dlm_lookup_lockres_full(struct dlm_ctxt *dlm, @@ -539,17 +544,17 @@ again: static void __dlm_print_nodes(struct dlm_ctxt *dlm) { - int node = -1; + int node = -1, num = 0; assert_spin_locked(&dlm->spinlock); - printk(KERN_NOTICE "o2dlm: Nodes in domain %s: ", dlm->name); - + printk("( "); while ((node = find_next_bit(dlm->domain_map, O2NM_MAX_NODES, node + 1)) < O2NM_MAX_NODES) { printk("%d ", node); + ++num; } - printk("\n"); + printk(") %u nodes\n", num); } static int dlm_exit_domain_handler(struct o2net_msg *msg, u32 len, void *data, @@ -566,11 +571,10 @@ static int dlm_exit_domain_handler(struct o2net_msg *msg, u32 len, void *data, node = exit_msg->node_idx; - printk(KERN_NOTICE "o2dlm: Node %u leaves domain %s\n", node, dlm->name); - spin_lock(&dlm->spinlock); clear_bit(node, dlm->domain_map); clear_bit(node, dlm->exit_domain_map); + printk(KERN_NOTICE "o2dlm: Node %u leaves domain %s ", node, dlm->name); __dlm_print_nodes(dlm); /* notify anything attached to the heartbeat events */ @@ -755,6 +759,7 @@ void dlm_unregister_domain(struct dlm_ctxt *dlm) dlm_mark_domain_leaving(dlm); dlm_leave_domain(dlm); + printk(KERN_NOTICE "o2dlm: Leaving domain %s\n", dlm->name); dlm_force_free_mles(dlm); dlm_complete_dlm_shutdown(dlm); } @@ -970,7 +975,7 @@ static int dlm_assert_joined_handler(struct o2net_msg *msg, u32 len, void *data, clear_bit(assert->node_idx, dlm->exit_domain_map); __dlm_set_joining_node(dlm, DLM_LOCK_RES_OWNER_UNKNOWN); - printk(KERN_NOTICE "o2dlm: Node %u joins domain %s\n", + printk(KERN_NOTICE "o2dlm: Node %u joins domain %s ", assert->node_idx, dlm->name); __dlm_print_nodes(dlm); @@ -1701,8 +1706,10 @@ static int dlm_try_to_join_domain(struct dlm_ctxt *dlm) bail: spin_lock(&dlm->spinlock); __dlm_set_joining_node(dlm, DLM_LOCK_RES_OWNER_UNKNOWN); - if (!status) + if (!status) { + printk(KERN_NOTICE "o2dlm: Joining domain %s ", dlm->name); __dlm_print_nodes(dlm); + } spin_unlock(&dlm->spinlock); if (ctxt) { @@ -2131,13 +2138,6 @@ struct dlm_ctxt * dlm_register_domain(const char *domain, goto leave; } - if (!o2hb_check_local_node_heartbeating()) { - mlog(ML_ERROR, "the local node has not been configured, or is " - "not heartbeating\n"); - ret = -EPROTO; - goto leave; - } - mlog(0, "register called for domain \"%s\"\n", domain); retry: diff --git a/fs/ocfs2/dlm/dlmlock.c b/fs/ocfs2/dlm/dlmlock.c index 8d39e0f..975810b 100644 --- a/fs/ocfs2/dlm/dlmlock.c +++ b/fs/ocfs2/dlm/dlmlock.c @@ -183,10 +183,6 @@ static enum dlm_status dlmlock_master(struct dlm_ctxt *dlm, kick_thread = 1; } } - /* reduce the inflight count, this may result in the lockres - * being purged below during calc_usage */ - if (lock->ml.node == dlm->node_num) - dlm_lockres_drop_inflight_ref(dlm, res); spin_unlock(&res->spinlock); wake_up(&res->wq); @@ -231,10 +227,16 @@ static enum dlm_status dlmlock_remote(struct dlm_ctxt *dlm, lock->ml.type, res->lockname.len, res->lockname.name, flags); + /* + * Wait if resource is getting recovered, remastered, etc. + * If the resource was remastered and new owner is self, then exit. + */ spin_lock(&res->spinlock); - - /* will exit this call with spinlock held */ __dlm_wait_on_lockres(res); + if (res->owner == dlm->node_num) { + spin_unlock(&res->spinlock); + return DLM_RECOVERING; + } res->state |= DLM_LOCK_RES_IN_PROGRESS; /* add lock to local (secondary) queue */ @@ -319,27 +321,23 @@ static enum dlm_status dlm_send_remote_lock_request(struct dlm_ctxt *dlm, tmpret = o2net_send_message(DLM_CREATE_LOCK_MSG, dlm->key, &create, sizeof(create), res->owner, &status); if (tmpret >= 0) { - // successfully sent and received - ret = status; // this is already a dlm_status + ret = status; if (ret == DLM_REJECTED) { - mlog(ML_ERROR, "%s:%.*s: BUG. this is a stale lockres " - "no longer owned by %u. that node is coming back " - "up currently.\n", dlm->name, create.namelen, + mlog(ML_ERROR, "%s: res %.*s, Stale lockres no longer " + "owned by node %u. That node is coming back up " + "currently.\n", dlm->name, create.namelen, create.name, res->owner); dlm_print_one_lock_resource(res); BUG(); } } else { - mlog(ML_ERROR, "Error %d when sending message %u (key 0x%x) to " - "node %u\n", tmpret, DLM_CREATE_LOCK_MSG, dlm->key, - res->owner); - if (dlm_is_host_down(tmpret)) { + mlog(ML_ERROR, "%s: res %.*s, Error %d send CREATE LOCK to " + "node %u\n", dlm->name, create.namelen, create.name, + tmpret, res->owner); + if (dlm_is_host_down(tmpret)) ret = DLM_RECOVERING; - mlog(0, "node %u died so returning DLM_RECOVERING " - "from lock message!\n", res->owner); - } else { + else ret = dlm_err_to_dlm_status(tmpret); - } } return ret; @@ -440,7 +438,7 @@ struct dlm_lock * dlm_new_lock(int type, u8 node, u64 cookie, /* zero memory only if kernel-allocated */ lksb = kzalloc(sizeof(*lksb), GFP_NOFS); if (!lksb) { - kfree(lock); + kmem_cache_free(dlm_lock_cache, lock); return NULL; } kernel_allocated = 1; @@ -718,18 +716,10 @@ retry_lock: if (status == DLM_RECOVERING || status == DLM_MIGRATING || status == DLM_FORWARD) { - mlog(0, "retrying lock with migration/" - "recovery/in progress\n"); msleep(100); - /* no waiting for dlm_reco_thread */ if (recovery) { if (status != DLM_RECOVERING) goto retry_lock; - - mlog(0, "%s: got RECOVERING " - "for $RECOVERY lock, master " - "was %u\n", dlm->name, - res->owner); /* wait to see the node go down, then * drop down and allow the lockres to * get cleaned up. need to remaster. */ @@ -741,6 +731,14 @@ retry_lock: } } + /* Inflight taken in dlm_get_lock_resource() is dropped here */ + spin_lock(&res->spinlock); + dlm_lockres_drop_inflight_ref(dlm, res); + spin_unlock(&res->spinlock); + + dlm_lockres_calc_usage(dlm, res); + dlm_kick_thread(dlm, res); + if (status != DLM_NORMAL) { lock->lksb->flags &= ~DLM_LKSB_GET_LVB; if (status != DLM_NOTQUEUED) diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index 11eefb8..8e48ba5 100644 --- a/fs/ocfs2/dlm/dlmmaster.c +++ b/fs/ocfs2/dlm/dlmmaster.c @@ -631,39 +631,58 @@ error: return NULL; } -void __dlm_lockres_grab_inflight_ref(struct dlm_ctxt *dlm, - struct dlm_lock_resource *res, - int new_lockres, - const char *file, - int line) +void dlm_lockres_set_refmap_bit(struct dlm_ctxt *dlm, + struct dlm_lock_resource *res, int bit) { - if (!new_lockres) - assert_spin_locked(&res->spinlock); + assert_spin_locked(&res->spinlock); - if (!test_bit(dlm->node_num, res->refmap)) { - BUG_ON(res->inflight_locks != 0); - dlm_lockres_set_refmap_bit(dlm->node_num, res); - } + mlog(0, "res %.*s, set node %u, %ps()\n", res->lockname.len, + res->lockname.name, bit, __builtin_return_address(0)); + + set_bit(bit, res->refmap); +} + +void dlm_lockres_clear_refmap_bit(struct dlm_ctxt *dlm, + struct dlm_lock_resource *res, int bit) +{ + assert_spin_locked(&res->spinlock); + + mlog(0, "res %.*s, clr node %u, %ps()\n", res->lockname.len, + res->lockname.name, bit, __builtin_return_address(0)); + + clear_bit(bit, res->refmap); +} + +static void __dlm_lockres_grab_inflight_ref(struct dlm_ctxt *dlm, + struct dlm_lock_resource *res) +{ res->inflight_locks++; - mlog(0, "%s:%.*s: inflight++: now %u\n", - dlm->name, res->lockname.len, res->lockname.name, - res->inflight_locks); + + mlog(0, "%s: res %.*s, inflight++: now %u, %ps()\n", dlm->name, + res->lockname.len, res->lockname.name, res->inflight_locks, + __builtin_return_address(0)); +} + +void dlm_lockres_grab_inflight_ref(struct dlm_ctxt *dlm, + struct dlm_lock_resource *res) +{ + assert_spin_locked(&res->spinlock); + __dlm_lockres_grab_inflight_ref(dlm, res); } -void __dlm_lockres_drop_inflight_ref(struct dlm_ctxt *dlm, - struct dlm_lock_resource *res, - const char *file, - int line) +void dlm_lockres_drop_inflight_ref(struct dlm_ctxt *dlm, + struct dlm_lock_resource *res) { assert_spin_locked(&res->spinlock); BUG_ON(res->inflight_locks == 0); + res->inflight_locks--; - mlog(0, "%s:%.*s: inflight--: now %u\n", - dlm->name, res->lockname.len, res->lockname.name, - res->inflight_locks); - if (res->inflight_locks == 0) - dlm_lockres_clear_refmap_bit(dlm->node_num, res); + + mlog(0, "%s: res %.*s, inflight--: now %u, %ps()\n", dlm->name, + res->lockname.len, res->lockname.name, res->inflight_locks, + __builtin_return_address(0)); + wake_up(&res->wq); } @@ -697,7 +716,6 @@ struct dlm_lock_resource * dlm_get_lock_resource(struct dlm_ctxt *dlm, unsigned int hash; int tries = 0; int bit, wait_on_recovery = 0; - int drop_inflight_if_nonlocal = 0; BUG_ON(!lockid); @@ -709,36 +727,46 @@ lookup: spin_lock(&dlm->spinlock); tmpres = __dlm_lookup_lockres_full(dlm, lockid, namelen, hash); if (tmpres) { - int dropping_ref = 0; - spin_unlock(&dlm->spinlock); - spin_lock(&tmpres->spinlock); - /* We wait for the other thread that is mastering the resource */ + + /* + * Right after dlm spinlock was released, dlm_thread could have + * purged the lockres. Check if lockres got unhashed. If so + * start over. + */ + if (hlist_unhashed(&tmpres->hash_node)) { + spin_unlock(&tmpres->spinlock); + dlm_lockres_put(tmpres); + tmpres = NULL; + goto lookup; + } + + /* Wait on the thread that is mastering the resource */ if (tmpres->owner == DLM_LOCK_RES_OWNER_UNKNOWN) { __dlm_wait_on_lockres(tmpres); BUG_ON(tmpres->owner == DLM_LOCK_RES_OWNER_UNKNOWN); + spin_unlock(&tmpres->spinlock); + dlm_lockres_put(tmpres); + tmpres = NULL; + goto lookup; } - if (tmpres->owner == dlm->node_num) { - BUG_ON(tmpres->state & DLM_LOCK_RES_DROPPING_REF); - dlm_lockres_grab_inflight_ref(dlm, tmpres); - } else if (tmpres->state & DLM_LOCK_RES_DROPPING_REF) - dropping_ref = 1; - spin_unlock(&tmpres->spinlock); - - /* wait until done messaging the master, drop our ref to allow - * the lockres to be purged, start over. */ - if (dropping_ref) { - spin_lock(&tmpres->spinlock); - __dlm_wait_on_lockres_flags(tmpres, DLM_LOCK_RES_DROPPING_REF); + /* Wait on the resource purge to complete before continuing */ + if (tmpres->state & DLM_LOCK_RES_DROPPING_REF) { + BUG_ON(tmpres->owner == dlm->node_num); + __dlm_wait_on_lockres_flags(tmpres, + DLM_LOCK_RES_DROPPING_REF); spin_unlock(&tmpres->spinlock); dlm_lockres_put(tmpres); tmpres = NULL; goto lookup; } - mlog(0, "found in hash!\n"); + /* Grab inflight ref to pin the resource */ + dlm_lockres_grab_inflight_ref(dlm, tmpres); + + spin_unlock(&tmpres->spinlock); if (res) dlm_lockres_put(res); res = tmpres; @@ -829,8 +857,8 @@ lookup: * but they might own this lockres. wait on them. */ bit = find_next_bit(dlm->recovery_map, O2NM_MAX_NODES, 0); if (bit < O2NM_MAX_NODES) { - mlog(ML_NOTICE, "%s:%.*s: at least one node (%d) to " - "recover before lock mastery can begin\n", + mlog(0, "%s: res %.*s, At least one node (%d) " + "to recover before lock mastery can begin\n", dlm->name, namelen, (char *)lockid, bit); wait_on_recovery = 1; } @@ -843,12 +871,9 @@ lookup: /* finally add the lockres to its hash bucket */ __dlm_insert_lockres(dlm, res); - /* since this lockres is new it doesn't not require the spinlock */ - dlm_lockres_grab_inflight_ref_new(dlm, res); - /* if this node does not become the master make sure to drop - * this inflight reference below */ - drop_inflight_if_nonlocal = 1; + /* since this lockres is new it doesn't not require the spinlock */ + __dlm_lockres_grab_inflight_ref(dlm, res); /* get an extra ref on the mle in case this is a BLOCK * if so, the creator of the BLOCK may try to put the last @@ -864,8 +889,8 @@ redo_request: * dlm spinlock would be detectable be a change on the mle, * so we only need to clear out the recovery map once. */ if (dlm_is_recovery_lock(lockid, namelen)) { - mlog(ML_NOTICE, "%s: recovery map is not empty, but " - "must master $RECOVERY lock now\n", dlm->name); + mlog(0, "%s: Recovery map is not empty, but must " + "master $RECOVERY lock now\n", dlm->name); if (!dlm_pre_master_reco_lockres(dlm, res)) wait_on_recovery = 0; else { @@ -883,8 +908,8 @@ redo_request: spin_lock(&dlm->spinlock); bit = find_next_bit(dlm->recovery_map, O2NM_MAX_NODES, 0); if (bit < O2NM_MAX_NODES) { - mlog(ML_NOTICE, "%s:%.*s: at least one node (%d) to " - "recover before lock mastery can begin\n", + mlog(0, "%s: res %.*s, At least one node (%d) " + "to recover before lock mastery can begin\n", dlm->name, namelen, (char *)lockid, bit); wait_on_recovery = 1; } else @@ -913,8 +938,8 @@ redo_request: * yet, keep going until it does. this is how the * master will know that asserts are needed back to * the lower nodes. */ - mlog(0, "%s:%.*s: requests only up to %u but master " - "is %u, keep going\n", dlm->name, namelen, + mlog(0, "%s: res %.*s, Requests only up to %u but " + "master is %u, keep going\n", dlm->name, namelen, lockid, nodenum, mle->master); } } @@ -924,13 +949,12 @@ wait: ret = dlm_wait_for_lock_mastery(dlm, res, mle, &blocked); if (ret < 0) { wait_on_recovery = 1; - mlog(0, "%s:%.*s: node map changed, redo the " - "master request now, blocked=%d\n", - dlm->name, res->lockname.len, + mlog(0, "%s: res %.*s, Node map changed, redo the master " + "request now, blocked=%d\n", dlm->name, res->lockname.len, res->lockname.name, blocked); if (++tries > 20) { - mlog(ML_ERROR, "%s:%.*s: spinning on " - "dlm_wait_for_lock_mastery, blocked=%d\n", + mlog(ML_ERROR, "%s: res %.*s, Spinning on " + "dlm_wait_for_lock_mastery, blocked = %d\n", dlm->name, res->lockname.len, res->lockname.name, blocked); dlm_print_one_lock_resource(res); @@ -940,7 +964,8 @@ wait: goto redo_request; } - mlog(0, "lockres mastered by %u\n", res->owner); + mlog(0, "%s: res %.*s, Mastered by %u\n", dlm->name, res->lockname.len, + res->lockname.name, res->owner); /* make sure we never continue without this */ BUG_ON(res->owner == O2NM_MAX_NODES); @@ -952,8 +977,6 @@ wait: wake_waiters: spin_lock(&res->spinlock); - if (res->owner != dlm->node_num && drop_inflight_if_nonlocal) - dlm_lockres_drop_inflight_ref(dlm, res); res->state &= ~DLM_LOCK_RES_IN_PROGRESS; spin_unlock(&res->spinlock); wake_up(&res->wq); @@ -1388,6 +1411,7 @@ int dlm_master_request_handler(struct o2net_msg *msg, u32 len, void *data, int found, ret; int set_maybe; int dispatch_assert = 0; + int dispatched = 0; if (!dlm_grab(dlm)) return DLM_MASTER_RESP_NO; @@ -1426,9 +1450,7 @@ way_up_top: } if (res->owner == dlm->node_num) { - mlog(0, "%s:%.*s: setting bit %u in refmap\n", - dlm->name, namelen, name, request->node_idx); - dlm_lockres_set_refmap_bit(request->node_idx, res); + dlm_lockres_set_refmap_bit(dlm, res, request->node_idx); spin_unlock(&res->spinlock); response = DLM_MASTER_RESP_YES; if (mle) @@ -1493,10 +1515,8 @@ way_up_top: * go back and clean the mles on any * other nodes */ dispatch_assert = 1; - dlm_lockres_set_refmap_bit(request->node_idx, res); - mlog(0, "%s:%.*s: setting bit %u in refmap\n", - dlm->name, namelen, name, - request->node_idx); + dlm_lockres_set_refmap_bit(dlm, res, + request->node_idx); } else response = DLM_MASTER_RESP_NO; } else { @@ -1598,13 +1618,16 @@ send_response: mlog(ML_ERROR, "failed to dispatch assert master work\n"); response = DLM_MASTER_RESP_ERROR; dlm_lockres_put(res); + } else { + dispatched = 1; } } else { if (res) dlm_lockres_put(res); } - dlm_put(dlm); + if (!dispatched) + dlm_put(dlm); return response; } @@ -1702,7 +1725,7 @@ again: "lockres, set the bit in the refmap\n", namelen, lockname, to); spin_lock(&res->spinlock); - dlm_lockres_set_refmap_bit(to, res); + dlm_lockres_set_refmap_bit(dlm, res, to); spin_unlock(&res->spinlock); } } @@ -2022,7 +2045,6 @@ int dlm_dispatch_assert_master(struct dlm_ctxt *dlm, /* queue up work for dlm_assert_master_worker */ - dlm_grab(dlm); /* get an extra ref for the work item */ dlm_init_work_item(dlm, item, dlm_assert_master_worker, NULL); item->u.am.lockres = res; /* already have a ref */ /* can optionally ignore node numbers higher than this node */ @@ -2187,8 +2209,6 @@ int dlm_drop_lockres_ref(struct dlm_ctxt *dlm, struct dlm_lock_resource *res) namelen = res->lockname.len; BUG_ON(namelen > O2NM_MAX_NAME_LEN); - mlog(0, "%s:%.*s: sending deref to %d\n", - dlm->name, namelen, lockname, res->owner); memset(&deref, 0, sizeof(deref)); deref.node_idx = dlm->node_num; deref.namelen = namelen; @@ -2197,14 +2217,12 @@ int dlm_drop_lockres_ref(struct dlm_ctxt *dlm, struct dlm_lock_resource *res) ret = o2net_send_message(DLM_DEREF_LOCKRES_MSG, dlm->key, &deref, sizeof(deref), res->owner, &r); if (ret < 0) - mlog(ML_ERROR, "Error %d when sending message %u (key 0x%x) to " - "node %u\n", ret, DLM_DEREF_LOCKRES_MSG, dlm->key, - res->owner); + mlog(ML_ERROR, "%s: res %.*s, error %d send DEREF to node %u\n", + dlm->name, namelen, lockname, ret, res->owner); else if (r < 0) { /* BAD. other node says I did not have a ref. */ - mlog(ML_ERROR,"while dropping ref on %s:%.*s " - "(master=%u) got %d.\n", dlm->name, namelen, - lockname, res->owner, r); + mlog(ML_ERROR, "%s: res %.*s, DEREF to node %u got %d\n", + dlm->name, namelen, lockname, res->owner, r); dlm_print_one_lock_resource(res); BUG(); } @@ -2260,7 +2278,7 @@ int dlm_deref_lockres_handler(struct o2net_msg *msg, u32 len, void *data, else { BUG_ON(res->state & DLM_LOCK_RES_DROPPING_REF); if (test_bit(node, res->refmap)) { - dlm_lockres_clear_refmap_bit(node, res); + dlm_lockres_clear_refmap_bit(dlm, res, node); cleared = 1; } } @@ -2320,7 +2338,7 @@ static void dlm_deref_lockres_worker(struct dlm_work_item *item, void *data) BUG_ON(res->state & DLM_LOCK_RES_DROPPING_REF); if (test_bit(node, res->refmap)) { __dlm_wait_on_lockres_flags(res, DLM_LOCK_RES_SETREF_INPROG); - dlm_lockres_clear_refmap_bit(node, res); + dlm_lockres_clear_refmap_bit(dlm, res, node); cleared = 1; } spin_unlock(&res->spinlock); @@ -2802,7 +2820,8 @@ static void dlm_remove_nonlocal_locks(struct dlm_ctxt *dlm, BUG_ON(!list_empty(&lock->bast_list)); BUG_ON(lock->ast_pending); BUG_ON(lock->bast_pending); - dlm_lockres_clear_refmap_bit(lock->ml.node, res); + dlm_lockres_clear_refmap_bit(dlm, res, + lock->ml.node); list_del_init(&lock->list); dlm_lock_put(lock); /* In a normal unlock, we would have added a @@ -2823,7 +2842,7 @@ static void dlm_remove_nonlocal_locks(struct dlm_ctxt *dlm, mlog(0, "%s:%.*s: node %u had a ref to this " "migrating lockres, clearing\n", dlm->name, res->lockname.len, res->lockname.name, bit); - dlm_lockres_clear_refmap_bit(bit, res); + dlm_lockres_clear_refmap_bit(dlm, res, bit); } bit++; } @@ -2916,9 +2935,9 @@ static int dlm_do_migrate_request(struct dlm_ctxt *dlm, &migrate, sizeof(migrate), nodenum, &status); if (ret < 0) { - mlog(ML_ERROR, "Error %d when sending message %u (key " - "0x%x) to node %u\n", ret, DLM_MIGRATE_REQUEST_MSG, - dlm->key, nodenum); + mlog(ML_ERROR, "%s: res %.*s, Error %d send " + "MIGRATE_REQUEST to node %u\n", dlm->name, + migrate.namelen, migrate.name, ret, nodenum); if (!dlm_is_host_down(ret)) { mlog(ML_ERROR, "unhandled error=%d!\n", ret); BUG(); @@ -2937,7 +2956,7 @@ static int dlm_do_migrate_request(struct dlm_ctxt *dlm, dlm->name, res->lockname.len, res->lockname.name, nodenum); spin_lock(&res->spinlock); - dlm_lockres_set_refmap_bit(nodenum, res); + dlm_lockres_set_refmap_bit(dlm, res, nodenum); spin_unlock(&res->spinlock); } } @@ -3271,7 +3290,7 @@ int dlm_finish_migration(struct dlm_ctxt *dlm, struct dlm_lock_resource *res, * mastery reference here since old_master will briefly have * a reference after the migration completes */ spin_lock(&res->spinlock); - dlm_lockres_set_refmap_bit(old_master, res); + dlm_lockres_set_refmap_bit(dlm, res, old_master); spin_unlock(&res->spinlock); mlog(0, "now time to do a migrate request to other nodes\n"); diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c index 7efab6d..0e5013e 100644 --- a/fs/ocfs2/dlm/dlmrecovery.c +++ b/fs/ocfs2/dlm/dlmrecovery.c @@ -362,40 +362,38 @@ static int dlm_is_node_recovered(struct dlm_ctxt *dlm, u8 node) } -int dlm_wait_for_node_death(struct dlm_ctxt *dlm, u8 node, int timeout) +void dlm_wait_for_node_death(struct dlm_ctxt *dlm, u8 node, int timeout) { - if (timeout) { - mlog(ML_NOTICE, "%s: waiting %dms for notification of " - "death of node %u\n", dlm->name, timeout, node); + if (dlm_is_node_dead(dlm, node)) + return; + + printk(KERN_NOTICE "o2dlm: Waiting on the death of node %u in " + "domain %s\n", node, dlm->name); + + if (timeout) wait_event_timeout(dlm->dlm_reco_thread_wq, - dlm_is_node_dead(dlm, node), - msecs_to_jiffies(timeout)); - } else { - mlog(ML_NOTICE, "%s: waiting indefinitely for notification " - "of death of node %u\n", dlm->name, node); + dlm_is_node_dead(dlm, node), + msecs_to_jiffies(timeout)); + else wait_event(dlm->dlm_reco_thread_wq, dlm_is_node_dead(dlm, node)); - } - /* for now, return 0 */ - return 0; } -int dlm_wait_for_node_recovery(struct dlm_ctxt *dlm, u8 node, int timeout) +void dlm_wait_for_node_recovery(struct dlm_ctxt *dlm, u8 node, int timeout) { - if (timeout) { - mlog(0, "%s: waiting %dms for notification of " - "recovery of node %u\n", dlm->name, timeout, node); + if (dlm_is_node_recovered(dlm, node)) + return; + + printk(KERN_NOTICE "o2dlm: Waiting on the recovery of node %u in " + "domain %s\n", node, dlm->name); + + if (timeout) wait_event_timeout(dlm->dlm_reco_thread_wq, - dlm_is_node_recovered(dlm, node), - msecs_to_jiffies(timeout)); - } else { - mlog(0, "%s: waiting indefinitely for notification " - "of recovery of node %u\n", dlm->name, node); + dlm_is_node_recovered(dlm, node), + msecs_to_jiffies(timeout)); + else wait_event(dlm->dlm_reco_thread_wq, dlm_is_node_recovered(dlm, node)); - } - /* for now, return 0 */ - return 0; } /* callers of the top-level api calls (dlmlock/dlmunlock) should @@ -430,6 +428,8 @@ static void dlm_begin_recovery(struct dlm_ctxt *dlm) { spin_lock(&dlm->spinlock); BUG_ON(dlm->reco.state & DLM_RECO_STATE_ACTIVE); + printk(KERN_NOTICE "o2dlm: Begin recovery on domain %s for node %u\n", + dlm->name, dlm->reco.dead_node); dlm->reco.state |= DLM_RECO_STATE_ACTIVE; spin_unlock(&dlm->spinlock); } @@ -440,9 +440,18 @@ static void dlm_end_recovery(struct dlm_ctxt *dlm) BUG_ON(!(dlm->reco.state & DLM_RECO_STATE_ACTIVE)); dlm->reco.state &= ~DLM_RECO_STATE_ACTIVE; spin_unlock(&dlm->spinlock); + printk(KERN_NOTICE "o2dlm: End recovery on domain %s\n", dlm->name); wake_up(&dlm->reco.event); } +static void dlm_print_recovery_master(struct dlm_ctxt *dlm) +{ + printk(KERN_NOTICE "o2dlm: Node %u (%s) is the Recovery Master for the " + "dead node %u in domain %s\n", dlm->reco.new_master, + (dlm->node_num == dlm->reco.new_master ? "me" : "he"), + dlm->reco.dead_node, dlm->name); +} + static int dlm_do_recovery(struct dlm_ctxt *dlm) { int status = 0; @@ -505,9 +514,8 @@ static int dlm_do_recovery(struct dlm_ctxt *dlm) } mlog(0, "another node will master this recovery session.\n"); } - mlog(0, "dlm=%s (%d), new_master=%u, this node=%u, dead_node=%u\n", - dlm->name, task_pid_nr(dlm->dlm_reco_thread_task), dlm->reco.new_master, - dlm->node_num, dlm->reco.dead_node); + + dlm_print_recovery_master(dlm); /* it is safe to start everything back up here * because all of the dead node's lock resources @@ -518,15 +526,13 @@ static int dlm_do_recovery(struct dlm_ctxt *dlm) return 0; master_here: - mlog(ML_NOTICE, "(%d) Node %u is the Recovery Master for the Dead Node " - "%u for Domain %s\n", task_pid_nr(dlm->dlm_reco_thread_task), - dlm->node_num, dlm->reco.dead_node, dlm->name); + dlm_print_recovery_master(dlm); status = dlm_remaster_locks(dlm, dlm->reco.dead_node); if (status < 0) { /* we should never hit this anymore */ - mlog(ML_ERROR, "error %d remastering locks for node %u, " - "retrying.\n", status, dlm->reco.dead_node); + mlog(ML_ERROR, "%s: Error %d remastering locks for node %u, " + "retrying.\n", dlm->name, status, dlm->reco.dead_node); /* yield a bit to allow any final network messages * to get handled on remaining nodes */ msleep(100); @@ -534,7 +540,10 @@ master_here: /* success! see if any other nodes need recovery */ mlog(0, "DONE mastering recovery of %s:%u here(this=%u)!\n", dlm->name, dlm->reco.dead_node, dlm->node_num); - dlm_reset_recovery(dlm); + spin_lock(&dlm->spinlock); + __dlm_reset_recovery(dlm); + dlm->reco.state &= ~DLM_RECO_STATE_FINALIZE; + spin_unlock(&dlm->spinlock); } dlm_end_recovery(dlm); @@ -567,7 +576,7 @@ static int dlm_remaster_locks(struct dlm_ctxt *dlm, u8 dead_node) BUG_ON(ndata->state != DLM_RECO_NODE_DATA_INIT); ndata->state = DLM_RECO_NODE_DATA_REQUESTING; - mlog(0, "requesting lock info from node %u\n", + mlog(0, "%s: Requesting lock info from node %u\n", dlm->name, ndata->node_num); if (ndata->node_num == dlm->node_num) { @@ -640,7 +649,7 @@ static int dlm_remaster_locks(struct dlm_ctxt *dlm, u8 dead_node) spin_unlock(&dlm_reco_state_lock); } - mlog(0, "done requesting all lock info\n"); + mlog(0, "%s: Done requesting all lock info\n", dlm->name); /* nodes should be sending reco data now * just need to wait */ @@ -692,6 +701,14 @@ static int dlm_remaster_locks(struct dlm_ctxt *dlm, u8 dead_node) if (all_nodes_done) { int ret; + /* Set this flag on recovery master to avoid + * a new recovery for another dead node start + * before the recovery is not done. That may + * cause recovery hung.*/ + spin_lock(&dlm->spinlock); + dlm->reco.state |= DLM_RECO_STATE_FINALIZE; + spin_unlock(&dlm->spinlock); + /* all nodes are now in DLM_RECO_NODE_DATA_DONE state * just send a finalize message to everyone and * clean up */ @@ -802,10 +819,9 @@ static int dlm_request_all_locks(struct dlm_ctxt *dlm, u8 request_from, /* negative status is handled by caller */ if (ret < 0) - mlog(ML_ERROR, "Error %d when sending message %u (key " - "0x%x) to node %u\n", ret, DLM_LOCK_REQUEST_MSG, - dlm->key, request_from); - + mlog(ML_ERROR, "%s: Error %d send LOCK_REQUEST to node %u " + "to recover dead node %u\n", dlm->name, ret, + request_from, dead_node); // return from here, then // sleep until all received or error return ret; @@ -956,9 +972,9 @@ static int dlm_send_all_done_msg(struct dlm_ctxt *dlm, u8 dead_node, u8 send_to) ret = o2net_send_message(DLM_RECO_DATA_DONE_MSG, dlm->key, &done_msg, sizeof(done_msg), send_to, &tmpret); if (ret < 0) { - mlog(ML_ERROR, "Error %d when sending message %u (key " - "0x%x) to node %u\n", ret, DLM_RECO_DATA_DONE_MSG, - dlm->key, send_to); + mlog(ML_ERROR, "%s: Error %d send RECO_DATA_DONE to node %u " + "to recover dead node %u\n", dlm->name, ret, send_to, + dead_node); if (!dlm_is_host_down(ret)) { BUG(); } @@ -1127,9 +1143,11 @@ static int dlm_send_mig_lockres_msg(struct dlm_ctxt *dlm, if (ret < 0) { /* XXX: negative status is not handled. * this will end up killing this node. */ - mlog(ML_ERROR, "Error %d when sending message %u (key " - "0x%x) to node %u\n", ret, DLM_MIG_LOCKRES_MSG, - dlm->key, send_to); + mlog(ML_ERROR, "%s: res %.*s, Error %d send MIG_LOCKRES to " + "node %u (%s)\n", dlm->name, mres->lockname_len, + mres->lockname, ret, send_to, + (orig_flags & DLM_MRES_MIGRATION ? + "migration" : "recovery")); } else { /* might get an -ENOMEM back here */ ret = status; @@ -1671,6 +1689,7 @@ int dlm_master_requery_handler(struct o2net_msg *msg, u32 len, void *data, unsigned int hash; int master = DLM_LOCK_RES_OWNER_UNKNOWN; u32 flags = DLM_ASSERT_MASTER_REQUERY; + int dispatched = 0; if (!dlm_grab(dlm)) { /* since the domain has gone away on this @@ -1692,6 +1711,8 @@ int dlm_master_requery_handler(struct o2net_msg *msg, u32 len, void *data, mlog_errno(-ENOMEM); /* retry!? */ BUG(); + } else { + dispatched = 1; } } else /* put.. incase we are not the master */ dlm_lockres_put(res); @@ -1699,7 +1720,8 @@ int dlm_master_requery_handler(struct o2net_msg *msg, u32 len, void *data, } spin_unlock(&dlm->spinlock); - dlm_put(dlm); + if (!dispatched) + dlm_put(dlm); return master; } @@ -1745,13 +1767,13 @@ static int dlm_process_recovery_data(struct dlm_ctxt *dlm, struct dlm_migratable_lockres *mres) { struct dlm_migratable_lock *ml; - struct list_head *queue; + struct list_head *queue, *iter; struct list_head *tmpq = NULL; struct dlm_lock *newlock = NULL; struct dlm_lockstatus *lksb = NULL; int ret = 0; int i, j, bad; - struct dlm_lock *lock = NULL; + struct dlm_lock *lock; u8 from = O2NM_MAX_NODES; unsigned int added = 0; __be64 c; @@ -1767,7 +1789,7 @@ static int dlm_process_recovery_data(struct dlm_ctxt *dlm, dlm->name, mres->lockname_len, mres->lockname, from); spin_lock(&res->spinlock); - dlm_lockres_set_refmap_bit(from, res); + dlm_lockres_set_refmap_bit(dlm, res, from); spin_unlock(&res->spinlock); added++; break; @@ -1786,14 +1808,16 @@ static int dlm_process_recovery_data(struct dlm_ctxt *dlm, /* MIGRATION ONLY! */ BUG_ON(!(mres->flags & DLM_MRES_MIGRATION)); + lock = NULL; spin_lock(&res->spinlock); for (j = DLM_GRANTED_LIST; j <= DLM_BLOCKED_LIST; j++) { tmpq = dlm_list_idx_to_ptr(res, j); - list_for_each_entry(lock, tmpq, list) { - if (lock->ml.cookie != ml->cookie) - lock = NULL; - else + list_for_each(iter, tmpq) { + lock = list_entry(iter, + struct dlm_lock, list); + if (lock->ml.cookie == ml->cookie) break; + lock = NULL; } if (lock) break; @@ -1965,7 +1989,7 @@ skip_lvb: mlog(0, "%s:%.*s: added lock for node %u, " "setting refmap bit\n", dlm->name, res->lockname.len, res->lockname.name, ml->node); - dlm_lockres_set_refmap_bit(ml->node, res); + dlm_lockres_set_refmap_bit(dlm, res, ml->node); added++; } spin_unlock(&res->spinlock); @@ -2084,6 +2108,9 @@ static void dlm_finish_local_lockres_recovery(struct dlm_ctxt *dlm, list_for_each_entry_safe(res, next, &dlm->reco.resources, recovering) { if (res->owner == dead_node) { + mlog(0, "%s: res %.*s, Changing owner from %u to %u\n", + dlm->name, res->lockname.len, res->lockname.name, + res->owner, new_master); list_del_init(&res->recovering); spin_lock(&res->spinlock); /* new_master has our reference from @@ -2105,40 +2132,30 @@ static void dlm_finish_local_lockres_recovery(struct dlm_ctxt *dlm, for (i = 0; i < DLM_HASH_BUCKETS; i++) { bucket = dlm_lockres_hash(dlm, i); hlist_for_each_entry(res, hash_iter, bucket, hash_node) { - if (res->state & DLM_LOCK_RES_RECOVERING) { - if (res->owner == dead_node) { - mlog(0, "(this=%u) res %.*s owner=%u " - "was not on recovering list, but " - "clearing state anyway\n", - dlm->node_num, res->lockname.len, - res->lockname.name, new_master); - } else if (res->owner == dlm->node_num) { - mlog(0, "(this=%u) res %.*s owner=%u " - "was not on recovering list, " - "owner is THIS node, clearing\n", - dlm->node_num, res->lockname.len, - res->lockname.name, new_master); - } else - continue; + if (!(res->state & DLM_LOCK_RES_RECOVERING)) + continue; - if (!list_empty(&res->recovering)) { - mlog(0, "%s:%.*s: lockres was " - "marked RECOVERING, owner=%u\n", - dlm->name, res->lockname.len, - res->lockname.name, res->owner); - list_del_init(&res->recovering); - dlm_lockres_put(res); - } - spin_lock(&res->spinlock); - /* new_master has our reference from - * the lock state sent during recovery */ - dlm_change_lockres_owner(dlm, res, new_master); - res->state &= ~DLM_LOCK_RES_RECOVERING; - if (__dlm_lockres_has_locks(res)) - __dlm_dirty_lockres(dlm, res); - spin_unlock(&res->spinlock); - wake_up(&res->wq); + if (res->owner != dead_node && + res->owner != dlm->node_num) + continue; + + if (!list_empty(&res->recovering)) { + list_del_init(&res->recovering); + dlm_lockres_put(res); } + + /* new_master has our reference from + * the lock state sent during recovery */ + mlog(0, "%s: res %.*s, Changing owner from %u to %u\n", + dlm->name, res->lockname.len, res->lockname.name, + res->owner, new_master); + spin_lock(&res->spinlock); + dlm_change_lockres_owner(dlm, res, new_master); + res->state &= ~DLM_LOCK_RES_RECOVERING; + if (__dlm_lockres_has_locks(res)) + __dlm_dirty_lockres(dlm, res); + spin_unlock(&res->spinlock); + wake_up(&res->wq); } } } @@ -2252,12 +2269,12 @@ static void dlm_free_dead_locks(struct dlm_ctxt *dlm, res->lockname.len, res->lockname.name, freed, dead_node); __dlm_print_one_lock_resource(res); } - dlm_lockres_clear_refmap_bit(dead_node, res); + dlm_lockres_clear_refmap_bit(dlm, res, dead_node); } else if (test_bit(dead_node, res->refmap)) { mlog(0, "%s:%.*s: dead node %u had a ref, but had " "no locks and had not purged before dying\n", dlm->name, res->lockname.len, res->lockname.name, dead_node); - dlm_lockres_clear_refmap_bit(dead_node, res); + dlm_lockres_clear_refmap_bit(dlm, res, dead_node); } /* do not kick thread yet */ @@ -2324,9 +2341,9 @@ static void dlm_do_local_recovery_cleanup(struct dlm_ctxt *dlm, u8 dead_node) dlm_revalidate_lvb(dlm, res, dead_node); if (res->owner == dead_node) { if (res->state & DLM_LOCK_RES_DROPPING_REF) { - mlog(ML_NOTICE, "Ignore %.*s for " + mlog(ML_NOTICE, "%s: res %.*s, Skip " "recovery as it is being freed\n", - res->lockname.len, + dlm->name, res->lockname.len, res->lockname.name); } else dlm_move_lockres_to_recovery_list(dlm, @@ -2870,8 +2887,8 @@ int dlm_finalize_reco_handler(struct o2net_msg *msg, u32 len, void *data, BUG(); } dlm->reco.state &= ~DLM_RECO_STATE_FINALIZE; + __dlm_reset_recovery(dlm); spin_unlock(&dlm->spinlock); - dlm_reset_recovery(dlm); dlm_kick_recovery_thread(dlm); break; default: diff --git a/fs/ocfs2/dlm/dlmthread.c b/fs/ocfs2/dlm/dlmthread.c index 1d6d1d2..e73c833 100644 --- a/fs/ocfs2/dlm/dlmthread.c +++ b/fs/ocfs2/dlm/dlmthread.c @@ -94,24 +94,26 @@ int __dlm_lockres_unused(struct dlm_lock_resource *res) { int bit; + assert_spin_locked(&res->spinlock); + if (__dlm_lockres_has_locks(res)) return 0; + /* Locks are in the process of being created */ + if (res->inflight_locks) + return 0; + if (!list_empty(&res->dirty) || res->state & DLM_LOCK_RES_DIRTY) return 0; if (res->state & DLM_LOCK_RES_RECOVERING) return 0; + /* Another node has this resource with this node as the master */ bit = find_next_bit(res->refmap, O2NM_MAX_NODES, 0); if (bit < O2NM_MAX_NODES) return 0; - /* - * since the bit for dlm->node_num is not set, inflight_locks better - * be zero - */ - BUG_ON(res->inflight_locks != 0); return 1; } @@ -185,8 +187,6 @@ static void dlm_purge_lockres(struct dlm_ctxt *dlm, /* clear our bit from the master's refmap, ignore errors */ ret = dlm_drop_lockres_ref(dlm, res); if (ret < 0) { - mlog(ML_ERROR, "%s: deref %.*s failed %d\n", dlm->name, - res->lockname.len, res->lockname.name, ret); if (!dlm_is_host_down(ret)) BUG(); } @@ -209,7 +209,7 @@ static void dlm_purge_lockres(struct dlm_ctxt *dlm, BUG(); } - __dlm_unhash_lockres(res); + __dlm_unhash_lockres(dlm, res); /* lockres is not in the hash now. drop the flag and wake up * any processes waiting in dlm_get_lock_resource. */ diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index ab4046f..b5e457c 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c @@ -1692,7 +1692,7 @@ int ocfs2_open_lock(struct inode *inode) mlog(0, "inode %llu take PRMODE open lock\n", (unsigned long long)OCFS2_I(inode)->ip_blkno); - if (ocfs2_mount_local(osb)) + if (ocfs2_is_hard_readonly(osb) || ocfs2_mount_local(osb)) goto out; lockres = &OCFS2_I(inode)->ip_open_lockres; @@ -1718,6 +1718,12 @@ int ocfs2_try_open_lock(struct inode *inode, int write) (unsigned long long)OCFS2_I(inode)->ip_blkno, write ? "EXMODE" : "PRMODE"); + if (ocfs2_is_hard_readonly(osb)) { + if (write) + status = -EROFS; + goto out; + } + if (ocfs2_mount_local(osb)) goto out; @@ -2092,7 +2098,7 @@ static void ocfs2_refresh_inode_from_lvb(struct inode *inode) inode->i_uid = be32_to_cpu(lvb->lvb_iuid); inode->i_gid = be32_to_cpu(lvb->lvb_igid); inode->i_mode = be16_to_cpu(lvb->lvb_imode); - inode->i_nlink = be16_to_cpu(lvb->lvb_inlink); + set_nlink(inode, be16_to_cpu(lvb->lvb_inlink)); ocfs2_unpack_timespec(&inode->i_atime, be64_to_cpu(lvb->lvb_iatime_packed)); ocfs2_unpack_timespec(&inode->i_mtime, @@ -2298,7 +2304,7 @@ int ocfs2_inode_lock_full_nested(struct inode *inode, if (ocfs2_is_hard_readonly(osb)) { if (ex) status = -EROFS; - goto bail; + goto getbh; } if (ocfs2_mount_local(osb)) @@ -2356,7 +2362,7 @@ local: mlog_errno(status); goto bail; } - +getbh: if (ret_bh) { status = ocfs2_assign_bh(inode, ret_bh, local_bh); if (status < 0) { @@ -2631,8 +2637,11 @@ int ocfs2_dentry_lock(struct dentry *dentry, int ex) BUG_ON(!dl); - if (ocfs2_is_hard_readonly(osb)) - return -EROFS; + if (ocfs2_is_hard_readonly(osb)) { + if (ex) + return -EROFS; + return 0; + } if (ocfs2_mount_local(osb)) return 0; @@ -2650,7 +2659,7 @@ void ocfs2_dentry_unlock(struct dentry *dentry, int ex) struct ocfs2_dentry_lock *dl = dentry->d_fsdata; struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb); - if (!ocfs2_mount_local(osb)) + if (!ocfs2_is_hard_readonly(osb) && !ocfs2_mount_local(osb)) ocfs2_cluster_unlock(osb, &dl->dl_lockres, level); } @@ -3959,9 +3968,13 @@ static void ocfs2_downconvert_thread_do_work(struct ocfs2_super *osb) osb->dc_work_sequence = osb->dc_wake_sequence; processed = osb->blocked_lock_count; - while (processed) { - BUG_ON(list_empty(&osb->blocked_lock_list)); - + /* + * blocked lock processing in this loop might call iput which can + * remove items off osb->blocked_lock_list. Downconvert up to + * 'processed' number of locks, but stop short if we had some + * removed in ocfs2_mark_lockres_freeing when downconverting. + */ + while (processed && !list_empty(&osb->blocked_lock_list)) { lockres = list_entry(osb->blocked_lock_list.next, struct ocfs2_lock_res, l_blocked_list); list_del_init(&lockres->l_blocked_list); diff --git a/fs/ocfs2/extent_map.c b/fs/ocfs2/extent_map.c index 774a032..cf22847 100644 --- a/fs/ocfs2/extent_map.c +++ b/fs/ocfs2/extent_map.c @@ -831,6 +831,102 @@ out: return ret; } +int ocfs2_seek_data_hole_offset(struct file *file, loff_t *offset, int origin) +{ + struct inode *inode = file->f_mapping->host; + int ret; + unsigned int is_last = 0, is_data = 0; + u16 cs_bits = OCFS2_SB(inode->i_sb)->s_clustersize_bits; + u32 cpos, cend, clen, hole_size; + u64 extoff, extlen; + struct buffer_head *di_bh = NULL; + struct ocfs2_extent_rec rec; + + BUG_ON(origin != SEEK_DATA && origin != SEEK_HOLE); + + ret = ocfs2_inode_lock(inode, &di_bh, 0); + if (ret) { + mlog_errno(ret); + goto out; + } + + down_read(&OCFS2_I(inode)->ip_alloc_sem); + + if (*offset >= inode->i_size) { + ret = -ENXIO; + goto out_unlock; + } + + if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) { + if (origin == SEEK_HOLE) + *offset = inode->i_size; + goto out_unlock; + } + + clen = 0; + cpos = *offset >> cs_bits; + cend = ocfs2_clusters_for_bytes(inode->i_sb, inode->i_size); + + while (cpos < cend && !is_last) { + ret = ocfs2_get_clusters_nocache(inode, di_bh, cpos, &hole_size, + &rec, &is_last); + if (ret) { + mlog_errno(ret); + goto out_unlock; + } + + extoff = cpos; + extoff <<= cs_bits; + + if (rec.e_blkno == 0ULL) { + clen = hole_size; + is_data = 0; + } else { + clen = le16_to_cpu(rec.e_leaf_clusters) - + (cpos - le32_to_cpu(rec.e_cpos)); + is_data = (rec.e_flags & OCFS2_EXT_UNWRITTEN) ? 0 : 1; + } + + if ((!is_data && origin == SEEK_HOLE) || + (is_data && origin == SEEK_DATA)) { + if (extoff > *offset) + *offset = extoff; + goto out_unlock; + } + + if (!is_last) + cpos += clen; + } + + if (origin == SEEK_HOLE) { + extoff = cpos; + extoff <<= cs_bits; + extlen = clen; + extlen <<= cs_bits; + + if ((extoff + extlen) > inode->i_size) + extlen = inode->i_size - extoff; + extoff += extlen; + if (extoff > *offset) + *offset = extoff; + goto out_unlock; + } + + ret = -ENXIO; + +out_unlock: + + brelse(di_bh); + + up_read(&OCFS2_I(inode)->ip_alloc_sem); + + ocfs2_inode_unlock(inode, 0); +out: + if (ret && ret != -ENXIO) + ret = -ENXIO; + return ret; +} + int ocfs2_read_virt_blocks(struct inode *inode, u64 v_block, int nr, struct buffer_head *bhs[], int flags, int (*validate)(struct super_block *sb, diff --git a/fs/ocfs2/extent_map.h b/fs/ocfs2/extent_map.h index e79d41c..67ea57d 100644 --- a/fs/ocfs2/extent_map.h +++ b/fs/ocfs2/extent_map.h @@ -53,6 +53,8 @@ int ocfs2_extent_map_get_blocks(struct inode *inode, u64 v_blkno, u64 *p_blkno, int ocfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, u64 map_start, u64 map_len); +int ocfs2_seek_data_hole_offset(struct file *file, loff_t *offset, int origin); + int ocfs2_xattr_get_clusters(struct inode *inode, u32 v_cluster, u32 *p_cluster, u32 *num_clusters, struct ocfs2_extent_list *el, diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index b1e35a3..6a7a3d9 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -171,7 +171,8 @@ static int ocfs2_dir_release(struct inode *inode, struct file *file) return 0; } -static int ocfs2_sync_file(struct file *file, int datasync) +static int ocfs2_sync_file(struct file *file, loff_t start, loff_t end, + int datasync) { int err = 0; journal_t *journal; @@ -184,6 +185,16 @@ static int ocfs2_sync_file(struct file *file, int datasync) file->f_path.dentry->d_name.name, (unsigned long long)datasync); + err = filemap_write_and_wait_range(inode->i_mapping, start, end); + if (err) + return err; + + /* + * Probably don't need the i_mutex at all in here, just putting it here + * to be consistent with how fsync used to be called, someone more + * familiar with the fs could possibly remove it. + */ + mutex_lock(&inode->i_mutex); if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) { /* * We still have to flush drive's caches to get data to the @@ -200,6 +211,7 @@ static int ocfs2_sync_file(struct file *file, int datasync) bail: if (err) mlog_errno(err); + mutex_unlock(&inode->i_mutex); return (err < 0) ? -EIO : 0; } @@ -1142,6 +1154,8 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr) if (status) goto bail_unlock; + inode_dio_wait(inode); + if (i_size_read(inode) > attr->ia_size) { if (ocfs2_should_order_data(inode)) { status = ocfs2_begin_ordered_truncate(inode, @@ -1279,11 +1293,11 @@ bail: return err; } -int ocfs2_permission(struct inode *inode, int mask, unsigned int flags) +int ocfs2_permission(struct inode *inode, int mask) { int ret; - if (flags & IPERM_FLAG_RCU) + if (mask & MAY_NOT_BLOCK) return -ECHILD; ret = ocfs2_inode_lock(inode, NULL, 0); @@ -1293,7 +1307,7 @@ int ocfs2_permission(struct inode *inode, int mask, unsigned int flags) goto out; } - ret = generic_permission(inode, mask, flags, ocfs2_check_acl); + ret = generic_permission(inode, mask); ocfs2_inode_unlock(inode, 0); out: @@ -1936,6 +1950,9 @@ static int __ocfs2_change_file_space(struct file *file, struct inode *inode, if (ret < 0) mlog_errno(ret); + if (file && (file->f_flags & O_SYNC)) + handle->h_sync = 1; + ocfs2_commit_trans(osb, handle); out_inode_unlock: @@ -2038,6 +2055,23 @@ out: return ret; } +static void ocfs2_aiodio_wait(struct inode *inode) +{ + wait_queue_head_t *wq = ocfs2_ioend_wq(inode); + + wait_event(*wq, (atomic_read(&OCFS2_I(inode)->ip_unaligned_aio) == 0)); +} + +static int ocfs2_is_io_unaligned(struct inode *inode, size_t count, loff_t pos) +{ + int blockmask = inode->i_sb->s_blocksize - 1; + loff_t final_size = pos + count; + + if ((pos & blockmask) || (final_size & blockmask)) + return 1; + return 0; +} + static int ocfs2_prepare_inode_for_refcount(struct inode *inode, struct file *file, loff_t pos, size_t count, @@ -2216,6 +2250,7 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb, struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); int full_coherency = !(osb->s_mount_opt & OCFS2_MOUNT_COHERENCY_BUFFERED); + int unaligned_dio = 0; trace_ocfs2_file_aio_write(inode, file, file->f_path.dentry, (unsigned long long)OCFS2_I(inode)->ip_blkno, @@ -2236,9 +2271,8 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb, ocfs2_iocb_clear_sem_locked(iocb); relock: - /* to match setattr's i_mutex -> i_alloc_sem -> rw_lock ordering */ + /* to match setattr's i_mutex -> rw_lock ordering */ if (direct_io) { - down_read(&inode->i_alloc_sem); have_alloc_sem = 1; /* communicate with ocfs2_dio_end_io */ ocfs2_iocb_set_sem_locked(iocb); @@ -2284,13 +2318,16 @@ relock: goto out; } + if (direct_io && !is_sync_kiocb(iocb)) + unaligned_dio = ocfs2_is_io_unaligned(inode, iocb->ki_left, + *ppos); + /* * We can't complete the direct I/O as requested, fall back to * buffered I/O. */ if (direct_io && !can_do_direct) { ocfs2_rw_unlock(inode, rw_level); - up_read(&inode->i_alloc_sem); have_alloc_sem = 0; rw_level = -1; @@ -2299,6 +2336,18 @@ relock: goto relock; } + if (unaligned_dio) { + /* + * Wait on previous unaligned aio to complete before + * proceeding. + */ + ocfs2_aiodio_wait(inode); + + /* Mark the iocb as needing a decrement in ocfs2_dio_end_io */ + atomic_inc(&OCFS2_I(inode)->ip_unaligned_aio); + ocfs2_iocb_set_unaligned_aio(iocb); + } + /* * To later detect whether a journal commit for sync writes is * necessary, we sample i_size, and cluster count here. @@ -2338,10 +2387,14 @@ out_dio: /* buffered aio wouldn't have proper lock coverage today */ BUG_ON(ret == -EIOCBQUEUED && !(file->f_flags & O_DIRECT)); + if (unlikely(written <= 0)) + goto no_sync; + if (((file->f_flags & O_DSYNC) && !direct_io) || IS_SYNC(inode) || ((file->f_flags & O_DIRECT) && !direct_io)) { - ret = filemap_fdatawrite_range(file->f_mapping, pos, - pos + count - 1); + ret = filemap_fdatawrite_range(file->f_mapping, + iocb->ki_pos - written, + iocb->ki_pos - 1); if (ret < 0) written = ret; @@ -2354,15 +2407,16 @@ out_dio: } if (!ret) - ret = filemap_fdatawait_range(file->f_mapping, pos, - pos + count - 1); + ret = filemap_fdatawait_range(file->f_mapping, + iocb->ki_pos - written, + iocb->ki_pos - 1); } +no_sync: /* * deep in g_f_a_w_n()->ocfs2_direct_IO we pass in a ocfs2_dio_end_io * function pointer which is called when o_direct io completes so that - * it can unlock our rw lock. (it's the clustered equivalent of - * i_alloc_sem; protects truncate from racing with pending ios). + * it can unlock our rw lock. * Unfortunately there are error cases which call end_io and others * that don't. so we don't have to unlock the rw_lock if either an * async dio is going to do it in the future or an end_io after an @@ -2371,6 +2425,12 @@ out_dio: if ((ret == -EIOCBQUEUED) || (!ocfs2_iocb_is_rw_locked(iocb))) { rw_level = -1; have_alloc_sem = 0; + unaligned_dio = 0; + } + + if (unaligned_dio) { + ocfs2_iocb_clear_unaligned_aio(iocb); + atomic_dec(&OCFS2_I(inode)->ip_unaligned_aio); } out: @@ -2378,10 +2438,8 @@ out: ocfs2_rw_unlock(inode, rw_level); out_sems: - if (have_alloc_sem) { - up_read(&inode->i_alloc_sem); + if (have_alloc_sem) ocfs2_iocb_clear_sem_locked(iocb); - } mutex_unlock(&inode->i_mutex); @@ -2416,9 +2474,7 @@ static ssize_t ocfs2_file_splice_write(struct pipe_inode_info *pipe, struct address_space *mapping = out->f_mapping; struct inode *inode = mapping->host; struct splice_desc sd = { - .total_len = len, .flags = flags, - .pos = *ppos, .u.file = out, }; @@ -2428,6 +2484,12 @@ static ssize_t ocfs2_file_splice_write(struct pipe_inode_info *pipe, out->f_path.dentry->d_name.len, out->f_path.dentry->d_name.name, len); + ret = generic_write_checks(out, ppos, &len, 0); + if (ret) + return ret; + sd.total_len = len; + sd.pos = *ppos; + if (pipe->inode) mutex_lock_nested(&pipe->inode->i_mutex, I_MUTEX_PARENT); @@ -2531,7 +2593,6 @@ static ssize_t ocfs2_file_aio_read(struct kiocb *iocb, * need locks to protect pending reads from racing with truncate. */ if (filp->f_flags & O_DIRECT) { - down_read(&inode->i_alloc_sem); have_alloc_sem = 1; ocfs2_iocb_set_sem_locked(iocb); @@ -2574,16 +2635,66 @@ static ssize_t ocfs2_file_aio_read(struct kiocb *iocb, } bail: - if (have_alloc_sem) { - up_read(&inode->i_alloc_sem); + if (have_alloc_sem) ocfs2_iocb_clear_sem_locked(iocb); - } + if (rw_level != -1) ocfs2_rw_unlock(inode, rw_level); return ret; } +/* Refer generic_file_llseek_unlocked() */ +static loff_t ocfs2_file_llseek(struct file *file, loff_t offset, int origin) +{ + struct inode *inode = file->f_mapping->host; + int ret = 0; + + mutex_lock(&inode->i_mutex); + + switch (origin) { + case SEEK_SET: + break; + case SEEK_END: + offset += inode->i_size; + break; + case SEEK_CUR: + if (offset == 0) { + offset = file->f_pos; + goto out; + } + offset += file->f_pos; + break; + case SEEK_DATA: + case SEEK_HOLE: + ret = ocfs2_seek_data_hole_offset(file, &offset, origin); + if (ret) + goto out; + break; + default: + ret = -EINVAL; + goto out; + } + + if (offset < 0 && !(file->f_mode & FMODE_UNSIGNED_OFFSET)) + ret = -EINVAL; + if (!ret && offset > inode->i_sb->s_maxbytes) + ret = -EINVAL; + if (ret) + goto out; + + if (offset != file->f_pos) { + file->f_pos = offset; + file->f_version = 0; + } + +out: + mutex_unlock(&inode->i_mutex); + if (ret) + return ret; + return offset; +} + const struct inode_operations ocfs2_file_iops = { .setattr = ocfs2_setattr, .getattr = ocfs2_getattr, @@ -2593,12 +2704,14 @@ const struct inode_operations ocfs2_file_iops = { .listxattr = ocfs2_listxattr, .removexattr = generic_removexattr, .fiemap = ocfs2_fiemap, + .get_acl = ocfs2_iop_get_acl, }; const struct inode_operations ocfs2_special_file_iops = { .setattr = ocfs2_setattr, .getattr = ocfs2_getattr, .permission = ocfs2_permission, + .get_acl = ocfs2_iop_get_acl, }; /* @@ -2606,7 +2719,7 @@ const struct inode_operations ocfs2_special_file_iops = { * ocfs2_fops_no_plocks and ocfs2_dops_no_plocks! */ const struct file_operations ocfs2_fops = { - .llseek = generic_file_llseek, + .llseek = ocfs2_file_llseek, .read = do_sync_read, .write = do_sync_write, .mmap = ocfs2_mmap, @@ -2654,7 +2767,7 @@ const struct file_operations ocfs2_dops = { * the cluster. */ const struct file_operations ocfs2_fops_no_plocks = { - .llseek = generic_file_llseek, + .llseek = ocfs2_file_llseek, .read = do_sync_read, .write = do_sync_write, .mmap = ocfs2_mmap, diff --git a/fs/ocfs2/file.h b/fs/ocfs2/file.h index f5afbbe..97bf761 100644 --- a/fs/ocfs2/file.h +++ b/fs/ocfs2/file.h @@ -61,7 +61,7 @@ int ocfs2_zero_extend(struct inode *inode, struct buffer_head *di_bh, int ocfs2_setattr(struct dentry *dentry, struct iattr *attr); int ocfs2_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat); -int ocfs2_permission(struct inode *inode, int mask, unsigned int flags); +int ocfs2_permission(struct inode *inode, int mask); int ocfs2_should_update_atime(struct inode *inode, struct vfsmount *vfsmnt); diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c index b4c8bb6..17454a9 100644 --- a/fs/ocfs2/inode.c +++ b/fs/ocfs2/inode.c @@ -291,7 +291,7 @@ void ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe, (unsigned long long)OCFS2_I(inode)->ip_blkno, (unsigned long long)le64_to_cpu(fe->i_blkno)); - inode->i_nlink = ocfs2_read_links_count(fe); + set_nlink(inode, ocfs2_read_links_count(fe)); trace_ocfs2_populate_inode(OCFS2_I(inode)->ip_blkno, le32_to_cpu(fe->i_flags)); @@ -951,7 +951,7 @@ static void ocfs2_cleanup_delete_inode(struct inode *inode, trace_ocfs2_cleanup_delete_inode( (unsigned long long)OCFS2_I(inode)->ip_blkno, sync_data); if (sync_data) - write_inode_now(inode, 1); + filemap_write_and_wait(inode->i_mapping); truncate_inode_pages(&inode->i_data, 0); } @@ -1290,7 +1290,7 @@ void ocfs2_refresh_inode(struct inode *inode, OCFS2_I(inode)->ip_dyn_features = le16_to_cpu(fe->i_dyn_features); ocfs2_set_inode_flags(inode); i_size_write(inode, le64_to_cpu(fe->i_size)); - inode->i_nlink = ocfs2_read_links_count(fe); + set_nlink(inode, ocfs2_read_links_count(fe)); inode->i_uid = le32_to_cpu(fe->i_uid); inode->i_gid = le32_to_cpu(fe->i_gid); inode->i_mode = le16_to_cpu(fe->i_mode); diff --git a/fs/ocfs2/inode.h b/fs/ocfs2/inode.h index 1c508b1..88924a3 100644 --- a/fs/ocfs2/inode.h +++ b/fs/ocfs2/inode.h @@ -43,6 +43,9 @@ struct ocfs2_inode_info /* protects extended attribute changes on this inode */ struct rw_semaphore ip_xattr_sem; + /* Number of outstanding AIO's which are not page aligned */ + atomic_t ip_unaligned_aio; + /* These fields are protected by ip_lock */ spinlock_t ip_lock; u32 ip_open_count; diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c index bc91072..726ff26 100644 --- a/fs/ocfs2/ioctl.c +++ b/fs/ocfs2/ioctl.c @@ -122,7 +122,7 @@ static int ocfs2_set_inode_attr(struct inode *inode, unsigned flags, if ((oldflags & OCFS2_IMMUTABLE_FL) || ((flags ^ oldflags) & (OCFS2_APPEND_FL | OCFS2_IMMUTABLE_FL))) { if (!capable(CAP_LINUX_IMMUTABLE)) - goto bail_unlock; + goto bail_commit; } ocfs2_inode->ip_attr = flags; @@ -132,6 +132,7 @@ static int ocfs2_set_inode_attr(struct inode *inode, unsigned flags, if (status < 0) mlog_errno(status); +bail_commit: ocfs2_commit_trans(osb, handle); bail_unlock: ocfs2_inode_unlock(inode, 1); @@ -381,7 +382,7 @@ int ocfs2_info_handle_freeinode(struct inode *inode, if (!oifi) { status = -ENOMEM; mlog_errno(status); - goto bail; + goto out_err; } if (o2info_from_user(*oifi, req)) @@ -431,7 +432,7 @@ bail: o2info_set_request_error(&oifi->ifi_req, req); kfree(oifi); - +out_err: return status; } @@ -666,7 +667,7 @@ int ocfs2_info_handle_freefrag(struct inode *inode, if (!oiff) { status = -ENOMEM; mlog_errno(status); - goto bail; + goto out_err; } if (o2info_from_user(*oiff, req)) @@ -716,7 +717,7 @@ bail: o2info_set_request_error(&oiff->iff_req, req); kfree(oiff); - +out_err: return status; } diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index 295d564..0a42ae9 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c @@ -1544,9 +1544,9 @@ static int ocfs2_replay_journal(struct ocfs2_super *osb, /* we need to run complete recovery for offline orphan slots */ ocfs2_replay_map_set_state(osb, REPLAY_NEEDED); - mlog(ML_NOTICE, "Recovering node %d from slot %d on device (%u,%u)\n", - node_num, slot_num, - MAJOR(osb->sb->s_dev), MINOR(osb->sb->s_dev)); + printk(KERN_NOTICE "ocfs2: Begin replay journal (node %d, slot %d) on "\ + "device (%u,%u)\n", node_num, slot_num, MAJOR(osb->sb->s_dev), + MINOR(osb->sb->s_dev)); OCFS2_I(inode)->ip_clusters = le32_to_cpu(fe->i_clusters); @@ -1601,6 +1601,9 @@ static int ocfs2_replay_journal(struct ocfs2_super *osb, jbd2_journal_destroy(journal); + printk(KERN_NOTICE "ocfs2: End replay journal (node %d, slot %d) on "\ + "device (%u,%u)\n", node_num, slot_num, MAJOR(osb->sb->s_dev), + MINOR(osb->sb->s_dev)); done: /* drop the lock on this nodes journal */ if (got_lock) @@ -1808,6 +1811,20 @@ static inline unsigned long ocfs2_orphan_scan_timeout(void) * every slot, queuing a recovery of the slot on the ocfs2_wq thread. This * is done to catch any orphans that are left over in orphan directories. * + * It scans all slots, even ones that are in use. It does so to handle the + * case described below: + * + * Node 1 has an inode it was using. The dentry went away due to memory + * pressure. Node 1 closes the inode, but it's on the free list. The node + * has the open lock. + * Node 2 unlinks the inode. It grabs the dentry lock to notify others, + * but node 1 has no dentry and doesn't get the message. It trylocks the + * open lock, sees that another node has a PR, and does nothing. + * Later node 2 runs its orphan dir. It igets the inode, trylocks the + * open lock, sees the PR still, and does nothing. + * Basically, we have to trigger an orphan iput on node 1. The only way + * for this to happen is if node 1 runs node 2's orphan dir. + * * ocfs2_queue_orphan_scan gets called every ORPHAN_SCAN_SCHEDULE_TIMEOUT * seconds. It gets an EX lock on os_lockres and checks sequence number * stored in LVB. If the sequence number has changed, it means some other diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h index 68cf2f6..a3385b6 100644 --- a/fs/ocfs2/journal.h +++ b/fs/ocfs2/journal.h @@ -441,10 +441,11 @@ static inline int ocfs2_mknod_credits(struct super_block *sb, int is_dir, #define OCFS2_SIMPLE_DIR_EXTEND_CREDITS (2) /* file update (nlink, etc) + directory mtime/ctime + dir entry block + quota - * update on dir + index leaf + dx root update for free list */ + * update on dir + index leaf + dx root update for free list + + * previous dirblock update in the free list */ static inline int ocfs2_link_credits(struct super_block *sb) { - return 2*OCFS2_INODE_UPDATE_CREDITS + 3 + + return 2*OCFS2_INODE_UPDATE_CREDITS + 4 + ocfs2_quota_trans_credits(sb); } diff --git a/fs/ocfs2/mmap.c b/fs/ocfs2/mmap.c index 3e9393c..9cd4108 100644 --- a/fs/ocfs2/mmap.c +++ b/fs/ocfs2/mmap.c @@ -61,7 +61,7 @@ static int ocfs2_fault(struct vm_area_struct *area, struct vm_fault *vmf) static int __ocfs2_page_mkwrite(struct file *file, struct buffer_head *di_bh, struct page *page) { - int ret; + int ret = VM_FAULT_NOPAGE; struct inode *inode = file->f_path.dentry->d_inode; struct address_space *mapping = inode->i_mapping; loff_t pos = page_offset(page); @@ -71,32 +71,25 @@ static int __ocfs2_page_mkwrite(struct file *file, struct buffer_head *di_bh, void *fsdata; loff_t size = i_size_read(inode); - /* - * Another node might have truncated while we were waiting on - * cluster locks. - * We don't check size == 0 before the shift. This is borrowed - * from do_generic_file_read. - */ last_index = (size - 1) >> PAGE_CACHE_SHIFT; - if (unlikely(!size || page->index > last_index)) { - ret = -EINVAL; - goto out; - } /* - * The i_size check above doesn't catch the case where nodes - * truncated and then re-extended the file. We'll re-check the - * page mapping after taking the page lock inside of - * ocfs2_write_begin_nolock(). + * There are cases that lead to the page no longer bebongs to the + * mapping. + * 1) pagecache truncates locally due to memory pressure. + * 2) pagecache truncates when another is taking EX lock against + * inode lock. see ocfs2_data_convert_worker. + * + * The i_size check doesn't catch the case where nodes truncated and + * then re-extended the file. We'll re-check the page mapping after + * taking the page lock inside of ocfs2_write_begin_nolock(). + * + * Let VM retry with these cases. */ - if (!PageUptodate(page) || page->mapping != inode->i_mapping) { - /* - * the page has been umapped in ocfs2_data_downconvert_worker. - * So return 0 here and let VFS retry. - */ - ret = 0; + if ((page->mapping != inode->i_mapping) || + (!PageUptodate(page)) || + (page_offset(page) >= size)) goto out; - } /* * Call ocfs2_write_begin() and ocfs2_write_end() to take @@ -116,17 +109,21 @@ static int __ocfs2_page_mkwrite(struct file *file, struct buffer_head *di_bh, if (ret) { if (ret != -ENOSPC) mlog_errno(ret); + if (ret == -ENOMEM) + ret = VM_FAULT_OOM; + else + ret = VM_FAULT_SIGBUS; goto out; } - ret = ocfs2_write_end_nolock(mapping, pos, len, len, locked_page, - fsdata); - if (ret < 0) { - mlog_errno(ret); + if (!locked_page) { + ret = VM_FAULT_NOPAGE; goto out; } + ret = ocfs2_write_end_nolock(mapping, pos, len, len, locked_page, + fsdata); BUG_ON(ret != len); - ret = 0; + ret = VM_FAULT_LOCKED; out: return ret; } @@ -168,8 +165,6 @@ static int ocfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) out: ocfs2_unblock_signals(&oldset); - if (ret) - ret = VM_FAULT_SIGBUS; return ret; } diff --git a/fs/ocfs2/move_extents.c b/fs/ocfs2/move_extents.c index cd94270..184c76b 100644 --- a/fs/ocfs2/move_extents.c +++ b/fs/ocfs2/move_extents.c @@ -36,7 +36,6 @@ #include "dir.h" #include "buffer_head_io.h" #include "sysfile.h" -#include "suballoc.h" #include "refcounttree.h" #include "move_extents.h" @@ -746,7 +745,7 @@ static int ocfs2_move_extent(struct ocfs2_move_extents_context *context, */ ocfs2_probe_alloc_group(inode, gd_bh, &goal_bit, len, move_max_hop, new_phys_cpos); - if (!new_phys_cpos) { + if (!*new_phys_cpos) { ret = -ENOSPC; goto out_commit; } diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index e5d738c..a8b2bfe 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c @@ -199,9 +199,7 @@ static struct inode *ocfs2_get_init_inode(struct inode *dir, int mode) * these are used by the support functions here and in * callers. */ if (S_ISDIR(mode)) - inode->i_nlink = 2; - else - inode->i_nlink = 1; + set_nlink(inode, 2); inode_init_owner(inode, dir, mode); dquot_initialize(inode); return inode; @@ -1379,7 +1377,7 @@ static int ocfs2_rename(struct inode *old_dir, } if (new_inode) { - new_inode->i_nlink--; + drop_nlink(new_inode); new_inode->i_ctime = CURRENT_TIME; } old_dir->i_ctime = old_dir->i_mtime = CURRENT_TIME; @@ -1387,9 +1385,9 @@ static int ocfs2_rename(struct inode *old_dir, if (update_dot_dot) { status = ocfs2_update_entry(old_inode, handle, &old_inode_dot_dot_res, new_dir); - old_dir->i_nlink--; + drop_nlink(old_dir); if (new_inode) { - new_inode->i_nlink--; + drop_nlink(new_inode); } else { inc_nlink(new_dir); mark_inode_dirty(new_dir); @@ -2018,7 +2016,7 @@ static int ocfs2_orphan_add(struct ocfs2_super *osb, orphan_fe = (struct ocfs2_dinode *) orphan_dir_bh->b_data; if (S_ISDIR(inode->i_mode)) ocfs2_add_links_count(orphan_fe, 1); - orphan_dir_inode->i_nlink = ocfs2_read_links_count(orphan_fe); + set_nlink(orphan_dir_inode, ocfs2_read_links_count(orphan_fe)); ocfs2_journal_dirty(handle, orphan_dir_bh); status = __ocfs2_add_entry(handle, orphan_dir_inode, name, @@ -2116,7 +2114,7 @@ int ocfs2_orphan_del(struct ocfs2_super *osb, orphan_fe = (struct ocfs2_dinode *) orphan_dir_bh->b_data; if (S_ISDIR(inode->i_mode)) ocfs2_add_links_count(orphan_fe, -1); - orphan_dir_inode->i_nlink = ocfs2_read_links_count(orphan_fe); + set_nlink(orphan_dir_inode, ocfs2_read_links_count(orphan_fe)); ocfs2_journal_dirty(handle, orphan_dir_bh); leave: @@ -2282,7 +2280,7 @@ int ocfs2_create_inode_in_orphan(struct inode *dir, goto leave; } - inode->i_nlink = 0; + clear_nlink(inode); /* do the real work now. */ status = __ocfs2_mknod_locked(dir, inode, 0, &new_di_bh, parent_di_bh, handle, @@ -2437,7 +2435,7 @@ int ocfs2_mv_orphaned_inode_to_new(struct inode *dir, di = (struct ocfs2_dinode *)di_bh->b_data; le32_add_cpu(&di->i_flags, -OCFS2_ORPHANED_FL); di->i_orphaned_slot = 0; - inode->i_nlink = 1; + set_nlink(inode, 1); ocfs2_set_links_count(di, inode->i_nlink); ocfs2_journal_dirty(handle, di_bh); @@ -2498,4 +2496,5 @@ const struct inode_operations ocfs2_dir_iops = { .listxattr = ocfs2_listxattr, .removexattr = generic_removexattr, .fiemap = ocfs2_fiemap, + .get_acl = ocfs2_iop_get_acl, }; diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h index 4092858..d355e6e 100644 --- a/fs/ocfs2/ocfs2.h +++ b/fs/ocfs2/ocfs2.h @@ -836,18 +836,65 @@ static inline unsigned int ocfs2_clusters_to_megabytes(struct super_block *sb, static inline void _ocfs2_set_bit(unsigned int bit, unsigned long *bitmap) { - __test_and_set_bit_le(bit, bitmap); + __set_bit_le(bit, bitmap); } #define ocfs2_set_bit(bit, addr) _ocfs2_set_bit((bit), (unsigned long *)(addr)) static inline void _ocfs2_clear_bit(unsigned int bit, unsigned long *bitmap) { - __test_and_clear_bit_le(bit, bitmap); + __clear_bit_le(bit, bitmap); } #define ocfs2_clear_bit(bit, addr) _ocfs2_clear_bit((bit), (unsigned long *)(addr)) #define ocfs2_test_bit test_bit_le #define ocfs2_find_next_zero_bit find_next_zero_bit_le #define ocfs2_find_next_bit find_next_bit_le + +static inline void *correct_addr_and_bit_unaligned(int *bit, void *addr) +{ +#if BITS_PER_LONG == 64 + *bit += ((unsigned long) addr & 7UL) << 3; + addr = (void *) ((unsigned long) addr & ~7UL); +#elif BITS_PER_LONG == 32 + *bit += ((unsigned long) addr & 3UL) << 3; + addr = (void *) ((unsigned long) addr & ~3UL); +#else +#error "how many bits you are?!" +#endif + return addr; +} + +static inline void ocfs2_set_bit_unaligned(int bit, void *bitmap) +{ + bitmap = correct_addr_and_bit_unaligned(&bit, bitmap); + ocfs2_set_bit(bit, bitmap); +} + +static inline void ocfs2_clear_bit_unaligned(int bit, void *bitmap) +{ + bitmap = correct_addr_and_bit_unaligned(&bit, bitmap); + ocfs2_clear_bit(bit, bitmap); +} + +static inline int ocfs2_test_bit_unaligned(int bit, void *bitmap) +{ + bitmap = correct_addr_and_bit_unaligned(&bit, bitmap); + return ocfs2_test_bit(bit, bitmap); +} + +static inline int ocfs2_find_next_zero_bit_unaligned(void *bitmap, int max, + int start) +{ + int fix = 0, ret, tmpmax; + bitmap = correct_addr_and_bit_unaligned(&fix, bitmap); + tmpmax = max + fix; + start += fix; + + ret = ocfs2_find_next_zero_bit(bitmap, tmpmax, start) - fix; + if (ret > max) + return max; + return ret; +} + #endif /* OCFS2_H */ diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c index 92fcd57..a40e5ce 100644 --- a/fs/ocfs2/quota_global.c +++ b/fs/ocfs2/quota_global.c @@ -712,6 +712,12 @@ static int ocfs2_release_dquot(struct dquot *dquot) */ if (status < 0) mlog_errno(status); + /* + * Clear dq_off so that we search for the structure in quota file next + * time we acquire it. The structure might be deleted and reallocated + * elsewhere by another node while our dquot structure is on freelist. + */ + dquot->dq_off = 0; clear_bit(DQ_ACTIVE_B, &dquot->dq_flags); out_trans: ocfs2_commit_trans(osb, handle); @@ -750,16 +756,17 @@ static int ocfs2_acquire_dquot(struct dquot *dquot) status = ocfs2_lock_global_qf(info, 1); if (status < 0) goto out; - if (!test_bit(DQ_READ_B, &dquot->dq_flags)) { - status = ocfs2_qinfo_lock(info, 0); - if (status < 0) - goto out_dq; - status = qtree_read_dquot(&info->dqi_gi, dquot); - ocfs2_qinfo_unlock(info, 0); - if (status < 0) - goto out_dq; - } - set_bit(DQ_READ_B, &dquot->dq_flags); + status = ocfs2_qinfo_lock(info, 0); + if (status < 0) + goto out_dq; + /* + * We always want to read dquot structure from disk because we don't + * know what happened with it while it was on freelist. + */ + status = qtree_read_dquot(&info->dqi_gi, dquot); + ocfs2_qinfo_unlock(info, 0); + if (status < 0) + goto out_dq; OCFS2_DQUOT(dquot)->dq_use_count++; OCFS2_DQUOT(dquot)->dq_origspace = dquot->dq_dqb.dqb_curspace; diff --git a/fs/ocfs2/quota_local.c b/fs/ocfs2/quota_local.c index dc8007f..b6cfcf2 100644 --- a/fs/ocfs2/quota_local.c +++ b/fs/ocfs2/quota_local.c @@ -404,7 +404,9 @@ struct ocfs2_quota_recovery *ocfs2_begin_quota_recovery( int status = 0; struct ocfs2_quota_recovery *rec; - mlog(ML_NOTICE, "Beginning quota recovery in slot %u\n", slot_num); + printk(KERN_NOTICE "ocfs2: Beginning quota recovery on device (%s) for " + "slot %u\n", osb->dev_str, slot_num); + rec = ocfs2_alloc_quota_recovery(); if (!rec) return ERR_PTR(-ENOMEM); @@ -549,8 +551,8 @@ static int ocfs2_recover_local_quota_file(struct inode *lqinode, goto out_commit; } lock_buffer(qbh); - WARN_ON(!ocfs2_test_bit(bit, dchunk->dqc_bitmap)); - ocfs2_clear_bit(bit, dchunk->dqc_bitmap); + WARN_ON(!ocfs2_test_bit_unaligned(bit, dchunk->dqc_bitmap)); + ocfs2_clear_bit_unaligned(bit, dchunk->dqc_bitmap); le32_add_cpu(&dchunk->dqc_free, 1); unlock_buffer(qbh); ocfs2_journal_dirty(handle, qbh); @@ -596,7 +598,9 @@ int ocfs2_finish_quota_recovery(struct ocfs2_super *osb, struct inode *lqinode; unsigned int flags; - mlog(ML_NOTICE, "Finishing quota recovery in slot %u\n", slot_num); + printk(KERN_NOTICE "ocfs2: Finishing quota recovery on device (%s) for " + "slot %u\n", osb->dev_str, slot_num); + mutex_lock(&sb_dqopt(sb)->dqonoff_mutex); for (type = 0; type < MAXQUOTAS; type++) { if (list_empty(&(rec->r_list[type]))) @@ -612,8 +616,9 @@ int ocfs2_finish_quota_recovery(struct ocfs2_super *osb, /* Someone else is holding the lock? Then he must be * doing the recovery. Just skip the file... */ if (status == -EAGAIN) { - mlog(ML_NOTICE, "skipping quota recovery for slot %d " - "because quota file is locked.\n", slot_num); + printk(KERN_NOTICE "ocfs2: Skipping quota recovery on " + "device (%s) for slot %d because quota file is " + "locked.\n", osb->dev_str, slot_num); status = 0; goto out_put; } else if (status < 0) { @@ -944,7 +949,7 @@ static struct ocfs2_quota_chunk *ocfs2_find_free_entry(struct super_block *sb, * ol_quota_entries_per_block(sb); } - found = ocfs2_find_next_zero_bit(dchunk->dqc_bitmap, len, 0); + found = ocfs2_find_next_zero_bit_unaligned(dchunk->dqc_bitmap, len, 0); /* We failed? */ if (found == len) { mlog(ML_ERROR, "Did not find empty entry in chunk %d with %u" @@ -1208,7 +1213,7 @@ static void olq_alloc_dquot(struct buffer_head *bh, void *private) struct ocfs2_local_disk_chunk *dchunk; dchunk = (struct ocfs2_local_disk_chunk *)bh->b_data; - ocfs2_set_bit(*offset, dchunk->dqc_bitmap); + ocfs2_set_bit_unaligned(*offset, dchunk->dqc_bitmap); le32_add_cpu(&dchunk->dqc_free, -1); } @@ -1289,16 +1294,12 @@ int ocfs2_local_release_dquot(handle_t *handle, struct dquot *dquot) (od->dq_chunk->qc_headerbh->b_data); /* Mark structure as freed */ lock_buffer(od->dq_chunk->qc_headerbh); - ocfs2_clear_bit(offset, dchunk->dqc_bitmap); + ocfs2_clear_bit_unaligned(offset, dchunk->dqc_bitmap); le32_add_cpu(&dchunk->dqc_free, 1); unlock_buffer(od->dq_chunk->qc_headerbh); ocfs2_journal_dirty(handle, od->dq_chunk->qc_headerbh); out: - /* Clear the read bit so that next time someone uses this - * dquot he reads fresh info from disk and allocates local - * dquot structure */ - clear_bit(DQ_READ_B, &dquot->dq_flags); return status; } diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c index 15d29cc..9f32d7c 100644 --- a/fs/ocfs2/refcounttree.c +++ b/fs/ocfs2/refcounttree.c @@ -4368,25 +4368,6 @@ static inline int ocfs2_may_create(struct inode *dir, struct dentry *child) return inode_permission(dir, MAY_WRITE | MAY_EXEC); } -/* copied from user_path_parent. */ -static int ocfs2_user_path_parent(const char __user *path, - struct nameidata *nd, char **name) -{ - char *s = getname(path); - int error; - - if (IS_ERR(s)) - return PTR_ERR(s); - - error = kern_path_parent(s, nd); - if (error) - putname(s); - else - *name = s; - - return error; -} - /** * ocfs2_vfs_reflink - Create a reference-counted link * @@ -4460,10 +4441,8 @@ int ocfs2_reflink_ioctl(struct inode *inode, bool preserve) { struct dentry *new_dentry; - struct nameidata nd; - struct path old_path; + struct path old_path, new_path; int error; - char *to = NULL; if (!ocfs2_refcount_tree(OCFS2_SB(inode->i_sb))) return -EOPNOTSUPP; @@ -4474,39 +4453,33 @@ int ocfs2_reflink_ioctl(struct inode *inode, return error; } - error = ocfs2_user_path_parent(newname, &nd, &to); - if (error) { + new_dentry = user_path_create(AT_FDCWD, newname, &new_path, 0); + error = PTR_ERR(new_dentry); + if (IS_ERR(new_dentry)) { mlog_errno(error); goto out; } error = -EXDEV; - if (old_path.mnt != nd.path.mnt) - goto out_release; - new_dentry = lookup_create(&nd, 0); - error = PTR_ERR(new_dentry); - if (IS_ERR(new_dentry)) { + if (old_path.mnt != new_path.mnt) { mlog_errno(error); - goto out_unlock; + goto out_dput; } - error = mnt_want_write(nd.path.mnt); + error = mnt_want_write(new_path.mnt); if (error) { mlog_errno(error); goto out_dput; } error = ocfs2_vfs_reflink(old_path.dentry, - nd.path.dentry->d_inode, + new_path.dentry->d_inode, new_dentry, preserve); - mnt_drop_write(nd.path.mnt); + mnt_drop_write(new_path.mnt); out_dput: dput(new_dentry); -out_unlock: - mutex_unlock(&nd.path.dentry->d_inode->i_mutex); -out_release: - path_put(&nd.path); - putname(to); + mutex_unlock(&new_path.dentry->d_inode->i_mutex); + path_put(&new_path); out: path_put(&old_path); diff --git a/fs/ocfs2/slot_map.c b/fs/ocfs2/slot_map.c index 26fc001..1424c15 100644 --- a/fs/ocfs2/slot_map.c +++ b/fs/ocfs2/slot_map.c @@ -493,8 +493,8 @@ int ocfs2_find_slot(struct ocfs2_super *osb) goto bail; } } else - mlog(ML_NOTICE, "slot %d is already allocated to this node!\n", - slot); + printk(KERN_INFO "ocfs2: Slot %d on device (%s) was already " + "allocated to this node!\n", slot, osb->dev_str); ocfs2_set_slot(si, slot, osb->node_num); osb->slot_num = slot; diff --git a/fs/ocfs2/stack_o2cb.c b/fs/ocfs2/stack_o2cb.c index 19965b0..9436801 100644 --- a/fs/ocfs2/stack_o2cb.c +++ b/fs/ocfs2/stack_o2cb.c @@ -28,6 +28,7 @@ #include "cluster/masklog.h" #include "cluster/nodemanager.h" #include "cluster/heartbeat.h" +#include "cluster/tcp.h" #include "stackglue.h" @@ -256,6 +257,61 @@ static void o2cb_dump_lksb(struct ocfs2_dlm_lksb *lksb) } /* + * Check if this node is heartbeating and is connected to all other + * heartbeating nodes. + */ +static int o2cb_cluster_check(void) +{ + u8 node_num; + int i; + unsigned long hbmap[BITS_TO_LONGS(O2NM_MAX_NODES)]; + unsigned long netmap[BITS_TO_LONGS(O2NM_MAX_NODES)]; + + node_num = o2nm_this_node(); + if (node_num == O2NM_MAX_NODES) { + printk(KERN_ERR "o2cb: This node has not been configured.\n"); + return -EINVAL; + } + + /* + * o2dlm expects o2net sockets to be created. If not, then + * dlm_join_domain() fails with a stack of errors which are both cryptic + * and incomplete. The idea here is to detect upfront whether we have + * managed to connect to all nodes or not. If not, then list the nodes + * to allow the user to check the configuration (incorrect IP, firewall, + * etc.) Yes, this is racy. But its not the end of the world. + */ +#define O2CB_MAP_STABILIZE_COUNT 60 + for (i = 0; i < O2CB_MAP_STABILIZE_COUNT; ++i) { + o2hb_fill_node_map(hbmap, sizeof(hbmap)); + if (!test_bit(node_num, hbmap)) { + printk(KERN_ERR "o2cb: %s heartbeat has not been " + "started.\n", (o2hb_global_heartbeat_active() ? + "Global" : "Local")); + return -EINVAL; + } + o2net_fill_node_map(netmap, sizeof(netmap)); + /* Force set the current node to allow easy compare */ + set_bit(node_num, netmap); + if (!memcmp(hbmap, netmap, sizeof(hbmap))) + return 0; + if (i < O2CB_MAP_STABILIZE_COUNT) + msleep(1000); + } + + printk(KERN_ERR "o2cb: This node could not connect to nodes:"); + i = -1; + while ((i = find_next_bit(hbmap, O2NM_MAX_NODES, + i + 1)) < O2NM_MAX_NODES) { + if (!test_bit(i, netmap)) + printk(" %u", i); + } + printk(".\n"); + + return -ENOTCONN; +} + +/* * Called from the dlm when it's about to evict a node. This is how the * classic stack signals node death. */ @@ -263,8 +319,8 @@ static void o2dlm_eviction_cb(int node_num, void *data) { struct ocfs2_cluster_connection *conn = data; - mlog(ML_NOTICE, "o2dlm has evicted node %d from group %.*s\n", - node_num, conn->cc_namelen, conn->cc_name); + printk(KERN_NOTICE "o2cb: o2dlm has evicted node %d from domain %.*s\n", + node_num, conn->cc_namelen, conn->cc_name); conn->cc_recovery_handler(node_num, conn->cc_recovery_data); } @@ -280,12 +336,11 @@ static int o2cb_cluster_connect(struct ocfs2_cluster_connection *conn) BUG_ON(conn == NULL); BUG_ON(conn->cc_proto == NULL); - /* for now we only have one cluster/node, make sure we see it - * in the heartbeat universe */ - if (!o2hb_check_local_node_heartbeating()) { - if (o2hb_global_heartbeat_active()) - mlog(ML_ERROR, "Global heartbeat not started\n"); - rc = -EINVAL; + /* Ensure cluster stack is up and all nodes are connected */ + rc = o2cb_cluster_check(); + if (rc) { + printk(KERN_ERR "o2cb: Cluster check failed. Fix errors " + "before retrying.\n"); goto out; } diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 56f6102..5fe6b1e 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -54,6 +54,7 @@ #include "ocfs1_fs_compat.h" #include "alloc.h" +#include "aops.h" #include "blockcheck.h" #include "dlmglue.h" #include "export.h" @@ -1107,9 +1108,9 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) ocfs2_set_ro_flag(osb, 1); - printk(KERN_NOTICE "Readonly device detected. No cluster " - "services will be utilized for this mount. Recovery " - "will be skipped.\n"); + printk(KERN_NOTICE "ocfs2: Readonly device (%s) detected. " + "Cluster services will not be used for this mount. " + "Recovery will be skipped.\n", osb->dev_str); } if (!ocfs2_is_hard_readonly(osb)) { @@ -1582,8 +1583,8 @@ static int ocfs2_show_options(struct seq_file *s, struct vfsmount *mnt) seq_printf(s, ",localflocks,"); if (osb->osb_cluster_stack[0]) - seq_printf(s, ",cluster_stack=%.*s", OCFS2_STACK_LABEL_LEN, - osb->osb_cluster_stack); + seq_show_option_n(s, "cluster_stack", osb->osb_cluster_stack, + OCFS2_STACK_LABEL_LEN); if (opts & OCFS2_MOUNT_USRQUOTA) seq_printf(s, ",usrquota"); if (opts & OCFS2_MOUNT_GRPQUOTA) @@ -1616,12 +1617,17 @@ static int ocfs2_show_options(struct seq_file *s, struct vfsmount *mnt) return 0; } +wait_queue_head_t ocfs2__ioend_wq[OCFS2_IOEND_WQ_HASH_SZ]; + static int __init ocfs2_init(void) { - int status; + int status, i; ocfs2_print_version(); + for (i = 0; i < OCFS2_IOEND_WQ_HASH_SZ; i++) + init_waitqueue_head(&ocfs2__ioend_wq[i]); + status = init_ocfs2_uptodate_cache(); if (status < 0) { mlog_errno(status); @@ -1760,7 +1766,7 @@ static void ocfs2_inode_init_once(void *data) ocfs2_extent_map_init(&oi->vfs_inode); INIT_LIST_HEAD(&oi->ip_io_markers); oi->ip_dir_start_lookup = 0; - + atomic_set(&oi->ip_unaligned_aio, 0); init_rwsem(&oi->ip_alloc_sem); init_rwsem(&oi->ip_xattr_sem); mutex_init(&oi->ip_io_mutex); @@ -1974,7 +1980,8 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err) * If we failed before we got a uuid_str yet, we can't stop * heartbeat. Otherwise, do it. */ - if (!mnt_err && !ocfs2_mount_local(osb) && osb->uuid_str) + if (!mnt_err && !ocfs2_mount_local(osb) && osb->uuid_str && + !ocfs2_is_hard_readonly(osb)) hangup_needed = 1; if (osb->cconn) @@ -2353,7 +2360,7 @@ static int ocfs2_initialize_super(struct super_block *sb, mlog_errno(status); goto bail; } - cleancache_init_shared_fs((char *)&uuid_net_key, sb); + cleancache_init_shared_fs((char *)&di->id2.i_super.s_uuid, sb); bail: return status; @@ -2462,8 +2469,8 @@ static int ocfs2_check_volume(struct ocfs2_super *osb) goto finally; } } else { - mlog(ML_NOTICE, "File system was not unmounted cleanly, " - "recovering volume.\n"); + printk(KERN_NOTICE "ocfs2: File system on device (%s) was not " + "unmounted cleanly, recovering it.\n", osb->dev_str); } local = ocfs2_mount_local(osb); diff --git a/fs/ocfs2/super.h b/fs/ocfs2/super.h index 40c7de0..74ff74c 100644 --- a/fs/ocfs2/super.h +++ b/fs/ocfs2/super.h @@ -31,17 +31,15 @@ extern struct workqueue_struct *ocfs2_wq; int ocfs2_publish_get_mount_state(struct ocfs2_super *osb, int node_num); -void __ocfs2_error(struct super_block *sb, - const char *function, - const char *fmt, ...) - __attribute__ ((format (printf, 3, 4))); +__printf(3, 4) +void __ocfs2_error(struct super_block *sb, const char *function, + const char *fmt, ...); #define ocfs2_error(sb, fmt, args...) __ocfs2_error(sb, __PRETTY_FUNCTION__, fmt, ##args) -void __ocfs2_abort(struct super_block *sb, - const char *function, - const char *fmt, ...) - __attribute__ ((format (printf, 3, 4))); +__printf(3, 4) +void __ocfs2_abort(struct super_block *sb, const char *function, + const char *fmt, ...); #define ocfs2_abort(sb, fmt, args...) __ocfs2_abort(sb, __PRETTY_FUNCTION__, fmt, ##args) diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index 61a84cf..bef187b 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c @@ -2376,16 +2376,18 @@ static int ocfs2_remove_value_outside(struct inode*inode, } ret = ocfs2_xattr_value_truncate(inode, vb, 0, &ctxt); - if (ret < 0) { - mlog_errno(ret); - break; - } ocfs2_commit_trans(osb, ctxt.handle); if (ctxt.meta_ac) { ocfs2_free_alloc_context(ctxt.meta_ac); ctxt.meta_ac = NULL; } + + if (ret < 0) { + mlog_errno(ret); + break; + } + } if (ctxt.meta_ac) @@ -7195,20 +7197,9 @@ int ocfs2_init_security_and_acl(struct inode *dir, { int ret = 0; struct buffer_head *dir_bh = NULL; - struct ocfs2_security_xattr_info si = { - .enable = 1, - }; - ret = ocfs2_init_security_get(inode, dir, qstr, &si); - if (!ret) { - ret = ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_SECURITY, - si.name, si.value, si.value_len, - XATTR_CREATE); - if (ret) { - mlog_errno(ret); - goto leave; - } - } else if (ret != -EOPNOTSUPP) { + ret = ocfs2_init_security_get(inode, dir, qstr, NULL); + if (ret) { mlog_errno(ret); goto leave; } @@ -7265,6 +7256,22 @@ static int ocfs2_xattr_security_set(struct dentry *dentry, const char *name, name, value, size, flags); } +int ocfs2_initxattrs(struct inode *inode, const struct xattr *xattr_array, + void *fs_info) +{ + const struct xattr *xattr; + int err = 0; + + for (xattr = xattr_array; xattr->name != NULL; xattr++) { + err = ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_SECURITY, + xattr->name, xattr->value, + xattr->value_len, XATTR_CREATE); + if (err) + break; + } + return err; +} + int ocfs2_init_security_get(struct inode *inode, struct inode *dir, const struct qstr *qstr, @@ -7273,8 +7280,13 @@ int ocfs2_init_security_get(struct inode *inode, /* check whether ocfs2 support feature xattr */ if (!ocfs2_supports_xattr(OCFS2_SB(dir->i_sb))) return -EOPNOTSUPP; - return security_inode_init_security(inode, dir, qstr, &si->name, - &si->value, &si->value_len); + if (si) + return security_old_inode_init_security(inode, dir, qstr, + &si->name, &si->value, + &si->value_len); + + return security_inode_init_security(inode, dir, qstr, + &ocfs2_initxattrs, NULL); } int ocfs2_init_security_set(handle_t *handle, diff --git a/fs/reiserfs/bitmap.c b/fs/reiserfs/bitmap.c index 483442e..d1aca1d 100644 --- a/fs/reiserfs/bitmap.c +++ b/fs/reiserfs/bitmap.c @@ -214,7 +214,7 @@ static int scan_bitmap_block(struct reiserfs_transaction_handle *th, } /* otherwise we clear all bit were set ... */ while (--i >= *beg) - reiserfs_test_and_clear_le_bit + reiserfs_clear_le_bit (i, bh->b_data); reiserfs_restore_prepared_buffer(s, bh); *beg = org; @@ -1222,15 +1222,11 @@ void reiserfs_cache_bitmap_metadata(struct super_block *sb, info->free_count = 0; while (--cur >= (unsigned long *)bh->b_data) { - int i; - /* 0 and ~0 are special, we can optimize for them */ if (*cur == 0) info->free_count += BITS_PER_LONG; else if (*cur != ~0L) /* A mix, investigate */ - for (i = BITS_PER_LONG - 1; i >= 0; i--) - if (!reiserfs_test_le_bit(i, cur)) - info->free_count++; + info->free_count += BITS_PER_LONG - hweight_long(*cur); } } diff --git a/fs/reiserfs/dir.c b/fs/reiserfs/dir.c index 198dabf..8048eea 100644 --- a/fs/reiserfs/dir.c +++ b/fs/reiserfs/dir.c @@ -14,7 +14,8 @@ extern const struct reiserfs_key MIN_KEY; static int reiserfs_readdir(struct file *, void *, filldir_t); -static int reiserfs_dir_fsync(struct file *filp, int datasync); +static int reiserfs_dir_fsync(struct file *filp, loff_t start, loff_t end, + int datasync); const struct file_operations reiserfs_dir_operations = { .llseek = generic_file_llseek, @@ -27,13 +28,21 @@ const struct file_operations reiserfs_dir_operations = { #endif }; -static int reiserfs_dir_fsync(struct file *filp, int datasync) +static int reiserfs_dir_fsync(struct file *filp, loff_t start, loff_t end, + int datasync) { struct inode *inode = filp->f_mapping->host; int err; + + err = filemap_write_and_wait_range(inode->i_mapping, start, end); + if (err) + return err; + + mutex_lock(&inode->i_mutex); reiserfs_write_lock(inode->i_sb); err = reiserfs_commit_for_inode(inode); reiserfs_write_unlock(inode->i_sb); + mutex_unlock(&inode->i_mutex); if (err < 0) return err; return 0; @@ -119,6 +128,7 @@ int reiserfs_readdir_dentry(struct dentry *dentry, void *dirent, char *d_name; off_t d_off; ino_t d_ino; + loff_t cur_pos = deh_offset(deh); if (!de_visible(deh)) /* it is hidden entry */ @@ -191,8 +201,9 @@ int reiserfs_readdir_dentry(struct dentry *dentry, void *dirent, if (local_buf != small_buf) { kfree(local_buf); } - // next entry should be looked for with such offset - next_pos = deh_offset(deh) + 1; + + /* deh_offset(deh) may be invalid now. */ + next_pos = cur_pos + 1; if (item_moved(&tmp_ih, &path_to_entry)) { goto research; diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c index 91f080c..f011185 100644 --- a/fs/reiserfs/file.c +++ b/fs/reiserfs/file.c @@ -126,7 +126,7 @@ static int reiserfs_file_open(struct inode *inode, struct file *file) return err; } -static void reiserfs_vfs_truncate_file(struct inode *inode) +void reiserfs_vfs_truncate_file(struct inode *inode) { mutex_lock(&(REISERFS_I(inode)->tailpack)); reiserfs_truncate_file(inode, 1); @@ -140,12 +140,18 @@ static void reiserfs_vfs_truncate_file(struct inode *inode) * be removed... */ -static int reiserfs_sync_file(struct file *filp, int datasync) +static int reiserfs_sync_file(struct file *filp, loff_t start, loff_t end, + int datasync) { struct inode *inode = filp->f_mapping->host; int err; int barrier_done; + err = filemap_write_and_wait_range(inode->i_mapping, start, end); + if (err) + return err; + + mutex_lock(&inode->i_mutex); BUG_ON(!S_ISREG(inode->i_mode)); err = sync_mapping_buffers(inode->i_mapping); reiserfs_write_lock(inode->i_sb); @@ -153,6 +159,7 @@ static int reiserfs_sync_file(struct file *filp, int datasync) reiserfs_write_unlock(inode->i_sb); if (barrier_done != 1 && reiserfs_barrier_flush(inode->i_sb)) blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL); + mutex_unlock(&inode->i_mutex); if (barrier_done < 0) return barrier_done; return (err < 0) ? -EIO : 0; @@ -305,11 +312,11 @@ const struct file_operations reiserfs_file_operations = { }; const struct inode_operations reiserfs_file_inode_operations = { - .truncate = reiserfs_vfs_truncate_file, .setattr = reiserfs_setattr, .setxattr = reiserfs_setxattr, .getxattr = reiserfs_getxattr, .listxattr = reiserfs_listxattr, .removexattr = reiserfs_removexattr, .permission = reiserfs_permission, + .get_acl = reiserfs_get_acl, }; diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c index ebe8db4..fcb07e5 100644 --- a/fs/reiserfs/inode.c +++ b/fs/reiserfs/inode.c @@ -1154,7 +1154,7 @@ static void init_inode(struct inode *inode, struct treepath *path) set_inode_item_key_version(inode, KEY_FORMAT_3_5); set_inode_sd_version(inode, STAT_DATA_V1); inode->i_mode = sd_v1_mode(sd); - inode->i_nlink = sd_v1_nlink(sd); + set_nlink(inode, sd_v1_nlink(sd)); inode->i_uid = sd_v1_uid(sd); inode->i_gid = sd_v1_gid(sd); inode->i_size = sd_v1_size(sd); @@ -1199,7 +1199,7 @@ static void init_inode(struct inode *inode, struct treepath *path) struct stat_data *sd = (struct stat_data *)B_I_PITEM(bh, ih); inode->i_mode = sd_v2_mode(sd); - inode->i_nlink = sd_v2_nlink(sd); + set_nlink(inode, sd_v2_nlink(sd)); inode->i_uid = sd_v2_uid(sd); inode->i_size = sd_v2_size(sd); inode->i_gid = sd_v2_gid(sd); @@ -1444,7 +1444,7 @@ void reiserfs_read_locked_inode(struct inode *inode, /* a stale NFS handle can trigger this without it being an error */ pathrelse(&path_to_sd); reiserfs_make_bad_inode(inode); - inode->i_nlink = 0; + clear_nlink(inode); return; } @@ -1475,6 +1475,11 @@ void reiserfs_read_locked_inode(struct inode *inode, reiserfs_check_path(&path_to_sd); /* init inode should be relsing */ + /* + * Stat data v1 doesn't support ACLs. + */ + if (get_inode_sd_version(inode) == STAT_DATA_V1) + cache_no_acl(inode); } /** @@ -1832,7 +1837,7 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th, #endif /* fill stat data */ - inode->i_nlink = (S_ISDIR(mode) ? 2 : 1); + set_nlink(inode, (S_ISDIR(mode) ? 2 : 1)); /* uid and gid must already be set by the caller for quota init */ @@ -1989,7 +1994,7 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th, make_bad_inode(inode); out_inserted_sd: - inode->i_nlink = 0; + clear_nlink(inode); th->t_trans_id = 0; /* so the caller can't use this handle later */ unlock_new_inode(inode); /* OK to do even if we hadn't locked it */ iput(inode); @@ -3075,9 +3080,8 @@ static ssize_t reiserfs_direct_IO(int rw, struct kiocb *iocb, struct inode *inode = file->f_mapping->host; ssize_t ret; - ret = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, - offset, nr_segs, - reiserfs_get_blocks_direct_io, NULL); + ret = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs, + reiserfs_get_blocks_direct_io); /* * In case of error extending write may have instantiated a few @@ -3087,8 +3091,10 @@ static ssize_t reiserfs_direct_IO(int rw, struct kiocb *iocb, loff_t isize = i_size_read(inode); loff_t end = offset + iov_length(iov, nr_segs); - if (end > isize) - vmtruncate(inode, isize); + if ((end > isize) && inode_newsize_ok(inode, isize) == 0) { + truncate_setsize(inode, isize); + reiserfs_vfs_truncate_file(inode); + } } return ret; @@ -3120,6 +3126,9 @@ int reiserfs_setattr(struct dentry *dentry, struct iattr *attr) error = -EFBIG; goto out; } + + inode_dio_wait(inode); + /* fill in hole pointers in the expanding truncate case. */ if (attr->ia_size > inode->i_size) { error = generic_cont_expand_simple(inode, attr->ia_size); @@ -3199,8 +3208,19 @@ int reiserfs_setattr(struct dentry *dentry, struct iattr *attr) */ reiserfs_write_unlock_once(inode->i_sb, depth); if ((attr->ia_valid & ATTR_SIZE) && - attr->ia_size != i_size_read(inode)) - error = vmtruncate(inode, attr->ia_size); + attr->ia_size != i_size_read(inode)) { + error = inode_newsize_ok(inode, attr->ia_size); + if (!error) { + /* + * Could race against reiserfs_file_release + * if called from NFS, so take tailpack mutex. + */ + mutex_lock(&REISERFS_I(inode)->tailpack); + truncate_setsize(inode, attr->ia_size); + reiserfs_truncate_file(inode, 1); + mutex_unlock(&REISERFS_I(inode)->tailpack); + } + } if (!error) { setattr_copy(inode, attr); diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c index c5e82ec..938a77f 100644 --- a/fs/reiserfs/journal.c +++ b/fs/reiserfs/journal.c @@ -291,14 +291,13 @@ int reiserfs_allocate_list_bitmaps(struct super_block *sb, for (i = 0; i < JOURNAL_NUM_BITMAPS; i++) { jb = jb_array + i; jb->journal_list = NULL; - jb->bitmaps = vmalloc(mem); + jb->bitmaps = vzalloc(mem); if (!jb->bitmaps) { reiserfs_warning(sb, "clm-2000", "unable to " "allocate bitmaps for journal lists"); failed = 1; break; } - memset(jb->bitmaps, 0, mem); } if (failed) { free_list_bitmaps(sb, jb_array); @@ -353,11 +352,10 @@ static struct reiserfs_journal_cnode *allocate_cnodes(int num_cnodes) if (num_cnodes <= 0) { return NULL; } - head = vmalloc(num_cnodes * sizeof(struct reiserfs_journal_cnode)); + head = vzalloc(num_cnodes * sizeof(struct reiserfs_journal_cnode)); if (!head) { return NULL; } - memset(head, 0, num_cnodes * sizeof(struct reiserfs_journal_cnode)); head[0].prev = NULL; head[0].next = head + 1; for (i = 1; i < num_cnodes; i++) { @@ -678,23 +676,19 @@ struct buffer_chunk { static void write_chunk(struct buffer_chunk *chunk) { int i; - get_fs_excl(); for (i = 0; i < chunk->nr; i++) { submit_logged_buffer(chunk->bh[i]); } chunk->nr = 0; - put_fs_excl(); } static void write_ordered_chunk(struct buffer_chunk *chunk) { int i; - get_fs_excl(); for (i = 0; i < chunk->nr; i++) { submit_ordered_buffer(chunk->bh[i]); } chunk->nr = 0; - put_fs_excl(); } static int add_to_chunk(struct buffer_chunk *chunk, struct buffer_head *bh, @@ -986,8 +980,6 @@ static int flush_commit_list(struct super_block *s, return 0; } - get_fs_excl(); - /* before we can put our commit blocks on disk, we have to make sure everyone older than ** us is on disk too */ @@ -1145,7 +1137,6 @@ static int flush_commit_list(struct super_block *s, if (retval) reiserfs_abort(s, retval, "Journal write error in %s", __func__); - put_fs_excl(); return retval; } @@ -1374,8 +1365,6 @@ static int flush_journal_list(struct super_block *s, return 0; } - get_fs_excl(); - /* if all the work is already done, get out of here */ if (atomic_read(&(jl->j_nonzerolen)) <= 0 && atomic_read(&(jl->j_commit_left)) <= 0) { @@ -1597,7 +1586,6 @@ static int flush_journal_list(struct super_block *s, put_journal_list(s, jl); if (flushall) mutex_unlock(&journal->j_flush_mutex); - put_fs_excl(); return err; } @@ -1928,15 +1916,19 @@ static int do_journal_release(struct reiserfs_transaction_handle *th, } reiserfs_mounted_fs_count--; - /* wait for all commits to finish */ - cancel_delayed_work(&SB_JOURNAL(sb)->j_work); /* * We must release the write lock here because * the workqueue job (flush_async_commit) needs this lock */ reiserfs_write_unlock(sb); - flush_workqueue(commit_wq); + /* + * Cancel flushing of old commits. Note that this work will not + * be requeued because superblock is being shutdown and doesn't + * have MS_ACTIVE set. + */ + /* wait for all commits to finish */ + cancel_delayed_work_sync(&SB_JOURNAL(sb)->j_work); if (!reiserfs_mounted_fs_count) { destroy_workqueue(commit_wq); @@ -2695,14 +2687,13 @@ int journal_init(struct super_block *sb, const char *j_dev_name, * dependency inversion warnings. */ reiserfs_write_unlock(sb); - journal = SB_JOURNAL(sb) = vmalloc(sizeof(struct reiserfs_journal)); + journal = SB_JOURNAL(sb) = vzalloc(sizeof(struct reiserfs_journal)); if (!journal) { reiserfs_warning(sb, "journal-1256", "unable to get memory for journal structure"); reiserfs_write_lock(sb); return 1; } - memset(journal, 0, sizeof(struct reiserfs_journal)); INIT_LIST_HEAD(&journal->j_bitmap_nodes); INIT_LIST_HEAD(&journal->j_prealloc_list); INIT_LIST_HEAD(&journal->j_working_list); @@ -3108,7 +3099,6 @@ static int do_journal_begin_r(struct reiserfs_transaction_handle *th, th->t_trans_id = journal->j_trans_id; unlock_journal(sb); INIT_LIST_HEAD(&th->t_list); - get_fs_excl(); return 0; out_fail: @@ -3964,7 +3954,6 @@ static int do_journal_end(struct reiserfs_transaction_handle *th, flush = flags & FLUSH_ALL; wait_on_commit = flags & WAIT; - put_fs_excl(); current->journal_info = th->t_handle_save; reiserfs_check_lock_depth(sb, "journal end"); if (journal->j_len == 0) { @@ -4226,8 +4215,15 @@ static int do_journal_end(struct reiserfs_transaction_handle *th, if (flush) { flush_commit_list(sb, jl, 1); flush_journal_list(sb, jl, 1); - } else if (!(jl->j_state & LIST_COMMIT_PENDING)) - queue_delayed_work(commit_wq, &journal->j_work, HZ / 10); + } else if (!(jl->j_state & LIST_COMMIT_PENDING)) { + /* + * Avoid queueing work when sb is being shut down. Transaction + * will be flushed on journal shutdown. + */ + if (sb->s_flags & MS_ACTIVE) + queue_delayed_work(commit_wq, + &journal->j_work, HZ / 10); + } /* if the next transaction has any chance of wrapping, flush ** transactions that might get overwritten. If any journal lists are very @@ -4316,4 +4312,3 @@ void reiserfs_abort_journal(struct super_block *sb, int errno) dump_stack(); #endif } - diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c index 1186626..80058e8 100644 --- a/fs/reiserfs/namei.c +++ b/fs/reiserfs/namei.c @@ -19,7 +19,7 @@ #include #include -#define INC_DIR_INODE_NLINK(i) if (i->i_nlink != 1) { inc_nlink(i); if (i->i_nlink >= REISERFS_LINK_MAX) i->i_nlink=1; } +#define INC_DIR_INODE_NLINK(i) if (i->i_nlink != 1) { inc_nlink(i); if (i->i_nlink >= REISERFS_LINK_MAX) set_nlink(i, 1); } #define DEC_DIR_INODE_NLINK(i) if (i->i_nlink != 1) drop_nlink(i); // directory item contains array of entry headers. This performs @@ -622,7 +622,7 @@ static int reiserfs_create(struct inode *dir, struct dentry *dentry, int mode, dentry->d_name.len, inode, 1 /*visible */ ); if (retval) { int err; - inode->i_nlink--; + drop_nlink(inode); reiserfs_update_sd(&th, inode); err = journal_end(&th, dir->i_sb, jbegin_count); if (err) @@ -702,7 +702,7 @@ static int reiserfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dentry->d_name.len, inode, 1 /*visible */ ); if (retval) { int err; - inode->i_nlink--; + drop_nlink(inode); reiserfs_update_sd(&th, inode); err = journal_end(&th, dir->i_sb, jbegin_count); if (err) @@ -787,7 +787,7 @@ static int reiserfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) dentry->d_name.len, inode, 1 /*visible */ ); if (retval) { int err; - inode->i_nlink = 0; + clear_nlink(inode); DEC_DIR_INODE_NLINK(dir); reiserfs_update_sd(&th, inode); err = journal_end(&th, dir->i_sb, jbegin_count); @@ -964,7 +964,7 @@ static int reiserfs_unlink(struct inode *dir, struct dentry *dentry) reiserfs_warning(inode->i_sb, "reiserfs-7042", "deleting nonexistent file (%lu), %d", inode->i_ino, inode->i_nlink); - inode->i_nlink = 1; + set_nlink(inode, 1); } drop_nlink(inode); @@ -1086,7 +1086,7 @@ static int reiserfs_symlink(struct inode *parent_dir, dentry->d_name.len, inode, 1 /*visible */ ); if (retval) { int err; - inode->i_nlink--; + drop_nlink(inode); reiserfs_update_sd(&th, inode); err = journal_end(&th, parent_dir->i_sb, jbegin_count); if (err) @@ -1129,7 +1129,7 @@ static int reiserfs_link(struct dentry *old_dentry, struct inode *dir, retval = journal_begin(&th, dir->i_sb, jbegin_count); if (retval) { - inode->i_nlink--; + drop_nlink(inode); reiserfs_write_unlock(dir->i_sb); return retval; } @@ -1144,7 +1144,7 @@ static int reiserfs_link(struct dentry *old_dentry, struct inode *dir, if (retval) { int err; - inode->i_nlink--; + drop_nlink(inode); err = journal_end(&th, dir->i_sb, jbegin_count); reiserfs_write_unlock(dir->i_sb); return err ? err : retval; @@ -1529,6 +1529,7 @@ const struct inode_operations reiserfs_dir_inode_operations = { .listxattr = reiserfs_listxattr, .removexattr = reiserfs_removexattr, .permission = reiserfs_permission, + .get_acl = reiserfs_get_acl, }; /* @@ -1545,6 +1546,7 @@ const struct inode_operations reiserfs_symlink_inode_operations = { .listxattr = reiserfs_listxattr, .removexattr = reiserfs_removexattr, .permission = reiserfs_permission, + .get_acl = reiserfs_get_acl, }; @@ -1558,5 +1560,5 @@ const struct inode_operations reiserfs_special_inode_operations = { .listxattr = reiserfs_listxattr, .removexattr = reiserfs_removexattr, .permission = reiserfs_permission, - + .get_acl = reiserfs_get_acl, }; diff --git a/fs/reiserfs/resize.c b/fs/reiserfs/resize.c index b3a94d2..7483279 100644 --- a/fs/reiserfs/resize.c +++ b/fs/reiserfs/resize.c @@ -111,15 +111,13 @@ int reiserfs_resize(struct super_block *s, unsigned long block_count_new) /* allocate additional bitmap blocks, reallocate array of bitmap * block pointers */ bitmap = - vmalloc(sizeof(struct reiserfs_bitmap_info) * bmap_nr_new); + vzalloc(sizeof(struct reiserfs_bitmap_info) * bmap_nr_new); if (!bitmap) { /* Journal bitmaps are still supersized, but the memory isn't * leaked, so I guess it's ok */ printk("reiserfs_resize: unable to allocate memory.\n"); return -ENOMEM; } - memset(bitmap, 0, - sizeof(struct reiserfs_bitmap_info) * bmap_nr_new); for (i = 0; i < bmap_nr; i++) bitmap[i] = old_bitmap[i]; @@ -136,7 +134,7 @@ int reiserfs_resize(struct super_block *s, unsigned long block_count_new) return -EIO; } memset(bh->b_data, 0, sb_blocksize(sb)); - reiserfs_test_and_set_le_bit(0, bh->b_data); + reiserfs_set_le_bit(0, bh->b_data); reiserfs_cache_bitmap_metadata(s, bh, bitmap + i); set_buffer_uptodate(bh); @@ -172,7 +170,7 @@ int reiserfs_resize(struct super_block *s, unsigned long block_count_new) reiserfs_prepare_for_journal(s, bh, 1); for (i = block_r; i < s->s_blocksize * 8; i++) - reiserfs_test_and_clear_le_bit(i, bh->b_data); + reiserfs_clear_le_bit(i, bh->b_data); info->free_count += s->s_blocksize * 8 - block_r; journal_mark_dirty(&th, s, bh); @@ -190,7 +188,7 @@ int reiserfs_resize(struct super_block *s, unsigned long block_count_new) reiserfs_prepare_for_journal(s, bh, 1); for (i = block_r_new; i < s->s_blocksize * 8; i++) - reiserfs_test_and_set_le_bit(i, bh->b_data); + reiserfs_set_le_bit(i, bh->b_data); journal_mark_dirty(&th, s, bh); brelse(bh); diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index 7527623..569498a 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -1663,6 +1663,7 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent) /* Set default values for options: non-aggressive tails, RO on errors */ REISERFS_SB(s)->s_mount_opt |= (1 << REISERFS_SMALLTAIL); REISERFS_SB(s)->s_mount_opt |= (1 << REISERFS_ERROR_RO); + REISERFS_SB(s)->s_mount_opt |= (1 << REISERFS_BARRIER_FLUSH); /* no preallocation minimum, be smart in reiserfs_file_write instead */ REISERFS_SB(s)->s_alloc_options.preallocmin = 0; diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c index 6e3ca4e..04eecc4 100644 --- a/fs/reiserfs/xattr.c +++ b/fs/reiserfs/xattr.c @@ -555,11 +555,10 @@ reiserfs_xattr_set_handle(struct reiserfs_transaction_handle *th, reiserfs_write_unlock(inode->i_sb); mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_XATTR); - down_write(&dentry->d_inode->i_alloc_sem); + inode_dio_wait(dentry->d_inode); reiserfs_write_lock(inode->i_sb); err = reiserfs_setattr(dentry, &newattrs); - up_write(&dentry->d_inode->i_alloc_sem); mutex_unlock(&dentry->d_inode->i_mutex); } else update_ctime(inode); @@ -868,27 +867,6 @@ out: return err; } -static int reiserfs_check_acl(struct inode *inode, int mask, unsigned int flags) -{ - struct posix_acl *acl; - int error = -EAGAIN; /* do regular unix permission checks by default */ - - if (flags & IPERM_FLAG_RCU) - return -ECHILD; - - acl = reiserfs_get_acl(inode, ACL_TYPE_ACCESS); - - if (acl) { - if (!IS_ERR(acl)) { - error = posix_acl_permission(inode, acl, mask); - posix_acl_release(acl); - } else if (PTR_ERR(acl) != -ENODATA) - error = PTR_ERR(acl); - } - - return error; -} - static int create_privroot(struct dentry *dentry) { int err; @@ -952,7 +930,7 @@ static int xattr_mount_check(struct super_block *s) return 0; } -int reiserfs_permission(struct inode *inode, int mask, unsigned int flags) +int reiserfs_permission(struct inode *inode, int mask) { /* * We don't do permission checks on the internal objects. @@ -961,15 +939,7 @@ int reiserfs_permission(struct inode *inode, int mask, unsigned int flags) if (IS_PRIVATE(inode)) return 0; -#ifdef CONFIG_REISERFS_FS_XATTR - /* - * Stat data v1 doesn't support ACLs. - */ - if (get_inode_sd_version(inode) != STAT_DATA_V1) - return generic_permission(inode, mask, flags, - reiserfs_check_acl); -#endif - return generic_permission(inode, mask, flags, NULL); + return generic_permission(inode, mask); } static int xattr_hide_revalidate(struct dentry *dentry, struct nameidata *nd) diff --git a/fs/reiserfs/xattr_acl.c b/fs/reiserfs/xattr_acl.c index 3dc38f1..6da0396 100644 --- a/fs/reiserfs/xattr_acl.c +++ b/fs/reiserfs/xattr_acl.c @@ -272,12 +272,10 @@ reiserfs_set_acl(struct reiserfs_transaction_handle *th, struct inode *inode, case ACL_TYPE_ACCESS: name = POSIX_ACL_XATTR_ACCESS; if (acl) { - mode_t mode = inode->i_mode; - error = posix_acl_equiv_mode(acl, &mode); + error = posix_acl_equiv_mode(acl, &inode->i_mode); if (error < 0) return error; else { - inode->i_mode = mode; if (error == 0) acl = NULL; } @@ -354,10 +352,6 @@ reiserfs_inherit_default_acl(struct reiserfs_transaction_handle *th, return PTR_ERR(acl); if (acl) { - struct posix_acl *acl_copy; - mode_t mode = inode->i_mode; - int need_acl; - /* Copy the default ACL to the default ACL of a new directory */ if (S_ISDIR(inode->i_mode)) { err = reiserfs_set_acl(th, inode, ACL_TYPE_DEFAULT, @@ -368,29 +362,13 @@ reiserfs_inherit_default_acl(struct reiserfs_transaction_handle *th, /* Now we reconcile the new ACL and the mode, potentially modifying both */ - acl_copy = posix_acl_clone(acl, GFP_NOFS); - if (!acl_copy) { - err = -ENOMEM; - goto cleanup; - } - - need_acl = posix_acl_create_masq(acl_copy, &mode); - if (need_acl >= 0) { - if (mode != inode->i_mode) { - inode->i_mode = mode; - } + err = posix_acl_create(&acl, GFP_NOFS, &inode->i_mode); + if (err < 0) + return err; - /* If we need an ACL.. */ - if (need_acl > 0) { - err = reiserfs_set_acl(th, inode, - ACL_TYPE_ACCESS, - acl_copy); - if (err) - goto cleanup_copy; - } - } - cleanup_copy: - posix_acl_release(acl_copy); + /* If we need an ACL.. */ + if (err > 0) + err = reiserfs_set_acl(th, inode, ACL_TYPE_ACCESS, acl); cleanup: posix_acl_release(acl); } else { @@ -445,7 +423,10 @@ int reiserfs_cache_default_acl(struct inode *inode) int reiserfs_acl_chmod(struct inode *inode) { - struct posix_acl *acl, *clone; + struct reiserfs_transaction_handle th; + struct posix_acl *acl; + size_t size; + int depth; int error; if (S_ISLNK(inode->i_mode)) @@ -463,30 +444,22 @@ int reiserfs_acl_chmod(struct inode *inode) return 0; if (IS_ERR(acl)) return PTR_ERR(acl); - clone = posix_acl_clone(acl, GFP_NOFS); - posix_acl_release(acl); - if (!clone) - return -ENOMEM; - error = posix_acl_chmod_masq(clone, inode->i_mode); + error = posix_acl_chmod(&acl, GFP_NOFS, inode->i_mode); + if (error) + return error; + + size = reiserfs_xattr_nblocks(inode, reiserfs_acl_size(acl->a_count)); + depth = reiserfs_write_lock_once(inode->i_sb); + error = journal_begin(&th, inode->i_sb, size * 2); if (!error) { - struct reiserfs_transaction_handle th; - size_t size = reiserfs_xattr_nblocks(inode, - reiserfs_acl_size(clone->a_count)); - int depth; - - depth = reiserfs_write_lock_once(inode->i_sb); - error = journal_begin(&th, inode->i_sb, size * 2); - if (!error) { - int error2; - error = reiserfs_set_acl(&th, inode, ACL_TYPE_ACCESS, - clone); - error2 = journal_end(&th, inode->i_sb, size * 2); - if (error2) - error = error2; - } - reiserfs_write_unlock_once(inode->i_sb, depth); + int error2; + error = reiserfs_set_acl(&th, inode, ACL_TYPE_ACCESS, acl); + error2 = journal_end(&th, inode->i_sb, size * 2); + if (error2) + error = error2; } - posix_acl_release(clone); + reiserfs_write_unlock_once(inode->i_sb, depth); + posix_acl_release(acl); return error; } diff --git a/fs/reiserfs/xattr_security.c b/fs/reiserfs/xattr_security.c index ef66c18..534668f 100644 --- a/fs/reiserfs/xattr_security.c +++ b/fs/reiserfs/xattr_security.c @@ -66,8 +66,8 @@ int reiserfs_security_init(struct inode *dir, struct inode *inode, if (IS_PRIVATE(dir)) return 0; - error = security_inode_init_security(inode, dir, qstr, &sec->name, - &sec->value, &sec->length); + error = security_old_inode_init_security(inode, dir, qstr, &sec->name, + &sec->value, &sec->length); if (error) { if (error == -EOPNOTSUPP) error = 0; diff --git a/fs/ubifs/budget.c b/fs/ubifs/budget.c index 315de66..bc4f94b 100644 --- a/fs/ubifs/budget.c +++ b/fs/ubifs/budget.c @@ -63,7 +63,7 @@ static void shrink_liability(struct ubifs_info *c, int nr_to_write) { down_read(&c->vfs_sb->s_umount); - writeback_inodes_sb(c->vfs_sb); + writeback_inodes_sb(c->vfs_sb, WB_REASON_FS_FREE_SPACE); up_read(&c->vfs_sb->s_umount); } diff --git a/fs/ubifs/commit.c b/fs/ubifs/commit.c index 87cd0ea..b2ca12f 100644 --- a/fs/ubifs/commit.c +++ b/fs/ubifs/commit.c @@ -78,7 +78,7 @@ static int nothing_to_commit(struct ubifs_info *c) * If the root TNC node is dirty, we definitely have something to * commit. */ - if (c->zroot.znode && test_bit(DIRTY_ZNODE, &c->zroot.znode->flags)) + if (c->zroot.znode && ubifs_zn_dirty(c->zroot.znode)) return 0; /* @@ -166,15 +166,10 @@ static int do_commit(struct ubifs_info *c) err = ubifs_orphan_end_commit(c); if (err) goto out; - old_ltail_lnum = c->ltail_lnum; - err = ubifs_log_end_commit(c, new_ltail_lnum); - if (err) - goto out; err = dbg_check_old_index(c, &zroot); if (err) goto out; - mutex_lock(&c->mst_mutex); c->mst_node->cmt_no = cpu_to_le64(c->cmt_no); c->mst_node->log_lnum = cpu_to_le32(new_ltail_lnum); c->mst_node->root_lnum = cpu_to_le32(zroot.lnum); @@ -203,8 +198,9 @@ static int do_commit(struct ubifs_info *c) c->mst_node->flags |= cpu_to_le32(UBIFS_MST_NO_ORPHS); else c->mst_node->flags &= ~cpu_to_le32(UBIFS_MST_NO_ORPHS); - err = ubifs_write_master(c); - mutex_unlock(&c->mst_mutex); + + old_ltail_lnum = c->ltail_lnum; + err = ubifs_log_end_commit(c, new_ltail_lnum); if (err) goto out; @@ -418,7 +414,7 @@ int ubifs_run_commit(struct ubifs_info *c) spin_lock(&c->cs_lock); if (c->cmt_state == COMMIT_BROKEN) { - err = -EINVAL; + err = -EROFS; goto out; } @@ -444,7 +440,7 @@ int ubifs_run_commit(struct ubifs_info *c) * re-check it. */ if (c->cmt_state == COMMIT_BROKEN) { - err = -EINVAL; + err = -EROFS; goto out_cmt_unlock; } @@ -576,7 +572,7 @@ int dbg_check_old_index(struct ubifs_info *c, struct ubifs_zbranch *zroot) struct idx_node *i; size_t sz; - if (!(ubifs_chk_flags & UBIFS_CHK_OLD_IDX)) + if (!dbg_is_chk_index(c)) return 0; INIT_LIST_HEAD(&list); diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c index 0bb2bce..b09ba2d 100644 --- a/fs/ubifs/debug.c +++ b/fs/ubifs/debug.c @@ -27,13 +27,12 @@ * various local functions of those subsystems. */ -#define UBIFS_DBG_PRESERVE_UBI - -#include "ubifs.h" #include -#include #include #include +#include +#include +#include "ubifs.h" #ifdef CONFIG_UBIFS_FS_DEBUG @@ -42,15 +41,6 @@ DEFINE_SPINLOCK(dbg_lock); static char dbg_key_buf0[128]; static char dbg_key_buf1[128]; -unsigned int ubifs_chk_flags; -unsigned int ubifs_tst_flags; - -module_param_named(debug_chks, ubifs_chk_flags, uint, S_IRUGO | S_IWUSR); -module_param_named(debug_tsts, ubifs_tst_flags, uint, S_IRUGO | S_IWUSR); - -MODULE_PARM_DESC(debug_chks, "Debug check flags"); -MODULE_PARM_DESC(debug_tsts, "Debug special test flags"); - static const char *get_key_fmt(int fmt) { switch (fmt) { @@ -91,6 +81,28 @@ static const char *get_key_type(int type) } } +static const char *get_dent_type(int type) +{ + switch (type) { + case UBIFS_ITYPE_REG: + return "file"; + case UBIFS_ITYPE_DIR: + return "dir"; + case UBIFS_ITYPE_LNK: + return "symlink"; + case UBIFS_ITYPE_BLK: + return "blkdev"; + case UBIFS_ITYPE_CHR: + return "char dev"; + case UBIFS_ITYPE_FIFO: + return "fifo"; + case UBIFS_ITYPE_SOCK: + return "socket"; + default: + return "unknown/invalid type"; + } +} + static void sprintf_key(const struct ubifs_info *c, const union ubifs_key *key, char *buffer) { @@ -234,9 +246,13 @@ static void dump_ch(const struct ubifs_ch *ch) printk(KERN_DEBUG "\tlen %u\n", le32_to_cpu(ch->len)); } -void dbg_dump_inode(const struct ubifs_info *c, const struct inode *inode) +void dbg_dump_inode(struct ubifs_info *c, const struct inode *inode) { const struct ubifs_inode *ui = ubifs_inode(inode); + struct qstr nm = { .name = NULL }; + union ubifs_key key; + struct ubifs_dent_node *dent, *pdent = NULL; + int count = 2; printk(KERN_DEBUG "Dump in-memory inode:"); printk(KERN_DEBUG "\tinode %lu\n", inode->i_ino); @@ -270,6 +286,32 @@ void dbg_dump_inode(const struct ubifs_info *c, const struct inode *inode) printk(KERN_DEBUG "\tlast_page_read %lu\n", ui->last_page_read); printk(KERN_DEBUG "\tread_in_a_row %lu\n", ui->read_in_a_row); printk(KERN_DEBUG "\tdata_len %d\n", ui->data_len); + + if (!S_ISDIR(inode->i_mode)) + return; + + printk(KERN_DEBUG "List of directory entries:\n"); + ubifs_assert(!mutex_is_locked(&c->tnc_mutex)); + + lowest_dent_key(c, &key, inode->i_ino); + while (1) { + dent = ubifs_tnc_next_ent(c, &key, &nm); + if (IS_ERR(dent)) { + if (PTR_ERR(dent) != -ENOENT) + printk(KERN_DEBUG "error %ld\n", PTR_ERR(dent)); + break; + } + + printk(KERN_DEBUG "\t%d: %s (%s)\n", + count++, dent->name, get_dent_type(dent->type)); + + nm.name = dent->name; + nm.len = le16_to_cpu(dent->nlen); + kfree(pdent); + pdent = dent; + key_read(c, &dent->key, &key); + } + kfree(pdent); } void dbg_dump_node(const struct ubifs_info *c, const void *node) @@ -278,7 +320,7 @@ void dbg_dump_node(const struct ubifs_info *c, const void *node) union ubifs_key key; const struct ubifs_ch *ch = node; - if (dbg_failure_mode) + if (dbg_is_tst_rcvry(c)) return; /* If the magic is incorrect, just hexdump the first bytes */ @@ -828,13 +870,29 @@ void dbg_dump_lpt_info(struct ubifs_info *c) spin_unlock(&dbg_lock); } +void dbg_dump_sleb(const struct ubifs_info *c, + const struct ubifs_scan_leb *sleb, int offs) +{ + struct ubifs_scan_node *snod; + + printk(KERN_DEBUG "(pid %d) start dumping scanned data from LEB %d:%d\n", + current->pid, sleb->lnum, offs); + + list_for_each_entry(snod, &sleb->nodes, list) { + cond_resched(); + printk(KERN_DEBUG "Dumping node at LEB %d:%d len %d\n", sleb->lnum, + snod->offs, snod->len); + dbg_dump_node(c, snod->node); + } +} + void dbg_dump_leb(const struct ubifs_info *c, int lnum) { struct ubifs_scan_leb *sleb; struct ubifs_scan_node *snod; void *buf; - if (dbg_failure_mode) + if (dbg_is_tst_rcvry(c)) return; printk(KERN_DEBUG "(pid %d) start dumping LEB %d\n", @@ -1080,6 +1138,7 @@ out: /** * dbg_check_synced_i_size - check synchronized inode size. + * @c: UBIFS file-system description object * @inode: inode to check * * If inode is clean, synchronized inode size has to be equivalent to current @@ -1087,12 +1146,12 @@ out: * has to be locked). Returns %0 if synchronized inode size if correct, and * %-EINVAL if not. */ -int dbg_check_synced_i_size(struct inode *inode) +int dbg_check_synced_i_size(const struct ubifs_info *c, struct inode *inode) { int err = 0; struct ubifs_inode *ui = ubifs_inode(inode); - if (!(ubifs_chk_flags & UBIFS_CHK_GEN)) + if (!dbg_is_chk_gen(c)) return 0; if (!S_ISREG(inode->i_mode)) return 0; @@ -1125,7 +1184,7 @@ int dbg_check_synced_i_size(struct inode *inode) * Note, it is good idea to make sure the @dir->i_mutex is locked before * calling this function. */ -int dbg_check_dir_size(struct ubifs_info *c, const struct inode *dir) +int dbg_check_dir(struct ubifs_info *c, const struct inode *dir) { unsigned int nlink = 2; union ubifs_key key; @@ -1133,7 +1192,7 @@ int dbg_check_dir_size(struct ubifs_info *c, const struct inode *dir) struct qstr nm = { .name = NULL }; loff_t size = UBIFS_INO_NODE_SZ; - if (!(ubifs_chk_flags & UBIFS_CHK_GEN)) + if (!dbg_is_chk_gen(c)) return 0; if (!S_ISDIR(dir->i_mode)) @@ -1167,12 +1226,14 @@ int dbg_check_dir_size(struct ubifs_info *c, const struct inode *dir) "but calculated size is %llu", dir->i_ino, (unsigned long long)i_size_read(dir), (unsigned long long)size); + dbg_dump_inode(c, dir); dump_stack(); return -EINVAL; } if (dir->i_nlink != nlink) { ubifs_err("directory inode %lu has nlink %u, but calculated " "nlink is %u", dir->i_ino, dir->i_nlink, nlink); + dbg_dump_inode(c, dir); dump_stack(); return -EINVAL; } @@ -1489,7 +1550,7 @@ int dbg_check_tnc(struct ubifs_info *c, int extra) long clean_cnt = 0, dirty_cnt = 0; int err, last; - if (!(ubifs_chk_flags & UBIFS_CHK_TNC)) + if (!dbg_is_chk_index(c)) return 0; ubifs_assert(mutex_is_locked(&c->tnc_mutex)); @@ -1736,7 +1797,7 @@ int dbg_check_idx_size(struct ubifs_info *c, long long idx_size) int err; long long calc = 0; - if (!(ubifs_chk_flags & UBIFS_CHK_IDX_SZ)) + if (!dbg_is_chk_index(c)) return 0; err = dbg_walk_index(c, NULL, add_size, &calc); @@ -2312,7 +2373,7 @@ int dbg_check_filesystem(struct ubifs_info *c) int err; struct fsck_data fsckd; - if (!(ubifs_chk_flags & UBIFS_CHK_FS)) + if (!dbg_is_chk_fs(c)) return 0; fsckd.inodes = RB_ROOT; @@ -2347,7 +2408,7 @@ int dbg_check_data_nodes_order(struct ubifs_info *c, struct list_head *head) struct list_head *cur; struct ubifs_scan_node *sa, *sb; - if (!(ubifs_chk_flags & UBIFS_CHK_GEN)) + if (!dbg_is_chk_gen(c)) return 0; for (cur = head->next; cur->next != head; cur = cur->next) { @@ -2414,7 +2475,7 @@ int dbg_check_nondata_nodes_order(struct ubifs_info *c, struct list_head *head) struct list_head *cur; struct ubifs_scan_node *sa, *sb; - if (!(ubifs_chk_flags & UBIFS_CHK_GEN)) + if (!dbg_is_chk_gen(c)) return 0; for (cur = head->next; cur->next != head; cur = cur->next) { @@ -2491,214 +2552,141 @@ error_dump: return 0; } -int dbg_force_in_the_gaps(void) +static inline int chance(unsigned int n, unsigned int out_of) { - if (!(ubifs_chk_flags & UBIFS_CHK_GEN)) - return 0; + return !!((random32() % out_of) + 1 <= n); - return !(random32() & 7); } -/* Failure mode for recovery testing */ - -#define chance(n, d) (simple_rand() <= (n) * 32768LL / (d)) - -struct failure_mode_info { - struct list_head list; - struct ubifs_info *c; -}; - -static LIST_HEAD(fmi_list); -static DEFINE_SPINLOCK(fmi_lock); - -static unsigned int next; - -static int simple_rand(void) +static int power_cut_emulated(struct ubifs_info *c, int lnum, int write) { - if (next == 0) - next = current->pid; - next = next * 1103515245 + 12345; - return (next >> 16) & 32767; -} - -static void failure_mode_init(struct ubifs_info *c) -{ - struct failure_mode_info *fmi; - - fmi = kmalloc(sizeof(struct failure_mode_info), GFP_NOFS); - if (!fmi) { - ubifs_err("Failed to register failure mode - no memory"); - return; - } - fmi->c = c; - spin_lock(&fmi_lock); - list_add_tail(&fmi->list, &fmi_list); - spin_unlock(&fmi_lock); -} - -static void failure_mode_exit(struct ubifs_info *c) -{ - struct failure_mode_info *fmi, *tmp; - - spin_lock(&fmi_lock); - list_for_each_entry_safe(fmi, tmp, &fmi_list, list) - if (fmi->c == c) { - list_del(&fmi->list); - kfree(fmi); - } - spin_unlock(&fmi_lock); -} - -static struct ubifs_info *dbg_find_info(struct ubi_volume_desc *desc) -{ - struct failure_mode_info *fmi; - - spin_lock(&fmi_lock); - list_for_each_entry(fmi, &fmi_list, list) - if (fmi->c->ubi == desc) { - struct ubifs_info *c = fmi->c; - - spin_unlock(&fmi_lock); - return c; - } - spin_unlock(&fmi_lock); - return NULL; -} - -static int in_failure_mode(struct ubi_volume_desc *desc) -{ - struct ubifs_info *c = dbg_find_info(desc); - - if (c && dbg_failure_mode) - return c->dbg->failure_mode; - return 0; -} + struct ubifs_debug_info *d = c->dbg; -static int do_fail(struct ubi_volume_desc *desc, int lnum, int write) -{ - struct ubifs_info *c = dbg_find_info(desc); - struct ubifs_debug_info *d; + ubifs_assert(dbg_is_tst_rcvry(c)); - if (!c || !dbg_failure_mode) - return 0; - d = c->dbg; - if (d->failure_mode) - return 1; - if (!d->fail_cnt) { - /* First call - decide delay to failure */ + if (!d->pc_cnt) { + /* First call - decide delay to the power cut */ if (chance(1, 2)) { - unsigned int delay = 1 << (simple_rand() >> 11); + unsigned long delay; if (chance(1, 2)) { - d->fail_delay = 1; - d->fail_timeout = jiffies + - msecs_to_jiffies(delay); - dbg_rcvry("failing after %ums", delay); + d->pc_delay = 1; + /* Fail withing 1 minute */ + delay = random32() % 60000; + d->pc_timeout = jiffies; + d->pc_timeout += msecs_to_jiffies(delay); + ubifs_warn("failing after %lums", delay); } else { - d->fail_delay = 2; - d->fail_cnt_max = delay; - dbg_rcvry("failing after %u calls", delay); + d->pc_delay = 2; + delay = random32() % 10000; + /* Fail within 10000 operations */ + d->pc_cnt_max = delay; + ubifs_warn("failing after %lu calls", delay); } } - d->fail_cnt += 1; + + d->pc_cnt += 1; } + /* Determine if failure delay has expired */ - if (d->fail_delay == 1) { - if (time_before(jiffies, d->fail_timeout)) + if (d->pc_delay == 1 && time_before(jiffies, d->pc_timeout)) return 0; - } else if (d->fail_delay == 2) - if (d->fail_cnt++ < d->fail_cnt_max) + if (d->pc_delay == 2 && d->pc_cnt++ < d->pc_cnt_max) return 0; + if (lnum == UBIFS_SB_LNUM) { - if (write) { - if (chance(1, 2)) - return 0; - } else if (chance(19, 20)) + if (write && chance(1, 2)) return 0; - dbg_rcvry("failing in super block LEB %d", lnum); + if (chance(19, 20)) + return 0; + ubifs_warn("failing in super block LEB %d", lnum); } else if (lnum == UBIFS_MST_LNUM || lnum == UBIFS_MST_LNUM + 1) { if (chance(19, 20)) return 0; - dbg_rcvry("failing in master LEB %d", lnum); + ubifs_warn("failing in master LEB %d", lnum); } else if (lnum >= UBIFS_LOG_LNUM && lnum <= c->log_last) { - if (write) { - if (chance(99, 100)) - return 0; - } else if (chance(399, 400)) + if (write && chance(99, 100)) + return 0; + if (chance(399, 400)) return 0; - dbg_rcvry("failing in log LEB %d", lnum); + ubifs_warn("failing in log LEB %d", lnum); } else if (lnum >= c->lpt_first && lnum <= c->lpt_last) { - if (write) { - if (chance(7, 8)) - return 0; - } else if (chance(19, 20)) + if (write && chance(7, 8)) return 0; - dbg_rcvry("failing in LPT LEB %d", lnum); + if (chance(19, 20)) + return 0; + ubifs_warn("failing in LPT LEB %d", lnum); } else if (lnum >= c->orph_first && lnum <= c->orph_last) { - if (write) { - if (chance(1, 2)) - return 0; - } else if (chance(9, 10)) + if (write && chance(1, 2)) return 0; - dbg_rcvry("failing in orphan LEB %d", lnum); + if (chance(9, 10)) + return 0; + ubifs_warn("failing in orphan LEB %d", lnum); } else if (lnum == c->ihead_lnum) { if (chance(99, 100)) return 0; - dbg_rcvry("failing in index head LEB %d", lnum); + ubifs_warn("failing in index head LEB %d", lnum); } else if (c->jheads && lnum == c->jheads[GCHD].wbuf.lnum) { if (chance(9, 10)) return 0; - dbg_rcvry("failing in GC head LEB %d", lnum); + ubifs_warn("failing in GC head LEB %d", lnum); } else if (write && !RB_EMPTY_ROOT(&c->buds) && !ubifs_search_bud(c, lnum)) { if (chance(19, 20)) return 0; - dbg_rcvry("failing in non-bud LEB %d", lnum); + ubifs_warn("failing in non-bud LEB %d", lnum); } else if (c->cmt_state == COMMIT_RUNNING_BACKGROUND || c->cmt_state == COMMIT_RUNNING_REQUIRED) { if (chance(999, 1000)) return 0; - dbg_rcvry("failing in bud LEB %d commit running", lnum); + ubifs_warn("failing in bud LEB %d commit running", lnum); } else { if (chance(9999, 10000)) return 0; - dbg_rcvry("failing in bud LEB %d commit not running", lnum); + ubifs_warn("failing in bud LEB %d commit not running", lnum); } - ubifs_err("*** SETTING FAILURE MODE ON (LEB %d) ***", lnum); - d->failure_mode = 1; + + d->pc_happened = 1; + ubifs_warn("========== Power cut emulated =========="); dump_stack(); return 1; } -static void cut_data(const void *buf, int len) +static void cut_data(const void *buf, unsigned int len) { - int flen, i; + unsigned int from, to, i, ffs = chance(1, 2); unsigned char *p = (void *)buf; - flen = (len * (long long)simple_rand()) >> 15; - for (i = flen; i < len; i++) - p[i] = 0xff; -} + from = random32() % (len + 1); + if (chance(1, 2)) + to = random32() % (len - from + 1); + else + to = len; -int dbg_leb_read(struct ubi_volume_desc *desc, int lnum, char *buf, int offset, - int len, int check) -{ - if (in_failure_mode(desc)) - return -EROFS; - return ubi_leb_read(desc, lnum, buf, offset, len, check); + if (from < to) + ubifs_warn("filled bytes %u-%u with %s", from, to - 1, + ffs ? "0xFFs" : "random data"); + + if (ffs) + for (i = from; i < to; i++) + p[i] = 0xFF; + else + for (i = from; i < to; i++) + p[i] = random32() % 0x100; } -int dbg_leb_write(struct ubi_volume_desc *desc, int lnum, const void *buf, - int offset, int len, int dtype) +int dbg_leb_write(struct ubifs_info *c, int lnum, const void *buf, + int offs, int len, int dtype) { int err, failing; - if (in_failure_mode(desc)) + if (c->dbg->pc_happened) return -EROFS; - failing = do_fail(desc, lnum, 1); + + failing = power_cut_emulated(c, lnum, 1); if (failing) cut_data(buf, len); - err = ubi_leb_write(desc, lnum, buf, offset, len, dtype); + err = ubi_leb_write(c->ubi, lnum, buf, offs, len, dtype); if (err) return err; if (failing) @@ -2706,162 +2694,207 @@ int dbg_leb_write(struct ubi_volume_desc *desc, int lnum, const void *buf, return 0; } -int dbg_leb_change(struct ubi_volume_desc *desc, int lnum, const void *buf, +int dbg_leb_change(struct ubifs_info *c, int lnum, const void *buf, int len, int dtype) { int err; - if (do_fail(desc, lnum, 1)) + if (c->dbg->pc_happened) return -EROFS; - err = ubi_leb_change(desc, lnum, buf, len, dtype); + if (power_cut_emulated(c, lnum, 1)) + return -EROFS; + err = ubi_leb_change(c->ubi, lnum, buf, len, dtype); if (err) return err; - if (do_fail(desc, lnum, 1)) + if (power_cut_emulated(c, lnum, 1)) return -EROFS; return 0; } -int dbg_leb_erase(struct ubi_volume_desc *desc, int lnum) +int dbg_leb_unmap(struct ubifs_info *c, int lnum) { int err; - if (do_fail(desc, lnum, 0)) + if (c->dbg->pc_happened) + return -EROFS; + if (power_cut_emulated(c, lnum, 0)) return -EROFS; - err = ubi_leb_erase(desc, lnum); + err = ubi_leb_unmap(c->ubi, lnum); if (err) return err; - if (do_fail(desc, lnum, 0)) + if (power_cut_emulated(c, lnum, 0)) return -EROFS; return 0; } -int dbg_leb_unmap(struct ubi_volume_desc *desc, int lnum) +int dbg_leb_map(struct ubifs_info *c, int lnum, int dtype) { int err; - if (do_fail(desc, lnum, 0)) + if (c->dbg->pc_happened) return -EROFS; - err = ubi_leb_unmap(desc, lnum); + if (power_cut_emulated(c, lnum, 0)) + return -EROFS; + err = ubi_leb_map(c->ubi, lnum, dtype); if (err) return err; - if (do_fail(desc, lnum, 0)) + if (power_cut_emulated(c, lnum, 0)) return -EROFS; return 0; } -int dbg_is_mapped(struct ubi_volume_desc *desc, int lnum) -{ - if (in_failure_mode(desc)) - return -EROFS; - return ubi_is_mapped(desc, lnum); -} +/* + * Root directory for UBIFS stuff in debugfs. Contains sub-directories which + * contain the stuff specific to particular file-system mounts. + */ +static struct dentry *dfs_rootdir; -int dbg_leb_map(struct ubi_volume_desc *desc, int lnum, int dtype) +static int dfs_file_open(struct inode *inode, struct file *file) { - int err; - - if (do_fail(desc, lnum, 0)) - return -EROFS; - err = ubi_leb_map(desc, lnum, dtype); - if (err) - return err; - if (do_fail(desc, lnum, 0)) - return -EROFS; - return 0; + file->private_data = inode->i_private; + return nonseekable_open(inode, file); } /** - * ubifs_debugging_init - initialize UBIFS debugging. - * @c: UBIFS file-system description object + * provide_user_output - provide output to the user reading a debugfs file. + * @val: boolean value for the answer + * @u: the buffer to store the answer at + * @count: size of the buffer + * @ppos: position in the @u output buffer * - * This function initializes debugging-related data for the file system. - * Returns zero in case of success and a negative error code in case of + * This is a simple helper function which stores @val boolean value in the user + * buffer when the user reads one of UBIFS debugfs files. Returns amount of + * bytes written to @u in case of success and a negative error code in case of * failure. */ -int ubifs_debugging_init(struct ubifs_info *c) +static int provide_user_output(int val, char __user *u, size_t count, + loff_t *ppos) { - c->dbg = kzalloc(sizeof(struct ubifs_debug_info), GFP_KERNEL); - if (!c->dbg) - return -ENOMEM; + char buf[3]; - failure_mode_init(c); - return 0; + if (val) + buf[0] = '1'; + else + buf[0] = '0'; + buf[1] = '\n'; + buf[2] = 0x00; + + return simple_read_from_buffer(u, count, ppos, buf, 2); } -/** - * ubifs_debugging_exit - free debugging data. - * @c: UBIFS file-system description object - */ -void ubifs_debugging_exit(struct ubifs_info *c) +static ssize_t dfs_file_read(struct file *file, char __user *u, size_t count, + loff_t *ppos) { - failure_mode_exit(c); - kfree(c->dbg); -} + struct dentry *dent = file->f_path.dentry; + struct ubifs_info *c = file->private_data; + struct ubifs_debug_info *d = c->dbg; + int val; + + if (dent == d->dfs_chk_gen) + val = d->chk_gen; + else if (dent == d->dfs_chk_index) + val = d->chk_index; + else if (dent == d->dfs_chk_orph) + val = d->chk_orph; + else if (dent == d->dfs_chk_lprops) + val = d->chk_lprops; + else if (dent == d->dfs_chk_fs) + val = d->chk_fs; + else if (dent == d->dfs_tst_rcvry) + val = d->tst_rcvry; + else + return -EINVAL; -/* - * Root directory for UBIFS stuff in debugfs. Contains sub-directories which - * contain the stuff specific to particular file-system mounts. - */ -static struct dentry *dfs_rootdir; + return provide_user_output(val, u, count, ppos); +} /** - * dbg_debugfs_init - initialize debugfs file-system. + * interpret_user_input - interpret user debugfs file input. + * @u: user-provided buffer with the input + * @count: buffer size * - * UBIFS uses debugfs file-system to expose various debugging knobs to - * user-space. This function creates "ubifs" directory in the debugfs - * file-system. Returns zero in case of success and a negative error code in - * case of failure. + * This is a helper function which interpret user input to a boolean UBIFS + * debugfs file. Returns %0 or %1 in case of success and a negative error code + * in case of failure. */ -int dbg_debugfs_init(void) +static int interpret_user_input(const char __user *u, size_t count) { - dfs_rootdir = debugfs_create_dir("ubifs", NULL); - if (IS_ERR(dfs_rootdir)) { - int err = PTR_ERR(dfs_rootdir); - ubifs_err("cannot create \"ubifs\" debugfs directory, " - "error %d\n", err); - return err; - } + size_t buf_size; + char buf[8]; - return 0; -} + buf_size = min_t(size_t, count, (sizeof(buf) - 1)); + if (copy_from_user(buf, u, buf_size)) + return -EFAULT; -/** - * dbg_debugfs_exit - remove the "ubifs" directory from debugfs file-system. - */ -void dbg_debugfs_exit(void) -{ - debugfs_remove(dfs_rootdir); -} + if (buf[0] == '1') + return 1; + else if (buf[0] == '0') + return 0; -static int open_debugfs_file(struct inode *inode, struct file *file) -{ - file->private_data = inode->i_private; - return nonseekable_open(inode, file); + return -EINVAL; } -static ssize_t write_debugfs_file(struct file *file, const char __user *buf, - size_t count, loff_t *ppos) +static ssize_t dfs_file_write(struct file *file, const char __user *u, + size_t count, loff_t *ppos) { struct ubifs_info *c = file->private_data; struct ubifs_debug_info *d = c->dbg; + struct dentry *dent = file->f_path.dentry; + int val; - if (file->f_path.dentry == d->dfs_dump_lprops) + /* + * TODO: this is racy - the file-system might have already been + * unmounted and we'd oops in this case. The plan is to fix it with + * help of 'iterate_supers_type()' which we should have in v3.0: when + * a debugfs opened, we rember FS's UUID in file->private_data. Then + * whenever we access the FS via a debugfs file, we iterate all UBIFS + * superblocks and fine the one with the same UUID, and take the + * locking right. + * + * The other way to go suggested by Al Viro is to create a separate + * 'ubifs-debug' file-system instead. + */ + if (file->f_path.dentry == d->dfs_dump_lprops) { dbg_dump_lprops(c); - else if (file->f_path.dentry == d->dfs_dump_budg) + return count; + } + if (file->f_path.dentry == d->dfs_dump_budg) { dbg_dump_budg(c, &c->bi); - else if (file->f_path.dentry == d->dfs_dump_tnc) { + return count; + } + if (file->f_path.dentry == d->dfs_dump_tnc) { mutex_lock(&c->tnc_mutex); dbg_dump_tnc(c); mutex_unlock(&c->tnc_mutex); - } else + return count; + } + + val = interpret_user_input(u, count); + if (val < 0) + return val; + + if (dent == d->dfs_chk_gen) + d->chk_gen = val; + else if (dent == d->dfs_chk_index) + d->chk_index = val; + else if (dent == d->dfs_chk_orph) + d->chk_orph = val; + else if (dent == d->dfs_chk_lprops) + d->chk_lprops = val; + else if (dent == d->dfs_chk_fs) + d->chk_fs = val; + else if (dent == d->dfs_tst_rcvry) + d->tst_rcvry = val; + else return -EINVAL; return count; } static const struct file_operations dfs_fops = { - .open = open_debugfs_file, - .write = write_debugfs_file, + .open = dfs_file_open, + .read = dfs_file_read, + .write = dfs_file_write, .owner = THIS_MODULE, .llseek = no_llseek, }; @@ -2880,12 +2913,20 @@ static const struct file_operations dfs_fops = { */ int dbg_debugfs_init_fs(struct ubifs_info *c) { - int err; + int err, n; const char *fname; struct dentry *dent; struct ubifs_debug_info *d = c->dbg; - sprintf(d->dfs_dir_name, "ubi%d_%d", c->vi.ubi_num, c->vi.vol_id); + n = snprintf(d->dfs_dir_name, UBIFS_DFS_DIR_LEN + 1, UBIFS_DFS_DIR_NAME, + c->vi.ubi_num, c->vi.vol_id); + if (n == UBIFS_DFS_DIR_LEN) { + /* The array size is too small */ + fname = UBIFS_DFS_DIR_NAME; + dent = ERR_PTR(-EINVAL); + goto out; + } + fname = d->dfs_dir_name; dent = debugfs_create_dir(fname, dfs_rootdir); if (IS_ERR_OR_NULL(dent)) @@ -2910,13 +2951,55 @@ int dbg_debugfs_init_fs(struct ubifs_info *c) goto out_remove; d->dfs_dump_tnc = dent; + fname = "chk_general"; + dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, d->dfs_dir, c, + &dfs_fops); + if (IS_ERR_OR_NULL(dent)) + goto out_remove; + d->dfs_chk_gen = dent; + + fname = "chk_index"; + dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, d->dfs_dir, c, + &dfs_fops); + if (IS_ERR_OR_NULL(dent)) + goto out_remove; + d->dfs_chk_index = dent; + + fname = "chk_orphans"; + dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, d->dfs_dir, c, + &dfs_fops); + if (IS_ERR_OR_NULL(dent)) + goto out_remove; + d->dfs_chk_orph = dent; + + fname = "chk_lprops"; + dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, d->dfs_dir, c, + &dfs_fops); + if (IS_ERR_OR_NULL(dent)) + goto out_remove; + d->dfs_chk_lprops = dent; + + fname = "chk_fs"; + dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, d->dfs_dir, c, + &dfs_fops); + if (IS_ERR_OR_NULL(dent)) + goto out_remove; + d->dfs_chk_fs = dent; + + fname = "tst_recovery"; + dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, d->dfs_dir, c, + &dfs_fops); + if (IS_ERR_OR_NULL(dent)) + goto out_remove; + d->dfs_tst_rcvry = dent; + return 0; out_remove: debugfs_remove_recursive(d->dfs_dir); out: err = dent ? PTR_ERR(dent) : -ENODEV; - ubifs_err("cannot create \"%s\" debugfs directory, error %d\n", + ubifs_err("cannot create \"%s\" debugfs file or directory, error %d\n", fname, err); return err; } @@ -2930,4 +3013,179 @@ void dbg_debugfs_exit_fs(struct ubifs_info *c) debugfs_remove_recursive(c->dbg->dfs_dir); } +struct ubifs_global_debug_info ubifs_dbg; + +static struct dentry *dfs_chk_gen; +static struct dentry *dfs_chk_index; +static struct dentry *dfs_chk_orph; +static struct dentry *dfs_chk_lprops; +static struct dentry *dfs_chk_fs; +static struct dentry *dfs_tst_rcvry; + +static ssize_t dfs_global_file_read(struct file *file, char __user *u, + size_t count, loff_t *ppos) +{ + struct dentry *dent = file->f_path.dentry; + int val; + + if (dent == dfs_chk_gen) + val = ubifs_dbg.chk_gen; + else if (dent == dfs_chk_index) + val = ubifs_dbg.chk_index; + else if (dent == dfs_chk_orph) + val = ubifs_dbg.chk_orph; + else if (dent == dfs_chk_lprops) + val = ubifs_dbg.chk_lprops; + else if (dent == dfs_chk_fs) + val = ubifs_dbg.chk_fs; + else if (dent == dfs_tst_rcvry) + val = ubifs_dbg.tst_rcvry; + else + return -EINVAL; + + return provide_user_output(val, u, count, ppos); +} + +static ssize_t dfs_global_file_write(struct file *file, const char __user *u, + size_t count, loff_t *ppos) +{ + struct dentry *dent = file->f_path.dentry; + int val; + + val = interpret_user_input(u, count); + if (val < 0) + return val; + + if (dent == dfs_chk_gen) + ubifs_dbg.chk_gen = val; + else if (dent == dfs_chk_index) + ubifs_dbg.chk_index = val; + else if (dent == dfs_chk_orph) + ubifs_dbg.chk_orph = val; + else if (dent == dfs_chk_lprops) + ubifs_dbg.chk_lprops = val; + else if (dent == dfs_chk_fs) + ubifs_dbg.chk_fs = val; + else if (dent == dfs_tst_rcvry) + ubifs_dbg.tst_rcvry = val; + else + return -EINVAL; + + return count; +} + +static const struct file_operations dfs_global_fops = { + .read = dfs_global_file_read, + .write = dfs_global_file_write, + .owner = THIS_MODULE, + .llseek = no_llseek, +}; + +/** + * dbg_debugfs_init - initialize debugfs file-system. + * + * UBIFS uses debugfs file-system to expose various debugging knobs to + * user-space. This function creates "ubifs" directory in the debugfs + * file-system. Returns zero in case of success and a negative error code in + * case of failure. + */ +int dbg_debugfs_init(void) +{ + int err; + const char *fname; + struct dentry *dent; + + fname = "ubifs"; + dent = debugfs_create_dir(fname, NULL); + if (IS_ERR_OR_NULL(dent)) + goto out; + dfs_rootdir = dent; + + fname = "chk_general"; + dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, dfs_rootdir, NULL, + &dfs_global_fops); + if (IS_ERR_OR_NULL(dent)) + goto out_remove; + dfs_chk_gen = dent; + + fname = "chk_index"; + dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, dfs_rootdir, NULL, + &dfs_global_fops); + if (IS_ERR_OR_NULL(dent)) + goto out_remove; + dfs_chk_index = dent; + + fname = "chk_orphans"; + dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, dfs_rootdir, NULL, + &dfs_global_fops); + if (IS_ERR_OR_NULL(dent)) + goto out_remove; + dfs_chk_orph = dent; + + fname = "chk_lprops"; + dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, dfs_rootdir, NULL, + &dfs_global_fops); + if (IS_ERR_OR_NULL(dent)) + goto out_remove; + dfs_chk_lprops = dent; + + fname = "chk_fs"; + dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, dfs_rootdir, NULL, + &dfs_global_fops); + if (IS_ERR_OR_NULL(dent)) + goto out_remove; + dfs_chk_fs = dent; + + fname = "tst_recovery"; + dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, dfs_rootdir, NULL, + &dfs_global_fops); + if (IS_ERR_OR_NULL(dent)) + goto out_remove; + dfs_tst_rcvry = dent; + + return 0; + +out_remove: + debugfs_remove_recursive(dfs_rootdir); +out: + err = dent ? PTR_ERR(dent) : -ENODEV; + ubifs_err("cannot create \"%s\" debugfs file or directory, error %d\n", + fname, err); + return err; +} + +/** + * dbg_debugfs_exit - remove the "ubifs" directory from debugfs file-system. + */ +void dbg_debugfs_exit(void) +{ + debugfs_remove_recursive(dfs_rootdir); +} + +/** + * ubifs_debugging_init - initialize UBIFS debugging. + * @c: UBIFS file-system description object + * + * This function initializes debugging-related data for the file system. + * Returns zero in case of success and a negative error code in case of + * failure. + */ +int ubifs_debugging_init(struct ubifs_info *c) +{ + c->dbg = kzalloc(sizeof(struct ubifs_debug_info), GFP_KERNEL); + if (!c->dbg) + return -ENOMEM; + + return 0; +} + +/** + * ubifs_debugging_exit - free debugging data. + * @c: UBIFS file-system description object + */ +void ubifs_debugging_exit(struct ubifs_info *c) +{ + kfree(c->dbg); +} + #endif /* CONFIG_UBIFS_FS_DEBUG */ diff --git a/fs/ubifs/debug.h b/fs/ubifs/debug.h index fd75b63..c9d2941 100644 --- a/fs/ubifs/debug.h +++ b/fs/ubifs/debug.h @@ -31,18 +31,25 @@ typedef int (*dbg_znode_callback)(struct ubifs_info *c, #ifdef CONFIG_UBIFS_FS_DEBUG -#include +/* + * The UBIFS debugfs directory name pattern and maximum name length (3 for "ubi" + * + 1 for "_" and plus 2x2 for 2 UBI numbers and 1 for the trailing zero byte. + */ +#define UBIFS_DFS_DIR_NAME "ubi%d_%d" +#define UBIFS_DFS_DIR_LEN (3 + 1 + 2*2 + 1) /** * ubifs_debug_info - per-FS debugging information. * @old_zroot: old index root - used by 'dbg_check_old_index()' * @old_zroot_level: old index root level - used by 'dbg_check_old_index()' * @old_zroot_sqnum: old index root sqnum - used by 'dbg_check_old_index()' - * @failure_mode: failure mode for recovery testing - * @fail_delay: 0=>don't delay, 1=>delay a time, 2=>delay a number of calls - * @fail_timeout: time in jiffies when delay of failure mode expires - * @fail_cnt: current number of calls to failure mode I/O functions - * @fail_cnt_max: number of calls by which to delay failure mode + * + * @pc_happened: non-zero if an emulated power cut happened + * @pc_delay: 0=>don't delay, 1=>delay a time, 2=>delay a number of calls + * @pc_timeout: time in jiffies when delay of failure mode expires + * @pc_cnt: current number of calls to failure mode I/O functions + * @pc_cnt_max: number of calls by which to delay failure mode + * * @chk_lpt_sz: used by LPT tree size checker * @chk_lpt_sz2: used by LPT tree size checker * @chk_lpt_wastage: used by LPT tree size checker @@ -56,21 +63,36 @@ typedef int (*dbg_znode_callback)(struct ubifs_info *c, * @saved_free: saved amount of free space * @saved_idx_gc_cnt: saved value of @c->idx_gc_cnt * + * @chk_gen: if general extra checks are enabled + * @chk_index: if index xtra checks are enabled + * @chk_orph: if orphans extra checks are enabled + * @chk_lprops: if lprops extra checks are enabled + * @chk_fs: if UBIFS contents extra checks are enabled + * @tst_rcvry: if UBIFS recovery testing mode enabled + * * @dfs_dir_name: name of debugfs directory containing this file-system's files * @dfs_dir: direntry object of the file-system debugfs directory * @dfs_dump_lprops: "dump lprops" debugfs knob * @dfs_dump_budg: "dump budgeting information" debugfs knob * @dfs_dump_tnc: "dump TNC" debugfs knob + * @dfs_chk_gen: debugfs knob to enable UBIFS general extra checks + * @dfs_chk_index: debugfs knob to enable UBIFS index extra checks + * @dfs_chk_orph: debugfs knob to enable UBIFS orphans extra checks + * @dfs_chk_lprops: debugfs knob to enable UBIFS LEP properties extra checks + * @dfs_chk_fs: debugfs knob to enable UBIFS contents extra checks + * @dfs_tst_rcvry: debugfs knob to enable UBIFS recovery testing */ struct ubifs_debug_info { struct ubifs_zbranch old_zroot; int old_zroot_level; unsigned long long old_zroot_sqnum; - int failure_mode; - int fail_delay; - unsigned long fail_timeout; - unsigned int fail_cnt; - unsigned int fail_cnt_max; + + int pc_happened; + int pc_delay; + unsigned long pc_timeout; + unsigned int pc_cnt; + unsigned int pc_cnt_max; + long long chk_lpt_sz; long long chk_lpt_sz2; long long chk_lpt_wastage; @@ -84,11 +106,43 @@ struct ubifs_debug_info { long long saved_free; int saved_idx_gc_cnt; - char dfs_dir_name[100]; + unsigned int chk_gen:1; + unsigned int chk_index:1; + unsigned int chk_orph:1; + unsigned int chk_lprops:1; + unsigned int chk_fs:1; + unsigned int tst_rcvry:1; + + char dfs_dir_name[UBIFS_DFS_DIR_LEN + 1]; struct dentry *dfs_dir; struct dentry *dfs_dump_lprops; struct dentry *dfs_dump_budg; struct dentry *dfs_dump_tnc; + struct dentry *dfs_chk_gen; + struct dentry *dfs_chk_index; + struct dentry *dfs_chk_orph; + struct dentry *dfs_chk_lprops; + struct dentry *dfs_chk_fs; + struct dentry *dfs_tst_rcvry; +}; + +/** + * ubifs_global_debug_info - global (not per-FS) UBIFS debugging information. + * + * @chk_gen: if general extra checks are enabled + * @chk_index: if index xtra checks are enabled + * @chk_orph: if orphans extra checks are enabled + * @chk_lprops: if lprops extra checks are enabled + * @chk_fs: if UBIFS contents extra checks are enabled + * @tst_rcvry: if UBIFS recovery testing mode enabled + */ +struct ubifs_global_debug_info { + unsigned int chk_gen:1; + unsigned int chk_index:1; + unsigned int chk_orph:1; + unsigned int chk_lprops:1; + unsigned int chk_fs:1; + unsigned int tst_rcvry:1; }; #define ubifs_assert(expr) do { \ @@ -128,6 +182,8 @@ const char *dbg_key_str1(const struct ubifs_info *c, #define DBGKEY(key) dbg_key_str0(c, (key)) #define DBGKEY1(key) dbg_key_str1(c, (key)) +extern spinlock_t dbg_lock; + #define ubifs_dbg_msg(type, fmt, ...) \ pr_debug("UBIFS DBG " type ": " fmt "\n", ##__VA_ARGS__) @@ -163,41 +219,36 @@ const char *dbg_key_str1(const struct ubifs_info *c, /* Additional recovery messages */ #define dbg_rcvry(fmt, ...) ubifs_dbg_msg("rcvry", fmt, ##__VA_ARGS__) -/* - * Debugging check flags. - * - * UBIFS_CHK_GEN: general checks - * UBIFS_CHK_TNC: check TNC - * UBIFS_CHK_IDX_SZ: check index size - * UBIFS_CHK_ORPH: check orphans - * UBIFS_CHK_OLD_IDX: check the old index - * UBIFS_CHK_LPROPS: check lprops - * UBIFS_CHK_FS: check the file-system - */ -enum { - UBIFS_CHK_GEN = 0x1, - UBIFS_CHK_TNC = 0x2, - UBIFS_CHK_IDX_SZ = 0x4, - UBIFS_CHK_ORPH = 0x8, - UBIFS_CHK_OLD_IDX = 0x10, - UBIFS_CHK_LPROPS = 0x20, - UBIFS_CHK_FS = 0x40, -}; +extern struct ubifs_global_debug_info ubifs_dbg; -/* - * Special testing flags. - * - * UBIFS_TST_RCVRY: failure mode for recovery testing - */ -enum { - UBIFS_TST_RCVRY = 0x4, -}; - -extern spinlock_t dbg_lock; - -extern unsigned int ubifs_msg_flags; -extern unsigned int ubifs_chk_flags; -extern unsigned int ubifs_tst_flags; +static inline int dbg_is_chk_gen(const struct ubifs_info *c) +{ + return !!(ubifs_dbg.chk_gen || c->dbg->chk_gen); +} +static inline int dbg_is_chk_index(const struct ubifs_info *c) +{ + return !!(ubifs_dbg.chk_index || c->dbg->chk_index); +} +static inline int dbg_is_chk_orph(const struct ubifs_info *c) +{ + return !!(ubifs_dbg.chk_orph || c->dbg->chk_orph); +} +static inline int dbg_is_chk_lprops(const struct ubifs_info *c) +{ + return !!(ubifs_dbg.chk_lprops || c->dbg->chk_lprops); +} +static inline int dbg_is_chk_fs(const struct ubifs_info *c) +{ + return !!(ubifs_dbg.chk_fs || c->dbg->chk_fs); +} +static inline int dbg_is_tst_rcvry(const struct ubifs_info *c) +{ + return !!(ubifs_dbg.tst_rcvry || c->dbg->tst_rcvry); +} +static inline int dbg_is_power_cut(const struct ubifs_info *c) +{ + return !!c->dbg->pc_happened; +} int ubifs_debugging_init(struct ubifs_info *c); void ubifs_debugging_exit(struct ubifs_info *c); @@ -208,7 +259,7 @@ const char *dbg_cstate(int cmt_state); const char *dbg_jhead(int jhead); const char *dbg_get_key_dump(const struct ubifs_info *c, const union ubifs_key *key); -void dbg_dump_inode(const struct ubifs_info *c, const struct inode *inode); +void dbg_dump_inode(struct ubifs_info *c, const struct inode *inode); void dbg_dump_node(const struct ubifs_info *c, const void *node); void dbg_dump_lpt_node(const struct ubifs_info *c, void *node, int lnum, int offs); @@ -219,6 +270,8 @@ void dbg_dump_lprop(const struct ubifs_info *c, const struct ubifs_lprops *lp); void dbg_dump_lprops(struct ubifs_info *c); void dbg_dump_lpt_info(struct ubifs_info *c); void dbg_dump_leb(const struct ubifs_info *c, int lnum); +void dbg_dump_sleb(const struct ubifs_info *c, + const struct ubifs_scan_leb *sleb, int offs); void dbg_dump_znode(const struct ubifs_info *c, const struct ubifs_znode *znode); void dbg_dump_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, int cat); @@ -241,8 +294,8 @@ int dbg_check_cats(struct ubifs_info *c); int dbg_check_ltab(struct ubifs_info *c); int dbg_chk_lpt_free_spc(struct ubifs_info *c); int dbg_chk_lpt_sz(struct ubifs_info *c, int action, int len); -int dbg_check_synced_i_size(struct inode *inode); -int dbg_check_dir_size(struct ubifs_info *c, const struct inode *dir); +int dbg_check_synced_i_size(const struct ubifs_info *c, struct inode *inode); +int dbg_check_dir(struct ubifs_info *c, const struct inode *dir); int dbg_check_tnc(struct ubifs_info *c, int extra); int dbg_check_idx_size(struct ubifs_info *c, long long idx_size); int dbg_check_filesystem(struct ubifs_info *c); @@ -255,54 +308,12 @@ int dbg_check_inode_size(struct ubifs_info *c, const struct inode *inode, int dbg_check_data_nodes_order(struct ubifs_info *c, struct list_head *head); int dbg_check_nondata_nodes_order(struct ubifs_info *c, struct list_head *head); -/* Force the use of in-the-gaps method for testing */ -static inline int dbg_force_in_the_gaps_enabled(void) -{ - return ubifs_chk_flags & UBIFS_CHK_GEN; -} -int dbg_force_in_the_gaps(void); - -/* Failure mode for recovery testing */ -#define dbg_failure_mode (ubifs_tst_flags & UBIFS_TST_RCVRY) - -#ifndef UBIFS_DBG_PRESERVE_UBI -#define ubi_leb_read dbg_leb_read -#define ubi_leb_write dbg_leb_write -#define ubi_leb_change dbg_leb_change -#define ubi_leb_erase dbg_leb_erase -#define ubi_leb_unmap dbg_leb_unmap -#define ubi_is_mapped dbg_is_mapped -#define ubi_leb_map dbg_leb_map -#endif - -int dbg_leb_read(struct ubi_volume_desc *desc, int lnum, char *buf, int offset, - int len, int check); -int dbg_leb_write(struct ubi_volume_desc *desc, int lnum, const void *buf, - int offset, int len, int dtype); -int dbg_leb_change(struct ubi_volume_desc *desc, int lnum, const void *buf, - int len, int dtype); -int dbg_leb_erase(struct ubi_volume_desc *desc, int lnum); -int dbg_leb_unmap(struct ubi_volume_desc *desc, int lnum); -int dbg_is_mapped(struct ubi_volume_desc *desc, int lnum); -int dbg_leb_map(struct ubi_volume_desc *desc, int lnum, int dtype); - -static inline int dbg_read(struct ubi_volume_desc *desc, int lnum, char *buf, - int offset, int len) -{ - return dbg_leb_read(desc, lnum, buf, offset, len, 0); -} - -static inline int dbg_write(struct ubi_volume_desc *desc, int lnum, - const void *buf, int offset, int len) -{ - return dbg_leb_write(desc, lnum, buf, offset, len, UBI_UNKNOWN); -} - -static inline int dbg_change(struct ubi_volume_desc *desc, int lnum, - const void *buf, int len) -{ - return dbg_leb_change(desc, lnum, buf, len, UBI_UNKNOWN); -} +int dbg_leb_write(struct ubifs_info *c, int lnum, const void *buf, int offs, + int len, int dtype); +int dbg_leb_change(struct ubifs_info *c, int lnum, const void *buf, int len, + int dtype); +int dbg_leb_unmap(struct ubifs_info *c, int lnum); +int dbg_leb_map(struct ubifs_info *c, int lnum, int dtype); /* Debugfs-related stuff */ int dbg_debugfs_init(void); @@ -314,7 +325,7 @@ void dbg_debugfs_exit_fs(struct ubifs_info *c); /* Use "if (0)" to make compiler check arguments even if debugging is off */ #define ubifs_assert(expr) do { \ - if (0 && (expr)) \ + if (0) \ printk(KERN_CRIT "UBIFS assert failed in %s at %u (pid %d)\n", \ __func__, __LINE__, current->pid); \ } while (0) @@ -324,9 +335,12 @@ void dbg_debugfs_exit_fs(struct ubifs_info *c); ubifs_err(fmt, ##__VA_ARGS__); \ } while (0) -#define ubifs_dbg_msg(fmt, ...) do { \ - if (0) \ - pr_debug(fmt "\n", ##__VA_ARGS__); \ +#define DBGKEY(key) ((char *)(key)) +#define DBGKEY1(key) ((char *)(key)) + +#define ubifs_dbg_msg(fmt, ...) do { \ + if (0) \ + printk(KERN_DEBUG fmt "\n", ##__VA_ARGS__); \ } while (0) #define dbg_dump_stack() @@ -347,9 +361,6 @@ void dbg_debugfs_exit_fs(struct ubifs_info *c); #define dbg_scan(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__) #define dbg_rcvry(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__) -#define DBGKEY(key) ((char *)(key)) -#define DBGKEY1(key) ((char *)(key)) - static inline int ubifs_debugging_init(struct ubifs_info *c) { return 0; } static inline void ubifs_debugging_exit(struct ubifs_info *c) { return; } static inline const char *dbg_ntype(int type) { return ""; } @@ -358,7 +369,7 @@ static inline const char *dbg_jhead(int jhead) { return ""; } static inline const char * dbg_get_key_dump(const struct ubifs_info *c, const union ubifs_key *key) { return ""; } -static inline void dbg_dump_inode(const struct ubifs_info *c, +static inline void dbg_dump_inode(struct ubifs_info *c, const struct inode *inode) { return; } static inline void dbg_dump_node(const struct ubifs_info *c, const void *node) { return; } @@ -379,6 +390,9 @@ static inline void dbg_dump_lpt_info(struct ubifs_info *c) { return; } static inline void dbg_dump_leb(const struct ubifs_info *c, int lnum) { return; } static inline void +dbg_dump_sleb(const struct ubifs_info *c, + const struct ubifs_scan_leb *sleb, int offs) { return; } +static inline void dbg_dump_znode(const struct ubifs_info *c, const struct ubifs_znode *znode) { return; } static inline void dbg_dump_heap(struct ubifs_info *c, @@ -410,9 +424,11 @@ static inline int dbg_check_ltab(struct ubifs_info *c) { return 0; } static inline int dbg_chk_lpt_free_spc(struct ubifs_info *c) { return 0; } static inline int dbg_chk_lpt_sz(struct ubifs_info *c, int action, int len) { return 0; } -static inline int dbg_check_synced_i_size(struct inode *inode) { return 0; } -static inline int dbg_check_dir_size(struct ubifs_info *c, - const struct inode *dir) { return 0; } +static inline int +dbg_check_synced_i_size(const struct ubifs_info *c, + struct inode *inode) { return 0; } +static inline int dbg_check_dir(struct ubifs_info *c, + const struct inode *dir) { return 0; } static inline int dbg_check_tnc(struct ubifs_info *c, int extra) { return 0; } static inline int dbg_check_idx_size(struct ubifs_info *c, long long idx_size) { return 0; } @@ -432,9 +448,23 @@ static inline int dbg_check_nondata_nodes_order(struct ubifs_info *c, struct list_head *head) { return 0; } -static inline int dbg_force_in_the_gaps(void) { return 0; } -#define dbg_force_in_the_gaps_enabled() 0 -#define dbg_failure_mode 0 +static inline int dbg_leb_write(struct ubifs_info *c, int lnum, + const void *buf, int offset, + int len, int dtype) { return 0; } +static inline int dbg_leb_change(struct ubifs_info *c, int lnum, + const void *buf, int len, + int dtype) { return 0; } +static inline int dbg_leb_unmap(struct ubifs_info *c, int lnum) { return 0; } +static inline int dbg_leb_map(struct ubifs_info *c, int lnum, + int dtype) { return 0; } + +static inline int dbg_is_chk_gen(const struct ubifs_info *c) { return 0; } +static inline int dbg_is_chk_index(const struct ubifs_info *c) { return 0; } +static inline int dbg_is_chk_orph(const struct ubifs_info *c) { return 0; } +static inline int dbg_is_chk_lprops(const struct ubifs_info *c) { return 0; } +static inline int dbg_is_chk_fs(const struct ubifs_info *c) { return 0; } +static inline int dbg_is_tst_rcvry(const struct ubifs_info *c) { return 0; } +static inline int dbg_is_power_cut(const struct ubifs_info *c) { return 0; } static inline int dbg_debugfs_init(void) { return 0; } static inline void dbg_debugfs_exit(void) { return; } diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c index 936a038..aaebf0f 100644 --- a/fs/ubifs/dir.c +++ b/fs/ubifs/dir.c @@ -102,7 +102,7 @@ struct inode *ubifs_new_inode(struct ubifs_info *c, const struct inode *dir, * UBIFS has to fully control "clean <-> dirty" transitions of inodes * to make budgeting work. */ - inode->i_flags |= (S_NOCMTIME); + inode->i_flags |= S_NOCMTIME; inode_init_owner(inode, dir, mode); inode->i_mtime = inode->i_atime = inode->i_ctime = @@ -172,9 +172,11 @@ struct inode *ubifs_new_inode(struct ubifs_info *c, const struct inode *dir, #ifdef CONFIG_UBIFS_FS_DEBUG -static int dbg_check_name(struct ubifs_dent_node *dent, struct qstr *nm) +static int dbg_check_name(const struct ubifs_info *c, + const struct ubifs_dent_node *dent, + const struct qstr *nm) { - if (!(ubifs_chk_flags & UBIFS_CHK_GEN)) + if (!dbg_is_chk_gen(c)) return 0; if (le16_to_cpu(dent->nlen) != nm->len) return -EINVAL; @@ -185,7 +187,7 @@ static int dbg_check_name(struct ubifs_dent_node *dent, struct qstr *nm) #else -#define dbg_check_name(dent, nm) 0 +#define dbg_check_name(c, dent, nm) 0 #endif @@ -219,7 +221,7 @@ static struct dentry *ubifs_lookup(struct inode *dir, struct dentry *dentry, goto out; } - if (dbg_check_name(dent, &dentry->d_name)) { + if (dbg_check_name(c, dent, &dentry->d_name)) { err = -EINVAL; goto out; } @@ -546,7 +548,7 @@ static int ubifs_link(struct dentry *old_dentry, struct inode *dir, ubifs_assert(mutex_is_locked(&dir->i_mutex)); ubifs_assert(mutex_is_locked(&inode->i_mutex)); - err = dbg_check_synced_i_size(inode); + err = dbg_check_synced_i_size(c, inode); if (err) return err; @@ -601,7 +603,7 @@ static int ubifs_unlink(struct inode *dir, struct dentry *dentry) inode->i_nlink, dir->i_ino); ubifs_assert(mutex_is_locked(&dir->i_mutex)); ubifs_assert(mutex_is_locked(&inode->i_mutex)); - err = dbg_check_synced_i_size(inode); + err = dbg_check_synced_i_size(c, inode); if (err) return err; diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c index 5e7fccf..6589006 100644 --- a/fs/ubifs/file.c +++ b/fs/ubifs/file.c @@ -1263,7 +1263,7 @@ int ubifs_setattr(struct dentry *dentry, struct iattr *attr) if (err) return err; - err = dbg_check_synced_i_size(inode); + err = dbg_check_synced_i_size(c, inode); if (err) return err; @@ -1304,7 +1304,7 @@ static void *ubifs_follow_link(struct dentry *dentry, struct nameidata *nd) return NULL; } -int ubifs_fsync(struct file *file, int datasync) +int ubifs_fsync(struct file *file, loff_t start, loff_t end, int datasync) { struct inode *inode = file->f_mapping->host; struct ubifs_info *c = inode->i_sb->s_fs_info; @@ -1319,14 +1319,16 @@ int ubifs_fsync(struct file *file, int datasync) */ return 0; - /* - * VFS has already synchronized dirty pages for this inode. Synchronize - * the inode unless this is a 'datasync()' call. - */ + err = filemap_write_and_wait_range(inode->i_mapping, start, end); + if (err) + return err; + mutex_lock(&inode->i_mutex); + + /* Synchronize the inode unless this is a 'datasync()' call. */ if (!datasync || (inode->i_state & I_DIRTY_DATASYNC)) { err = inode->i_sb->s_op->write_inode(inode, NULL); if (err) - return err; + goto out; } /* @@ -1334,10 +1336,9 @@ int ubifs_fsync(struct file *file, int datasync) * them. */ err = ubifs_sync_wbufs_by_inode(c, inode); - if (err) - return err; - - return 0; +out: + mutex_unlock(&inode->i_mutex); + return err; } /** @@ -1521,8 +1522,7 @@ static int ubifs_vm_page_mkwrite(struct vm_area_struct *vma, ubifs_release_dirty_inode_budget(c, ui); } - unlock_page(page); - return 0; + return VM_FAULT_LOCKED; out_unlock: unlock_page(page); diff --git a/fs/ubifs/io.c b/fs/ubifs/io.c index 3be645e..9228950 100644 --- a/fs/ubifs/io.c +++ b/fs/ubifs/io.c @@ -86,8 +86,125 @@ void ubifs_ro_mode(struct ubifs_info *c, int err) c->no_chk_data_crc = 0; c->vfs_sb->s_flags |= MS_RDONLY; ubifs_warn("switched to read-only mode, error %d", err); + dump_stack(); + } +} + +/* + * Below are simple wrappers over UBI I/O functions which include some + * additional checks and UBIFS debugging stuff. See corresponding UBI function + * for more information. + */ + +int ubifs_leb_read(const struct ubifs_info *c, int lnum, void *buf, int offs, + int len, int even_ebadmsg) +{ + int err; + + err = ubi_read(c->ubi, lnum, buf, offs, len); + /* + * In case of %-EBADMSG print the error message only if the + * @even_ebadmsg is true. + */ + if (err && (err != -EBADMSG || even_ebadmsg)) { + ubifs_err("reading %d bytes from LEB %d:%d failed, error %d", + len, lnum, offs, err); + dbg_dump_stack(); + } + return err; +} + +int ubifs_leb_write(struct ubifs_info *c, int lnum, const void *buf, int offs, + int len, int dtype) +{ + int err; + + ubifs_assert(!c->ro_media && !c->ro_mount); + if (c->ro_error) + return -EROFS; + if (!dbg_is_tst_rcvry(c)) + err = ubi_leb_write(c->ubi, lnum, buf, offs, len, dtype); + else + err = dbg_leb_write(c, lnum, buf, offs, len, dtype); + if (err) { + ubifs_err("writing %d bytes to LEB %d:%d failed, error %d", + len, lnum, offs, err); + ubifs_ro_mode(c, err); + dbg_dump_stack(); + } + return err; +} + +int ubifs_leb_change(struct ubifs_info *c, int lnum, const void *buf, int len, + int dtype) +{ + int err; + + ubifs_assert(!c->ro_media && !c->ro_mount); + if (c->ro_error) + return -EROFS; + if (!dbg_is_tst_rcvry(c)) + err = ubi_leb_change(c->ubi, lnum, buf, len, dtype); + else + err = dbg_leb_change(c, lnum, buf, len, dtype); + if (err) { + ubifs_err("changing %d bytes in LEB %d failed, error %d", + len, lnum, err); + ubifs_ro_mode(c, err); + dbg_dump_stack(); + } + return err; +} + +int ubifs_leb_unmap(struct ubifs_info *c, int lnum) +{ + int err; + + ubifs_assert(!c->ro_media && !c->ro_mount); + if (c->ro_error) + return -EROFS; + if (!dbg_is_tst_rcvry(c)) + err = ubi_leb_unmap(c->ubi, lnum); + else + err = dbg_leb_unmap(c, lnum); + if (err) { + ubifs_err("unmap LEB %d failed, error %d", lnum, err); + ubifs_ro_mode(c, err); + dbg_dump_stack(); + } + return err; +} + +int ubifs_leb_map(struct ubifs_info *c, int lnum, int dtype) +{ + int err; + + ubifs_assert(!c->ro_media && !c->ro_mount); + if (c->ro_error) + return -EROFS; + if (!dbg_is_tst_rcvry(c)) + err = ubi_leb_map(c->ubi, lnum, dtype); + else + err = dbg_leb_map(c, lnum, dtype); + if (err) { + ubifs_err("mapping LEB %d failed, error %d", lnum, err); + ubifs_ro_mode(c, err); + dbg_dump_stack(); + } + return err; +} + +int ubifs_is_mapped(const struct ubifs_info *c, int lnum) +{ + int err; + + err = ubi_is_mapped(c->ubi, lnum); + if (err < 0) { + ubifs_err("ubi_is_mapped failed for LEB %d, error %d", + lnum, err); dbg_dump_stack(); } + return err; } /** @@ -406,14 +523,10 @@ int ubifs_wbuf_sync_nolock(struct ubifs_wbuf *wbuf) dirt = sync_len - wbuf->used; if (dirt) ubifs_pad(c, wbuf->buf + wbuf->used, dirt); - err = ubi_leb_write(c->ubi, wbuf->lnum, wbuf->buf, wbuf->offs, - sync_len, wbuf->dtype); - if (err) { - ubifs_err("cannot write %d bytes to LEB %d:%d", - sync_len, wbuf->lnum, wbuf->offs); - dbg_dump_stack(); + err = ubifs_leb_write(c, wbuf->lnum, wbuf->buf, wbuf->offs, sync_len, + wbuf->dtype); + if (err) return err; - } spin_lock(&wbuf->lock); wbuf->offs += sync_len; @@ -605,9 +718,9 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len) if (aligned_len == wbuf->avail) { dbg_io("flush jhead %s wbuf to LEB %d:%d", dbg_jhead(wbuf->jhead), wbuf->lnum, wbuf->offs); - err = ubi_leb_write(c->ubi, wbuf->lnum, wbuf->buf, - wbuf->offs, wbuf->size, - wbuf->dtype); + err = ubifs_leb_write(c, wbuf->lnum, wbuf->buf, + wbuf->offs, wbuf->size, + wbuf->dtype); if (err) goto out; @@ -642,8 +755,8 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len) dbg_io("flush jhead %s wbuf to LEB %d:%d", dbg_jhead(wbuf->jhead), wbuf->lnum, wbuf->offs); memcpy(wbuf->buf + wbuf->used, buf, wbuf->avail); - err = ubi_leb_write(c->ubi, wbuf->lnum, wbuf->buf, wbuf->offs, - wbuf->size, wbuf->dtype); + err = ubifs_leb_write(c, wbuf->lnum, wbuf->buf, wbuf->offs, + wbuf->size, wbuf->dtype); if (err) goto out; @@ -661,8 +774,8 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len) */ dbg_io("write %d bytes to LEB %d:%d", wbuf->size, wbuf->lnum, wbuf->offs); - err = ubi_leb_write(c->ubi, wbuf->lnum, buf, wbuf->offs, - wbuf->size, wbuf->dtype); + err = ubifs_leb_write(c, wbuf->lnum, buf, wbuf->offs, + wbuf->size, wbuf->dtype); if (err) goto out; @@ -683,8 +796,8 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len) n <<= c->max_write_shift; dbg_io("write %d bytes to LEB %d:%d", n, wbuf->lnum, wbuf->offs); - err = ubi_leb_write(c->ubi, wbuf->lnum, buf + written, - wbuf->offs, n, wbuf->dtype); + err = ubifs_leb_write(c, wbuf->lnum, buf + written, + wbuf->offs, n, wbuf->dtype); if (err) goto out; wbuf->offs += n; @@ -766,13 +879,9 @@ int ubifs_write_node(struct ubifs_info *c, void *buf, int len, int lnum, return -EROFS; ubifs_prepare_node(c, buf, len, 1); - err = ubi_leb_write(c->ubi, lnum, buf, offs, buf_len, dtype); - if (err) { - ubifs_err("cannot write %d bytes to LEB %d:%d, error %d", - buf_len, lnum, offs, err); + err = ubifs_leb_write(c, lnum, buf, offs, buf_len, dtype); + if (err) dbg_dump_node(c, buf); - dbg_dump_stack(); - } return err; } @@ -824,13 +933,9 @@ int ubifs_read_node_wbuf(struct ubifs_wbuf *wbuf, void *buf, int type, int len, if (rlen > 0) { /* Read everything that goes before write-buffer */ - err = ubi_read(c->ubi, lnum, buf, offs, rlen); - if (err && err != -EBADMSG) { - ubifs_err("failed to read node %d from LEB %d:%d, " - "error %d", type, lnum, offs, err); - dbg_dump_stack(); + err = ubifs_leb_read(c, lnum, buf, offs, rlen, 0); + if (err && err != -EBADMSG) return err; - } } if (type != ch->node_type) { @@ -885,12 +990,9 @@ int ubifs_read_node(const struct ubifs_info *c, void *buf, int type, int len, ubifs_assert(!(offs & 7) && offs < c->leb_size); ubifs_assert(type >= 0 && type < UBIFS_NODE_TYPES_CNT); - err = ubi_read(c->ubi, lnum, buf, offs, len); - if (err && err != -EBADMSG) { - ubifs_err("cannot read node %d from LEB %d:%d, error %d", - type, lnum, offs, err); + err = ubifs_leb_read(c, lnum, buf, offs, len, 0); + if (err && err != -EBADMSG) return err; - } if (type != ch->node_type) { ubifs_err("bad node type (%d but expected %d)", diff --git a/fs/ubifs/log.c b/fs/ubifs/log.c index affea94..843beda 100644 --- a/fs/ubifs/log.c +++ b/fs/ubifs/log.c @@ -110,10 +110,14 @@ static inline long long empty_log_bytes(const struct ubifs_info *c) h = (long long)c->lhead_lnum * c->leb_size + c->lhead_offs; t = (long long)c->ltail_lnum * c->leb_size; - if (h >= t) + if (h > t) return c->log_bytes - h + t; - else + else if (h != t) return t - h; + else if (c->lhead_lnum != c->ltail_lnum) + return 0; + else + return c->log_bytes; } /** @@ -262,7 +266,7 @@ int ubifs_add_bud_to_log(struct ubifs_info *c, int jhead, int lnum, int offs) * an unclean reboot, because the target LEB might have been * unmapped, but not yet physically erased. */ - err = ubi_leb_map(c->ubi, bud->lnum, UBI_SHORTTERM); + err = ubifs_leb_map(c, bud->lnum, UBI_SHORTTERM); if (err) goto out_unlock; } @@ -283,8 +287,6 @@ int ubifs_add_bud_to_log(struct ubifs_info *c, int jhead, int lnum, int offs) return 0; out_unlock: - if (err != -EAGAIN) - ubifs_ro_mode(c, err); mutex_unlock(&c->log_mutex); kfree(ref); kfree(bud); @@ -455,9 +457,9 @@ out: * @ltail_lnum: new log tail LEB number * * This function is called on when the commit operation was finished. It - * moves log tail to new position and unmaps LEBs which contain obsolete data. - * Returns zero in case of success and a negative error code in case of - * failure. + * moves log tail to new position and updates the master node so that it stores + * the new log tail LEB number. Returns zero in case of success and a negative + * error code in case of failure. */ int ubifs_log_end_commit(struct ubifs_info *c, int ltail_lnum) { @@ -485,7 +487,12 @@ int ubifs_log_end_commit(struct ubifs_info *c, int ltail_lnum) spin_unlock(&c->buds_lock); err = dbg_check_bud_bytes(c); + if (err) + goto out; + err = ubifs_write_master(c); + +out: mutex_unlock(&c->log_mutex); return err; } @@ -752,7 +759,7 @@ static int dbg_check_bud_bytes(struct ubifs_info *c) struct ubifs_bud *bud; long long bud_bytes = 0; - if (!(ubifs_chk_flags & UBIFS_CHK_GEN)) + if (!dbg_is_chk_gen(c)) return 0; spin_lock(&c->buds_lock); diff --git a/fs/ubifs/lprops.c b/fs/ubifs/lprops.c index ae6f74a..ea9d491 100644 --- a/fs/ubifs/lprops.c +++ b/fs/ubifs/lprops.c @@ -510,7 +510,7 @@ static int is_lprops_dirty(struct ubifs_info *c, struct ubifs_lprops *lprops) pnode = (struct ubifs_pnode *)container_of(lprops - pos, struct ubifs_pnode, lprops[0]); - return !test_bit(COW_ZNODE, &pnode->flags) && + return !test_bit(COW_CNODE, &pnode->flags) && test_bit(DIRTY_CNODE, &pnode->flags); } @@ -866,7 +866,7 @@ int dbg_check_cats(struct ubifs_info *c) struct list_head *pos; int i, cat; - if (!(ubifs_chk_flags & (UBIFS_CHK_GEN | UBIFS_CHK_LPROPS))) + if (!dbg_is_chk_gen(c) && !dbg_is_chk_lprops(c)) return 0; list_for_each_entry(lprops, &c->empty_list, list) { @@ -964,7 +964,7 @@ void dbg_check_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, int cat, { int i = 0, j, err = 0; - if (!(ubifs_chk_flags & (UBIFS_CHK_GEN | UBIFS_CHK_LPROPS))) + if (!dbg_is_chk_gen(c) && !dbg_is_chk_lprops(c)) return; for (i = 0; i < heap->cnt; i++) { @@ -1268,7 +1268,7 @@ int dbg_check_lprops(struct ubifs_info *c) int i, err; struct ubifs_lp_stats lst; - if (!(ubifs_chk_flags & UBIFS_CHK_LPROPS)) + if (!dbg_is_chk_lprops(c)) return 0; /* diff --git a/fs/ubifs/lpt.c b/fs/ubifs/lpt.c index ef5155e..6189c74 100644 --- a/fs/ubifs/lpt.c +++ b/fs/ubifs/lpt.c @@ -701,8 +701,8 @@ int ubifs_create_dflt_lpt(struct ubifs_info *c, int *main_lebs, int lpt_first, alen = ALIGN(len, c->min_io_size); set_ltab(c, lnum, c->leb_size - alen, alen - len); memset(p, 0xff, alen - len); - err = ubi_leb_change(c->ubi, lnum++, buf, alen, - UBI_SHORTTERM); + err = ubifs_leb_change(c, lnum++, buf, alen, + UBI_SHORTTERM); if (err) goto out; p = buf; @@ -732,8 +732,8 @@ int ubifs_create_dflt_lpt(struct ubifs_info *c, int *main_lebs, int lpt_first, set_ltab(c, lnum, c->leb_size - alen, alen - len); memset(p, 0xff, alen - len); - err = ubi_leb_change(c->ubi, lnum++, buf, alen, - UBI_SHORTTERM); + err = ubifs_leb_change(c, lnum++, buf, alen, + UBI_SHORTTERM); if (err) goto out; p = buf; @@ -780,8 +780,8 @@ int ubifs_create_dflt_lpt(struct ubifs_info *c, int *main_lebs, int lpt_first, alen = ALIGN(len, c->min_io_size); set_ltab(c, lnum, c->leb_size - alen, alen - len); memset(p, 0xff, alen - len); - err = ubi_leb_change(c->ubi, lnum++, buf, alen, - UBI_SHORTTERM); + err = ubifs_leb_change(c, lnum++, buf, alen, + UBI_SHORTTERM); if (err) goto out; p = buf; @@ -806,7 +806,7 @@ int ubifs_create_dflt_lpt(struct ubifs_info *c, int *main_lebs, int lpt_first, alen = ALIGN(len, c->min_io_size); set_ltab(c, lnum, c->leb_size - alen, alen - len); memset(p, 0xff, alen - len); - err = ubi_leb_change(c->ubi, lnum++, buf, alen, UBI_SHORTTERM); + err = ubifs_leb_change(c, lnum++, buf, alen, UBI_SHORTTERM); if (err) goto out; p = buf; @@ -826,7 +826,7 @@ int ubifs_create_dflt_lpt(struct ubifs_info *c, int *main_lebs, int lpt_first, /* Write remaining buffer */ memset(p, 0xff, alen - len); - err = ubi_leb_change(c->ubi, lnum, buf, alen, UBI_SHORTTERM); + err = ubifs_leb_change(c, lnum, buf, alen, UBI_SHORTTERM); if (err) goto out; @@ -1222,7 +1222,7 @@ int ubifs_read_nnode(struct ubifs_info *c, struct ubifs_nnode *parent, int iip) if (c->big_lpt) nnode->num = calc_nnode_num_from_parent(c, parent, iip); } else { - err = ubi_read(c->ubi, lnum, buf, offs, c->nnode_sz); + err = ubifs_leb_read(c, lnum, buf, offs, c->nnode_sz, 1); if (err) goto out; err = ubifs_unpack_nnode(c, buf, nnode); @@ -1247,6 +1247,7 @@ int ubifs_read_nnode(struct ubifs_info *c, struct ubifs_nnode *parent, int iip) out: ubifs_err("error %d reading nnode at %d:%d", err, lnum, offs); + dbg_dump_stack(); kfree(nnode); return err; } @@ -1290,7 +1291,7 @@ static int read_pnode(struct ubifs_info *c, struct ubifs_nnode *parent, int iip) lprops->flags = ubifs_categorize_lprops(c, lprops); } } else { - err = ubi_read(c->ubi, lnum, buf, offs, c->pnode_sz); + err = ubifs_leb_read(c, lnum, buf, offs, c->pnode_sz, 1); if (err) goto out; err = unpack_pnode(c, buf, pnode); @@ -1312,6 +1313,7 @@ static int read_pnode(struct ubifs_info *c, struct ubifs_nnode *parent, int iip) out: ubifs_err("error %d reading pnode at %d:%d", err, lnum, offs); dbg_dump_pnode(c, pnode, parent, iip); + dbg_dump_stack(); dbg_msg("calc num: %d", calc_pnode_num_from_parent(c, parent, iip)); kfree(pnode); return err; @@ -1331,7 +1333,7 @@ static int read_ltab(struct ubifs_info *c) buf = vmalloc(c->ltab_sz); if (!buf) return -ENOMEM; - err = ubi_read(c->ubi, c->ltab_lnum, buf, c->ltab_offs, c->ltab_sz); + err = ubifs_leb_read(c, c->ltab_lnum, buf, c->ltab_offs, c->ltab_sz, 1); if (err) goto out; err = unpack_ltab(c, buf); @@ -1354,7 +1356,8 @@ static int read_lsave(struct ubifs_info *c) buf = vmalloc(c->lsave_sz); if (!buf) return -ENOMEM; - err = ubi_read(c->ubi, c->lsave_lnum, buf, c->lsave_offs, c->lsave_sz); + err = ubifs_leb_read(c, c->lsave_lnum, buf, c->lsave_offs, + c->lsave_sz, 1); if (err) goto out; err = unpack_lsave(c, buf); @@ -1814,8 +1817,8 @@ static struct ubifs_nnode *scan_get_nnode(struct ubifs_info *c, if (c->big_lpt) nnode->num = calc_nnode_num_from_parent(c, parent, iip); } else { - err = ubi_read(c->ubi, branch->lnum, buf, branch->offs, - c->nnode_sz); + err = ubifs_leb_read(c, branch->lnum, buf, branch->offs, + c->nnode_sz, 1); if (err) return ERR_PTR(err); err = ubifs_unpack_nnode(c, buf, nnode); @@ -1883,8 +1886,8 @@ static struct ubifs_pnode *scan_get_pnode(struct ubifs_info *c, ubifs_assert(branch->lnum >= c->lpt_first && branch->lnum <= c->lpt_last); ubifs_assert(branch->offs >= 0 && branch->offs < c->leb_size); - err = ubi_read(c->ubi, branch->lnum, buf, branch->offs, - c->pnode_sz); + err = ubifs_leb_read(c, branch->lnum, buf, branch->offs, + c->pnode_sz, 1); if (err) return ERR_PTR(err); err = unpack_pnode(c, buf, pnode); @@ -2224,7 +2227,7 @@ int dbg_check_lpt_nodes(struct ubifs_info *c, struct ubifs_cnode *cnode, struct ubifs_cnode *cn; int num, iip = 0, err; - if (!(ubifs_chk_flags & UBIFS_CHK_LPROPS)) + if (!dbg_is_chk_lprops(c)) return 0; while (cnode) { diff --git a/fs/ubifs/lpt_commit.c b/fs/ubifs/lpt_commit.c index dfcb574..cddd6bd 100644 --- a/fs/ubifs/lpt_commit.c +++ b/fs/ubifs/lpt_commit.c @@ -27,6 +27,7 @@ #include #include +#include #include "ubifs.h" #ifdef CONFIG_UBIFS_FS_DEBUG @@ -116,8 +117,8 @@ static int get_cnodes_to_commit(struct ubifs_info *c) return 0; cnt += 1; while (1) { - ubifs_assert(!test_bit(COW_ZNODE, &cnode->flags)); - __set_bit(COW_ZNODE, &cnode->flags); + ubifs_assert(!test_bit(COW_CNODE, &cnode->flags)); + __set_bit(COW_CNODE, &cnode->flags); cnext = next_dirty_cnode(cnode); if (!cnext) { cnode->cnext = c->lpt_cnext; @@ -465,7 +466,7 @@ static int write_cnodes(struct ubifs_info *c) */ clear_bit(DIRTY_CNODE, &cnode->flags); smp_mb__before_clear_bit(); - clear_bit(COW_ZNODE, &cnode->flags); + clear_bit(COW_CNODE, &cnode->flags); smp_mb__after_clear_bit(); offs += len; dbg_chk_lpt_sz(c, 1, len); @@ -1160,11 +1161,11 @@ static int lpt_gc_lnum(struct ubifs_info *c, int lnum) void *buf = c->lpt_buf; dbg_lp("LEB %d", lnum); - err = ubi_read(c->ubi, lnum, buf, 0, c->leb_size); - if (err) { - ubifs_err("cannot read LEB %d, error %d", lnum, err); + + err = ubifs_leb_read(c, lnum, buf, 0, c->leb_size, 1); + if (err) return err; - } + while (1) { if (!is_a_node(c, buf, len)) { int pad_len; @@ -1640,7 +1641,7 @@ static int dbg_check_ltab_lnum(struct ubifs_info *c, int lnum) int ret; void *buf, *p; - if (!(ubifs_chk_flags & UBIFS_CHK_LPROPS)) + if (!dbg_is_chk_lprops(c)) return 0; buf = p = __vmalloc(c->leb_size, GFP_NOFS, PAGE_KERNEL); @@ -1650,11 +1651,11 @@ static int dbg_check_ltab_lnum(struct ubifs_info *c, int lnum) } dbg_lp("LEB %d", lnum); - err = ubi_read(c->ubi, lnum, buf, 0, c->leb_size); - if (err) { - dbg_msg("ubi_read failed, LEB %d, error %d", lnum, err); + + err = ubifs_leb_read(c, lnum, buf, 0, c->leb_size, 1); + if (err) goto out; - } + while (1) { if (!is_a_node(c, p, len)) { int i, pad_len; @@ -1711,7 +1712,7 @@ int dbg_check_ltab(struct ubifs_info *c) { int lnum, err, i, cnt; - if (!(ubifs_chk_flags & UBIFS_CHK_LPROPS)) + if (!dbg_is_chk_lprops(c)) return 0; /* Bring the entire tree into memory */ @@ -1754,7 +1755,7 @@ int dbg_chk_lpt_free_spc(struct ubifs_info *c) long long free = 0; int i; - if (!(ubifs_chk_flags & UBIFS_CHK_LPROPS)) + if (!dbg_is_chk_lprops(c)) return 0; for (i = 0; i < c->lpt_lebs; i++) { @@ -1796,7 +1797,7 @@ int dbg_chk_lpt_sz(struct ubifs_info *c, int action, int len) long long chk_lpt_sz, lpt_sz; int err = 0; - if (!(ubifs_chk_flags & UBIFS_CHK_LPROPS)) + if (!dbg_is_chk_lprops(c)) return 0; switch (action) { @@ -1901,11 +1902,10 @@ static void dump_lpt_leb(const struct ubifs_info *c, int lnum) return; } - err = ubi_read(c->ubi, lnum, buf, 0, c->leb_size); - if (err) { - ubifs_err("cannot read LEB %d, error %d", lnum, err); + err = ubifs_leb_read(c, lnum, buf, 0, c->leb_size, 1); + if (err) goto out; - } + while (1) { offs = c->leb_size - len; if (!is_a_node(c, p, len)) { @@ -2019,7 +2019,7 @@ static int dbg_populate_lsave(struct ubifs_info *c) struct ubifs_lpt_heap *heap; int i; - if (!(ubifs_chk_flags & UBIFS_CHK_GEN)) + if (!dbg_is_chk_gen(c)) return 0; if (random32() & 3) return 0; diff --git a/fs/ubifs/master.c b/fs/ubifs/master.c index 278c238..bb9f481 100644 --- a/fs/ubifs/master.c +++ b/fs/ubifs/master.c @@ -352,10 +352,9 @@ int ubifs_read_master(struct ubifs_info *c) * ubifs_write_master - write master node. * @c: UBIFS file-system description object * - * This function writes the master node. The caller has to take the - * @c->mst_mutex lock before calling this function. Returns zero in case of - * success and a negative error code in case of failure. The master node is - * written twice to enable recovery. + * This function writes the master node. Returns zero in case of success and a + * negative error code in case of failure. The master node is written twice to + * enable recovery. */ int ubifs_write_master(struct ubifs_info *c) { diff --git a/fs/ubifs/misc.h b/fs/ubifs/misc.h index 0b5296a..ee7cb5e 100644 --- a/fs/ubifs/misc.h +++ b/fs/ubifs/misc.h @@ -39,6 +39,29 @@ static inline int ubifs_zn_dirty(const struct ubifs_znode *znode) } /** + * ubifs_zn_obsolete - check if znode is obsolete. + * @znode: znode to check + * + * This helper function returns %1 if @znode is obsolete and %0 otherwise. + */ +static inline int ubifs_zn_obsolete(const struct ubifs_znode *znode) +{ + return !!test_bit(OBSOLETE_ZNODE, &znode->flags); +} + +/** + * ubifs_zn_cow - check if znode has to be copied on write. + * @znode: znode to check + * + * This helper function returns %1 if @znode is has COW flag set and %0 + * otherwise. + */ +static inline int ubifs_zn_cow(const struct ubifs_znode *znode) +{ + return !!test_bit(COW_ZNODE, &znode->flags); +} + +/** * ubifs_wake_up_bgt - wake up background thread. * @c: UBIFS file-system description object */ @@ -122,86 +145,6 @@ static inline int ubifs_wbuf_sync(struct ubifs_wbuf *wbuf) } /** - * ubifs_leb_unmap - unmap an LEB. - * @c: UBIFS file-system description object - * @lnum: LEB number to unmap - * - * This function returns %0 on success and a negative error code on failure. - */ -static inline int ubifs_leb_unmap(const struct ubifs_info *c, int lnum) -{ - int err; - - ubifs_assert(!c->ro_media && !c->ro_mount); - if (c->ro_error) - return -EROFS; - err = ubi_leb_unmap(c->ubi, lnum); - if (err) { - ubifs_err("unmap LEB %d failed, error %d", lnum, err); - return err; - } - - return 0; -} - -/** - * ubifs_leb_write - write to a LEB. - * @c: UBIFS file-system description object - * @lnum: LEB number to write - * @buf: buffer to write from - * @offs: offset within LEB to write to - * @len: length to write - * @dtype: data type - * - * This function returns %0 on success and a negative error code on failure. - */ -static inline int ubifs_leb_write(const struct ubifs_info *c, int lnum, - const void *buf, int offs, int len, int dtype) -{ - int err; - - ubifs_assert(!c->ro_media && !c->ro_mount); - if (c->ro_error) - return -EROFS; - err = ubi_leb_write(c->ubi, lnum, buf, offs, len, dtype); - if (err) { - ubifs_err("writing %d bytes at %d:%d, error %d", - len, lnum, offs, err); - return err; - } - - return 0; -} - -/** - * ubifs_leb_change - atomic LEB change. - * @c: UBIFS file-system description object - * @lnum: LEB number to write - * @buf: buffer to write from - * @len: length to write - * @dtype: data type - * - * This function returns %0 on success and a negative error code on failure. - */ -static inline int ubifs_leb_change(const struct ubifs_info *c, int lnum, - const void *buf, int len, int dtype) -{ - int err; - - ubifs_assert(!c->ro_media && !c->ro_mount); - if (c->ro_error) - return -EROFS; - err = ubi_leb_change(c->ubi, lnum, buf, len, dtype); - if (err) { - ubifs_err("changing %d bytes in LEB %d, error %d", - len, lnum, err); - return err; - } - - return 0; -} - -/** * ubifs_encode_dev - encode device node IDs. * @dev: UBIFS device node information * @rdev: device IDs to encode diff --git a/fs/ubifs/orphan.c b/fs/ubifs/orphan.c index a5422ff..f9c90b5 100644 --- a/fs/ubifs/orphan.c +++ b/fs/ubifs/orphan.c @@ -130,13 +130,14 @@ void ubifs_delete_orphan(struct ubifs_info *c, ino_t inum) else if (inum > o->inum) p = p->rb_right; else { - if (o->dnext) { + if (o->del) { spin_unlock(&c->orphan_lock); dbg_gen("deleted twice ino %lu", (unsigned long)inum); return; } if (o->cnext) { + o->del = 1; o->dnext = c->orph_dnext; c->orph_dnext = o; spin_unlock(&c->orphan_lock); @@ -447,6 +448,7 @@ static void erase_deleted(struct ubifs_info *c) orphan = dnext; dnext = orphan->dnext; ubifs_assert(!orphan->new); + ubifs_assert(orphan->del); rb_erase(&orphan->rb, &c->orph_tree); list_del(&orphan->list); c->tot_orphans -= 1; @@ -536,6 +538,7 @@ static int insert_dead_orphan(struct ubifs_info *c, ino_t inum) rb_link_node(&orphan->rb, parent, p); rb_insert_color(&orphan->rb, &c->orph_tree); list_add_tail(&orphan->list, &c->orph_list); + orphan->del = 1; orphan->dnext = c->orph_dnext; c->orph_dnext = orphan; dbg_mnt("ino %lu, new %d, tot %d", (unsigned long)inum, @@ -929,7 +932,7 @@ static int dbg_check_orphans(struct ubifs_info *c) struct check_info ci; int err; - if (!(ubifs_chk_flags & UBIFS_CHK_ORPH)) + if (!dbg_is_chk_orph(c)) return 0; ci.last_ino = 0; diff --git a/fs/ubifs/recovery.c b/fs/ubifs/recovery.c index 783d8e0..ee4f43f 100644 --- a/fs/ubifs/recovery.c +++ b/fs/ubifs/recovery.c @@ -117,7 +117,7 @@ static int get_master_node(const struct ubifs_info *c, int lnum, void **pbuf, if (!sbuf) return -ENOMEM; - err = ubi_read(c->ubi, lnum, sbuf, 0, c->leb_size); + err = ubifs_leb_read(c, lnum, sbuf, 0, c->leb_size, 0); if (err && err != -EBADMSG) goto out_free; @@ -213,10 +213,10 @@ static int write_rcvrd_mst_node(struct ubifs_info *c, mst->flags |= cpu_to_le32(UBIFS_MST_RCVRY); ubifs_prepare_node(c, mst, UBIFS_MST_NODE_SZ, 1); - err = ubi_leb_change(c->ubi, lnum, mst, sz, UBI_SHORTTERM); + err = ubifs_leb_change(c, lnum, mst, sz, UBI_SHORTTERM); if (err) goto out; - err = ubi_leb_change(c->ubi, lnum + 1, mst, sz, UBI_SHORTTERM); + err = ubifs_leb_change(c, lnum + 1, mst, sz, UBI_SHORTTERM); if (err) goto out; out: @@ -274,7 +274,8 @@ int ubifs_recover_master_node(struct ubifs_info *c) if (cor1) goto out_err; mst = mst1; - } else if (offs1 == 0 && offs2 + sz >= c->leb_size) { + } else if (offs1 == 0 && + c->leb_size - offs2 - sz < sz) { /* 1st LEB was unmapped and written, 2nd not */ if (cor1) goto out_err; @@ -539,8 +540,8 @@ static int fix_unclean_leb(struct ubifs_info *c, struct ubifs_scan_leb *sleb, int len = ALIGN(endpt, c->min_io_size); if (start) { - err = ubi_read(c->ubi, lnum, sleb->buf, 0, - start); + err = ubifs_leb_read(c, lnum, sleb->buf, 0, + start, 1); if (err) return err; } @@ -554,8 +555,8 @@ static int fix_unclean_leb(struct ubifs_info *c, struct ubifs_scan_leb *sleb, ubifs_pad(c, buf, pad_len); } } - err = ubi_leb_change(c->ubi, lnum, sleb->buf, len, - UBI_UNKNOWN); + err = ubifs_leb_change(c, lnum, sleb->buf, len, + UBI_UNKNOWN); if (err) return err; } @@ -819,7 +820,8 @@ static int get_cs_sqnum(struct ubifs_info *c, int lnum, int offs, return -ENOMEM; if (c->leb_size - offs < UBIFS_CS_NODE_SZ) goto out_err; - err = ubi_read(c->ubi, lnum, (void *)cs_node, offs, UBIFS_CS_NODE_SZ); + err = ubifs_leb_read(c, lnum, (void *)cs_node, offs, + UBIFS_CS_NODE_SZ, 0); if (err && err != -EBADMSG) goto out_free; ret = ubifs_scan_a_node(c, cs_node, UBIFS_CS_NODE_SZ, lnum, offs, 0); @@ -919,8 +921,7 @@ struct ubifs_scan_leb *ubifs_recover_log_leb(struct ubifs_info *c, int lnum, * * This function returns %0 on success and a negative error code on failure. */ -static int recover_head(const struct ubifs_info *c, int lnum, int offs, - void *sbuf) +static int recover_head(struct ubifs_info *c, int lnum, int offs, void *sbuf) { int len = c->max_write_size, err; @@ -931,15 +932,15 @@ static int recover_head(const struct ubifs_info *c, int lnum, int offs, return 0; /* Read at the head location and check it is empty flash */ - err = ubi_read(c->ubi, lnum, sbuf, offs, len); + err = ubifs_leb_read(c, lnum, sbuf, offs, len, 1); if (err || !is_empty(sbuf, len)) { dbg_rcvry("cleaning head at %d:%d", lnum, offs); if (offs == 0) return ubifs_leb_unmap(c, lnum); - err = ubi_read(c->ubi, lnum, sbuf, 0, offs); + err = ubifs_leb_read(c, lnum, sbuf, 0, offs, 1); if (err) return err; - return ubi_leb_change(c->ubi, lnum, sbuf, offs, UBI_UNKNOWN); + return ubifs_leb_change(c, lnum, sbuf, offs, UBI_UNKNOWN); } return 0; @@ -962,7 +963,7 @@ static int recover_head(const struct ubifs_info *c, int lnum, int offs, * * This function returns %0 on success and a negative error code on failure. */ -int ubifs_recover_inl_heads(const struct ubifs_info *c, void *sbuf) +int ubifs_recover_inl_heads(struct ubifs_info *c, void *sbuf) { int err; @@ -982,7 +983,7 @@ int ubifs_recover_inl_heads(const struct ubifs_info *c, void *sbuf) } /** - * clean_an_unclean_leb - read and write a LEB to remove corruption. + * clean_an_unclean_leb - read and write a LEB to remove corruption. * @c: UBIFS file-system description object * @ucleb: unclean LEB information * @sbuf: LEB-sized buffer to use @@ -993,7 +994,7 @@ int ubifs_recover_inl_heads(const struct ubifs_info *c, void *sbuf) * * This function returns %0 on success and a negative error code on failure. */ -static int clean_an_unclean_leb(const struct ubifs_info *c, +static int clean_an_unclean_leb(struct ubifs_info *c, struct ubifs_unclean_leb *ucleb, void *sbuf) { int err, lnum = ucleb->lnum, offs = 0, len = ucleb->endpt, quiet = 1; @@ -1009,7 +1010,7 @@ static int clean_an_unclean_leb(const struct ubifs_info *c, return 0; } - err = ubi_read(c->ubi, lnum, buf, offs, len); + err = ubifs_leb_read(c, lnum, buf, offs, len, 0); if (err && err != -EBADMSG) return err; @@ -1069,7 +1070,7 @@ static int clean_an_unclean_leb(const struct ubifs_info *c, } /* Write back the LEB atomically */ - err = ubi_leb_change(c->ubi, lnum, sbuf, len, UBI_UNKNOWN); + err = ubifs_leb_change(c, lnum, sbuf, len, UBI_UNKNOWN); if (err) return err; @@ -1089,7 +1090,7 @@ static int clean_an_unclean_leb(const struct ubifs_info *c, * * This function returns %0 on success and a negative error code on failure. */ -int ubifs_clean_lebs(const struct ubifs_info *c, void *sbuf) +int ubifs_clean_lebs(struct ubifs_info *c, void *sbuf) { dbg_rcvry("recovery"); while (!list_empty(&c->unclean_leb_list)) { @@ -1454,7 +1455,7 @@ static int fix_size_in_place(struct ubifs_info *c, struct size_entry *e) if (i_size >= e->d_size) return 0; /* Read the LEB */ - err = ubi_read(c->ubi, lnum, c->sbuf, 0, c->leb_size); + err = ubifs_leb_read(c, lnum, c->sbuf, 0, c->leb_size, 1); if (err) goto out; /* Change the size field and recalculate the CRC */ @@ -1470,7 +1471,7 @@ static int fix_size_in_place(struct ubifs_info *c, struct size_entry *e) len -= 1; len = ALIGN(len + 1, c->min_io_size); /* Atomically write the fixed LEB back again */ - err = ubi_leb_change(c->ubi, lnum, c->sbuf, len, UBI_UNKNOWN); + err = ubifs_leb_change(c, lnum, c->sbuf, len, UBI_UNKNOWN); if (err) goto out; dbg_rcvry("inode %lu at %d:%d size %lld -> %lld", diff --git a/fs/ubifs/replay.c b/fs/ubifs/replay.c index 5e97161..ccabaf1 100644 --- a/fs/ubifs/replay.c +++ b/fs/ubifs/replay.c @@ -523,8 +523,7 @@ static int is_last_bud(struct ubifs_info *c, struct ubifs_bud *bud) if (!list_is_last(&next->list, &jh->buds_list)) return 0; - err = ubi_read(c->ubi, next->lnum, (char *)&data, - next->start, 4); + err = ubifs_leb_read(c, next->lnum, (char *)&data, next->start, 4, 1); if (err) return 0; diff --git a/fs/ubifs/sb.c b/fs/ubifs/sb.c index 1250016..b73ecd8 100644 --- a/fs/ubifs/sb.c +++ b/fs/ubifs/sb.c @@ -247,7 +247,7 @@ static int create_default_filesystem(struct ubifs_info *c) mst->total_dirty = cpu_to_le64(tmp64); /* The indexing LEB does not contribute to dark space */ - tmp64 = (c->main_lebs - 1) * c->dark_wm; + tmp64 = ((long long)(c->main_lebs - 1) * c->dark_wm); mst->total_dark = cpu_to_le64(tmp64); mst->total_used = cpu_to_le64(UBIFS_INO_NODE_SZ); @@ -674,15 +674,15 @@ static int fixup_leb(struct ubifs_info *c, int lnum, int len) if (len == 0) { dbg_mnt("unmap empty LEB %d", lnum); - return ubi_leb_unmap(c->ubi, lnum); + return ubifs_leb_unmap(c, lnum); } dbg_mnt("fixup LEB %d, data len %d", lnum, len); - err = ubi_read(c->ubi, lnum, c->sbuf, 0, len); + err = ubifs_leb_read(c, lnum, c->sbuf, 0, len, 1); if (err) return err; - return ubi_leb_change(c->ubi, lnum, c->sbuf, len, UBI_UNKNOWN); + return ubifs_leb_change(c, lnum, c->sbuf, len, UBI_UNKNOWN); } /** diff --git a/fs/ubifs/scan.c b/fs/ubifs/scan.c index 36216b4..37383e8 100644 --- a/fs/ubifs/scan.c +++ b/fs/ubifs/scan.c @@ -148,7 +148,7 @@ struct ubifs_scan_leb *ubifs_start_scan(const struct ubifs_info *c, int lnum, INIT_LIST_HEAD(&sleb->nodes); sleb->buf = sbuf; - err = ubi_read(c->ubi, lnum, sbuf + offs, offs, c->leb_size - offs); + err = ubifs_leb_read(c, lnum, sbuf + offs, offs, c->leb_size - offs, 0); if (err && err != -EBADMSG) { ubifs_err("cannot read %d bytes from LEB %d:%d," " error %d", c->leb_size - offs, lnum, offs, err); @@ -240,7 +240,7 @@ void ubifs_scanned_corruption(const struct ubifs_info *c, int lnum, int offs, int len; ubifs_err("corruption at LEB %d:%d", lnum, offs); - if (dbg_failure_mode) + if (dbg_is_tst_rcvry(c)) return; len = c->leb_size - offs; if (len > 8192) diff --git a/fs/ubifs/shrinker.c b/fs/ubifs/shrinker.c index 9e1d056..e0a7a76 100644 --- a/fs/ubifs/shrinker.c +++ b/fs/ubifs/shrinker.c @@ -128,7 +128,6 @@ static int shrink_tnc(struct ubifs_info *c, int nr, int age, int *contention) freed = ubifs_destroy_tnc_subtree(znode); atomic_long_sub(freed, &ubifs_clean_zn_cnt); atomic_long_sub(freed, &c->clean_zn_cnt); - ubifs_assert(atomic_long_read(&c->clean_zn_cnt) >= 0); total_freed += freed; znode = zprev; } diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index db04976..201bcfc 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -85,7 +85,7 @@ static int validate_inode(struct ubifs_info *c, const struct inode *inode) if (ui->data_len < 0 || ui->data_len > UBIFS_MAX_INO_DATA) return 4; - if (ui->xattr && (inode->i_mode & S_IFMT) != S_IFREG) + if (ui->xattr && !S_ISREG(inode->i_mode)) return 5; if (!ubifs_compr_present(ui->compr_type)) { @@ -94,7 +94,7 @@ static int validate_inode(struct ubifs_info *c, const struct inode *inode) ubifs_compr_name(ui->compr_type)); } - err = dbg_check_dir_size(c, inode); + err = dbg_check_dir(c, inode); return err; } @@ -129,7 +129,7 @@ struct inode *ubifs_iget(struct super_block *sb, unsigned long inum) goto out_ino; inode->i_flags |= (S_NOCMTIME | S_NOATIME); - inode->i_nlink = le32_to_cpu(ino->nlink); + set_nlink(inode, le32_to_cpu(ino->nlink)); inode->i_uid = le32_to_cpu(ino->uid); inode->i_gid = le32_to_cpu(ino->gid); inode->i_atime.tv_sec = (int64_t)le64_to_cpu(ino->atime_sec); @@ -914,7 +914,7 @@ static int check_volume_empty(struct ubifs_info *c) c->empty = 1; for (lnum = 0; lnum < c->leb_cnt; lnum++) { - err = ubi_is_mapped(c->ubi, lnum); + err = ubifs_is_mapped(c, lnum); if (unlikely(err < 0)) return err; if (err == 1) { @@ -1985,7 +1985,6 @@ static struct ubifs_info *alloc_ubifs_info(struct ubi_volume_desc *ubi) mutex_init(&c->lp_mutex); mutex_init(&c->tnc_mutex); mutex_init(&c->log_mutex); - mutex_init(&c->mst_mutex); mutex_init(&c->umount_mutex); mutex_init(&c->bu_mutex); mutex_init(&c->write_reserve_mutex); @@ -2264,19 +2263,12 @@ static int __init ubifs_init(void) return -EINVAL; } - err = register_filesystem(&ubifs_fs_type); - if (err) { - ubifs_err("cannot register file system, error %d", err); - return err; - } - - err = -ENOMEM; ubifs_inode_slab = kmem_cache_create("ubifs_inode_slab", sizeof(struct ubifs_inode), 0, SLAB_MEM_SPREAD | SLAB_RECLAIM_ACCOUNT, &inode_slab_ctor); if (!ubifs_inode_slab) - goto out_reg; + return -ENOMEM; register_shrinker(&ubifs_shrinker_info); @@ -2288,15 +2280,20 @@ static int __init ubifs_init(void) if (err) goto out_compr; + err = register_filesystem(&ubifs_fs_type); + if (err) { + ubifs_err("cannot register file system, error %d", err); + goto out_dbg; + } return 0; +out_dbg: + dbg_debugfs_exit(); out_compr: ubifs_compressors_exit(); out_shrinker: unregister_shrinker(&ubifs_shrinker_info); kmem_cache_destroy(ubifs_inode_slab); -out_reg: - unregister_filesystem(&ubifs_fs_type); return err; } /* late_initcall to let compressors initialize first */ diff --git a/fs/ubifs/tnc.c b/fs/ubifs/tnc.c index 91b4213..0667386 100644 --- a/fs/ubifs/tnc.c +++ b/fs/ubifs/tnc.c @@ -223,7 +223,7 @@ static struct ubifs_znode *copy_znode(struct ubifs_info *c, __set_bit(DIRTY_ZNODE, &zn->flags); __clear_bit(COW_ZNODE, &zn->flags); - ubifs_assert(!test_bit(OBSOLETE_ZNODE, &znode->flags)); + ubifs_assert(!ubifs_zn_obsolete(znode)); __set_bit(OBSOLETE_ZNODE, &znode->flags); if (znode->level != 0) { @@ -271,7 +271,7 @@ static struct ubifs_znode *dirty_cow_znode(struct ubifs_info *c, struct ubifs_znode *zn; int err; - if (!test_bit(COW_ZNODE, &znode->flags)) { + if (!ubifs_zn_cow(znode)) { /* znode is not being committed */ if (!test_and_set_bit(DIRTY_ZNODE, &znode->flags)) { atomic_long_inc(&c->dirty_zn_cnt); @@ -462,7 +462,7 @@ static int try_read_node(const struct ubifs_info *c, void *buf, int type, dbg_io("LEB %d:%d, %s, length %d", lnum, offs, dbg_ntype(type), len); - err = ubi_read(c->ubi, lnum, buf, offs, len); + err = ubifs_leb_read(c, lnum, buf, offs, len, 1); if (err) { ubifs_err("cannot read node type %d from LEB %d:%d, error %d", type, lnum, offs, err); @@ -1666,7 +1666,7 @@ static int read_wbuf(struct ubifs_wbuf *wbuf, void *buf, int len, int lnum, if (!overlap) { /* We may safely unlock the write-buffer and read the data */ spin_unlock(&wbuf->lock); - return ubi_read(c->ubi, lnum, buf, offs, len); + return ubifs_leb_read(c, lnum, buf, offs, len, 0); } /* Don't read under wbuf */ @@ -1680,7 +1680,7 @@ static int read_wbuf(struct ubifs_wbuf *wbuf, void *buf, int len, int lnum, if (rlen > 0) /* Read everything that goes before write-buffer */ - return ubi_read(c->ubi, lnum, buf, offs, rlen); + return ubifs_leb_read(c, lnum, buf, offs, rlen, 0); return 0; } @@ -1767,7 +1767,7 @@ int ubifs_tnc_bulk_read(struct ubifs_info *c, struct bu_info *bu) if (wbuf) err = read_wbuf(wbuf, bu->buf, len, lnum, offs); else - err = ubi_read(c->ubi, lnum, bu->buf, offs, len); + err = ubifs_leb_read(c, lnum, bu->buf, offs, len, 0); /* Check for a race with GC */ if (maybe_leb_gced(c, lnum, bu->gc_seq)) @@ -2423,7 +2423,7 @@ static int tnc_delete(struct ubifs_info *c, struct ubifs_znode *znode, int n) */ do { - ubifs_assert(!test_bit(OBSOLETE_ZNODE, &znode->flags)); + ubifs_assert(!ubifs_zn_obsolete(znode)); ubifs_assert(ubifs_zn_dirty(znode)); zp = znode->parent; @@ -2479,9 +2479,8 @@ static int tnc_delete(struct ubifs_info *c, struct ubifs_znode *znode, int n) c->zroot.offs = zbr->offs; c->zroot.len = zbr->len; c->zroot.znode = znode; - ubifs_assert(!test_bit(OBSOLETE_ZNODE, - &zp->flags)); - ubifs_assert(test_bit(DIRTY_ZNODE, &zp->flags)); + ubifs_assert(!ubifs_zn_obsolete(zp)); + ubifs_assert(ubifs_zn_dirty(zp)); atomic_long_dec(&c->dirty_zn_cnt); if (zp->cnext) { @@ -2865,7 +2864,7 @@ static void tnc_destroy_cnext(struct ubifs_info *c) struct ubifs_znode *znode = cnext; cnext = cnext->cnext; - if (test_bit(OBSOLETE_ZNODE, &znode->flags)) + if (ubifs_zn_obsolete(znode)) kfree(znode); } while (cnext && cnext != c->cnext); } @@ -3301,7 +3300,7 @@ int dbg_check_inode_size(struct ubifs_info *c, const struct inode *inode, if (!S_ISREG(inode->i_mode)) return 0; - if (!(ubifs_chk_flags & UBIFS_CHK_GEN)) + if (!dbg_is_chk_gen(c)) return 0; block = (size + UBIFS_BLOCK_SIZE - 1) >> UBIFS_BLOCK_SHIFT; @@ -3337,9 +3336,10 @@ out_dump: ubifs_err("inode %lu has size %lld, but there are data at offset %lld " "(data key %s)", (unsigned long)inode->i_ino, size, ((loff_t)block) << UBIFS_BLOCK_SHIFT, DBGKEY(key)); + mutex_unlock(&c->tnc_mutex); dbg_dump_inode(c, inode); dbg_dump_stack(); - err = -EINVAL; + return -EINVAL; out_unlock: mutex_unlock(&c->tnc_mutex); diff --git a/fs/ubifs/tnc_commit.c b/fs/ubifs/tnc_commit.c index 41920f3..4c15f07 100644 --- a/fs/ubifs/tnc_commit.c +++ b/fs/ubifs/tnc_commit.c @@ -22,6 +22,7 @@ /* This file implements TNC functions for committing */ +#include #include "ubifs.h" /** @@ -87,8 +88,12 @@ static int make_idx_node(struct ubifs_info *c, struct ubifs_idx_node *idx, atomic_long_dec(&c->dirty_zn_cnt); ubifs_assert(ubifs_zn_dirty(znode)); - ubifs_assert(test_bit(COW_ZNODE, &znode->flags)); + ubifs_assert(ubifs_zn_cow(znode)); + /* + * Note, unlike 'write_index()' we do not add memory barriers here + * because this function is called with @c->tnc_mutex locked. + */ __clear_bit(DIRTY_ZNODE, &znode->flags); __clear_bit(COW_ZNODE, &znode->flags); @@ -377,7 +382,7 @@ static int layout_in_gaps(struct ubifs_info *c, int cnt) c->gap_lebs = NULL; return err; } - if (dbg_force_in_the_gaps_enabled()) { + if (!dbg_is_chk_index(c)) { /* * Do not print scary warnings if the debugging * option which forces in-the-gaps is enabled. @@ -491,25 +496,6 @@ static int layout_in_empty_space(struct ubifs_info *c) else next_len = ubifs_idx_node_sz(c, cnext->child_cnt); - if (c->min_io_size == 1) { - buf_offs += ALIGN(len, 8); - if (next_len) { - if (buf_offs + next_len <= c->leb_size) - continue; - err = ubifs_update_one_lp(c, lnum, 0, - c->leb_size - buf_offs, 0, 0); - if (err) - return err; - lnum = -1; - continue; - } - err = ubifs_update_one_lp(c, lnum, - c->leb_size - buf_offs, 0, 0, 0); - if (err) - return err; - break; - } - /* Update buffer positions */ wlen = used + len; used += ALIGN(len, 8); @@ -658,7 +644,7 @@ static int get_znodes_to_commit(struct ubifs_info *c) } cnt += 1; while (1) { - ubifs_assert(!test_bit(COW_ZNODE, &znode->flags)); + ubifs_assert(!ubifs_zn_cow(znode)); __set_bit(COW_ZNODE, &znode->flags); znode->alt = 0; cnext = find_next_dirty(znode); @@ -704,7 +690,7 @@ static int alloc_idx_lebs(struct ubifs_info *c, int cnt) c->ilebs[c->ileb_cnt++] = lnum; dbg_cmt("LEB %d", lnum); } - if (dbg_force_in_the_gaps()) + if (dbg_is_chk_index(c) && !(random32() & 7)) return -ENOSPC; return 0; } @@ -830,7 +816,7 @@ static int write_index(struct ubifs_info *c) struct ubifs_idx_node *idx; struct ubifs_znode *znode, *cnext; int i, lnum, offs, len, next_len, buf_len, buf_offs, used; - int avail, wlen, err, lnum_pos = 0; + int avail, wlen, err, lnum_pos = 0, blen, nxt_offs; cnext = c->enext; if (!cnext) @@ -907,7 +893,7 @@ static int write_index(struct ubifs_info *c) cnext = znode->cnext; ubifs_assert(ubifs_zn_dirty(znode)); - ubifs_assert(test_bit(COW_ZNODE, &znode->flags)); + ubifs_assert(ubifs_zn_cow(znode)); /* * It is important that other threads should see %DIRTY_ZNODE @@ -922,6 +908,28 @@ static int write_index(struct ubifs_info *c) clear_bit(COW_ZNODE, &znode->flags); smp_mb__after_clear_bit(); + /* + * We have marked the znode as clean but have not updated the + * @c->clean_zn_cnt counter. If this znode becomes dirty again + * before 'free_obsolete_znodes()' is called, then + * @c->clean_zn_cnt will be decremented before it gets + * incremented (resulting in 2 decrements for the same znode). + * This means that @c->clean_zn_cnt may become negative for a + * while. + * + * Q: why we cannot increment @c->clean_zn_cnt? + * A: because we do not have the @c->tnc_mutex locked, and the + * following code would be racy and buggy: + * + * if (!ubifs_zn_obsolete(znode)) { + * atomic_long_inc(&c->clean_zn_cnt); + * atomic_long_inc(&ubifs_clean_zn_cnt); + * } + * + * Thus, we just delay the @c->clean_zn_cnt update until we + * have the mutex locked. + */ + /* Do not access znode from this point on */ /* Update buffer positions */ @@ -938,65 +946,38 @@ static int write_index(struct ubifs_info *c) else next_len = ubifs_idx_node_sz(c, cnext->child_cnt); - if (c->min_io_size == 1) { - /* - * Write the prepared index node immediately if there is - * no minimum IO size - */ - err = ubifs_leb_write(c, lnum, c->cbuf, buf_offs, - wlen, UBI_SHORTTERM); - if (err) - return err; - buf_offs += ALIGN(wlen, 8); - if (next_len) { - used = 0; - avail = buf_len; - if (buf_offs + next_len > c->leb_size) { - err = ubifs_update_one_lp(c, lnum, - LPROPS_NC, 0, 0, LPROPS_TAKEN); - if (err) - return err; - lnum = -1; - } + nxt_offs = buf_offs + used + next_len; + if (next_len && nxt_offs <= c->leb_size) { + if (avail > 0) continue; - } + else + blen = buf_len; } else { - int blen, nxt_offs = buf_offs + used + next_len; - - if (next_len && nxt_offs <= c->leb_size) { - if (avail > 0) - continue; - else - blen = buf_len; - } else { - wlen = ALIGN(wlen, 8); - blen = ALIGN(wlen, c->min_io_size); - ubifs_pad(c, c->cbuf + wlen, blen - wlen); - } - /* - * The buffer is full or there are no more znodes - * to do - */ - err = ubifs_leb_write(c, lnum, c->cbuf, buf_offs, - blen, UBI_SHORTTERM); - if (err) - return err; - buf_offs += blen; - if (next_len) { - if (nxt_offs > c->leb_size) { - err = ubifs_update_one_lp(c, lnum, - LPROPS_NC, 0, 0, LPROPS_TAKEN); - if (err) - return err; - lnum = -1; - } - used -= blen; - if (used < 0) - used = 0; - avail = buf_len - used; - memmove(c->cbuf, c->cbuf + blen, used); - continue; + wlen = ALIGN(wlen, 8); + blen = ALIGN(wlen, c->min_io_size); + ubifs_pad(c, c->cbuf + wlen, blen - wlen); + } + + /* The buffer is full or there are no more znodes to do */ + err = ubifs_leb_write(c, lnum, c->cbuf, buf_offs, blen, + UBI_SHORTTERM); + if (err) + return err; + buf_offs += blen; + if (next_len) { + if (nxt_offs > c->leb_size) { + err = ubifs_update_one_lp(c, lnum, LPROPS_NC, 0, + 0, LPROPS_TAKEN); + if (err) + return err; + lnum = -1; } + used -= blen; + if (used < 0) + used = 0; + avail = buf_len - used; + memmove(c->cbuf, c->cbuf + blen, used); + continue; } break; } @@ -1029,7 +1010,7 @@ static void free_obsolete_znodes(struct ubifs_info *c) do { znode = cnext; cnext = znode->cnext; - if (test_bit(OBSOLETE_ZNODE, &znode->flags)) + if (ubifs_zn_obsolete(znode)) kfree(znode); else { znode->cnext = NULL; diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h index 62d50a9..223dd42 100644 --- a/fs/ubifs/ubifs.h +++ b/fs/ubifs/ubifs.h @@ -230,14 +230,14 @@ enum { * LPT cnode flag bits. * * DIRTY_CNODE: cnode is dirty - * COW_CNODE: cnode is being committed and must be copied before writing * OBSOLETE_CNODE: cnode is being committed and has been copied (or deleted), - * so it can (and must) be freed when the commit is finished + * so it can (and must) be freed when the commit is finished + * COW_CNODE: cnode is being committed and must be copied before writing */ enum { DIRTY_CNODE = 0, - COW_CNODE = 1, - OBSOLETE_CNODE = 2, + OBSOLETE_CNODE = 1, + COW_CNODE = 2, }; /* @@ -908,6 +908,7 @@ struct ubifs_budget_req { * @dnext: next orphan to delete * @inum: inode number * @new: %1 => added since the last commit, otherwise %0 + * @del: %1 => delete pending, otherwise %0 */ struct ubifs_orphan { struct rb_node rb; @@ -917,6 +918,7 @@ struct ubifs_orphan { struct ubifs_orphan *dnext; ino_t inum; int new; + unsigned del:1; }; /** @@ -1042,7 +1044,6 @@ struct ubifs_debug_info; * * @mst_node: master node * @mst_offs: offset of valid master node - * @mst_mutex: protects the master node area, @mst_node, and @mst_offs * * @max_bu_buf_len: maximum bulk-read buffer length * @bu_mutex: protects the pre-allocated bulk-read buffer and @c->bu @@ -1282,7 +1283,6 @@ struct ubifs_info { struct ubifs_mst_node *mst_node; int mst_offs; - struct mutex mst_mutex; int max_bu_buf_len; struct mutex bu_mutex; @@ -1471,6 +1471,15 @@ extern struct ubifs_compressor *ubifs_compressors[UBIFS_COMPR_TYPES_CNT]; /* io.c */ void ubifs_ro_mode(struct ubifs_info *c, int err); +int ubifs_leb_read(const struct ubifs_info *c, int lnum, void *buf, int offs, + int len, int even_ebadmsg); +int ubifs_leb_write(struct ubifs_info *c, int lnum, const void *buf, int offs, + int len, int dtype); +int ubifs_leb_change(struct ubifs_info *c, int lnum, const void *buf, int len, + int dtype); +int ubifs_leb_unmap(struct ubifs_info *c, int lnum); +int ubifs_leb_map(struct ubifs_info *c, int lnum, int dtype); +int ubifs_is_mapped(const struct ubifs_info *c, int lnum); int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len); int ubifs_wbuf_seek_nolock(struct ubifs_wbuf *wbuf, int lnum, int offs, int dtype); @@ -1723,7 +1732,7 @@ const struct ubifs_lprops *ubifs_fast_find_frdi_idx(struct ubifs_info *c); int ubifs_calc_dark(const struct ubifs_info *c, int spc); /* file.c */ -int ubifs_fsync(struct file *file, int datasync); +int ubifs_fsync(struct file *file, loff_t start, loff_t end, int datasync); int ubifs_setattr(struct dentry *dentry, struct iattr *attr); /* dir.c */ @@ -1750,8 +1759,8 @@ struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum, int offs, void *sbuf, int jhead); struct ubifs_scan_leb *ubifs_recover_log_leb(struct ubifs_info *c, int lnum, int offs, void *sbuf); -int ubifs_recover_inl_heads(const struct ubifs_info *c, void *sbuf); -int ubifs_clean_lebs(const struct ubifs_info *c, void *sbuf); +int ubifs_recover_inl_heads(struct ubifs_info *c, void *sbuf); +int ubifs_clean_lebs(struct ubifs_info *c, void *sbuf); int ubifs_rcvry_gc_commit(struct ubifs_info *c); int ubifs_recover_size_accum(struct ubifs_info *c, union ubifs_key *key, int deletion, loff_t new_size); diff --git a/fs/ubifs/xattr.c b/fs/ubifs/xattr.c index 16f19f5..bf18f7a 100644 --- a/fs/ubifs/xattr.c +++ b/fs/ubifs/xattr.c @@ -558,10 +558,10 @@ int ubifs_removexattr(struct dentry *dentry, const char *name) } ubifs_assert(inode->i_nlink == 1); - inode->i_nlink = 0; + clear_nlink(inode); err = remove_xattr(c, host, inode, &nm); if (err) - inode->i_nlink = 1; + set_nlink(inode, 1); /* If @i_nlink is 0, 'iput()' will delete the inode */ iput(inode); diff --git a/fs/udf/balloc.c b/fs/udf/balloc.c index 95518a9..987585b 100644 --- a/fs/udf/balloc.c +++ b/fs/udf/balloc.c @@ -59,8 +59,8 @@ static int __load_block_bitmap(struct super_block *sb, int nr_groups = bitmap->s_nr_groups; if (block_group >= nr_groups) { - udf_debug("block_group (%d) > nr_groups (%d)\n", block_group, - nr_groups); + udf_debug("block_group (%d) > nr_groups (%d)\n", + block_group, nr_groups); } if (bitmap->s_block_bitmap[block_group]) { @@ -126,8 +126,9 @@ static void udf_bitmap_free_blocks(struct super_block *sb, if (bloc->logicalBlockNum + count < count || (bloc->logicalBlockNum + count) > partmap->s_partition_len) { udf_debug("%d < %d || %d + %d > %d\n", - bloc->logicalBlockNum, 0, bloc->logicalBlockNum, - count, partmap->s_partition_len); + bloc->logicalBlockNum, 0, + bloc->logicalBlockNum, count, + partmap->s_partition_len); goto error_return; } @@ -155,7 +156,7 @@ static void udf_bitmap_free_blocks(struct super_block *sb, if (udf_set_bit(bit + i, bh->b_data)) { udf_debug("bit %ld already set\n", bit + i); udf_debug("byte=%2x\n", - ((char *)bh->b_data)[(bit + i) >> 3]); + ((char *)bh->b_data)[(bit + i) >> 3]); } } udf_add_free_space(sb, sbi->s_partition, count); @@ -369,7 +370,8 @@ static void udf_table_free_blocks(struct super_block *sb, if (bloc->logicalBlockNum + count < count || (bloc->logicalBlockNum + count) > partmap->s_partition_len) { udf_debug("%d < %d || %d + %d > %d\n", - bloc->logicalBlockNum, 0, bloc->logicalBlockNum, count, + bloc->logicalBlockNum, 0, + bloc->logicalBlockNum, count, partmap->s_partition_len); goto error_return; } diff --git a/fs/udf/dir.c b/fs/udf/dir.c index eb8bfe2..56341af 100644 --- a/fs/udf/dir.c +++ b/fs/udf/dir.c @@ -163,7 +163,8 @@ static int do_udf_readdir(struct inode *dir, struct file *filp, struct kernel_lb_addr tloc = lelb_to_cpu(cfi.icb.extLocation); iblock = udf_get_lb_pblock(dir->i_sb, &tloc, 0); - flen = udf_get_filename(dir->i_sb, nameptr, fname, lfi); + flen = udf_get_filename(dir->i_sb, nameptr, lfi, fname, + UDF_NAME_LEN); dt_type = DT_UNKNOWN; } diff --git a/fs/udf/directory.c b/fs/udf/directory.c index 2ffdb67..3e44f57 100644 --- a/fs/udf/directory.c +++ b/fs/udf/directory.c @@ -162,8 +162,8 @@ struct fileIdentDesc *udf_get_fileident(void *buffer, int bufsize, int *offset) int padlen; if ((!buffer) || (!offset)) { - udf_debug("invalidparms\n, buffer=%p, offset=%p\n", buffer, - offset); + udf_debug("invalidparms, buffer=%p, offset=%p\n", + buffer, offset); return NULL; } @@ -201,7 +201,7 @@ struct short_ad *udf_get_fileshortad(uint8_t *ptr, int maxoffset, uint32_t *offs struct short_ad *sa; if ((!ptr) || (!offset)) { - printk(KERN_ERR "udf: udf_get_fileshortad() invalidparms\n"); + pr_err("%s: invalidparms\n", __func__); return NULL; } @@ -223,7 +223,7 @@ struct long_ad *udf_get_filelongad(uint8_t *ptr, int maxoffset, uint32_t *offset struct long_ad *la; if ((!ptr) || (!offset)) { - printk(KERN_ERR "udf: udf_get_filelongad() invalidparms\n"); + pr_err("%s: invalidparms\n", __func__); return NULL; } diff --git a/fs/udf/file.c b/fs/udf/file.c index 8eb9628..874c9e3 100644 --- a/fs/udf/file.c +++ b/fs/udf/file.c @@ -173,7 +173,7 @@ long udf_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) long old_block, new_block; int result = -EINVAL; - if (file_permission(filp, MAY_READ) != 0) { + if (inode_permission(inode, MAY_READ) != 0) { udf_debug("no permission to access inode %lu\n", inode->i_ino); result = -EPERM; goto out; diff --git a/fs/udf/inode.c b/fs/udf/inode.c index 957c974..e081440 100644 --- a/fs/udf/inode.c +++ b/fs/udf/inode.c @@ -37,6 +37,7 @@ #include #include #include +#include #include "udf_i.h" #include "udf_sb.h" @@ -83,12 +84,10 @@ void udf_evict_inode(struct inode *inode) end_writeback(inode); if (iinfo->i_alloc_type != ICBTAG_FLAG_AD_IN_ICB && inode->i_size != iinfo->i_lenExtents) { - printk(KERN_WARNING "UDF-fs (%s): Inode %lu (mode %o) has " - "inode size %llu different from extent length %llu. " - "Filesystem need not be standards compliant.\n", - inode->i_sb->s_id, inode->i_ino, inode->i_mode, - (unsigned long long)inode->i_size, - (unsigned long long)iinfo->i_lenExtents); + udf_warn(inode->i_sb, "Inode %lu (mode %o) has inode size %llu different from extent length %llu. Filesystem need not be standards compliant.\n", + inode->i_ino, inode->i_mode, + (unsigned long long)inode->i_size, + (unsigned long long)iinfo->i_lenExtents); } kfree(iinfo->i_ext.i_data); iinfo->i_ext.i_data = NULL; @@ -104,7 +103,13 @@ static int udf_writepage(struct page *page, struct writeback_control *wbc) static int udf_readpage(struct file *file, struct page *page) { - return block_read_full_page(page, udf_get_block); + return mpage_readpage(page, udf_get_block); +} + +static int udf_readpages(struct file *file, struct address_space *mapping, + struct list_head *pages, unsigned nr_pages) +{ + return mpage_readpages(mapping, pages, nr_pages, udf_get_block); } static int udf_write_begin(struct file *file, struct address_space *mapping, @@ -139,6 +144,7 @@ static sector_t udf_bmap(struct address_space *mapping, sector_t block) const struct address_space_operations udf_aops = { .readpage = udf_readpage, + .readpages = udf_readpages, .writepage = udf_writepage, .write_begin = udf_write_begin, .write_end = generic_write_end, @@ -1170,13 +1176,22 @@ update_time: return 0; } +/* + * Maximum length of linked list formed by ICB hierarchy. The chosen number is + * arbitrary - just that we hopefully don't limit any real use of rewritten + * inode on write-once media but avoid looping for too long on corrupted media. + */ +#define UDF_MAX_ICB_NESTING 1024 + static void __udf_read_inode(struct inode *inode) { struct buffer_head *bh = NULL; struct fileEntry *fe; uint16_t ident; struct udf_inode_info *iinfo = UDF_I(inode); + unsigned int indirections = 0; +reread: /* * Set defaults, but the inode is still incomplete! * Note: get_new_inode() sets the following on a new inode: @@ -1191,16 +1206,15 @@ static void __udf_read_inode(struct inode *inode) */ bh = udf_read_ptagged(inode->i_sb, &iinfo->i_location, 0, &ident); if (!bh) { - printk(KERN_ERR "udf: udf_read_inode(ino %ld) failed !bh\n", - inode->i_ino); + udf_err(inode->i_sb, "(ino %ld) failed !bh\n", inode->i_ino); make_bad_inode(inode); return; } if (ident != TAG_IDENT_FE && ident != TAG_IDENT_EFE && ident != TAG_IDENT_USE) { - printk(KERN_ERR "udf: udf_read_inode(ino %ld) " - "failed ident=%d\n", inode->i_ino, ident); + udf_err(inode->i_sb, "(ino %ld) failed ident=%d\n", + inode->i_ino, ident); brelse(bh); make_bad_inode(inode); return; @@ -1214,34 +1228,32 @@ static void __udf_read_inode(struct inode *inode) ibh = udf_read_ptagged(inode->i_sb, &iinfo->i_location, 1, &ident); if (ident == TAG_IDENT_IE && ibh) { - struct buffer_head *nbh = NULL; struct kernel_lb_addr loc; struct indirectEntry *ie; ie = (struct indirectEntry *)ibh->b_data; loc = lelb_to_cpu(ie->indirectICB.extLocation); - if (ie->indirectICB.extLength && - (nbh = udf_read_ptagged(inode->i_sb, &loc, 0, - &ident))) { - if (ident == TAG_IDENT_FE || - ident == TAG_IDENT_EFE) { - memcpy(&iinfo->i_location, - &loc, - sizeof(struct kernel_lb_addr)); - brelse(bh); - brelse(ibh); - brelse(nbh); - __udf_read_inode(inode); + if (ie->indirectICB.extLength) { + brelse(bh); + brelse(ibh); + memcpy(&iinfo->i_location, &loc, + sizeof(struct kernel_lb_addr)); + if (++indirections > UDF_MAX_ICB_NESTING) { + udf_err(inode->i_sb, + "too many ICBs in ICB hierarchy" + " (max %d supported)\n", + UDF_MAX_ICB_NESTING); + make_bad_inode(inode); return; } - brelse(nbh); + goto reread; } } brelse(ibh); } else if (fe->icbTag.strategyType != cpu_to_le16(4)) { - printk(KERN_ERR "udf: unsupported strategy type: %d\n", - le16_to_cpu(fe->icbTag.strategyType)); + udf_err(inode->i_sb, "unsupported strategy type: %d\n", + le16_to_cpu(fe->icbTag.strategyType)); brelse(bh); make_bad_inode(inode); return; @@ -1258,6 +1270,8 @@ static void udf_fill_inode(struct inode *inode, struct buffer_head *bh) int offset; struct udf_sb_info *sbi = UDF_SB(inode->i_sb); struct udf_inode_info *iinfo = UDF_I(inode); + unsigned int link_count; + int bs = inode->i_sb->s_blocksize; fe = (struct fileEntry *)bh->b_data; efe = (struct extendedFileEntry *)bh->b_data; @@ -1278,41 +1292,38 @@ static void udf_fill_inode(struct inode *inode, struct buffer_head *bh) if (fe->descTag.tagIdent == cpu_to_le16(TAG_IDENT_EFE)) { iinfo->i_efe = 1; iinfo->i_use = 0; - if (udf_alloc_i_data(inode, inode->i_sb->s_blocksize - + if (udf_alloc_i_data(inode, bs - sizeof(struct extendedFileEntry))) { make_bad_inode(inode); return; } memcpy(iinfo->i_ext.i_data, bh->b_data + sizeof(struct extendedFileEntry), - inode->i_sb->s_blocksize - - sizeof(struct extendedFileEntry)); + bs - sizeof(struct extendedFileEntry)); } else if (fe->descTag.tagIdent == cpu_to_le16(TAG_IDENT_FE)) { iinfo->i_efe = 0; iinfo->i_use = 0; - if (udf_alloc_i_data(inode, inode->i_sb->s_blocksize - - sizeof(struct fileEntry))) { + if (udf_alloc_i_data(inode, bs - sizeof(struct fileEntry))) { make_bad_inode(inode); return; } memcpy(iinfo->i_ext.i_data, bh->b_data + sizeof(struct fileEntry), - inode->i_sb->s_blocksize - sizeof(struct fileEntry)); + bs - sizeof(struct fileEntry)); } else if (fe->descTag.tagIdent == cpu_to_le16(TAG_IDENT_USE)) { iinfo->i_efe = 0; iinfo->i_use = 1; iinfo->i_lenAlloc = le32_to_cpu( ((struct unallocSpaceEntry *)bh->b_data)-> lengthAllocDescs); - if (udf_alloc_i_data(inode, inode->i_sb->s_blocksize - + if (udf_alloc_i_data(inode, bs - sizeof(struct unallocSpaceEntry))) { make_bad_inode(inode); return; } memcpy(iinfo->i_ext.i_data, bh->b_data + sizeof(struct unallocSpaceEntry), - inode->i_sb->s_blocksize - - sizeof(struct unallocSpaceEntry)); + bs - sizeof(struct unallocSpaceEntry)); return; } @@ -1340,9 +1351,10 @@ static void udf_fill_inode(struct inode *inode, struct buffer_head *bh) inode->i_mode &= ~sbi->s_umask; read_unlock(&sbi->s_cred_lock); - inode->i_nlink = le16_to_cpu(fe->fileLinkCount); - if (!inode->i_nlink) - inode->i_nlink = 1; + link_count = le16_to_cpu(fe->fileLinkCount); + if (!link_count) + link_count = 1; + set_nlink(inode, link_count); inode->i_size = le64_to_cpu(fe->informationLength); iinfo->i_lenExtents = inode->i_size; @@ -1389,6 +1401,36 @@ static void udf_fill_inode(struct inode *inode, struct buffer_head *bh) iinfo->i_lenEAttr; } + /* + * Sanity check length of allocation descriptors and extended attrs to + * avoid integer overflows + */ + if (iinfo->i_lenEAttr > bs || iinfo->i_lenAlloc > bs) { + make_bad_inode(inode); + return; + } + /* Now do exact checks */ + if (udf_file_entry_alloc_offset(inode) + iinfo->i_lenAlloc > bs) { + make_bad_inode(inode); + return; + } + /* Sanity checks for files in ICB so that we don't get confused later */ + if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) { + /* + * For file in ICB data is stored in allocation descriptor + * so sizes should match + */ + if (iinfo->i_lenAlloc != inode->i_size) { + make_bad_inode(inode); + return; + } + /* File in ICB has to fit in there... */ + if (inode->i_size > bs - udf_file_entry_alloc_offset(inode)) { + make_bad_inode(inode); + return; + } + } + switch (fe->icbTag.fileType) { case ICBTAG_FILE_TYPE_DIRECTORY: inode->i_op = &udf_dir_inode_operations; @@ -1435,9 +1477,8 @@ static void udf_fill_inode(struct inode *inode, struct buffer_head *bh) udf_debug("METADATA BITMAP FILE-----\n"); break; default: - printk(KERN_ERR "udf: udf_fill_inode(ino %ld) failed unknown " - "file type=%d\n", inode->i_ino, - fe->icbTag.fileType); + udf_err(inode->i_sb, "(ino %ld) failed unknown file type=%d\n", + inode->i_ino, fe->icbTag.fileType); make_bad_inode(inode); return; } @@ -1460,8 +1501,8 @@ static int udf_alloc_i_data(struct inode *inode, size_t size) iinfo->i_ext.i_data = kmalloc(size, GFP_KERNEL); if (!iinfo->i_ext.i_data) { - printk(KERN_ERR "udf:udf_alloc_i_data (ino %ld) " - "no free memory\n", inode->i_ino); + udf_err(inode->i_sb, "(ino %ld) no free memory\n", + inode->i_ino); return -ENOMEM; } @@ -1711,9 +1752,8 @@ out: if (do_sync) { sync_dirty_buffer(bh); if (buffer_write_io_error(bh)) { - printk(KERN_WARNING "IO error syncing udf inode " - "[%s:%08lx]\n", inode->i_sb->s_id, - inode->i_ino); + udf_warn(inode->i_sb, "IO error syncing udf inode [%08lx]\n", + inode->i_ino); err = -EIO; } } @@ -2004,8 +2044,7 @@ int8_t udf_current_aext(struct inode *inode, struct extent_position *epos, *elen = le32_to_cpu(lad->extLength) & UDF_EXTENT_LENGTH_MASK; break; default: - udf_debug("alloc_type = %d unsupported\n", - iinfo->i_alloc_type); + udf_debug("alloc_type = %d unsupported\n", iinfo->i_alloc_type); return -1; } diff --git a/fs/udf/lowlevel.c b/fs/udf/lowlevel.c index 43e24a3..6583fe9 100644 --- a/fs/udf/lowlevel.c +++ b/fs/udf/lowlevel.c @@ -38,7 +38,7 @@ unsigned int udf_get_last_session(struct super_block *sb) if (i == 0) { udf_debug("XA disk: %s, vol_desc_start=%d\n", - (ms_info.xa_flag ? "yes" : "no"), ms_info.addr.lba); + ms_info.xa_flag ? "yes" : "no", ms_info.addr.lba); if (ms_info.xa_flag) /* necessary for a valid ms_info.addr */ vol_desc_start = ms_info.addr.lba; } else { diff --git a/fs/udf/misc.c b/fs/udf/misc.c index 9215700..c175b4d 100644 --- a/fs/udf/misc.c +++ b/fs/udf/misc.c @@ -204,6 +204,7 @@ struct buffer_head *udf_read_tagged(struct super_block *sb, uint32_t block, { struct tag *tag_p; struct buffer_head *bh = NULL; + u8 checksum; /* Read the block */ if (block == 0xFFFFFFFF) @@ -211,8 +212,8 @@ struct buffer_head *udf_read_tagged(struct super_block *sb, uint32_t block, bh = udf_tread(sb, block); if (!bh) { - udf_debug("block=%d, location=%d: read failed\n", - block, location); + udf_err(sb, "read failed, block=%u, location=%d\n", + block, location); return NULL; } @@ -227,16 +228,18 @@ struct buffer_head *udf_read_tagged(struct super_block *sb, uint32_t block, } /* Verify the tag checksum */ - if (udf_tag_checksum(tag_p) != tag_p->tagChecksum) { - printk(KERN_ERR "udf: tag checksum failed block %d\n", block); + checksum = udf_tag_checksum(tag_p); + if (checksum != tag_p->tagChecksum) { + udf_err(sb, "tag checksum failed, block %u: 0x%02x != 0x%02x\n", + block, checksum, tag_p->tagChecksum); goto error_out; } /* Verify the tag version */ if (tag_p->descVersion != cpu_to_le16(0x0002U) && tag_p->descVersion != cpu_to_le16(0x0003U)) { - udf_debug("tag version 0x%04x != 0x0002 || 0x0003 block %d\n", - le16_to_cpu(tag_p->descVersion), block); + udf_err(sb, "tag version 0x%04x != 0x0002 || 0x0003, block %u\n", + le16_to_cpu(tag_p->descVersion), block); goto error_out; } @@ -248,8 +251,8 @@ struct buffer_head *udf_read_tagged(struct super_block *sb, uint32_t block, return bh; udf_debug("Crc failure block %d: crc = %d, crclen = %d\n", block, - le16_to_cpu(tag_p->descCRC), le16_to_cpu(tag_p->descCRCLength)); - + le16_to_cpu(tag_p->descCRC), + le16_to_cpu(tag_p->descCRCLength)); error_out: brelse(bh); return NULL; diff --git a/fs/udf/namei.c b/fs/udf/namei.c index d8c1bb5..483d662 100644 --- a/fs/udf/namei.c +++ b/fs/udf/namei.c @@ -235,7 +235,8 @@ static struct fileIdentDesc *udf_find_entry(struct inode *dir, if (!lfi) continue; - flen = udf_get_filename(dir->i_sb, nameptr, fname, lfi); + flen = udf_get_filename(dir->i_sb, nameptr, lfi, fname, + UDF_NAME_LEN); if (flen && udf_match(flen, fname, child->len, child->name)) goto out_ok; } @@ -577,8 +578,7 @@ static int udf_create(struct inode *dir, struct dentry *dentry, int mode, fi = udf_add_entry(dir, dentry, &fibh, &cfi, &err); if (!fi) { - inode->i_nlink--; - mark_inode_dirty(inode); + inode_dec_link_count(inode); iput(inode); return err; } @@ -618,8 +618,7 @@ static int udf_mknod(struct inode *dir, struct dentry *dentry, int mode, init_special_inode(inode, mode, rdev); fi = udf_add_entry(dir, dentry, &fibh, &cfi, &err); if (!fi) { - inode->i_nlink--; - mark_inode_dirty(inode); + inode_dec_link_count(inode); iput(inode); return err; } @@ -665,12 +664,11 @@ static int udf_mkdir(struct inode *dir, struct dentry *dentry, int mode) inode->i_fop = &udf_dir_operations; fi = udf_add_entry(inode, NULL, &fibh, &cfi, &err); if (!fi) { - inode->i_nlink--; - mark_inode_dirty(inode); + inode_dec_link_count(inode); iput(inode); goto out; } - inode->i_nlink = 2; + set_nlink(inode, 2); cfi.icb.extLength = cpu_to_le32(inode->i_sb->s_blocksize); cfi.icb.extLocation = cpu_to_lelb(dinfo->i_location); *(__le32 *)((struct allocDescImpUse *)cfi.icb.impUse)->impUse = @@ -683,7 +681,7 @@ static int udf_mkdir(struct inode *dir, struct dentry *dentry, int mode) fi = udf_add_entry(dir, dentry, &fibh, &cfi, &err); if (!fi) { - inode->i_nlink = 0; + clear_nlink(inode); mark_inode_dirty(inode); iput(inode); goto out; @@ -799,9 +797,8 @@ static int udf_rmdir(struct inode *dir, struct dentry *dentry) if (retval) goto end_rmdir; if (inode->i_nlink != 2) - udf_warning(inode->i_sb, "udf_rmdir", - "empty directory has nlink != 2 (%d)", - inode->i_nlink); + udf_warn(inode->i_sb, "empty directory has nlink != 2 (%d)\n", + inode->i_nlink); clear_nlink(inode); inode->i_size = 0; inode_dec_link_count(dir); @@ -840,7 +837,7 @@ static int udf_unlink(struct inode *dir, struct dentry *dentry) if (!inode->i_nlink) { udf_debug("Deleting nonexistent file (%lu), %d\n", inode->i_ino, inode->i_nlink); - inode->i_nlink = 1; + set_nlink(inode, 1); } retval = udf_delete_entry(dir, fi, &fibh, &cfi); if (retval) diff --git a/fs/udf/partition.c b/fs/udf/partition.c index a71090e..d6caf01 100644 --- a/fs/udf/partition.c +++ b/fs/udf/partition.c @@ -33,8 +33,8 @@ uint32_t udf_get_pblock(struct super_block *sb, uint32_t block, struct udf_sb_info *sbi = UDF_SB(sb); struct udf_part_map *map; if (partition >= sbi->s_partitions) { - udf_debug("block=%d, partition=%d, offset=%d: " - "invalid partition\n", block, partition, offset); + udf_debug("block=%d, partition=%d, offset=%d: invalid partition\n", + block, partition, offset); return 0xFFFFFFFF; } map = &sbi->s_partmaps[partition]; @@ -60,8 +60,8 @@ uint32_t udf_get_pblock_virt15(struct super_block *sb, uint32_t block, vdata = &map->s_type_specific.s_virtual; if (block > vdata->s_num_entries) { - udf_debug("Trying to access block beyond end of VAT " - "(%d max %d)\n", block, vdata->s_num_entries); + udf_debug("Trying to access block beyond end of VAT (%d max %d)\n", + block, vdata->s_num_entries); return 0xFFFFFFFF; } @@ -321,9 +321,14 @@ uint32_t udf_get_pblock_meta25(struct super_block *sb, uint32_t block, /* We shouldn't mount such media... */ BUG_ON(!inode); retblk = udf_try_read_meta(inode, block, partition, offset); - if (retblk == 0xFFFFFFFF) { - udf_warning(sb, __func__, "error reading from METADATA, " - "trying to read from MIRROR"); + if (retblk == 0xFFFFFFFF && mdata->s_metadata_fe) { + udf_warn(sb, "error reading from METADATA, trying to read from MIRROR\n"); + if (!(mdata->s_flags & MF_MIRROR_FE_LOADED)) { + mdata->s_mirror_fe = udf_find_metadata_inode_efe(sb, + mdata->s_mirror_file_loc, map->s_partition_num); + mdata->s_flags |= MF_MIRROR_FE_LOADED; + } + inode = mdata->s_mirror_fe; if (!inode) return 0xFFFFFFFF; diff --git a/fs/udf/super.c b/fs/udf/super.c index b0c7b53..f66439e 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c @@ -76,8 +76,6 @@ #define UDF_DEFAULT_BLOCKSIZE 2048 -static char error_buf[1024]; - /* These are the "meat" - everything else is stuffing */ static int udf_fill_super(struct super_block *, void *, int); static void udf_put_super(struct super_block *); @@ -93,8 +91,6 @@ static void udf_close_lvid(struct super_block *); static unsigned int udf_count_free(struct super_block *); static int udf_statfs(struct dentry *, struct kstatfs *); static int udf_show_options(struct seq_file *, struct vfsmount *); -static void udf_error(struct super_block *sb, const char *function, - const char *fmt, ...); struct logicalVolIntegrityDescImpUse *udf_sb_lvidiu(struct udf_sb_info *sbi) { @@ -245,9 +241,8 @@ static int udf_sb_alloc_partition_maps(struct super_block *sb, u32 count) sbi->s_partmaps = kcalloc(count, sizeof(struct udf_part_map), GFP_KERNEL); if (!sbi->s_partmaps) { - udf_error(sb, __func__, - "Unable to allocate space for %d partition maps", - count); + udf_err(sb, "Unable to allocate space for %d partition maps\n", + count); sbi->s_partitions = 0; return -ENOMEM; } @@ -551,8 +546,7 @@ static int udf_parse_options(char *options, struct udf_options *uopt, uopt->dmode = option & 0777; break; default: - printk(KERN_ERR "udf: bad mount option \"%s\" " - "or missing value\n", p); + pr_err("bad mount option \"%s\" or missing value\n", p); return 0; } } @@ -646,20 +640,16 @@ static loff_t udf_check_vsd(struct super_block *sb) udf_debug("ISO9660 Boot Record found\n"); break; case 1: - udf_debug("ISO9660 Primary Volume Descriptor " - "found\n"); + udf_debug("ISO9660 Primary Volume Descriptor found\n"); break; case 2: - udf_debug("ISO9660 Supplementary Volume " - "Descriptor found\n"); + udf_debug("ISO9660 Supplementary Volume Descriptor found\n"); break; case 3: - udf_debug("ISO9660 Volume Partition Descriptor " - "found\n"); + udf_debug("ISO9660 Volume Partition Descriptor found\n"); break; case 255: - udf_debug("ISO9660 Volume Descriptor Set " - "Terminator found\n"); + udf_debug("ISO9660 Volume Descriptor Set Terminator found\n"); break; default: udf_debug("ISO9660 VRS (%u) found\n", @@ -810,8 +800,7 @@ static int udf_load_pvoldesc(struct super_block *sb, sector_t block) pvoldesc->recordingDateAndTime)) { #ifdef UDFFS_DEBUG struct timestamp *ts = &pvoldesc->recordingDateAndTime; - udf_debug("recording time %04u/%02u/%02u" - " %02u:%02u (%x)\n", + udf_debug("recording time %04u/%02u/%02u %02u:%02u (%x)\n", le16_to_cpu(ts->year), ts->month, ts->day, ts->hour, ts->minute, le16_to_cpu(ts->typeAndTimezone)); #endif @@ -822,7 +811,7 @@ static int udf_load_pvoldesc(struct super_block *sb, sector_t block) strncpy(UDF_SB(sb)->s_volume_ident, outstr->u_name, outstr->u_len > 31 ? 31 : outstr->u_len); udf_debug("volIdent[] = '%s'\n", - UDF_SB(sb)->s_volume_ident); + UDF_SB(sb)->s_volume_ident); } if (!udf_build_ustr(instr, pvoldesc->volSetIdent, 128)) @@ -838,64 +827,57 @@ out1: return ret; } +struct inode *udf_find_metadata_inode_efe(struct super_block *sb, + u32 meta_file_loc, u32 partition_num) +{ + struct kernel_lb_addr addr; + struct inode *metadata_fe; + + addr.logicalBlockNum = meta_file_loc; + addr.partitionReferenceNum = partition_num; + + metadata_fe = udf_iget(sb, &addr); + + if (metadata_fe == NULL) + udf_warn(sb, "metadata inode efe not found\n"); + else if (UDF_I(metadata_fe)->i_alloc_type != ICBTAG_FLAG_AD_SHORT) { + udf_warn(sb, "metadata inode efe does not have short allocation descriptors!\n"); + iput(metadata_fe); + metadata_fe = NULL; + } + + return metadata_fe; +} + static int udf_load_metadata_files(struct super_block *sb, int partition) { struct udf_sb_info *sbi = UDF_SB(sb); struct udf_part_map *map; struct udf_meta_data *mdata; struct kernel_lb_addr addr; - int fe_error = 0; map = &sbi->s_partmaps[partition]; mdata = &map->s_type_specific.s_metadata; /* metadata address */ - addr.logicalBlockNum = mdata->s_meta_file_loc; - addr.partitionReferenceNum = map->s_partition_num; - udf_debug("Metadata file location: block = %d part = %d\n", - addr.logicalBlockNum, addr.partitionReferenceNum); + mdata->s_meta_file_loc, map->s_partition_num); - mdata->s_metadata_fe = udf_iget(sb, &addr); + mdata->s_metadata_fe = udf_find_metadata_inode_efe(sb, + mdata->s_meta_file_loc, map->s_partition_num); if (mdata->s_metadata_fe == NULL) { - udf_warning(sb, __func__, "metadata inode efe not found, " - "will try mirror inode."); - fe_error = 1; - } else if (UDF_I(mdata->s_metadata_fe)->i_alloc_type != - ICBTAG_FLAG_AD_SHORT) { - udf_warning(sb, __func__, "metadata inode efe does not have " - "short allocation descriptors!"); - fe_error = 1; - iput(mdata->s_metadata_fe); - mdata->s_metadata_fe = NULL; - } + /* mirror file entry */ + udf_debug("Mirror metadata file location: block = %d part = %d\n", + mdata->s_mirror_file_loc, map->s_partition_num); - /* mirror file entry */ - addr.logicalBlockNum = mdata->s_mirror_file_loc; - addr.partitionReferenceNum = map->s_partition_num; - - udf_debug("Mirror metadata file location: block = %d part = %d\n", - addr.logicalBlockNum, addr.partitionReferenceNum); + mdata->s_mirror_fe = udf_find_metadata_inode_efe(sb, + mdata->s_mirror_file_loc, map->s_partition_num); - mdata->s_mirror_fe = udf_iget(sb, &addr); - - if (mdata->s_mirror_fe == NULL) { - if (fe_error) { - udf_error(sb, __func__, "mirror inode efe not found " - "and metadata inode is missing too, exiting..."); - goto error_exit; - } else - udf_warning(sb, __func__, "mirror inode efe not found," - " but metadata inode is OK"); - } else if (UDF_I(mdata->s_mirror_fe)->i_alloc_type != - ICBTAG_FLAG_AD_SHORT) { - udf_warning(sb, __func__, "mirror inode efe does not have " - "short allocation descriptors!"); - iput(mdata->s_mirror_fe); - mdata->s_mirror_fe = NULL; - if (fe_error) + if (mdata->s_mirror_fe == NULL) { + udf_err(sb, "Both metadata and mirror metadata inode efe can not found\n"); goto error_exit; + } } /* @@ -908,18 +890,15 @@ static int udf_load_metadata_files(struct super_block *sb, int partition) addr.partitionReferenceNum = map->s_partition_num; udf_debug("Bitmap file location: block = %d part = %d\n", - addr.logicalBlockNum, addr.partitionReferenceNum); + addr.logicalBlockNum, addr.partitionReferenceNum); mdata->s_bitmap_fe = udf_iget(sb, &addr); if (mdata->s_bitmap_fe == NULL) { if (sb->s_flags & MS_RDONLY) - udf_warning(sb, __func__, "bitmap inode efe " - "not found but it's ok since the disc" - " is mounted read-only"); + udf_warn(sb, "bitmap inode efe not found but it's ok since the disc is mounted read-only\n"); else { - udf_error(sb, __func__, "bitmap inode efe not " - "found and attempted read-write mount"); + udf_err(sb, "bitmap inode efe not found and attempted read-write mount\n"); goto error_exit; } } @@ -972,9 +951,8 @@ static struct udf_bitmap *udf_sb_alloc_bitmap(struct super_block *sb, u32 index) bitmap = vzalloc(size); /* TODO: get rid of vzalloc */ if (bitmap == NULL) { - udf_error(sb, __func__, - "Unable to allocate space for bitmap " - "and %d buffer_head pointers", nr_groups); + udf_err(sb, "Unable to allocate space for bitmap and %d buffer_head pointers\n", + nr_groups); return NULL; } @@ -1004,10 +982,9 @@ static int udf_fill_partdesc_info(struct super_block *sb, if (p->accessType == cpu_to_le32(PD_ACCESS_TYPE_OVERWRITABLE)) map->s_partition_flags |= UDF_PART_FLAG_OVERWRITABLE; - udf_debug("Partition (%d type %x) starts at physical %d, " - "block length %d\n", p_index, - map->s_partition_type, map->s_partition_root, - map->s_partition_len); + udf_debug("Partition (%d type %x) starts at physical %d, block length %d\n", + p_index, map->s_partition_type, + map->s_partition_root, map->s_partition_len); if (strcmp(p->partitionContents.ident, PD_PARTITION_CONTENTS_NSR02) && strcmp(p->partitionContents.ident, PD_PARTITION_CONTENTS_NSR03)) @@ -1024,12 +1001,12 @@ static int udf_fill_partdesc_info(struct super_block *sb, map->s_uspace.s_table = udf_iget(sb, &loc); if (!map->s_uspace.s_table) { udf_debug("cannot load unallocSpaceTable (part %d)\n", - p_index); + p_index); return 1; } map->s_partition_flags |= UDF_PART_FLAG_UNALLOC_TABLE; udf_debug("unallocSpaceTable (part %d) @ %ld\n", - p_index, map->s_uspace.s_table->i_ino); + p_index, map->s_uspace.s_table->i_ino); } if (phd->unallocSpaceBitmap.extLength) { @@ -1042,8 +1019,8 @@ static int udf_fill_partdesc_info(struct super_block *sb, bitmap->s_extPosition = le32_to_cpu( phd->unallocSpaceBitmap.extPosition); map->s_partition_flags |= UDF_PART_FLAG_UNALLOC_BITMAP; - udf_debug("unallocSpaceBitmap (part %d) @ %d\n", p_index, - bitmap->s_extPosition); + udf_debug("unallocSpaceBitmap (part %d) @ %d\n", + p_index, bitmap->s_extPosition); } if (phd->partitionIntegrityTable.extLength) @@ -1059,13 +1036,13 @@ static int udf_fill_partdesc_info(struct super_block *sb, map->s_fspace.s_table = udf_iget(sb, &loc); if (!map->s_fspace.s_table) { udf_debug("cannot load freedSpaceTable (part %d)\n", - p_index); + p_index); return 1; } map->s_partition_flags |= UDF_PART_FLAG_FREED_TABLE; udf_debug("freedSpaceTable (part %d) @ %ld\n", - p_index, map->s_fspace.s_table->i_ino); + p_index, map->s_fspace.s_table->i_ino); } if (phd->freedSpaceBitmap.extLength) { @@ -1078,8 +1055,8 @@ static int udf_fill_partdesc_info(struct super_block *sb, bitmap->s_extPosition = le32_to_cpu( phd->freedSpaceBitmap.extPosition); map->s_partition_flags |= UDF_PART_FLAG_FREED_BITMAP; - udf_debug("freedSpaceBitmap (part %d) @ %d\n", p_index, - bitmap->s_extPosition); + udf_debug("freedSpaceBitmap (part %d) @ %d\n", + p_index, bitmap->s_extPosition); } return 0; } @@ -1119,11 +1096,9 @@ static int udf_load_vat(struct super_block *sb, int p_index, int type1_index) udf_find_vat_block(sb, p_index, type1_index, sbi->s_last_block); if (!sbi->s_vat_inode && sbi->s_last_block != blocks - 1) { - printk(KERN_NOTICE "UDF-fs: Failed to read VAT inode from the" - " last recorded block (%lu), retrying with the last " - "block of the device (%lu).\n", - (unsigned long)sbi->s_last_block, - (unsigned long)blocks - 1); + pr_notice("Failed to read VAT inode from the last recorded block (%lu), retrying with the last block of the device (%lu).\n", + (unsigned long)sbi->s_last_block, + (unsigned long)blocks - 1); udf_find_vat_block(sb, p_index, type1_index, blocks - 1); } if (!sbi->s_vat_inode) @@ -1221,8 +1196,8 @@ static int udf_load_partdesc(struct super_block *sb, sector_t block) if (map->s_partition_type == UDF_METADATA_MAP25) { ret = udf_load_metadata_files(sb, i); if (ret) { - printk(KERN_ERR "UDF-fs: error loading MetaData " - "partition map %d\n", i); + udf_err(sb, "error loading MetaData partition map %d\n", + i); goto out_bh; } } else { @@ -1235,9 +1210,7 @@ static int udf_load_partdesc(struct super_block *sb, sector_t block) * overwrite blocks instead of relocating them). */ sb->s_flags |= MS_RDONLY; - printk(KERN_NOTICE "UDF-fs: Filesystem marked read-only " - "because writing to pseudooverwrite partition is " - "not implemented.\n"); + pr_notice("Filesystem marked read-only because writing to pseudooverwrite partition is not implemented\n"); } out_bh: /* In case loading failed, we handle cleanup in udf_fill_super */ @@ -1259,13 +1232,13 @@ static int udf_load_sparable_map(struct super_block *sb, map->s_partition_type = UDF_SPARABLE_MAP15; sdata->s_packet_len = le16_to_cpu(spm->packetLength); if (!is_power_of_2(sdata->s_packet_len)) { - udf_error(sb, __func__, "error loading logical volume descriptor: " + udf_err(sb, "error loading logical volume descriptor: " "Invalid packet length %u\n", (unsigned)sdata->s_packet_len); return -EIO; } if (spm->numSparingTables > 4) { - udf_error(sb, __func__, "error loading logical volume descriptor: " + udf_err(sb, "error loading logical volume descriptor: " "Too many sparing tables (%d)\n", (int)spm->numSparingTables); return -EIO; @@ -1312,8 +1285,8 @@ static int udf_load_logicalvol(struct super_block *sb, sector_t block, BUG_ON(ident != TAG_IDENT_LVD); lvd = (struct logicalVolDesc *)bh->b_data; table_len = le32_to_cpu(lvd->mapTableLength); - if (sizeof(*lvd) + table_len > sb->s_blocksize) { - udf_error(sb, __func__, "error loading logical volume descriptor: " + if (table_len > sb->s_blocksize - sizeof(*lvd)) { + udf_err(sb, "error loading logical volume descriptor: " "Partition table too long (%u > %lu)\n", table_len, sb->s_blocksize - sizeof(*lvd)); ret = 1; @@ -1373,9 +1346,8 @@ static int udf_load_logicalvol(struct super_block *sb, sector_t block, struct metadataPartitionMap *mdm = (struct metadataPartitionMap *) &(lvd->partitionMaps[offset]); - udf_debug("Parsing Logical vol part %d " - "type %d id=%s\n", i, type, - UDF_ID_METADATA); + udf_debug("Parsing Logical vol part %d type %d id=%s\n", + i, type, UDF_ID_METADATA); map->s_partition_type = UDF_METADATA_MAP25; map->s_partition_func = udf_get_pblock_meta25; @@ -1390,25 +1362,24 @@ static int udf_load_logicalvol(struct super_block *sb, sector_t block, le32_to_cpu(mdm->allocUnitSize); mdata->s_align_unit_size = le16_to_cpu(mdm->alignUnitSize); - mdata->s_dup_md_flag = - mdm->flags & 0x01; + if (mdm->flags & 0x01) + mdata->s_flags |= MF_DUPLICATE_MD; udf_debug("Metadata Ident suffix=0x%x\n", - (le16_to_cpu( - ((__le16 *) - mdm->partIdent.identSuffix)[0]))); + le16_to_cpu(*(__le16 *) + mdm->partIdent.identSuffix)); udf_debug("Metadata part num=%d\n", - le16_to_cpu(mdm->partitionNum)); + le16_to_cpu(mdm->partitionNum)); udf_debug("Metadata part alloc unit size=%d\n", - le32_to_cpu(mdm->allocUnitSize)); + le32_to_cpu(mdm->allocUnitSize)); udf_debug("Metadata file loc=%d\n", - le32_to_cpu(mdm->metadataFileLoc)); + le32_to_cpu(mdm->metadataFileLoc)); udf_debug("Mirror file loc=%d\n", - le32_to_cpu(mdm->metadataMirrorFileLoc)); + le32_to_cpu(mdm->metadataMirrorFileLoc)); udf_debug("Bitmap file loc=%d\n", - le32_to_cpu(mdm->metadataBitmapFileLoc)); - udf_debug("Duplicate Flag: %d %d\n", - mdata->s_dup_md_flag, mdm->flags); + le32_to_cpu(mdm->metadataBitmapFileLoc)); + udf_debug("Flags: %d %d\n", + mdata->s_flags, mdm->flags); } else { udf_debug("Unknown ident: %s\n", upm2->partIdent.ident); @@ -1418,16 +1389,15 @@ static int udf_load_logicalvol(struct super_block *sb, sector_t block, map->s_partition_num = le16_to_cpu(upm2->partitionNum); } udf_debug("Partition (%d:%d) type %d on volume %d\n", - i, map->s_partition_num, type, - map->s_volumeseqnum); + i, map->s_partition_num, type, map->s_volumeseqnum); } if (fileset) { struct long_ad *la = (struct long_ad *)&(lvd->logicalVolContentsUse[0]); *fileset = lelb_to_cpu(la->extLocation); - udf_debug("FileSet found in LogicalVolDesc at block=%d, " - "partition=%d\n", fileset->logicalBlockNum, + udf_debug("FileSet found in LogicalVolDesc at block=%d, partition=%d\n", + fileset->logicalBlockNum, fileset->partitionReferenceNum); } if (lvd->integritySeqExt.extLength) @@ -1507,9 +1477,9 @@ static noinline int udf_process_sequence(struct super_block *sb, long block, bh = udf_read_tagged(sb, block, block, &ident); if (!bh) { - printk(KERN_ERR "udf: Block %Lu of volume descriptor " - "sequence is corrupted or we could not read " - "it.\n", (unsigned long long)block); + udf_err(sb, + "Block %llu of volume descriptor sequence is corrupted or we could not read it\n", + (unsigned long long)block); return 1; } @@ -1582,7 +1552,7 @@ static noinline int udf_process_sequence(struct super_block *sb, long block, * in a suitable order */ if (!vds[VDS_POS_PRIMARY_VOL_DESC].block) { - printk(KERN_ERR "udf: Primary Volume Descriptor not found!\n"); + udf_err(sb, "Primary Volume Descriptor not found!\n"); return 1; } if (udf_load_pvoldesc(sb, vds[VDS_POS_PRIMARY_VOL_DESC].block)) @@ -1769,7 +1739,7 @@ static int udf_load_vrs(struct super_block *sb, struct udf_options *uopt, if (!sb_set_blocksize(sb, uopt->blocksize)) { if (!silent) - printk(KERN_WARNING "UDF-fs: Bad block size\n"); + udf_warn(sb, "Bad block size\n"); return 0; } sbi->s_last_block = uopt->lastblock; @@ -1778,12 +1748,11 @@ static int udf_load_vrs(struct super_block *sb, struct udf_options *uopt, nsr_off = udf_check_vsd(sb); if (!nsr_off) { if (!silent) - printk(KERN_WARNING "UDF-fs: No VRS found\n"); + udf_warn(sb, "No VRS found\n"); return 0; } if (nsr_off == -1) - udf_debug("Failed to read byte 32768. Assuming open " - "disc. Skipping validity check\n"); + udf_debug("Failed to read byte 32768. Assuming open disc. Skipping validity check\n"); if (!sbi->s_last_block) sbi->s_last_block = udf_get_last_block(sb); } else { @@ -1794,7 +1763,7 @@ static int udf_load_vrs(struct super_block *sb, struct udf_options *uopt, sbi->s_anchor = uopt->anchor; if (!udf_find_anchor(sb, fileset)) { if (!silent) - printk(KERN_WARNING "UDF-fs: No anchor found\n"); + udf_warn(sb, "No anchor found\n"); return 0; } return 1; @@ -1972,8 +1941,7 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent) if (uopt.flags & (1 << UDF_FLAG_UTF8) && uopt.flags & (1 << UDF_FLAG_NLS_MAP)) { - udf_error(sb, "udf_read_super", - "utf8 cannot be combined with iocharset\n"); + udf_err(sb, "utf8 cannot be combined with iocharset\n"); goto error_out; } #ifdef CONFIG_UDF_NLS @@ -2022,15 +1990,14 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent) ret = udf_load_vrs(sb, &uopt, silent, &fileset); if (!ret && uopt.blocksize != UDF_DEFAULT_BLOCKSIZE) { if (!silent) - printk(KERN_NOTICE - "UDF-fs: Rescanning with blocksize " - "%d\n", UDF_DEFAULT_BLOCKSIZE); + pr_notice("Rescanning with blocksize %d\n", + UDF_DEFAULT_BLOCKSIZE); uopt.blocksize = UDF_DEFAULT_BLOCKSIZE; ret = udf_load_vrs(sb, &uopt, silent, &fileset); } } if (!ret) { - printk(KERN_WARNING "UDF-fs: No partition found (1)\n"); + udf_warn(sb, "No partition found (1)\n"); goto error_out; } @@ -2045,10 +2012,9 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent) le16_to_cpu(lvidiu->maxUDFWriteRev); */ if (minUDFReadRev > UDF_MAX_READ_VERSION) { - printk(KERN_ERR "UDF-fs: minUDFReadRev=%x " - "(max is %x)\n", - le16_to_cpu(lvidiu->minUDFReadRev), - UDF_MAX_READ_VERSION); + udf_err(sb, "minUDFReadRev=%x (max is %x)\n", + le16_to_cpu(lvidiu->minUDFReadRev), + UDF_MAX_READ_VERSION); goto error_out; } else if (minUDFWriteRev > UDF_MAX_WRITE_VERSION) sb->s_flags |= MS_RDONLY; @@ -2062,28 +2028,27 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent) } if (!sbi->s_partitions) { - printk(KERN_WARNING "UDF-fs: No partition found (2)\n"); + udf_warn(sb, "No partition found (2)\n"); goto error_out; } if (sbi->s_partmaps[sbi->s_partition].s_partition_flags & UDF_PART_FLAG_READ_ONLY) { - printk(KERN_NOTICE "UDF-fs: Partition marked readonly; " - "forcing readonly mount\n"); + pr_notice("Partition marked readonly; forcing readonly mount\n"); sb->s_flags |= MS_RDONLY; } if (udf_find_fileset(sb, &fileset, &rootdir)) { - printk(KERN_WARNING "UDF-fs: No fileset found\n"); + udf_warn(sb, "No fileset found\n"); goto error_out; } if (!silent) { struct timestamp ts; udf_time_to_disk_stamp(&ts, sbi->s_record_time); - udf_info("UDF: Mounting volume '%s', " - "timestamp %04u/%02u/%02u %02u:%02u (%x)\n", - sbi->s_volume_ident, le16_to_cpu(ts.year), ts.month, ts.day, + udf_info("Mounting volume '%s', timestamp %04u/%02u/%02u %02u:%02u (%x)\n", + sbi->s_volume_ident, + le16_to_cpu(ts.year), ts.month, ts.day, ts.hour, ts.minute, le16_to_cpu(ts.typeAndTimezone)); } if (!(sb->s_flags & MS_RDONLY)) @@ -2094,8 +2059,7 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent) /* perhaps it's not extensible enough, but for now ... */ inode = udf_iget(sb, &rootdir); if (!inode) { - printk(KERN_ERR "UDF-fs: Error in udf_iget, block=%d, " - "partition=%d\n", + udf_err(sb, "Error in udf_iget, block=%d, partition=%d\n", rootdir.logicalBlockNum, rootdir.partitionReferenceNum); goto error_out; } @@ -2103,7 +2067,7 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent) /* Allocate a dentry for the root inode */ sb->s_root = d_alloc_root(inode); if (!sb->s_root) { - printk(KERN_ERR "UDF-fs: Couldn't allocate root dentry\n"); + udf_err(sb, "Couldn't allocate root dentry\n"); iput(inode); goto error_out; } @@ -2131,32 +2095,40 @@ error_out: return -EINVAL; } -static void udf_error(struct super_block *sb, const char *function, - const char *fmt, ...) +void _udf_err(struct super_block *sb, const char *function, + const char *fmt, ...) { + struct va_format vaf; va_list args; - if (!(sb->s_flags & MS_RDONLY)) { - /* mark sb error */ + /* mark sb error */ + if (!(sb->s_flags & MS_RDONLY)) sb->s_dirt = 1; - } + va_start(args, fmt); - vsnprintf(error_buf, sizeof(error_buf), fmt, args); + + vaf.fmt = fmt; + vaf.va = &args; + + pr_err("error (device %s): %s: %pV", sb->s_id, function, &vaf); + va_end(args); - printk(KERN_CRIT "UDF-fs error (device %s): %s: %s\n", - sb->s_id, function, error_buf); } -void udf_warning(struct super_block *sb, const char *function, - const char *fmt, ...) +void _udf_warn(struct super_block *sb, const char *function, + const char *fmt, ...) { + struct va_format vaf; va_list args; va_start(args, fmt); - vsnprintf(error_buf, sizeof(error_buf), fmt, args); + + vaf.fmt = fmt; + vaf.va = &args; + + pr_warn("warning (device %s): %s: %pV", sb->s_id, function, &vaf); + va_end(args); - printk(KERN_WARNING "UDF-fs warning (device %s): %s: %s\n", - sb->s_id, function, error_buf); } static void udf_put_super(struct super_block *sb) @@ -2248,11 +2220,11 @@ static unsigned int udf_count_free_bitmap(struct super_block *sb, bh = udf_read_ptagged(sb, &loc, 0, &ident); if (!bh) { - printk(KERN_ERR "udf: udf_count_free failed\n"); + udf_err(sb, "udf_count_free failed\n"); goto out; } else if (ident != TAG_IDENT_SBD) { brelse(bh); - printk(KERN_ERR "udf: udf_count_free failed\n"); + udf_err(sb, "udf_count_free failed\n"); goto out; } diff --git a/fs/udf/symlink.c b/fs/udf/symlink.c index b1d4488..0422b7b 100644 --- a/fs/udf/symlink.c +++ b/fs/udf/symlink.c @@ -30,43 +30,73 @@ #include #include "udf_i.h" -static void udf_pc_to_char(struct super_block *sb, unsigned char *from, - int fromlen, unsigned char *to) +static int udf_pc_to_char(struct super_block *sb, unsigned char *from, + int fromlen, unsigned char *to, int tolen) { struct pathComponent *pc; int elen = 0; + int comp_len; unsigned char *p = to; + /* Reserve one byte for terminating \0 */ + tolen--; while (elen < fromlen) { pc = (struct pathComponent *)(from + elen); + elen += sizeof(struct pathComponent); switch (pc->componentType) { case 1: - if (pc->lengthComponentIdent == 0) { - p = to; - *p++ = '/'; + /* + * Symlink points to some place which should be agreed + * upon between originator and receiver of the media. Ignore. + */ + if (pc->lengthComponentIdent > 0) { + elen += pc->lengthComponentIdent; + break; } + /* Fall through */ + case 2: + if (tolen == 0) + return -ENAMETOOLONG; + p = to; + *p++ = '/'; + tolen--; break; case 3: + if (tolen < 3) + return -ENAMETOOLONG; memcpy(p, "../", 3); p += 3; + tolen -= 3; break; case 4: + if (tolen < 2) + return -ENAMETOOLONG; memcpy(p, "./", 2); p += 2; + tolen -= 2; /* that would be . - just ignore */ break; case 5: - p += udf_get_filename(sb, pc->componentIdent, p, - pc->lengthComponentIdent); + elen += pc->lengthComponentIdent; + if (elen > fromlen) + return -EIO; + comp_len = udf_get_filename(sb, pc->componentIdent, + pc->lengthComponentIdent, + p, tolen); + p += comp_len; + tolen -= comp_len; + if (tolen == 0) + return -ENAMETOOLONG; *p++ = '/'; + tolen--; break; } - elen += sizeof(struct pathComponent) + pc->lengthComponentIdent; } if (p > to + 1) p[-1] = '\0'; else p[0] = '\0'; + return 0; } static int udf_symlink_filler(struct file *file, struct page *page) @@ -74,11 +104,17 @@ static int udf_symlink_filler(struct file *file, struct page *page) struct inode *inode = page->mapping->host; struct buffer_head *bh = NULL; unsigned char *symlink; - int err = -EIO; + int err; unsigned char *p = kmap(page); struct udf_inode_info *iinfo; uint32_t pos; + /* We don't support symlinks longer than one block */ + if (inode->i_size > inode->i_sb->s_blocksize) { + err = -ENAMETOOLONG; + goto out_unmap; + } + iinfo = UDF_I(inode); pos = udf_block_map(inode, 0); @@ -88,14 +124,18 @@ static int udf_symlink_filler(struct file *file, struct page *page) } else { bh = sb_bread(inode->i_sb, pos); - if (!bh) - goto out; + if (!bh) { + err = -EIO; + goto out_unlock_inode; + } symlink = bh->b_data; } - udf_pc_to_char(inode->i_sb, symlink, inode->i_size, p); + err = udf_pc_to_char(inode->i_sb, symlink, inode->i_size, p, PAGE_SIZE); brelse(bh); + if (err) + goto out_unlock_inode; up_read(&iinfo->i_data_sem); SetPageUptodate(page); @@ -103,9 +143,10 @@ static int udf_symlink_filler(struct file *file, struct page *page) unlock_page(page); return 0; -out: +out_unlock_inode: up_read(&iinfo->i_data_sem); SetPageError(page); +out_unmap: kunmap(page); unlock_page(page); return err; diff --git a/fs/udf/truncate.c b/fs/udf/truncate.c index 8424308..4b98fee 100644 --- a/fs/udf/truncate.c +++ b/fs/udf/truncate.c @@ -95,23 +95,21 @@ void udf_truncate_tail_extent(struct inode *inode) lbcount += elen; if (lbcount > inode->i_size) { if (lbcount - inode->i_size >= inode->i_sb->s_blocksize) - printk(KERN_WARNING - "udf_truncate_tail_extent(): Too long " - "extent after EOF in inode %u: i_size: " - "%Ld lbcount: %Ld extent %u+%u\n", - (unsigned)inode->i_ino, - (long long)inode->i_size, - (long long)lbcount, - (unsigned)eloc.logicalBlockNum, - (unsigned)elen); + udf_warn(inode->i_sb, + "Too long extent after EOF in inode %u: i_size: %lld lbcount: %lld extent %u+%u\n", + (unsigned)inode->i_ino, + (long long)inode->i_size, + (long long)lbcount, + (unsigned)eloc.logicalBlockNum, + (unsigned)elen); nelen = elen - (lbcount - inode->i_size); epos.offset -= adsize; extent_trunc(inode, &epos, &eloc, etype, elen, nelen); epos.offset += adsize; if (udf_next_aext(inode, &epos, &eloc, &elen, 1) != -1) - printk(KERN_ERR "udf_truncate_tail_extent(): " - "Extent after EOF in inode %u.\n", - (unsigned)inode->i_ino); + udf_err(inode->i_sb, + "Extent after EOF in inode %u\n", + (unsigned)inode->i_ino); break; } } diff --git a/fs/udf/udf_sb.h b/fs/udf/udf_sb.h index 54706dc..604f5fc 100644 --- a/fs/udf/udf_sb.h +++ b/fs/udf/udf_sb.h @@ -54,13 +54,16 @@ #pragma pack(1) /* XXX(hch): Why? This file just defines in-core structures */ +#define MF_DUPLICATE_MD 0x01 +#define MF_MIRROR_FE_LOADED 0x02 + struct udf_meta_data { __u32 s_meta_file_loc; __u32 s_mirror_file_loc; __u32 s_bitmap_file_loc; __u32 s_alloc_unit_size; __u16 s_align_unit_size; - __u8 s_dup_md_flag; + int s_flags; struct inode *s_metadata_fe; struct inode *s_mirror_fe; struct inode *s_bitmap_fe; diff --git a/fs/udf/udfdecl.h b/fs/udf/udfdecl.h index dbd52d4b..8775ab2 100644 --- a/fs/udf/udfdecl.h +++ b/fs/udf/udfdecl.h @@ -1,6 +1,8 @@ #ifndef __UDF_DECL_H #define __UDF_DECL_H +#define pr_fmt(fmt) "UDF-fs: " fmt + #include "ecma_167.h" #include "osta_udf.h" @@ -16,23 +18,30 @@ #define UDF_PREALLOCATE #define UDF_DEFAULT_PREALLOC_BLOCKS 8 +extern __printf(3, 4) void _udf_err(struct super_block *sb, + const char *function, const char *fmt, ...); +#define udf_err(sb, fmt, ...) \ + _udf_err(sb, __func__, fmt, ##__VA_ARGS__) + +extern __printf(3, 4) void _udf_warn(struct super_block *sb, + const char *function, const char *fmt, ...); +#define udf_warn(sb, fmt, ...) \ + _udf_warn(sb, __func__, fmt, ##__VA_ARGS__) + +#define udf_info(fmt, ...) \ + pr_info("INFO " fmt, ##__VA_ARGS__) + #undef UDFFS_DEBUG #ifdef UDFFS_DEBUG -#define udf_debug(f, a...) \ -do { \ - printk(KERN_DEBUG "UDF-fs DEBUG %s:%d:%s: ", \ - __FILE__, __LINE__, __func__); \ - printk(f, ##a); \ -} while (0) +#define udf_debug(fmt, ...) \ + printk(KERN_DEBUG pr_fmt("%s:%d:%s: " fmt), \ + __FILE__, __LINE__, __func__, ##__VA_ARGS__) #else -#define udf_debug(f, a...) /**/ +#define udf_debug(fmt, ...) \ + no_printk(fmt, ##__VA_ARGS__) #endif -#define udf_info(f, a...) \ - printk(KERN_INFO "UDF-fs INFO " f, ##a); - - #define udf_fixed_to_variable(x) ( ( ( (x) >> 5 ) * 39 ) + ( (x) & 0x0000001F ) ) #define udf_variable_to_fixed(x) ( ( ( (x) / 39 ) << 5 ) + ( (x) % 39 ) ) @@ -112,8 +121,6 @@ struct extent_position { /* super.c */ -__attribute__((format(printf, 3, 4))) -extern void udf_warning(struct super_block *, const char *, const char *, ...); static inline void udf_updated_lvid(struct super_block *sb) { struct buffer_head *bh = UDF_SB(sb)->s_lvid_bh; @@ -126,6 +133,8 @@ static inline void udf_updated_lvid(struct super_block *sb) UDF_SB(sb)->s_lvid_dirty = 1; } extern u64 lvid_get_unique_id(struct super_block *sb); +struct inode *udf_find_metadata_inode_efe(struct super_block *sb, + u32 meta_file_loc, u32 partition_num); /* namei.c */ extern int udf_write_fi(struct inode *inode, struct fileIdentDesc *, @@ -198,7 +207,8 @@ udf_get_lb_pblock(struct super_block *sb, struct kernel_lb_addr *loc, } /* unicode.c */ -extern int udf_get_filename(struct super_block *, uint8_t *, uint8_t *, int); +extern int udf_get_filename(struct super_block *, uint8_t *, int, uint8_t *, + int); extern int udf_put_filename(struct super_block *, const uint8_t *, uint8_t *, int); extern int udf_build_ustr(struct ustr *, dstring *, int); diff --git a/fs/udf/udftime.c b/fs/udf/udftime.c index b8c828c..1f11483 100644 --- a/fs/udf/udftime.c +++ b/fs/udf/udftime.c @@ -34,9 +34,10 @@ * http://www.boulder.nist.gov/timefreq/pubs/bulletin/leapsecond.htm */ +#include "udfdecl.h" + #include #include -#include "udfdecl.h" #define EPOCH_YEAR 1970 diff --git a/fs/udf/unicode.c b/fs/udf/unicode.c index d03a90b..d29c06f 100644 --- a/fs/udf/unicode.c +++ b/fs/udf/unicode.c @@ -28,7 +28,8 @@ #include "udf_sb.h" -static int udf_translate_to_linux(uint8_t *, uint8_t *, int, uint8_t *, int); +static int udf_translate_to_linux(uint8_t *, int, uint8_t *, int, uint8_t *, + int); static int udf_char_to_ustr(struct ustr *dest, const uint8_t *src, int strlen) { @@ -114,7 +115,7 @@ int udf_CS0toUTF8(struct ustr *utf_o, const struct ustr *ocu_i) cmp_id = ocu_i->u_cmpID; if (cmp_id != 8 && cmp_id != 16) { memset(utf_o, 0, sizeof(struct ustr)); - printk(KERN_ERR "udf: unknown compression code (%d) stri=%s\n", + pr_err("unknown compression code (%d) stri=%s\n", cmp_id, ocu_i->u_name); return 0; } @@ -242,7 +243,7 @@ try_again: if (utf_cnt) { error_out: ocu[++u_len] = '?'; - printk(KERN_DEBUG "udf: bad UTF-8 character\n"); + printk(KERN_DEBUG pr_fmt("bad UTF-8 character\n")); } ocu[length - 1] = (uint8_t)u_len + 1; @@ -267,7 +268,7 @@ static int udf_CS0toNLS(struct nls_table *nls, struct ustr *utf_o, cmp_id = ocu_i->u_cmpID; if (cmp_id != 8 && cmp_id != 16) { memset(utf_o, 0, sizeof(struct ustr)); - printk(KERN_ERR "udf: unknown compression code (%d) stri=%s\n", + pr_err("unknown compression code (%d) stri=%s\n", cmp_id, ocu_i->u_name); return 0; } @@ -333,8 +334,8 @@ try_again: return u_len + 1; } -int udf_get_filename(struct super_block *sb, uint8_t *sname, uint8_t *dname, - int flen) +int udf_get_filename(struct super_block *sb, uint8_t *sname, int slen, + uint8_t *dname, int dlen) { struct ustr *filename, *unifilename; int len = 0; @@ -347,7 +348,7 @@ int udf_get_filename(struct super_block *sb, uint8_t *sname, uint8_t *dname, if (!unifilename) goto out1; - if (udf_build_ustr_exact(unifilename, sname, flen)) + if (udf_build_ustr_exact(unifilename, sname, slen)) goto out2; if (UDF_QUERY_FLAG(sb, UDF_FLAG_UTF8)) { @@ -366,7 +367,8 @@ int udf_get_filename(struct super_block *sb, uint8_t *sname, uint8_t *dname, } else goto out2; - len = udf_translate_to_linux(dname, filename->u_name, filename->u_len, + len = udf_translate_to_linux(dname, dlen, + filename->u_name, filename->u_len, unifilename->u_name, unifilename->u_len); out2: kfree(unifilename); @@ -403,10 +405,12 @@ int udf_put_filename(struct super_block *sb, const uint8_t *sname, #define EXT_MARK '.' #define CRC_MARK '#' #define EXT_SIZE 5 +/* Number of chars we need to store generated CRC to make filename unique */ +#define CRC_LEN 5 -static int udf_translate_to_linux(uint8_t *newName, uint8_t *udfName, - int udfLen, uint8_t *fidName, - int fidNameLen) +static int udf_translate_to_linux(uint8_t *newName, int newLen, + uint8_t *udfName, int udfLen, + uint8_t *fidName, int fidNameLen) { int index, newIndex = 0, needsCRC = 0; int extIndex = 0, newExtIndex = 0, hasExt = 0; @@ -440,7 +444,7 @@ static int udf_translate_to_linux(uint8_t *newName, uint8_t *udfName, newExtIndex = newIndex; } } - if (newIndex < 256) + if (newIndex < newLen) newName[newIndex++] = curr; else needsCRC = 1; @@ -468,13 +472,13 @@ static int udf_translate_to_linux(uint8_t *newName, uint8_t *udfName, } ext[localExtIndex++] = curr; } - maxFilenameLen = 250 - localExtIndex; + maxFilenameLen = newLen - CRC_LEN - localExtIndex; if (newIndex > maxFilenameLen) newIndex = maxFilenameLen; else newIndex = newExtIndex; - } else if (newIndex > 250) - newIndex = 250; + } else if (newIndex > newLen - CRC_LEN) + newIndex = newLen - CRC_LEN; newName[newIndex++] = CRC_MARK; valueCRC = crc_itu_t(0, fidName, fidNameLen); newName[newIndex++] = hexChar[(valueCRC & 0xf000) >> 12]; diff --git a/fs/ufs/ialloc.c b/fs/ufs/ialloc.c index 2eabf04..78a4c70 100644 --- a/fs/ufs/ialloc.c +++ b/fs/ufs/ialloc.c @@ -341,7 +341,7 @@ cg_found: fail_remove_inode: unlock_super(sb); - inode->i_nlink = 0; + clear_nlink(inode); iput(inode); UFSD("EXIT (FAILED): err %d\n", err); return ERR_PTR(err); diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c index b4d791a..879b134 100644 --- a/fs/ufs/inode.c +++ b/fs/ufs/inode.c @@ -589,7 +589,7 @@ static int ufs1_read_inode(struct inode *inode, struct ufs_inode *ufs_inode) * Copy data to the in-core inode. */ inode->i_mode = mode = fs16_to_cpu(sb, ufs_inode->ui_mode); - inode->i_nlink = fs16_to_cpu(sb, ufs_inode->ui_nlink); + set_nlink(inode, fs16_to_cpu(sb, ufs_inode->ui_nlink)); if (inode->i_nlink == 0) { ufs_error (sb, "ufs_read_inode", "inode %lu has zero nlink\n", inode->i_ino); return -1; @@ -637,7 +637,7 @@ static int ufs2_read_inode(struct inode *inode, struct ufs2_inode *ufs2_inode) * Copy data to the in-core inode. */ inode->i_mode = mode = fs16_to_cpu(sb, ufs2_inode->ui_mode); - inode->i_nlink = fs16_to_cpu(sb, ufs2_inode->ui_nlink); + set_nlink(inode, fs16_to_cpu(sb, ufs2_inode->ui_nlink)); if (inode->i_nlink == 0) { ufs_error (sb, "ufs_read_inode", "inode %lu has zero nlink\n", inode->i_ino); return -1; diff --git a/fs/ufs/namei.c b/fs/ufs/namei.c index b57aab9..639d491 100644 --- a/fs/ufs/namei.c +++ b/fs/ufs/namei.c @@ -59,8 +59,6 @@ static struct dentry *ufs_lookup(struct inode * dir, struct dentry *dentry, stru if (ino) inode = ufs_iget(dir->i_sb, ino); unlock_ufs(dir->i_sb); - if (IS_ERR(inode)) - return ERR_CAST(inode); return d_splice_alias(inode, dentry); } diff --git a/fs/ufs/ufs.h b/fs/ufs/ufs.h index 5be2755..c26f2bc 100644 --- a/fs/ufs/ufs.h +++ b/fs/ufs/ufs.h @@ -117,9 +117,12 @@ extern int ufs_getfrag_block (struct inode *inode, sector_t fragment, struct buf extern const struct file_operations ufs_dir_operations; /* super.c */ -extern void ufs_warning (struct super_block *, const char *, const char *, ...) __attribute__ ((format (printf, 3, 4))); -extern void ufs_error (struct super_block *, const char *, const char *, ...) __attribute__ ((format (printf, 3, 4))); -extern void ufs_panic (struct super_block *, const char *, const char *, ...) __attribute__ ((format (printf, 3, 4))); +extern __printf(3, 4) +void ufs_warning(struct super_block *, const char *, const char *, ...); +extern __printf(3, 4) +void ufs_error(struct super_block *, const char *, const char *, ...); +extern __printf(3, 4) +void ufs_panic(struct super_block *, const char *, const char *, ...); /* symlink.c */ extern const struct inode_operations ufs_fast_symlink_inode_operations; diff --git a/include/linux/dvb/audio.h b/include/linux/dvb/audio.h index fec66bd..d47bccd 100644 --- a/include/linux/dvb/audio.h +++ b/include/linux/dvb/audio.h @@ -67,7 +67,7 @@ typedef struct audio_status { typedef -struct audio_karaoke{ /* if Vocal1 or Vocal2 are non-zero, they get mixed */ +struct audio_karaoke { /* if Vocal1 or Vocal2 are non-zero, they get mixed */ int vocal1; /* into left and right t at 70% each */ int vocal2; /* if both, Vocal1 and Vocal2 are non-zero, Vocal1 gets*/ int melody; /* mixed into the left channel and */ diff --git a/include/linux/dvb/frontend.h b/include/linux/dvb/frontend.h index 36a3ed6..1b1094c 100644 --- a/include/linux/dvb/frontend.h +++ b/include/linux/dvb/frontend.h @@ -349,6 +349,7 @@ typedef enum fe_delivery_system { SYS_CMMB, SYS_DAB, SYS_DVBT2, + SYS_TURBO, } fe_delivery_system_t; struct dtv_cmds_h { diff --git a/include/linux/dvb/version.h b/include/linux/dvb/version.h index 1421cc8..66594b1 100644 --- a/include/linux/dvb/version.h +++ b/include/linux/dvb/version.h @@ -24,6 +24,6 @@ #define _DVBVERSION_H_ #define DVB_API_VERSION 5 -#define DVB_API_VERSION_MINOR 3 +#define DVB_API_VERSION_MINOR 4 #endif /*_DVBVERSION_H_*/ diff --git a/include/xen/Kbuild b/include/xen/Kbuild index 84ad8f0..563161a 100644 --- a/include/xen/Kbuild +++ b/include/xen/Kbuild @@ -1,2 +1,4 @@ header-y += evtchn.h +header-y += gntalloc.h +header-y += gntdev.h header-y += privcmd.h diff --git a/include/xen/balloon.h b/include/xen/balloon.h index a2b22f0..d29c153 100644 --- a/include/xen/balloon.h +++ b/include/xen/balloon.h @@ -15,11 +15,26 @@ struct balloon_stats { unsigned long max_schedule_delay; unsigned long retry_count; unsigned long max_retry_count; +#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG + unsigned long hotplug_pages; + unsigned long balloon_hotplug; +#endif }; extern struct balloon_stats balloon_stats; void balloon_set_new_target(unsigned long target); -int alloc_xenballooned_pages(int nr_pages, struct page** pages); -void free_xenballooned_pages(int nr_pages, struct page** pages); +int alloc_xenballooned_pages(int nr_pages, struct page **pages, + bool highmem); +void free_xenballooned_pages(int nr_pages, struct page **pages); + +struct sys_device; +#ifdef CONFIG_XEN_SELFBALLOONING +extern int register_xen_selfballooning(struct sys_device *sysdev); +#else +static inline int register_xen_selfballooning(struct sys_device *sysdev) +{ + return -ENOSYS; +} +#endif diff --git a/include/xen/events.h b/include/xen/events.h index 9af21e1..89b672d 100644 --- a/include/xen/events.h +++ b/include/xen/events.h @@ -12,7 +12,7 @@ int bind_evtchn_to_irqhandler(unsigned int evtchn, irq_handler_t handler, unsigned long irqflags, const char *devname, void *dev_id); -int bind_virq_to_irq(unsigned int virq, unsigned int cpu); +int bind_virq_to_irq(unsigned int virq, unsigned int cpu, bool percpu); int bind_virq_to_irqhandler(unsigned int virq, unsigned int cpu, irq_handler_t handler, unsigned long irqflags, const char *devname, @@ -74,8 +74,6 @@ int xen_set_callback_via(uint64_t via); void xen_evtchn_do_upcall(struct pt_regs *regs); void xen_hvm_evtchn_do_upcall(void); -/* Allocate a pirq for a physical interrupt, given a gsi. */ -int xen_allocate_pirq_gsi(unsigned gsi); /* Bind a pirq for a physical interrupt to an irq. */ int xen_bind_pirq_gsi_to_irq(unsigned gsi, unsigned pirq, int shareable, char *name); @@ -98,6 +96,9 @@ int xen_irq_from_pirq(unsigned pirq); /* Return the pirq allocated to the irq. */ int xen_pirq_from_irq(unsigned irq); +/* Return the irq allocated to the gsi */ +int xen_irq_from_gsi(unsigned gsi); + /* Determine whether to ignore this IRQ if it is passed to a guest. */ int xen_test_irq_shared(int irq); diff --git a/include/xen/grant_table.h b/include/xen/grant_table.h index b1fab6b..11e2dfc 100644 --- a/include/xen/grant_table.h +++ b/include/xen/grant_table.h @@ -43,7 +43,6 @@ #include #include -#include #include @@ -156,6 +155,7 @@ unsigned int gnttab_max_grant_frames(void); #define gnttab_map_vaddr(map) ((void *)(map.host_virt_addr)) int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops, + struct gnttab_map_grant_ref *kmap_ops, struct page **pages, unsigned int count); int gnttab_unmap_refs(struct gnttab_unmap_grant_ref *unmap_ops, struct page **pages, unsigned int count); diff --git a/include/xen/hvc-console.h b/include/xen/hvc-console.h index c3adde3..b62dfef 100644 --- a/include/xen/hvc-console.h +++ b/include/xen/hvc-console.h @@ -6,11 +6,13 @@ extern struct console xenboot_console; #ifdef CONFIG_HVC_XEN void xen_console_resume(void); void xen_raw_console_write(const char *str); +__printf(1, 2) void xen_raw_printk(const char *fmt, ...); #else static inline void xen_console_resume(void) { } static inline void xen_raw_console_write(const char *str) { } -static inline void xen_raw_printk(const char *fmt, ...) { } +static inline __printf(1, 2) +void xen_raw_printk(const char *fmt, ...) { } #endif #endif /* XEN_HVC_CONSOLE_H */ diff --git a/include/xen/interface/io/blkif.h b/include/xen/interface/io/blkif.h index 3d5d6db..9324488 100644 --- a/include/xen/interface/io/blkif.h +++ b/include/xen/interface/io/blkif.h @@ -57,6 +57,36 @@ typedef uint64_t blkif_sector_t; * "feature-flush-cache" node! */ #define BLKIF_OP_FLUSH_DISKCACHE 3 + +/* + * Recognised only if "feature-discard" is present in backend xenbus info. + * The "feature-discard" node contains a boolean indicating whether trim + * (ATA) or unmap (SCSI) - conviently called discard requests are likely + * to succeed or fail. Either way, a discard request + * may fail at any time with BLKIF_RSP_EOPNOTSUPP if it is unsupported by + * the underlying block-device hardware. The boolean simply indicates whether + * or not it is worthwhile for the frontend to attempt discard requests. + * If a backend does not recognise BLKIF_OP_DISCARD, it should *not* + * create the "feature-discard" node! + * + * Discard operation is a request for the underlying block device to mark + * extents to be erased. However, discard does not guarantee that the blocks + * will be erased from the device - it is just a hint to the device + * controller that these blocks are no longer in use. What the device + * controller does with that information is left to the controller. + * Discard operations are passed with sector_number as the + * sector index to begin discard operations at and nr_sectors as the number of + * sectors to be discarded. The specified sectors should be discarded if the + * underlying block device supports trim (ATA) or unmap (SCSI) operations, + * or a BLKIF_RSP_EOPNOTSUPP should be returned. + * More information about trim/unmap operations at: + * http://t13.org/Documents/UploadedDocuments/docs2008/ + * e07154r6-Data_Set_Management_Proposal_for_ATA-ACS2.doc + * http://www.seagate.com/staticfiles/support/disc/manuals/ + * Interface%20manuals/100293068c.pdf + */ +#define BLKIF_OP_DISCARD 5 + /* * Maximum scatter/gather segments per request. * This is carefully chosen so that sizeof(struct blkif_ring) <= PAGE_SIZE. @@ -74,6 +104,11 @@ struct blkif_request_rw { } seg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; }; +struct blkif_request_discard { + blkif_sector_t sector_number; + uint64_t nr_sectors; +}; + struct blkif_request { uint8_t operation; /* BLKIF_OP_??? */ uint8_t nr_segments; /* number of segments */ @@ -81,6 +116,7 @@ struct blkif_request { uint64_t id; /* private guest value, echoed in resp */ union { struct blkif_request_rw rw; + struct blkif_request_discard discard; } u; }; diff --git a/include/xen/interface/io/netif.h b/include/xen/interface/io/netif.h index cb94668..d4635cd 100644 --- a/include/xen/interface/io/netif.h +++ b/include/xen/interface/io/netif.h @@ -13,6 +13,24 @@ #include "../grant_table.h" /* + * Older implementation of Xen network frontend / backend has an + * implicit dependency on the MAX_SKB_FRAGS as the maximum number of + * ring slots a skb can use. Netfront / netback may not work as + * expected when frontend and backend have different MAX_SKB_FRAGS. + * + * A better approach is to add mechanism for netfront / netback to + * negotiate this value. However we cannot fix all possible + * frontends, so we need to define a value which states the minimum + * slots backend must support. + * + * The minimum value derives from older Linux kernel's MAX_SKB_FRAGS + * (18), which is proved to work with most frontends. Any new backend + * which doesn't negotiate with frontend should expect frontend to + * send a valid packet using slots up to this value. + */ +#define XEN_NETIF_NR_SLOTS_MIN 18 + +/* * Notifications after enqueuing any type of message should be conditional on * the appropriate req_event or rsp_event field in the shared ring. * If the client sends notification for rx requests then it should specify @@ -47,6 +65,7 @@ #define _XEN_NETTXF_extra_info (3) #define XEN_NETTXF_extra_info (1U<<_XEN_NETTXF_extra_info) +#define XEN_NETIF_MAX_TX_SIZE 0xFFFF struct xen_netif_tx_request { grant_ref_t gref; /* Reference to buffer page */ uint16_t offset; /* Offset within buffer page */ diff --git a/include/xen/interface/io/ring.h b/include/xen/interface/io/ring.h index 75271b9..7d28aff 100644 --- a/include/xen/interface/io/ring.h +++ b/include/xen/interface/io/ring.h @@ -188,6 +188,11 @@ struct __name##_back_ring { \ #define RING_REQUEST_CONS_OVERFLOW(_r, _cons) \ (((_cons) - (_r)->rsp_prod_pvt) >= RING_SIZE(_r)) +/* Ill-behaved frontend determination: Can there be this many requests? */ +#define RING_REQUEST_PROD_OVERFLOW(_r, _prod) \ + (((_prod) - (_r)->rsp_prod_pvt) > RING_SIZE(_r)) + + #define RING_PUSH_REQUESTS(_r) do { \ wmb(); /* back sees requests /before/ updated producer index */ \ (_r)->sring->req_prod = (_r)->req_prod_pvt; \ diff --git a/include/xen/interface/io/xs_wire.h b/include/xen/interface/io/xs_wire.h index 454ee26..7cdfca2 100644 --- a/include/xen/interface/io/xs_wire.h +++ b/include/xen/interface/io/xs_wire.h @@ -26,7 +26,10 @@ enum xsd_sockmsg_type XS_SET_PERMS, XS_WATCH_EVENT, XS_ERROR, - XS_IS_DOMAIN_INTRODUCED + XS_IS_DOMAIN_INTRODUCED, + XS_RESUME, + XS_SET_TARGET, + XS_RESTRICT }; #define XS_WRITE_NONE "NONE" diff --git a/include/xen/interface/physdev.h b/include/xen/interface/physdev.h index 534cac8..c1080d9 100644 --- a/include/xen/interface/physdev.h +++ b/include/xen/interface/physdev.h @@ -109,6 +109,7 @@ struct physdev_irq { #define MAP_PIRQ_TYPE_MSI 0x0 #define MAP_PIRQ_TYPE_GSI 0x1 #define MAP_PIRQ_TYPE_UNKNOWN 0x2 +#define MAP_PIRQ_TYPE_MSI_SEG 0x3 #define PHYSDEVOP_map_pirq 13 struct physdev_map_pirq { @@ -119,7 +120,7 @@ struct physdev_map_pirq { int index; /* IN or OUT */ int pirq; - /* IN */ + /* IN - high 16 bits hold segment for MAP_PIRQ_TYPE_MSI_SEG */ int bus; /* IN */ int devfn; @@ -198,6 +199,37 @@ struct physdev_get_free_pirq { uint32_t pirq; }; +#define XEN_PCI_DEV_EXTFN 0x1 +#define XEN_PCI_DEV_VIRTFN 0x2 +#define XEN_PCI_DEV_PXM 0x4 + +#define PHYSDEVOP_pci_device_add 25 +struct physdev_pci_device_add { + /* IN */ + uint16_t seg; + uint8_t bus; + uint8_t devfn; + uint32_t flags; + struct { + uint8_t bus; + uint8_t devfn; + } physfn; +#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L + uint32_t optarr[]; +#elif defined(__GNUC__) + uint32_t optarr[0]; +#endif +}; + +#define PHYSDEVOP_pci_device_remove 26 +#define PHYSDEVOP_restore_msi_ext 27 +struct physdev_pci_device { + /* IN */ + uint16_t seg; + uint8_t bus; + uint8_t devfn; +}; + /* * Notify that some PIRQ-bound event channels have been unmasked. * ** This command is obsolete since interface version 0x00030202 and is ** diff --git a/include/xen/interface/xen.h b/include/xen/interface/xen.h index 6acd9ce..6a6e914 100644 --- a/include/xen/interface/xen.h +++ b/include/xen/interface/xen.h @@ -492,6 +492,7 @@ struct dom0_vga_console_info { /* These flags are passed in the 'flags' field of start_info_t. */ #define SIF_PRIVILEGED (1<<0) /* Is the domain privileged? */ #define SIF_INITDOMAIN (1<<1) /* Is this the initial control domain? */ +#define SIF_PM_MASK (0xFF<<8) /* reserve 1 byte for xen-pm options */ typedef uint64_t cpumap_t; diff --git a/include/xen/page.h b/include/xen/page.h index 0be36b9..12765b6 100644 --- a/include/xen/page.h +++ b/include/xen/page.h @@ -3,6 +3,16 @@ #include -extern phys_addr_t xen_extra_mem_start, xen_extra_mem_size; +struct xen_memory_region { + phys_addr_t start; + phys_addr_t size; +}; + +#define XEN_EXTRA_MEM_MAX_REGIONS 128 /* == E820MAX */ + +extern __initdata +struct xen_memory_region xen_extra_mem[XEN_EXTRA_MEM_MAX_REGIONS]; + +extern unsigned long xen_released_pages; #endif /* _XEN_PAGE_H */ diff --git a/include/xen/platform_pci.h b/include/xen/platform_pci.h index a785a3b..438c256 100644 --- a/include/xen/platform_pci.h +++ b/include/xen/platform_pci.h @@ -29,8 +29,7 @@ static inline int xen_must_unplug_nics(void) { #if (defined(CONFIG_XEN_NETDEV_FRONTEND) || \ defined(CONFIG_XEN_NETDEV_FRONTEND_MODULE)) && \ - (defined(CONFIG_XEN_PLATFORM_PCI) || \ - defined(CONFIG_XEN_PLATFORM_PCI_MODULE)) + defined(CONFIG_XEN_PVHVM) return 1; #else return 0; @@ -40,8 +39,7 @@ static inline int xen_must_unplug_nics(void) { static inline int xen_must_unplug_disks(void) { #if (defined(CONFIG_XEN_BLKDEV_FRONTEND) || \ defined(CONFIG_XEN_BLKDEV_FRONTEND_MODULE)) && \ - (defined(CONFIG_XEN_PLATFORM_PCI) || \ - defined(CONFIG_XEN_PLATFORM_PCI_MODULE)) + defined(CONFIG_XEN_PVHVM) return 1; #else return 0; diff --git a/include/xen/xenbus.h b/include/xen/xenbus.h index 5467369..b1b6676 100644 --- a/include/xen/xenbus.h +++ b/include/xen/xenbus.h @@ -37,6 +37,7 @@ #include #include #include +#include #include #include #include @@ -156,9 +157,9 @@ int xenbus_scanf(struct xenbus_transaction t, __attribute__((format(scanf, 4, 5))); /* Single printf and write: returns -errno or 0. */ +__printf(4, 5) int xenbus_printf(struct xenbus_transaction t, - const char *dir, const char *node, const char *fmt, ...) - __attribute__((format(printf, 4, 5))); + const char *dir, const char *node, const char *fmt, ...); /* Generic read function: NULL-terminated triples of name, * sprintf-style type string, and pointer. Returns 0 or errno.*/ @@ -200,11 +201,11 @@ int xenbus_watch_path(struct xenbus_device *dev, const char *path, struct xenbus_watch *watch, void (*callback)(struct xenbus_watch *, const char **, unsigned int)); +__printf(4, 5) int xenbus_watch_pathfmt(struct xenbus_device *dev, struct xenbus_watch *watch, void (*callback)(struct xenbus_watch *, const char **, unsigned int), - const char *pathfmt, ...) - __attribute__ ((format (printf, 4, 5))); + const char *pathfmt, ...); int xenbus_switch_state(struct xenbus_device *dev, enum xenbus_state new_state); int xenbus_grant_ring(struct xenbus_device *dev, unsigned long ring_mfn); @@ -223,7 +224,9 @@ int xenbus_free_evtchn(struct xenbus_device *dev, int port); enum xenbus_state xenbus_read_driver_state(const char *path); +__printf(3, 4) void xenbus_dev_error(struct xenbus_device *dev, int err, const char *fmt, ...); +__printf(3, 4) void xenbus_dev_fatal(struct xenbus_device *dev, int err, const char *fmt, ...); const char *xenbus_strstate(enum xenbus_state state); diff --git a/lib/raid6/algos.c b/lib/raid6/algos.c index b595f56..8b02f60 100644 --- a/lib/raid6/algos.c +++ b/lib/raid6/algos.c @@ -17,6 +17,7 @@ */ #include +#include #ifndef __KERNEL__ #include #include diff --git a/lib/raid6/int.uc b/lib/raid6/int.uc index d1e276a..5b50f8d 100644 --- a/lib/raid6/int.uc +++ b/lib/raid6/int.uc @@ -11,7 +11,7 @@ * ----------------------------------------------------------------------- */ /* - * raid6int$#.c + * int$#.c * * $#-way unrolled portable integer math RAID-6 instruction set * diff --git a/lib/raid6/mktables.c b/lib/raid6/mktables.c index 3b15008..8a37809 100644 --- a/lib/raid6/mktables.c +++ b/lib/raid6/mktables.c @@ -60,6 +60,7 @@ int main(int argc, char *argv[]) uint8_t exptbl[256], invtbl[256]; printf("#include \n"); + printf("#include \n"); /* Compute multiplication table */ printf("\nconst u8 __attribute__((aligned(256)))\n" diff --git a/lib/raid6/recov.c b/lib/raid6/recov.c index 8590d19..fe275d7 100644 --- a/lib/raid6/recov.c +++ b/lib/raid6/recov.c @@ -18,6 +18,7 @@ * the syndrome.) */ +#include #include /* Recover two failed data blocks. */ diff --git a/net/802/fc.c b/net/802/fc.c index 1e49f2d..bd345f3 100644 --- a/net/802/fc.c +++ b/net/802/fc.c @@ -27,6 +27,7 @@ #include #include #include +#include #include /* diff --git a/net/802/garp.c b/net/802/garp.c index 1610295..8e21b6d 100644 --- a/net/802/garp.c +++ b/net/802/garp.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -553,7 +554,7 @@ static void garp_release_port(struct net_device *dev) if (rtnl_dereference(port->applicants[i])) return; } - rcu_assign_pointer(dev->garp_port, NULL); + RCU_INIT_POINTER(dev->garp_port, NULL); kfree_rcu(port, rcu); } @@ -605,7 +606,7 @@ void garp_uninit_applicant(struct net_device *dev, struct garp_application *appl ASSERT_RTNL(); - rcu_assign_pointer(port->applicants[appl->type], NULL); + RCU_INIT_POINTER(port->applicants[appl->type], NULL); /* Delete timer and generate a final TRANSMIT_PDU event to flush out * all pending messages before the applicant is gone. */ diff --git a/net/802/stp.c b/net/802/stp.c index 978c30b..15540b7 100644 --- a/net/802/stp.c +++ b/net/802/stp.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -88,9 +89,9 @@ void stp_proto_unregister(const struct stp_proto *proto) { mutex_lock(&stp_proto_mutex); if (is_zero_ether_addr(proto->group_address)) - rcu_assign_pointer(stp_proto, NULL); + RCU_INIT_POINTER(stp_proto, NULL); else - rcu_assign_pointer(garp_protos[proto->group_address[5] - + RCU_INIT_POINTER(garp_protos[proto->group_address[5] - GARP_ADDR_MIN], NULL); synchronize_rcu(); diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index 917ecb9..963f285 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -18,6 +18,8 @@ * 2 of the License, or (at your option) any later version. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include @@ -108,13 +110,6 @@ void unregister_vlan_dev(struct net_device *dev, struct list_head *head) grp = rtnl_dereference(real_dev->vlgrp); BUG_ON(!grp); - /* Take it out of our own structures, but be sure to interlock with - * HW accelerating devices or SW vlan input packet processing if - * VLAN is not 0 (leave it there for 802.1p). - */ - if (vlan_id && (real_dev->features & NETIF_F_HW_VLAN_FILTER)) - ops->ndo_vlan_rx_kill_vid(real_dev, vlan_id); - grp->nr_vlans--; if (vlan->flags & VLAN_FLAG_GVRP) @@ -131,14 +126,19 @@ void unregister_vlan_dev(struct net_device *dev, struct list_head *head) if (grp->nr_vlans == 0) { vlan_gvrp_uninit_applicant(real_dev); - rcu_assign_pointer(real_dev->vlgrp, NULL); - if (ops->ndo_vlan_rx_register) - ops->ndo_vlan_rx_register(real_dev, NULL); + RCU_INIT_POINTER(real_dev->vlgrp, NULL); /* Free the group, after all cpu's are done. */ call_rcu(&grp->rcu, vlan_rcu_free); } + /* Take it out of our own structures, but be sure to interlock with + * HW accelerating devices or SW vlan input packet processing if + * VLAN is not 0 (leave it there for 802.1p). + */ + if (vlan_id && (real_dev->features & NETIF_F_HW_VLAN_FILTER)) + ops->ndo_vlan_rx_kill_vid(real_dev, vlan_id); + /* Get rid of the vlan's reference to real_dev */ dev_put(real_dev); } @@ -149,13 +149,13 @@ int vlan_check_real_dev(struct net_device *real_dev, u16 vlan_id) const struct net_device_ops *ops = real_dev->netdev_ops; if (real_dev->features & NETIF_F_VLAN_CHALLENGED) { - pr_info("8021q: VLANs not supported on %s\n", name); + pr_info("VLANs not supported on %s\n", name); return -EOPNOTSUPP; } if ((real_dev->features & NETIF_F_HW_VLAN_FILTER) && (!ops->ndo_vlan_rx_add_vid || !ops->ndo_vlan_rx_kill_vid)) { - pr_info("8021q: Device %s has buggy VLAN hw accel\n", name); + pr_info("Device %s has buggy VLAN hw accel\n", name); return -EOPNOTSUPP; } @@ -205,8 +205,6 @@ int register_vlan_dev(struct net_device *dev) grp->nr_vlans++; if (ngrp) { - if (ops->ndo_vlan_rx_register && (real_dev->features & NETIF_F_HW_VLAN_RX)) - ops->ndo_vlan_rx_register(real_dev, ngrp); rcu_assign_pointer(real_dev->vlgrp, ngrp); } if (real_dev->features & NETIF_F_HW_VLAN_FILTER) @@ -344,13 +342,12 @@ static void __vlan_device_event(struct net_device *dev, unsigned long event) case NETDEV_CHANGENAME: vlan_proc_rem_dev(dev); if (vlan_proc_add_dev(dev) < 0) - pr_warning("8021q: failed to change proc name for %s\n", - dev->name); + pr_warn("failed to change proc name for %s\n", + dev->name); break; case NETDEV_REGISTER: if (vlan_proc_add_dev(dev) < 0) - pr_warning("8021q: failed to add proc entry for %s\n", - dev->name); + pr_warn("failed to add proc entry for %s\n", dev->name); break; case NETDEV_UNREGISTER: vlan_proc_rem_dev(dev); @@ -374,7 +371,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event, if ((event == NETDEV_UP) && (dev->features & NETIF_F_HW_VLAN_FILTER) && dev->netdev_ops->ndo_vlan_rx_add_vid) { - pr_info("8021q: adding VLAN 0 to HW filter on device %s\n", + pr_info("adding VLAN 0 to HW filter on device %s\n", dev->name); dev->netdev_ops->ndo_vlan_rx_add_vid(dev, 0); } diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h index 9da07e3..9fd45f3 100644 --- a/net/8021q/vlan.h +++ b/net/8021q/vlan.h @@ -74,6 +74,37 @@ static inline struct vlan_dev_info *vlan_dev_info(const struct net_device *dev) return netdev_priv(dev); } +static inline struct net_device *vlan_group_get_device(struct vlan_group *vg, + u16 vlan_id) +{ + struct net_device **array; + array = vg->vlan_devices_arrays[vlan_id / VLAN_GROUP_ARRAY_PART_LEN]; + return array ? array[vlan_id % VLAN_GROUP_ARRAY_PART_LEN] : NULL; +} + +static inline void vlan_group_set_device(struct vlan_group *vg, + u16 vlan_id, + struct net_device *dev) +{ + struct net_device **array; + if (!vg) + return; + array = vg->vlan_devices_arrays[vlan_id / VLAN_GROUP_ARRAY_PART_LEN]; + array[vlan_id % VLAN_GROUP_ARRAY_PART_LEN] = dev; +} + +/* Must be invoked with rcu_read_lock or with RTNL. */ +static inline struct net_device *vlan_find_dev(struct net_device *real_dev, + u16 vlan_id) +{ + struct vlan_group *grp = rcu_dereference_rtnl(real_dev->vlgrp); + + if (grp) + return vlan_group_get_device(grp, vlan_id); + + return NULL; +} + /* found in vlan_dev.c */ void vlan_dev_set_ingress_priority(const struct net_device *dev, u32 skb_prio, u16 vlan_prio); diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c index c177f9e..77d3532 100644 --- a/net/8021q/vlan_core.c +++ b/net/8021q/vlan_core.c @@ -2,6 +2,7 @@ #include #include #include +#include #include "vlan.h" bool vlan_do_receive(struct sk_buff **skbp) @@ -12,11 +13,8 @@ bool vlan_do_receive(struct sk_buff **skbp) struct vlan_pcpu_stats *rx_stats; vlan_dev = vlan_find_dev(skb->dev, vlan_id); - if (!vlan_dev) { - if (vlan_id) - skb->pkt_type = PACKET_OTHERHOST; + if (!vlan_dev) return false; - } skb = *skbp = skb_share_check(skb, GFP_ATOMIC); if (unlikely(!skb)) @@ -63,6 +61,27 @@ bool vlan_do_receive(struct sk_buff **skbp) return true; } +/* Must be invoked with rcu_read_lock or with RTNL. */ +struct net_device *__vlan_find_dev_deep(struct net_device *real_dev, + u16 vlan_id) +{ + struct vlan_group *grp = rcu_dereference_rtnl(real_dev->vlgrp); + + if (grp) { + return vlan_group_get_device(grp, vlan_id); + } else { + /* + * Bonding slaves do not have grp assigned to themselves. + * Grp is assigned to bonding master instead. + */ + if (netif_is_bond_slave(real_dev)) + return __vlan_find_dev_deep(real_dev->master, vlan_id); + } + + return NULL; +} +EXPORT_SYMBOL(__vlan_find_dev_deep); + struct net_device *vlan_dev_real_dev(const struct net_device *dev) { return vlan_dev_info(dev)->real_dev; @@ -75,35 +94,13 @@ u16 vlan_dev_vlan_id(const struct net_device *dev) } EXPORT_SYMBOL(vlan_dev_vlan_id); -/* VLAN rx hw acceleration helper. This acts like netif_{rx,receive_skb}(). */ -int __vlan_hwaccel_rx(struct sk_buff *skb, struct vlan_group *grp, - u16 vlan_tci, int polling) -{ - __vlan_hwaccel_put_tag(skb, vlan_tci); - return polling ? netif_receive_skb(skb) : netif_rx(skb); -} -EXPORT_SYMBOL(__vlan_hwaccel_rx); - -gro_result_t vlan_gro_receive(struct napi_struct *napi, struct vlan_group *grp, - unsigned int vlan_tci, struct sk_buff *skb) -{ - __vlan_hwaccel_put_tag(skb, vlan_tci); - return napi_gro_receive(napi, skb); -} -EXPORT_SYMBOL(vlan_gro_receive); - -gro_result_t vlan_gro_frags(struct napi_struct *napi, struct vlan_group *grp, - unsigned int vlan_tci) -{ - __vlan_hwaccel_put_tag(napi->skb, vlan_tci); - return napi_gro_frags(napi); -} -EXPORT_SYMBOL(vlan_gro_frags); - static struct sk_buff *vlan_reorder_header(struct sk_buff *skb) { - if (skb_cow(skb, skb_headroom(skb)) < 0) + if (skb_cow(skb, skb_headroom(skb)) < 0) { + kfree_skb(skb); return NULL; + } + memmove(skb->data - ETH_HLEN, skb->data - VLAN_ETH_HLEN, 2 * ETH_ALEN); skb->mac_header += VLAN_HLEN; return skb; diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index b172407..c43a788 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -20,6 +20,8 @@ * 2 of the License, or (at your option) any later version. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include @@ -55,7 +57,7 @@ static int vlan_dev_rebuild_header(struct sk_buff *skb) return arp_find(veth->h_dest, skb); #endif default: - pr_debug("%s: unable to resolve type %X addresses.\n", + pr_debug("%s: unable to resolve type %X addresses\n", dev->name, ntohs(veth->h_vlan_encapsulated_proto)); memcpy(veth->h_source, dev->dev_addr, ETH_ALEN); @@ -475,10 +477,12 @@ static void vlan_dev_change_rx_flags(struct net_device *dev, int change) { struct net_device *real_dev = vlan_dev_info(dev)->real_dev; - if (change & IFF_ALLMULTI) - dev_set_allmulti(real_dev, dev->flags & IFF_ALLMULTI ? 1 : -1); - if (change & IFF_PROMISC) - dev_set_promiscuity(real_dev, dev->flags & IFF_PROMISC ? 1 : -1); + if (dev->flags & IFF_UP) { + if (change & IFF_ALLMULTI) + dev_set_allmulti(real_dev, dev->flags & IFF_ALLMULTI ? 1 : -1); + if (change & IFF_PROMISC) + dev_set_promiscuity(real_dev, dev->flags & IFF_PROMISC ? 1 : -1); + } } static void vlan_dev_set_rx_mode(struct net_device *vlan_dev) @@ -518,6 +522,25 @@ static const struct header_ops vlan_header_ops = { .parse = eth_header_parse, }; +static int vlan_passthru_hard_header(struct sk_buff *skb, struct net_device *dev, + unsigned short type, + const void *daddr, const void *saddr, + unsigned int len) +{ + struct net_device *real_dev = vlan_dev_info(dev)->real_dev; + + if (saddr == NULL) + saddr = dev->dev_addr; + + return dev_hard_header(skb, real_dev, type, daddr, saddr, len); +} + +static const struct header_ops vlan_passthru_header_ops = { + .create = vlan_passthru_hard_header, + .rebuild = dev_rebuild_header, + .parse = eth_header_parse, +}; + static const struct net_device_ops vlan_netdev_ops; static int vlan_dev_init(struct net_device *dev) @@ -557,7 +580,7 @@ static int vlan_dev_init(struct net_device *dev) dev->needed_headroom = real_dev->needed_headroom; if (real_dev->features & NETIF_F_HW_VLAN_TX) { - dev->header_ops = real_dev->header_ops; + dev->header_ops = &vlan_passthru_header_ops; dev->hard_header_len = real_dev->hard_header_len; } else { dev->header_ops = &vlan_header_ops; @@ -602,8 +625,7 @@ static u32 vlan_dev_fix_features(struct net_device *dev, u32 features) features &= real_dev->features; features &= real_dev->vlan_features; - if (old_features & NETIF_F_SOFT_FEATURES) - features |= old_features & NETIF_F_SOFT_FEATURES; + features |= old_features & NETIF_F_SOFT_FEATURES; if (dev_ethtool_get_rx_csum(real_dev)) features |= NETIF_F_RXCSUM; @@ -616,7 +638,8 @@ static int vlan_ethtool_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) { const struct vlan_dev_info *vlan = vlan_dev_info(dev); - return dev_ethtool_get_settings(vlan->real_dev, cmd); + + return __ethtool_get_settings(vlan->real_dev, cmd); } static void vlan_ethtool_get_drvinfo(struct net_device *dev, @@ -680,7 +703,6 @@ static const struct net_device_ops vlan_netdev_ops = { .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = vlan_dev_set_mac_address, .ndo_set_rx_mode = vlan_dev_set_rx_mode, - .ndo_set_multicast_list = vlan_dev_set_rx_mode, .ndo_change_rx_flags = vlan_dev_change_rx_flags, .ndo_do_ioctl = vlan_dev_ioctl, .ndo_neigh_setup = vlan_dev_neigh_setup, diff --git a/net/8021q/vlan_netlink.c b/net/8021q/vlan_netlink.c index be9a5c1..c705612 100644 --- a/net/8021q/vlan_netlink.c +++ b/net/8021q/vlan_netlink.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -151,7 +152,7 @@ static size_t vlan_get_size(const struct net_device *dev) struct vlan_dev_info *vlan = vlan_dev_info(dev); return nla_total_size(2) + /* IFLA_VLAN_ID */ - sizeof(struct ifla_vlan_flags) + /* IFLA_VLAN_FLAGS */ + nla_total_size(sizeof(struct ifla_vlan_flags)) + /* IFLA_VLAN_FLAGS */ vlan_qos_map_size(vlan->nr_ingress_mappings) + vlan_qos_map_size(vlan->nr_egress_mappings); } diff --git a/net/8021q/vlanproc.c b/net/8021q/vlanproc.c index d940c49..d34b6da 100644 --- a/net/8021q/vlanproc.c +++ b/net/8021q/vlanproc.c @@ -17,6 +17,8 @@ * Jan 20, 1998 Ben Greear Initial Version *****************************************************************************/ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include @@ -155,7 +157,7 @@ int __net_init vlan_proc_init(struct net *net) return 0; err: - pr_err("%s: can't create entry in proc filesystem!\n", __func__); + pr_err("can't create entry in proc filesystem!\n"); vlan_proc_cleanup(net); return -ENOBUFS; } @@ -229,7 +231,7 @@ static void *vlan_seq_next(struct seq_file *seq, void *v, loff_t *pos) ++*pos; - dev = (struct net_device *)v; + dev = v; if (v == SEQ_START_TOKEN) dev = net_device_entry(&net->dev_base_head); diff --git a/net/9p/client.c b/net/9p/client.c index 5532710..e958178 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -38,6 +38,9 @@ #include #include "protocol.h" +#define CREATE_TRACE_POINTS +#include + /* * Client Option Parsing (code inspired by NFS code) * - a little lazy - parse all client options @@ -72,23 +75,22 @@ inline int p9_is_proto_dotu(struct p9_client *clnt) EXPORT_SYMBOL(p9_is_proto_dotu); /* Interpret mount option for protocol version */ -static int get_protocol_version(const substring_t *name) +static int get_protocol_version(char *s) { int version = -EINVAL; - if (!strncmp("9p2000", name->from, name->to-name->from)) { + if (!strcmp(s, "9p2000")) { version = p9_proto_legacy; P9_DPRINTK(P9_DEBUG_9P, "Protocol version: Legacy\n"); - } else if (!strncmp("9p2000.u", name->from, name->to-name->from)) { + } else if (!strcmp(s, "9p2000.u")) { version = p9_proto_2000u; P9_DPRINTK(P9_DEBUG_9P, "Protocol version: 9P2000.u\n"); - } else if (!strncmp("9p2000.L", name->from, name->to-name->from)) { + } else if (!strcmp(s, "9p2000.L")) { version = p9_proto_2000L; P9_DPRINTK(P9_DEBUG_9P, "Protocol version: 9P2000.L\n"); - } else { - P9_DPRINTK(P9_DEBUG_ERROR, "Unknown protocol version %s. ", - name->from); - } + } else + printk(KERN_INFO "9p: Unknown protocol version %s.\n", s); + return version; } @@ -106,6 +108,7 @@ static int parse_opts(char *opts, struct p9_client *clnt) char *p; substring_t args[MAX_OPT_ARGS]; int option; + char *s; int ret = 0; clnt->proto_version = p9_proto_2000u; @@ -123,40 +126,57 @@ static int parse_opts(char *opts, struct p9_client *clnt) options = tmp_options; while ((p = strsep(&options, ",")) != NULL) { - int token; + int token, r; if (!*p) continue; token = match_token(p, tokens, args); - if (token < Opt_trans) { - int r = match_int(&args[0], &option); + switch (token) { + case Opt_msize: + r = match_int(&args[0], &option); if (r < 0) { P9_DPRINTK(P9_DEBUG_ERROR, - "integer field, but no integer?\n"); + "integer field, but no integer?\n"); ret = r; continue; } - } - switch (token) { - case Opt_msize: clnt->msize = option; break; case Opt_trans: - clnt->trans_mod = v9fs_get_trans_by_name(&args[0]); - if(clnt->trans_mod == NULL) { + s = match_strdup(&args[0]); + if (!s) { + ret = -ENOMEM; P9_DPRINTK(P9_DEBUG_ERROR, - "Could not find request transport: %s\n", - (char *) &args[0]); + "problem allocating copy of trans arg\n"); + goto free_and_return; + } + clnt->trans_mod = v9fs_get_trans_by_name(s); + if (clnt->trans_mod == NULL) { + printk(KERN_INFO + "9p: Could not find " + "request transport: %s\n", s); ret = -EINVAL; + kfree(s); goto free_and_return; } + kfree(s); break; case Opt_legacy: clnt->proto_version = p9_proto_legacy; break; case Opt_version: - ret = get_protocol_version(&args[0]); - if (ret == -EINVAL) + s = match_strdup(&args[0]); + if (!s) { + ret = -ENOMEM; + P9_DPRINTK(P9_DEBUG_ERROR, + "problem allocating copy of version arg\n"); + goto free_and_return; + } + ret = get_protocol_version(s); + if (ret == -EINVAL) { + kfree(s); goto free_and_return; + } + kfree(s); clnt->proto_version = ret; break; default: @@ -184,11 +204,13 @@ free_and_return: * */ -static struct p9_req_t *p9_tag_alloc(struct p9_client *c, u16 tag) +static struct p9_req_t * +p9_tag_alloc(struct p9_client *c, u16 tag, unsigned int max_size) { unsigned long flags; int row, col; struct p9_req_t *req; + int alloc_msize = min(c->msize, max_size); /* This looks up the original request by tag so we know which * buffer to read the data into */ @@ -226,23 +248,10 @@ static struct p9_req_t *p9_tag_alloc(struct p9_client *c, u16 tag) return ERR_PTR(-ENOMEM); } init_waitqueue_head(req->wq); - if ((c->trans_mod->pref & P9_TRANS_PREF_PAYLOAD_MASK) == - P9_TRANS_PREF_PAYLOAD_SEP) { - int alloc_msize = min(c->msize, 4096); - req->tc = kmalloc(sizeof(struct p9_fcall)+alloc_msize, - GFP_NOFS); - req->tc->capacity = alloc_msize; - req->rc = kmalloc(sizeof(struct p9_fcall)+alloc_msize, - GFP_NOFS); - req->rc->capacity = alloc_msize; - } else { - req->tc = kmalloc(sizeof(struct p9_fcall)+c->msize, - GFP_NOFS); - req->tc->capacity = c->msize; - req->rc = kmalloc(sizeof(struct p9_fcall)+c->msize, - GFP_NOFS); - req->rc->capacity = c->msize; - } + req->tc = kmalloc(sizeof(struct p9_fcall) + alloc_msize, + GFP_NOFS); + req->rc = kmalloc(sizeof(struct p9_fcall) + alloc_msize, + GFP_NOFS); if ((!req->tc) || (!req->rc)) { printk(KERN_ERR "Couldn't grow tag array\n"); kfree(req->tc); @@ -252,6 +261,8 @@ static struct p9_req_t *p9_tag_alloc(struct p9_client *c, u16 tag) req->wq = NULL; return ERR_PTR(-ENOMEM); } + req->tc->capacity = alloc_msize; + req->rc->capacity = alloc_msize; req->tc->sdata = (char *) req->tc + sizeof(struct p9_fcall); req->rc->sdata = (char *) req->rc + sizeof(struct p9_fcall); } @@ -280,7 +291,7 @@ struct p9_req_t *p9_tag_lookup(struct p9_client *c, u16 tag) * buffer to read the data into */ tag++; - if(tag >= c->max_tag) + if(tag >= c->max_tag) return NULL; row = tag / P9_ROW_MAXTAG; @@ -456,37 +467,22 @@ static int p9_check_errors(struct p9_client *c, struct p9_req_t *req) int ecode; err = p9_parse_header(req->rc, NULL, &type, NULL, 0); + /* + * dump the response from server + * This should be after check errors which poplulate pdu_fcall. + */ + trace_9p_protocol_dump(c, req->rc); if (err) { P9_DPRINTK(P9_DEBUG_ERROR, "couldn't parse header %d\n", err); return err; } - if (type != P9_RERROR && type != P9_RLERROR) return 0; if (!p9_is_proto_dotl(c)) { char *ename; - - if (req->tc->pbuf_size) { - /* Handle user buffers */ - size_t len = req->rc->size - req->rc->offset; - if (req->tc->pubuf) { - /* User Buffer */ - err = copy_from_user( - &req->rc->sdata[req->rc->offset], - req->tc->pubuf, len); - if (err) { - err = -EFAULT; - goto out_err; - } - } else { - /* Kernel Buffer */ - memmove(&req->rc->sdata[req->rc->offset], - req->tc->pkbuf, len); - } - } err = p9pdu_readf(req->rc, c->proto_version, "s?d", - &ename, &ecode); + &ename, &ecode); if (err) goto out_err; @@ -496,11 +492,10 @@ static int p9_check_errors(struct p9_client *c, struct p9_req_t *req) if (!err || !IS_ERR_VALUE(err)) { err = p9_errstr2errno(ename, strlen(ename)); - P9_DPRINTK(P9_DEBUG_9P, "<<< RERROR (%d) %s\n", -ecode, - ename); - - kfree(ename); + P9_DPRINTK(P9_DEBUG_9P, "<<< RERROR (%d) %s\n", + -ecode, ename); } + kfree(ename); } else { err = p9pdu_readf(req->rc, c->proto_version, "d", &ecode); err = -ecode; @@ -508,7 +503,6 @@ static int p9_check_errors(struct p9_client *c, struct p9_req_t *req) P9_DPRINTK(P9_DEBUG_9P, "<<< RLERROR (%d)\n", -ecode); } - return err; out_err: @@ -517,6 +511,115 @@ out_err: return err; } +/** + * p9_check_zc_errors - check 9p packet for error return and process it + * @c: current client instance + * @req: request to parse and check for error conditions + * @in_hdrlen: Size of response protocol buffer. + * + * returns error code if one is discovered, otherwise returns 0 + * + * this will have to be more complicated if we have multiple + * error packet types + */ + +static int p9_check_zc_errors(struct p9_client *c, struct p9_req_t *req, + char *uidata, int in_hdrlen, int kern_buf) +{ + int err; + int ecode; + int8_t type; + char *ename = NULL; + + err = p9_parse_header(req->rc, NULL, &type, NULL, 0); + /* + * dump the response from server + * This should be after parse_header which poplulate pdu_fcall. + */ + trace_9p_protocol_dump(c, req->rc); + if (err) { + P9_DPRINTK(P9_DEBUG_ERROR, "couldn't parse header %d\n", err); + return err; + } + + if (type != P9_RERROR && type != P9_RLERROR) + return 0; + + if (!p9_is_proto_dotl(c)) { + /* Error is reported in string format */ + uint16_t len; + /* 7 = header size for RERROR, 2 is the size of string len; */ + int inline_len = in_hdrlen - (7 + 2); + + /* Read the size of error string */ + err = p9pdu_readf(req->rc, c->proto_version, "w", &len); + if (err) + goto out_err; + + ename = kmalloc(len + 1, GFP_NOFS); + if (!ename) { + err = -ENOMEM; + goto out_err; + } + if (len <= inline_len) { + /* We have error in protocol buffer itself */ + if (pdu_read(req->rc, ename, len)) { + err = -EFAULT; + goto out_free; + + } + } else { + /* + * Part of the data is in user space buffer. + */ + if (pdu_read(req->rc, ename, inline_len)) { + err = -EFAULT; + goto out_free; + + } + if (kern_buf) { + memcpy(ename + inline_len, uidata, + len - inline_len); + } else { + err = copy_from_user(ename + inline_len, + uidata, len - inline_len); + if (err) { + err = -EFAULT; + goto out_free; + } + } + } + ename[len] = 0; + if (p9_is_proto_dotu(c)) { + /* For dotu we also have error code */ + err = p9pdu_readf(req->rc, + c->proto_version, "d", &ecode); + if (err) + goto out_free; + err = -ecode; + } + if (!err || !IS_ERR_VALUE(err)) { + err = p9_errstr2errno(ename, strlen(ename)); + + P9_DPRINTK(P9_DEBUG_9P, "<<< RERROR (%d) %s\n", + -ecode, ename); + } + kfree(ename); + } else { + err = p9pdu_readf(req->rc, c->proto_version, "d", &ecode); + err = -ecode; + + P9_DPRINTK(P9_DEBUG_9P, "<<< RLERROR (%d)\n", -ecode); + } + return err; + +out_free: + kfree(ename); +out_err: + P9_DPRINTK(P9_DEBUG_ERROR, "couldn't parse error%d\n", err); + return err; +} + static struct p9_req_t * p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...); @@ -560,23 +663,12 @@ static int p9_client_flush(struct p9_client *c, struct p9_req_t *oldreq) return 0; } -/** - * p9_client_rpc - issue a request and wait for a response - * @c: client session - * @type: type of request - * @fmt: protocol format string (see protocol.c) - * - * Returns request structure (which client must free using p9_free_req) - */ - -static struct p9_req_t * -p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...) +static struct p9_req_t *p9_client_prepare_req(struct p9_client *c, + int8_t type, int req_size, + const char *fmt, va_list ap) { - va_list ap; int tag, err; struct p9_req_t *req; - unsigned long flags; - int sigpending; P9_DPRINTK(P9_DEBUG_MUX, "client %p op %d\n", c, type); @@ -588,12 +680,6 @@ p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...) if ((c->status == BeginDisconnect) && (type != P9_TCLUNK)) return ERR_PTR(-EIO); - if (signal_pending(current)) { - sigpending = 1; - clear_thread_flag(TIF_SIGPENDING); - } else - sigpending = 0; - tag = P9_NOTAG; if (type != P9_TVERSION) { tag = p9_idpool_get(c->tagpool); @@ -601,18 +687,51 @@ p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...) return ERR_PTR(-ENOMEM); } - req = p9_tag_alloc(c, tag); + req = p9_tag_alloc(c, tag, req_size); if (IS_ERR(req)) return req; /* marshall the data */ p9pdu_prepare(req->tc, tag, type); - va_start(ap, fmt); err = p9pdu_vwritef(req->tc, c->proto_version, fmt, ap); - va_end(ap); if (err) goto reterr; - p9pdu_finalize(req->tc); + p9pdu_finalize(c, req->tc); + trace_9p_client_req(c, type, tag); + return req; +reterr: + p9_free_req(c, req); + return ERR_PTR(err); +} + +/** + * p9_client_rpc - issue a request and wait for a response + * @c: client session + * @type: type of request + * @fmt: protocol format string (see protocol.c) + * + * Returns request structure (which client must free using p9_free_req) + */ + +static struct p9_req_t * +p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...) +{ + va_list ap; + int sigpending, err; + unsigned long flags; + struct p9_req_t *req; + + va_start(ap, fmt); + req = p9_client_prepare_req(c, type, c->msize, fmt, ap); + va_end(ap); + if (IS_ERR(req)) + return req; + + if (signal_pending(current)) { + sigpending = 1; + clear_thread_flag(TIF_SIGPENDING); + } else + sigpending = 0; err = c->trans_mod->request(c, req); if (err < 0) { @@ -620,18 +739,14 @@ p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...) c->status = Disconnected; goto reterr; } - - P9_DPRINTK(P9_DEBUG_MUX, "wait %p tag: %d\n", req->wq, tag); + /* Wait for the response */ err = wait_event_interruptible(*req->wq, - req->status >= REQ_STATUS_RCVD); - P9_DPRINTK(P9_DEBUG_MUX, "wait %p tag: %d returned %d\n", - req->wq, tag, err); + req->status >= REQ_STATUS_RCVD); if (req->status == REQ_STATUS_ERROR) { P9_DPRINTK(P9_DEBUG_ERROR, "req_status error %d\n", req->t_err); err = req->t_err; } - if ((err == -ERESTARTSYS) && (c->status == Connected)) { P9_DPRINTK(P9_DEBUG_MUX, "flushing\n"); sigpending = 1; @@ -644,25 +759,103 @@ p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...) if (req->status == REQ_STATUS_RCVD) err = 0; } - if (sigpending) { spin_lock_irqsave(¤t->sighand->siglock, flags); recalc_sigpending(); spin_unlock_irqrestore(¤t->sighand->siglock, flags); } - if (err < 0) goto reterr; err = p9_check_errors(c, req); - if (!err) { - P9_DPRINTK(P9_DEBUG_MUX, "exit: client %p op %d\n", c, type); + trace_9p_client_res(c, type, req->rc->tag, err); + if (!err) return req; +reterr: + p9_free_req(c, req); + return ERR_PTR(err); +} + +/** + * p9_client_zc_rpc - issue a request and wait for a response + * @c: client session + * @type: type of request + * @uidata: user bffer that should be ued for zero copy read + * @uodata: user buffer that shoud be user for zero copy write + * @inlen: read buffer size + * @olen: write buffer size + * @hdrlen: reader header size, This is the size of response protocol data + * @fmt: protocol format string (see protocol.c) + * + * Returns request structure (which client must free using p9_free_req) + */ +static struct p9_req_t *p9_client_zc_rpc(struct p9_client *c, int8_t type, + char *uidata, char *uodata, + int inlen, int olen, int in_hdrlen, + int kern_buf, const char *fmt, ...) +{ + va_list ap; + int sigpending, err; + unsigned long flags; + struct p9_req_t *req; + + va_start(ap, fmt); + /* + * We allocate a inline protocol data of only 4k bytes. + * The actual content is passed in zero-copy fashion. + */ + req = p9_client_prepare_req(c, type, P9_ZC_HDR_SZ, fmt, ap); + va_end(ap); + if (IS_ERR(req)) + return req; + + if (signal_pending(current)) { + sigpending = 1; + clear_thread_flag(TIF_SIGPENDING); + } else + sigpending = 0; + + /* If we are called with KERNEL_DS force kern_buf */ + if (segment_eq(get_fs(), KERNEL_DS)) + kern_buf = 1; + + err = c->trans_mod->zc_request(c, req, uidata, uodata, + inlen, olen, in_hdrlen, kern_buf); + if (err < 0) { + if (err == -EIO) + c->status = Disconnected; + if (err != -ERESTARTSYS) + goto reterr; + } + if (req->status == REQ_STATUS_ERROR) { + P9_DPRINTK(P9_DEBUG_ERROR, "req_status error %d\n", req->t_err); + err = req->t_err; + } + if ((err == -ERESTARTSYS) && (c->status == Connected)) { + P9_DPRINTK(P9_DEBUG_MUX, "flushing\n"); + sigpending = 1; + clear_thread_flag(TIF_SIGPENDING); + + if (c->trans_mod->cancel(c, req)) + p9_client_flush(c, req); + + /* if we received the response anyway, don't signal error */ + if (req->status == REQ_STATUS_RCVD) + err = 0; + } + if (sigpending) { + spin_lock_irqsave(¤t->sighand->siglock, flags); + recalc_sigpending(); + spin_unlock_irqrestore(¤t->sighand->siglock, flags); } + if (err < 0) + goto reterr; + err = p9_check_zc_errors(c, req, uidata, in_hdrlen, kern_buf); + trace_9p_client_res(c, type, req->rc->tag, err); + if (!err) + return req; reterr: - P9_DPRINTK(P9_DEBUG_MUX, "exit: client %p op %d error: %d\n", c, type, - err); p9_free_req(c, req); return ERR_PTR(err); } @@ -750,7 +943,7 @@ static int p9_client_version(struct p9_client *c) err = p9pdu_readf(req->rc, c->proto_version, "ds", &msize, &version); if (err) { P9_DPRINTK(P9_DEBUG_9P, "version error %d\n", err); - p9pdu_dump(1, req->rc); + trace_9p_protocol_dump(c, req->rc); goto error; } @@ -887,15 +1080,14 @@ EXPORT_SYMBOL(p9_client_begin_disconnect); struct p9_fid *p9_client_attach(struct p9_client *clnt, struct p9_fid *afid, char *uname, u32 n_uname, char *aname) { - int err; + int err = 0; struct p9_req_t *req; struct p9_fid *fid; struct p9_qid qid; - P9_DPRINTK(P9_DEBUG_9P, ">>> TATTACH afid %d uname %s aname %s\n", - afid ? afid->fid : -1, uname, aname); - err = 0; + P9_DPRINTK(P9_DEBUG_9P, ">>> TATTACH afid %d uname %s aname %s\n", + afid ? afid->fid : -1, uname, aname); fid = p9_fid_create(clnt); if (IS_ERR(fid)) { err = PTR_ERR(fid); @@ -912,7 +1104,7 @@ struct p9_fid *p9_client_attach(struct p9_client *clnt, struct p9_fid *afid, err = p9pdu_readf(req->rc, clnt->proto_version, "Q", &qid); if (err) { - p9pdu_dump(1, req->rc); + trace_9p_protocol_dump(clnt, req->rc); p9_free_req(clnt, req); goto error; } @@ -972,7 +1164,7 @@ struct p9_fid *p9_client_walk(struct p9_fid *oldfid, uint16_t nwname, err = p9pdu_readf(req->rc, clnt->proto_version, "R", &nwqids, &wqids); if (err) { - p9pdu_dump(1, req->rc); + trace_9p_protocol_dump(clnt, req->rc); p9_free_req(clnt, req); goto clunk_fid; } @@ -1039,7 +1231,7 @@ int p9_client_open(struct p9_fid *fid, int mode) err = p9pdu_readf(req->rc, clnt->proto_version, "Qd", &qid, &iounit); if (err) { - p9pdu_dump(1, req->rc); + trace_9p_protocol_dump(clnt, req->rc); goto free_and_error; } @@ -1082,7 +1274,7 @@ int p9_client_create_dotl(struct p9_fid *ofid, char *name, u32 flags, u32 mode, err = p9pdu_readf(req->rc, clnt->proto_version, "Qd", qid, &iounit); if (err) { - p9pdu_dump(1, req->rc); + trace_9p_protocol_dump(clnt, req->rc); goto free_and_error; } @@ -1127,7 +1319,7 @@ int p9_client_fcreate(struct p9_fid *fid, char *name, u32 perm, int mode, err = p9pdu_readf(req->rc, clnt->proto_version, "Qd", &qid, &iounit); if (err) { - p9pdu_dump(1, req->rc); + trace_9p_protocol_dump(clnt, req->rc); goto free_and_error; } @@ -1166,7 +1358,7 @@ int p9_client_symlink(struct p9_fid *dfid, char *name, char *symtgt, gid_t gid, err = p9pdu_readf(req->rc, clnt->proto_version, "Q", qid); if (err) { - p9pdu_dump(1, req->rc); + trace_9p_protocol_dump(clnt, req->rc); goto free_and_error; } @@ -1284,17 +1476,42 @@ error: } EXPORT_SYMBOL(p9_client_remove); +int p9_client_unlinkat(struct p9_fid *dfid, const char *name, int flags) +{ + int err = 0; + struct p9_req_t *req; + struct p9_client *clnt; + + P9_DPRINTK(P9_DEBUG_9P, ">>> TUNLINKAT fid %d %s %d\n", + dfid->fid, name, flags); + + clnt = dfid->clnt; + req = p9_client_rpc(clnt, P9_TUNLINKAT, "dsd", dfid->fid, name, flags); + if (IS_ERR(req)) { + err = PTR_ERR(req); + goto error; + } + P9_DPRINTK(P9_DEBUG_9P, "<<< RUNLINKAT fid %d %s\n", dfid->fid, name); + + p9_free_req(clnt, req); +error: + return err; +} +EXPORT_SYMBOL(p9_client_unlinkat); + int p9_client_read(struct p9_fid *fid, char *data, char __user *udata, u64 offset, u32 count) { - int err, rsize; - struct p9_client *clnt; - struct p9_req_t *req; char *dataptr; + int kernel_buf = 0; + struct p9_req_t *req; + struct p9_client *clnt; + int err, rsize, non_zc = 0; - P9_DPRINTK(P9_DEBUG_9P, ">>> TREAD fid %d offset %llu %d\n", fid->fid, - (long long unsigned) offset, count); + + P9_DPRINTK(P9_DEBUG_9P, ">>> TREAD fid %d offset %llu %d\n", + fid->fid, (long long unsigned) offset, count); err = 0; clnt = fid->clnt; @@ -1306,13 +1523,24 @@ p9_client_read(struct p9_fid *fid, char *data, char __user *udata, u64 offset, rsize = count; /* Don't bother zerocopy for small IO (< 1024) */ - if (((clnt->trans_mod->pref & P9_TRANS_PREF_PAYLOAD_MASK) == - P9_TRANS_PREF_PAYLOAD_SEP) && (rsize > 1024)) { - req = p9_client_rpc(clnt, P9_TREAD, "dqE", fid->fid, offset, - rsize, data, udata); + if (clnt->trans_mod->zc_request && rsize > 1024) { + char *indata; + if (data) { + kernel_buf = 1; + indata = data; + } else + indata = (char *)udata; + /* + * response header len is 11 + * PDU Header(7) + IO Size (4) + */ + req = p9_client_zc_rpc(clnt, P9_TREAD, indata, NULL, rsize, 0, + 11, kernel_buf, "dqd", fid->fid, + offset, rsize); } else { + non_zc = 1; req = p9_client_rpc(clnt, P9_TREAD, "dqd", fid->fid, offset, - rsize); + rsize); } if (IS_ERR(req)) { err = PTR_ERR(req); @@ -1321,13 +1549,13 @@ p9_client_read(struct p9_fid *fid, char *data, char __user *udata, u64 offset, err = p9pdu_readf(req->rc, clnt->proto_version, "D", &count, &dataptr); if (err) { - p9pdu_dump(1, req->rc); + trace_9p_protocol_dump(clnt, req->rc); goto free_and_error; } P9_DPRINTK(P9_DEBUG_9P, "<<< RREAD count %d\n", count); - if (!req->tc->pbuf_size) { + if (non_zc) { if (data) { memmove(data, dataptr, count); } else { @@ -1353,6 +1581,7 @@ p9_client_write(struct p9_fid *fid, char *data, const char __user *udata, u64 offset, u32 count) { int err, rsize; + int kernel_buf = 0; struct p9_client *clnt; struct p9_req_t *req; @@ -1368,19 +1597,24 @@ p9_client_write(struct p9_fid *fid, char *data, const char __user *udata, if (count < rsize) rsize = count; - /* Don't bother zerocopy form small IO (< 1024) */ - if (((clnt->trans_mod->pref & P9_TRANS_PREF_PAYLOAD_MASK) == - P9_TRANS_PREF_PAYLOAD_SEP) && (rsize > 1024)) { - req = p9_client_rpc(clnt, P9_TWRITE, "dqE", fid->fid, offset, - rsize, data, udata); + /* Don't bother zerocopy for small IO (< 1024) */ + if (clnt->trans_mod->zc_request && rsize > 1024) { + char *odata; + if (data) { + kernel_buf = 1; + odata = data; + } else + odata = (char *)udata; + req = p9_client_zc_rpc(clnt, P9_TWRITE, NULL, odata, 0, rsize, + P9_ZC_HDR_SZ, kernel_buf, "dqd", + fid->fid, offset, rsize); } else { - if (data) req = p9_client_rpc(clnt, P9_TWRITE, "dqD", fid->fid, - offset, rsize, data); + offset, rsize, data); else req = p9_client_rpc(clnt, P9_TWRITE, "dqU", fid->fid, - offset, rsize, udata); + offset, rsize, udata); } if (IS_ERR(req)) { err = PTR_ERR(req); @@ -1389,7 +1623,7 @@ p9_client_write(struct p9_fid *fid, char *data, const char __user *udata, err = p9pdu_readf(req->rc, clnt->proto_version, "d", &count); if (err) { - p9pdu_dump(1, req->rc); + trace_9p_protocol_dump(clnt, req->rc); goto free_and_error; } @@ -1429,7 +1663,7 @@ struct p9_wstat *p9_client_stat(struct p9_fid *fid) err = p9pdu_readf(req->rc, clnt->proto_version, "wS", &ignored, ret); if (err) { - p9pdu_dump(1, req->rc); + trace_9p_protocol_dump(clnt, req->rc); p9_free_req(clnt, req); goto error; } @@ -1480,7 +1714,7 @@ struct p9_stat_dotl *p9_client_getattr_dotl(struct p9_fid *fid, err = p9pdu_readf(req->rc, clnt->proto_version, "A", ret); if (err) { - p9pdu_dump(1, req->rc); + trace_9p_protocol_dump(clnt, req->rc); p9_free_req(clnt, req); goto error; } @@ -1628,7 +1862,7 @@ int p9_client_statfs(struct p9_fid *fid, struct p9_rstatfs *sb) &sb->bsize, &sb->blocks, &sb->bfree, &sb->bavail, &sb->files, &sb->ffree, &sb->fsid, &sb->namelen); if (err) { - p9pdu_dump(1, req->rc); + trace_9p_protocol_dump(clnt, req->rc); p9_free_req(clnt, req); goto error; } @@ -1646,7 +1880,8 @@ error: } EXPORT_SYMBOL(p9_client_statfs); -int p9_client_rename(struct p9_fid *fid, struct p9_fid *newdirfid, char *name) +int p9_client_rename(struct p9_fid *fid, + struct p9_fid *newdirfid, const char *name) { int err; struct p9_req_t *req; @@ -1673,6 +1908,36 @@ error: } EXPORT_SYMBOL(p9_client_rename); +int p9_client_renameat(struct p9_fid *olddirfid, const char *old_name, + struct p9_fid *newdirfid, const char *new_name) +{ + int err; + struct p9_req_t *req; + struct p9_client *clnt; + + err = 0; + clnt = olddirfid->clnt; + + P9_DPRINTK(P9_DEBUG_9P, ">>> TRENAMEAT olddirfid %d old name %s" + " newdirfid %d new name %s\n", olddirfid->fid, old_name, + newdirfid->fid, new_name); + + req = p9_client_rpc(clnt, P9_TRENAMEAT, "dsds", olddirfid->fid, + old_name, newdirfid->fid, new_name); + if (IS_ERR(req)) { + err = PTR_ERR(req); + goto error; + } + + P9_DPRINTK(P9_DEBUG_9P, "<<< RRENAMEAT newdirfid %d new name %s\n", + newdirfid->fid, new_name); + + p9_free_req(clnt, req); +error: + return err; +} +EXPORT_SYMBOL(p9_client_renameat); + /* * An xattrwalk without @attr_name gives the fid for the lisxattr namespace */ @@ -1704,7 +1969,7 @@ struct p9_fid *p9_client_xattrwalk(struct p9_fid *file_fid, } err = p9pdu_readf(req->rc, clnt->proto_version, "q", attr_size); if (err) { - p9pdu_dump(1, req->rc); + trace_9p_protocol_dump(clnt, req->rc); p9_free_req(clnt, req); goto clunk_fid; } @@ -1750,7 +2015,7 @@ EXPORT_SYMBOL_GPL(p9_client_xattrcreate); int p9_client_readdir(struct p9_fid *fid, char *data, u32 count, u64 offset) { - int err, rsize; + int err, rsize, non_zc = 0; struct p9_client *clnt; struct p9_req_t *req; char *dataptr; @@ -1768,13 +2033,18 @@ int p9_client_readdir(struct p9_fid *fid, char *data, u32 count, u64 offset) if (count < rsize) rsize = count; - if ((clnt->trans_mod->pref & P9_TRANS_PREF_PAYLOAD_MASK) == - P9_TRANS_PREF_PAYLOAD_SEP) { - req = p9_client_rpc(clnt, P9_TREADDIR, "dqF", fid->fid, - offset, rsize, data); + /* Don't bother zerocopy for small IO (< 1024) */ + if (clnt->trans_mod->zc_request && rsize > 1024) { + /* + * response header len is 11 + * PDU Header(7) + IO Size (4) + */ + req = p9_client_zc_rpc(clnt, P9_TREADDIR, data, NULL, rsize, 0, + 11, 1, "dqd", fid->fid, offset, rsize); } else { + non_zc = 1; req = p9_client_rpc(clnt, P9_TREADDIR, "dqd", fid->fid, - offset, rsize); + offset, rsize); } if (IS_ERR(req)) { err = PTR_ERR(req); @@ -1783,13 +2053,13 @@ int p9_client_readdir(struct p9_fid *fid, char *data, u32 count, u64 offset) err = p9pdu_readf(req->rc, clnt->proto_version, "D", &count, &dataptr); if (err) { - p9pdu_dump(1, req->rc); + trace_9p_protocol_dump(clnt, req->rc); goto free_and_error; } P9_DPRINTK(P9_DEBUG_9P, "<<< RREADDIR count %d\n", count); - if (!req->tc->pbuf_size && data) + if (non_zc) memmove(data, dataptr, count); p9_free_req(clnt, req); @@ -1820,7 +2090,7 @@ int p9_client_mknod_dotl(struct p9_fid *fid, char *name, int mode, err = p9pdu_readf(req->rc, clnt->proto_version, "Q", qid); if (err) { - p9pdu_dump(1, req->rc); + trace_9p_protocol_dump(clnt, req->rc); goto error; } P9_DPRINTK(P9_DEBUG_9P, "<<< RMKNOD qid %x.%llx.%x\n", qid->type, @@ -1851,7 +2121,7 @@ int p9_client_mkdir_dotl(struct p9_fid *fid, char *name, int mode, err = p9pdu_readf(req->rc, clnt->proto_version, "Q", qid); if (err) { - p9pdu_dump(1, req->rc); + trace_9p_protocol_dump(clnt, req->rc); goto error; } P9_DPRINTK(P9_DEBUG_9P, "<<< RMKDIR qid %x.%llx.%x\n", qid->type, @@ -1886,7 +2156,7 @@ int p9_client_lock_dotl(struct p9_fid *fid, struct p9_flock *flock, u8 *status) err = p9pdu_readf(req->rc, clnt->proto_version, "b", status); if (err) { - p9pdu_dump(1, req->rc); + trace_9p_protocol_dump(clnt, req->rc); goto error; } P9_DPRINTK(P9_DEBUG_9P, "<<< RLOCK status %i\n", *status); @@ -1919,7 +2189,7 @@ int p9_client_getlock_dotl(struct p9_fid *fid, struct p9_getlock *glock) &glock->start, &glock->length, &glock->proc_id, &glock->client_id); if (err) { - p9pdu_dump(1, req->rc); + trace_9p_protocol_dump(clnt, req->rc); goto error; } P9_DPRINTK(P9_DEBUG_9P, "<<< RGETLOCK type %i start %lld length %lld " @@ -1947,7 +2217,7 @@ int p9_client_readlink(struct p9_fid *fid, char **target) err = p9pdu_readf(req->rc, clnt->proto_version, "s", target); if (err) { - p9pdu_dump(1, req->rc); + trace_9p_protocol_dump(clnt, req->rc); goto error; } P9_DPRINTK(P9_DEBUG_9P, "<<< RREADLINK target %s\n", *target); diff --git a/net/9p/mod.c b/net/9p/mod.c index 72c3982..2664d12 100644 --- a/net/9p/mod.c +++ b/net/9p/mod.c @@ -80,14 +80,14 @@ EXPORT_SYMBOL(v9fs_unregister_trans); * @name: string identifying transport * */ -struct p9_trans_module *v9fs_get_trans_by_name(const substring_t *name) +struct p9_trans_module *v9fs_get_trans_by_name(char *s) { struct p9_trans_module *t, *found = NULL; spin_lock(&v9fs_trans_lock); list_for_each_entry(t, &v9fs_trans_list, list) - if (strncmp(t->name, name->from, name->to-name->from) == 0 && + if (strcmp(t->name, s) == 0 && try_module_get(t->owner)) { found = t; break; diff --git a/net/9p/protocol.c b/net/9p/protocol.c index a873277..55e10a9 100644 --- a/net/9p/protocol.c +++ b/net/9p/protocol.c @@ -37,46 +37,11 @@ #include #include "protocol.h" +#include + static int p9pdu_writef(struct p9_fcall *pdu, int proto_version, const char *fmt, ...); -#ifdef CONFIG_NET_9P_DEBUG -void -p9pdu_dump(int way, struct p9_fcall *pdu) -{ - int i, n; - u8 *data = pdu->sdata; - int datalen = pdu->size; - char buf[255]; - int buflen = 255; - - i = n = 0; - if (datalen > (buflen-16)) - datalen = buflen-16; - while (i < datalen) { - n += scnprintf(buf + n, buflen - n, "%02x ", data[i]); - if (i%4 == 3) - n += scnprintf(buf + n, buflen - n, " "); - if (i%32 == 31) - n += scnprintf(buf + n, buflen - n, "\n"); - - i++; - } - n += scnprintf(buf + n, buflen - n, "\n"); - - if (way) - P9_DPRINTK(P9_DEBUG_PKT, "[[[(%d) %s\n", datalen, buf); - else - P9_DPRINTK(P9_DEBUG_PKT, "]]](%d) %s\n", datalen, buf); -} -#else -void -p9pdu_dump(int way, struct p9_fcall *pdu) -{ -} -#endif -EXPORT_SYMBOL(p9pdu_dump); - void p9stat_free(struct p9_wstat *stbuf) { kfree(stbuf->name); @@ -87,7 +52,7 @@ void p9stat_free(struct p9_wstat *stbuf) } EXPORT_SYMBOL(p9stat_free); -static size_t pdu_read(struct p9_fcall *pdu, void *data, size_t size) +size_t pdu_read(struct p9_fcall *pdu, void *data, size_t size) { size_t len = min(pdu->size - pdu->offset, size); memcpy(data, &pdu->sdata[pdu->offset], len); @@ -114,26 +79,6 @@ pdu_write_u(struct p9_fcall *pdu, const char __user *udata, size_t size) return size - len; } -static size_t -pdu_write_urw(struct p9_fcall *pdu, const char *kdata, const char __user *udata, - size_t size) -{ - BUG_ON(pdu->size > P9_IOHDRSZ); - pdu->pubuf = (char __user *)udata; - pdu->pkbuf = (char *)kdata; - pdu->pbuf_size = size; - return 0; -} - -static size_t -pdu_write_readdir(struct p9_fcall *pdu, const char *kdata, size_t size) -{ - BUG_ON(pdu->size > P9_READDIRHDRSZ); - pdu->pkbuf = (char *)kdata; - pdu->pbuf_size = size; - return 0; -} - /* b - int8_t w - int16_t @@ -465,26 +410,6 @@ p9pdu_vwritef(struct p9_fcall *pdu, int proto_version, const char *fmt, errcode = -EFAULT; } break; - case 'E':{ - int32_t cnt = va_arg(ap, int32_t); - const char *k = va_arg(ap, const void *); - const char __user *u = va_arg(ap, - const void __user *); - errcode = p9pdu_writef(pdu, proto_version, "d", - cnt); - if (!errcode && pdu_write_urw(pdu, k, u, cnt)) - errcode = -EFAULT; - } - break; - case 'F':{ - int32_t cnt = va_arg(ap, int32_t); - const char *k = va_arg(ap, const void *); - errcode = p9pdu_writef(pdu, proto_version, "d", - cnt); - if (!errcode && pdu_write_readdir(pdu, k, cnt)) - errcode = -EFAULT; - } - break; case 'U':{ int32_t count = va_arg(ap, int32_t); const char __user *udata = @@ -597,7 +522,7 @@ p9pdu_writef(struct p9_fcall *pdu, int proto_version, const char *fmt, ...) return ret; } -int p9stat_read(char *buf, int len, struct p9_wstat *st, int proto_version) +int p9stat_read(struct p9_client *clnt, char *buf, int len, struct p9_wstat *st) { struct p9_fcall fake_pdu; int ret; @@ -607,10 +532,10 @@ int p9stat_read(char *buf, int len, struct p9_wstat *st, int proto_version) fake_pdu.sdata = buf; fake_pdu.offset = 0; - ret = p9pdu_readf(&fake_pdu, proto_version, "S", st); + ret = p9pdu_readf(&fake_pdu, clnt->proto_version, "S", st); if (ret) { P9_DPRINTK(P9_DEBUG_9P, "<<< p9stat_read failed: %d\n", ret); - p9pdu_dump(1, &fake_pdu); + trace_9p_protocol_dump(clnt, &fake_pdu); } return ret; @@ -623,7 +548,7 @@ int p9pdu_prepare(struct p9_fcall *pdu, int16_t tag, int8_t type) return p9pdu_writef(pdu, 0, "dbw", 0, type, tag); } -int p9pdu_finalize(struct p9_fcall *pdu) +int p9pdu_finalize(struct p9_client *clnt, struct p9_fcall *pdu) { int size = pdu->size; int err; @@ -632,11 +557,7 @@ int p9pdu_finalize(struct p9_fcall *pdu) err = p9pdu_writef(pdu, 0, "d", size); pdu->size = size; -#ifdef CONFIG_NET_9P_DEBUG - if ((p9_debug_level & P9_DEBUG_PKT) == P9_DEBUG_PKT) - p9pdu_dump(0, pdu); -#endif - + trace_9p_protocol_dump(clnt, pdu); P9_DPRINTK(P9_DEBUG_9P, ">>> size=%d type: %d tag: %d\n", pdu->size, pdu->id, pdu->tag); @@ -647,14 +568,10 @@ void p9pdu_reset(struct p9_fcall *pdu) { pdu->offset = 0; pdu->size = 0; - pdu->private = NULL; - pdu->pubuf = NULL; - pdu->pkbuf = NULL; - pdu->pbuf_size = 0; } -int p9dirent_read(char *buf, int len, struct p9_dirent *dirent, - int proto_version) +int p9dirent_read(struct p9_client *clnt, char *buf, int len, + struct p9_dirent *dirent) { struct p9_fcall fake_pdu; int ret; @@ -665,11 +582,11 @@ int p9dirent_read(char *buf, int len, struct p9_dirent *dirent, fake_pdu.sdata = buf; fake_pdu.offset = 0; - ret = p9pdu_readf(&fake_pdu, proto_version, "Qqbs", &dirent->qid, - &dirent->d_off, &dirent->d_type, &nameptr); + ret = p9pdu_readf(&fake_pdu, clnt->proto_version, "Qqbs", &dirent->qid, + &dirent->d_off, &dirent->d_type, &nameptr); if (ret) { P9_DPRINTK(P9_DEBUG_9P, "<<< p9dirent_read failed: %d\n", ret); - p9pdu_dump(1, &fake_pdu); + trace_9p_protocol_dump(clnt, &fake_pdu); goto out; } diff --git a/net/9p/protocol.h b/net/9p/protocol.h index 2431c0f..2cc525fa 100644 --- a/net/9p/protocol.h +++ b/net/9p/protocol.h @@ -29,6 +29,6 @@ int p9pdu_vwritef(struct p9_fcall *pdu, int proto_version, const char *fmt, va_list ap); int p9pdu_readf(struct p9_fcall *pdu, int proto_version, const char *fmt, ...); int p9pdu_prepare(struct p9_fcall *pdu, int16_t tag, int8_t type); -int p9pdu_finalize(struct p9_fcall *pdu); -void p9pdu_dump(int, struct p9_fcall *); +int p9pdu_finalize(struct p9_client *clnt, struct p9_fcall *pdu); void p9pdu_reset(struct p9_fcall *pdu); +size_t pdu_read(struct p9_fcall *pdu, void *data, size_t size); diff --git a/net/9p/trans_common.c b/net/9p/trans_common.c index 9a70ebd..2ee3879 100644 --- a/net/9p/trans_common.c +++ b/net/9p/trans_common.c @@ -21,30 +21,25 @@ /** * p9_release_req_pages - Release pages after the transaction. - * @*private: PDU's private page of struct trans_rpage_info */ -void -p9_release_req_pages(struct trans_rpage_info *rpinfo) +void p9_release_pages(struct page **pages, int nr_pages) { - int i = 0; + int i; - while (rpinfo->rp_data[i] && rpinfo->rp_nr_pages--) { - put_page(rpinfo->rp_data[i]); - i++; - } + for (i = 0; i < nr_pages; i++) + if (pages[i]) + put_page(pages[i]); } -EXPORT_SYMBOL(p9_release_req_pages); +EXPORT_SYMBOL(p9_release_pages); /** * p9_nr_pages - Return number of pages needed to accommodate the payload. */ -int -p9_nr_pages(struct p9_req_t *req) +int p9_nr_pages(char *data, int len) { unsigned long start_page, end_page; - start_page = (unsigned long)req->tc->pubuf >> PAGE_SHIFT; - end_page = ((unsigned long)req->tc->pubuf + req->tc->pbuf_size + - PAGE_SIZE - 1) >> PAGE_SHIFT; + start_page = (unsigned long)data >> PAGE_SHIFT; + end_page = ((unsigned long)data + len + PAGE_SIZE - 1) >> PAGE_SHIFT; return end_page - start_page; } EXPORT_SYMBOL(p9_nr_pages); @@ -58,35 +53,17 @@ EXPORT_SYMBOL(p9_nr_pages); * @nr_pages: number of pages to accommodate the payload * @rw: Indicates if the pages are for read or write. */ -int -p9_payload_gup(struct p9_req_t *req, size_t *pdata_off, int *pdata_len, - int nr_pages, u8 rw) -{ - uint32_t first_page_bytes = 0; - int32_t pdata_mapped_pages; - struct trans_rpage_info *rpinfo; - - *pdata_off = (__force size_t)req->tc->pubuf & (PAGE_SIZE-1); - if (*pdata_off) - first_page_bytes = min(((size_t)PAGE_SIZE - *pdata_off), - req->tc->pbuf_size); +int p9_payload_gup(char *data, int *nr_pages, struct page **pages, int write) +{ + int nr_mapped_pages; - rpinfo = req->tc->private; - pdata_mapped_pages = get_user_pages_fast((unsigned long)req->tc->pubuf, - nr_pages, rw, &rpinfo->rp_data[0]); - if (pdata_mapped_pages <= 0) - return pdata_mapped_pages; + nr_mapped_pages = get_user_pages_fast((unsigned long)data, + *nr_pages, write, pages); + if (nr_mapped_pages <= 0) + return nr_mapped_pages; - rpinfo->rp_nr_pages = pdata_mapped_pages; - if (*pdata_off) { - *pdata_len = first_page_bytes; - *pdata_len += min((req->tc->pbuf_size - *pdata_len), - ((size_t)pdata_mapped_pages - 1) << PAGE_SHIFT); - } else { - *pdata_len = min(req->tc->pbuf_size, - (size_t)pdata_mapped_pages << PAGE_SHIFT); - } + *nr_pages = nr_mapped_pages; return 0; } EXPORT_SYMBOL(p9_payload_gup); diff --git a/net/9p/trans_common.h b/net/9p/trans_common.h index 7630922..173bb55 100644 --- a/net/9p/trans_common.h +++ b/net/9p/trans_common.h @@ -12,21 +12,6 @@ * */ -/* TRUE if it is user context */ -#define P9_IS_USER_CONTEXT (!segment_eq(get_fs(), KERNEL_DS)) - -/** - * struct trans_rpage_info - To store mapped page information in PDU. - * @rp_alloc:Set if this structure is allocd, not a reuse unused space in pdu. - * @rp_nr_pages: Number of mapped pages - * @rp_data: Array of page pointers - */ -struct trans_rpage_info { - u8 rp_alloc; - int rp_nr_pages; - struct page *rp_data[0]; -}; - -void p9_release_req_pages(struct trans_rpage_info *); -int p9_payload_gup(struct p9_req_t *, size_t *, int *, int, u8); -int p9_nr_pages(struct p9_req_t *); +void p9_release_pages(struct page **, int); +int p9_payload_gup(char *, int *, struct page **, int); +int p9_nr_pages(char *, int); diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c index e317583..55f0c09 100644 --- a/net/9p/trans_virtio.c +++ b/net/9p/trans_virtio.c @@ -37,6 +37,7 @@ #include #include #include +#include #include #include #include @@ -150,12 +151,10 @@ static void req_done(struct virtqueue *vq) while (1) { spin_lock_irqsave(&chan->lock, flags); rc = virtqueue_get_buf(chan->vq, &len); - if (rc == NULL) { spin_unlock_irqrestore(&chan->lock, flags); break; } - chan->ring_bufs_avail = 1; spin_unlock_irqrestore(&chan->lock, flags); /* Wakeup if anyone waiting for VirtIO ring space. */ @@ -163,17 +162,6 @@ static void req_done(struct virtqueue *vq) P9_DPRINTK(P9_DEBUG_TRANS, ": rc %p\n", rc); P9_DPRINTK(P9_DEBUG_TRANS, ": lookup tag %d\n", rc->tag); req = p9_tag_lookup(chan->client, rc->tag); - if (req->tc->private) { - struct trans_rpage_info *rp = req->tc->private; - int p = rp->rp_nr_pages; - /*Release pages */ - p9_release_req_pages(rp); - atomic_sub(p, &vp_pinned); - wake_up(&vp_wq); - if (rp->rp_alloc) - kfree(rp); - req->tc->private = NULL; - } req->status = REQ_STATUS_RCVD; p9_client_cb(chan->client, req); } @@ -193,9 +181,8 @@ static void req_done(struct virtqueue *vq) * */ -static int -pack_sg_list(struct scatterlist *sg, int start, int limit, char *data, - int count) +static int pack_sg_list(struct scatterlist *sg, int start, + int limit, char *data, int count) { int s; int index = start; @@ -224,31 +211,36 @@ static int p9_virtio_cancel(struct p9_client *client, struct p9_req_t *req) * this takes a list of pages. * @sg: scatter/gather list to pack into * @start: which segment of the sg_list to start at - * @pdata_off: Offset into the first page * @**pdata: a list of pages to add into sg. + * @nr_pages: number of pages to pack into the scatter/gather list + * @data: data to pack into scatter/gather list * @count: amount of data to pack into the scatter/gather list */ static int -pack_sg_list_p(struct scatterlist *sg, int start, int limit, size_t pdata_off, - struct page **pdata, int count) +pack_sg_list_p(struct scatterlist *sg, int start, int limit, + struct page **pdata, int nr_pages, char *data, int count) { - int s; - int i = 0; + int i = 0, s; + int data_off; int index = start; - if (pdata_off) { - s = min((int)(PAGE_SIZE - pdata_off), count); - sg_set_page(&sg[index++], pdata[i++], s, pdata_off); - count -= s; - } - - while (count) { - BUG_ON(index > limit); - s = min((int)PAGE_SIZE, count); - sg_set_page(&sg[index++], pdata[i++], s, 0); + BUG_ON(nr_pages > (limit - start)); + /* + * if the first page doesn't start at + * page boundary find the offset + */ + data_off = offset_in_page(data); + while (nr_pages) { + s = rest_of_page(data); + if (s > count) + s = count; + sg_set_page(&sg[index++], pdata[i++], s, data_off); + data_off = 0; + data += s; count -= s; + nr_pages--; } - return index-start; + return index - start; } /** @@ -261,114 +253,166 @@ pack_sg_list_p(struct scatterlist *sg, int start, int limit, size_t pdata_off, static int p9_virtio_request(struct p9_client *client, struct p9_req_t *req) { - int in, out, inp, outp; - struct virtio_chan *chan = client->trans; + int err; + int in, out; unsigned long flags; - size_t pdata_off = 0; - struct trans_rpage_info *rpinfo = NULL; - int err, pdata_len = 0; + struct virtio_chan *chan = client->trans; P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: virtio request\n"); req->status = REQ_STATUS_SENT; +req_retry: + spin_lock_irqsave(&chan->lock, flags); + + /* Handle out VirtIO ring buffers */ + out = pack_sg_list(chan->sg, 0, + VIRTQUEUE_NUM, req->tc->sdata, req->tc->size); - if (req->tc->pbuf_size && (req->tc->pubuf && P9_IS_USER_CONTEXT)) { - int nr_pages = p9_nr_pages(req); - int rpinfo_size = sizeof(struct trans_rpage_info) + - sizeof(struct page *) * nr_pages; + in = pack_sg_list(chan->sg, out, + VIRTQUEUE_NUM, req->rc->sdata, req->rc->capacity); - if (atomic_read(&vp_pinned) >= chan->p9_max_pages) { - err = wait_event_interruptible(vp_wq, - atomic_read(&vp_pinned) < chan->p9_max_pages); + err = virtqueue_add_buf(chan->vq, chan->sg, out, in, req->tc); + if (err < 0) { + if (err == -ENOSPC) { + chan->ring_bufs_avail = 0; + spin_unlock_irqrestore(&chan->lock, flags); + err = wait_event_interruptible(*chan->vc_wq, + chan->ring_bufs_avail); if (err == -ERESTARTSYS) return err; - P9_DPRINTK(P9_DEBUG_TRANS, "9p: May gup pages now.\n"); - } - if (rpinfo_size <= (req->tc->capacity - req->tc->size)) { - /* We can use sdata */ - req->tc->private = req->tc->sdata + req->tc->size; - rpinfo = (struct trans_rpage_info *)req->tc->private; - rpinfo->rp_alloc = 0; + P9_DPRINTK(P9_DEBUG_TRANS, "9p:Retry virtio request\n"); + goto req_retry; } else { - req->tc->private = kmalloc(rpinfo_size, GFP_NOFS); - if (!req->tc->private) { - P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: " - "private kmalloc returned NULL"); - return -ENOMEM; - } - rpinfo = (struct trans_rpage_info *)req->tc->private; - rpinfo->rp_alloc = 1; + spin_unlock_irqrestore(&chan->lock, flags); + P9_DPRINTK(P9_DEBUG_TRANS, + "9p debug: " + "virtio rpc add_buf returned failure"); + return -EIO; } + } + virtqueue_kick(chan->vq); + spin_unlock_irqrestore(&chan->lock, flags); - err = p9_payload_gup(req, &pdata_off, &pdata_len, nr_pages, - req->tc->id == P9_TREAD ? 1 : 0); - if (err < 0) { - if (rpinfo->rp_alloc) - kfree(rpinfo); + P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: virtio request kicked\n"); + return 0; +} + +static int p9_get_mapped_pages(struct virtio_chan *chan, + struct page **pages, char *data, + int nr_pages, int write, int kern_buf) +{ + int err; + if (!kern_buf) { + /* + * We allow only p9_max_pages pinned. We wait for the + * Other zc request to finish here + */ + if (atomic_read(&vp_pinned) >= chan->p9_max_pages) { + err = wait_event_interruptible(vp_wq, + (atomic_read(&vp_pinned) < chan->p9_max_pages)); + if (err == -ERESTARTSYS) + return err; + } + err = p9_payload_gup(data, &nr_pages, pages, write); + if (err < 0) return err; - } else { - atomic_add(rpinfo->rp_nr_pages, &vp_pinned); + atomic_add(nr_pages, &vp_pinned); + } else { + /* kernel buffer, no need to pin pages */ + int s, index = 0; + int count = nr_pages; + while (nr_pages) { + s = rest_of_page(data); + pages[index++] = kmap_to_page(data); + data += s; + nr_pages--; } + nr_pages = count; } + return nr_pages; +} -req_retry_pinned: - spin_lock_irqsave(&chan->lock, flags); +/** + * p9_virtio_zc_request - issue a zero copy request + * @client: client instance issuing the request + * @req: request to be issued + * @uidata: user bffer that should be ued for zero copy read + * @uodata: user buffer that shoud be user for zero copy write + * @inlen: read buffer size + * @olen: write buffer size + * @hdrlen: reader header size, This is the size of response protocol data + * + */ +static int +p9_virtio_zc_request(struct p9_client *client, struct p9_req_t *req, + char *uidata, char *uodata, int inlen, + int outlen, int in_hdr_len, int kern_buf) +{ + int in, out, err; + unsigned long flags; + int in_nr_pages = 0, out_nr_pages = 0; + struct page **in_pages = NULL, **out_pages = NULL; + struct virtio_chan *chan = client->trans; - /* Handle out VirtIO ring buffers */ - out = pack_sg_list(chan->sg, 0, VIRTQUEUE_NUM, req->tc->sdata, - req->tc->size); - - if (req->tc->pbuf_size && (req->tc->id == P9_TWRITE)) { - /* We have additional write payload buffer to take care */ - if (req->tc->pubuf && P9_IS_USER_CONTEXT) { - outp = pack_sg_list_p(chan->sg, out, VIRTQUEUE_NUM, - pdata_off, rpinfo->rp_data, pdata_len); - } else { - char *pbuf; - if (req->tc->pubuf) - pbuf = (__force char *) req->tc->pubuf; - else - pbuf = req->tc->pkbuf; - outp = pack_sg_list(chan->sg, out, VIRTQUEUE_NUM, pbuf, - req->tc->pbuf_size); + P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: virtio request\n"); + + if (uodata) { + out_nr_pages = p9_nr_pages(uodata, outlen); + out_pages = kmalloc(sizeof(struct page *) * out_nr_pages, + GFP_NOFS); + if (!out_pages) { + err = -ENOMEM; + goto err_out; + } + out_nr_pages = p9_get_mapped_pages(chan, out_pages, uodata, + out_nr_pages, 0, kern_buf); + if (out_nr_pages < 0) { + err = out_nr_pages; + kfree(out_pages); + out_pages = NULL; + goto err_out; } - out += outp; } - - /* Handle in VirtIO ring buffers */ - if (req->tc->pbuf_size && - ((req->tc->id == P9_TREAD) || (req->tc->id == P9_TREADDIR))) { - /* - * Take care of additional Read payload. - * 11 is the read/write header = PDU Header(7) + IO Size (4). - * Arrange in such a way that server places header in the - * alloced memory and payload onto the user buffer. - */ - inp = pack_sg_list(chan->sg, out, - VIRTQUEUE_NUM, req->rc->sdata, 11); - /* - * Running executables in the filesystem may result in - * a read request with kernel buffer as opposed to user buffer. - */ - if (req->tc->pubuf && P9_IS_USER_CONTEXT) { - in = pack_sg_list_p(chan->sg, out+inp, VIRTQUEUE_NUM, - pdata_off, rpinfo->rp_data, pdata_len); - } else { - char *pbuf; - if (req->tc->pubuf) - pbuf = (__force char *) req->tc->pubuf; - else - pbuf = req->tc->pkbuf; - - in = pack_sg_list(chan->sg, out+inp, VIRTQUEUE_NUM, - pbuf, req->tc->pbuf_size); + if (uidata) { + in_nr_pages = p9_nr_pages(uidata, inlen); + in_pages = kmalloc(sizeof(struct page *) * in_nr_pages, + GFP_NOFS); + if (!in_pages) { + err = -ENOMEM; + goto err_out; + } + in_nr_pages = p9_get_mapped_pages(chan, in_pages, uidata, + in_nr_pages, 1, kern_buf); + if (in_nr_pages < 0) { + err = in_nr_pages; + kfree(in_pages); + in_pages = NULL; + goto err_out; } - in += inp; - } else { - in = pack_sg_list(chan->sg, out, VIRTQUEUE_NUM, - req->rc->sdata, req->rc->capacity); } + req->status = REQ_STATUS_SENT; +req_retry_pinned: + spin_lock_irqsave(&chan->lock, flags); + /* out data */ + out = pack_sg_list(chan->sg, 0, + VIRTQUEUE_NUM, req->tc->sdata, req->tc->size); + + if (out_pages) + out += pack_sg_list_p(chan->sg, out, VIRTQUEUE_NUM, + out_pages, out_nr_pages, uodata, outlen); + /* + * Take care of in data + * For example TREAD have 11. + * 11 is the read/write header = PDU Header(7) + IO Size (4). + * Arrange in such a way that server places header in the + * alloced memory and payload onto the user buffer. + */ + in = pack_sg_list(chan->sg, out, + VIRTQUEUE_NUM, req->rc->sdata, in_hdr_len); + if (in_pages) + in += pack_sg_list_p(chan->sg, out + in, VIRTQUEUE_NUM, + in_pages, in_nr_pages, uidata, inlen); err = virtqueue_add_buf(chan->vq, chan->sg, out, in, req->tc); if (err < 0) { @@ -376,28 +420,45 @@ req_retry_pinned: chan->ring_bufs_avail = 0; spin_unlock_irqrestore(&chan->lock, flags); err = wait_event_interruptible(*chan->vc_wq, - chan->ring_bufs_avail); + chan->ring_bufs_avail); if (err == -ERESTARTSYS) - return err; + goto err_out; P9_DPRINTK(P9_DEBUG_TRANS, "9p:Retry virtio request\n"); goto req_retry_pinned; } else { spin_unlock_irqrestore(&chan->lock, flags); P9_DPRINTK(P9_DEBUG_TRANS, - "9p debug: " - "virtio rpc add_buf returned failure"); - if (rpinfo && rpinfo->rp_alloc) - kfree(rpinfo); - return -EIO; + "9p debug: " + "virtio rpc add_buf returned failure"); + err = -EIO; + goto err_out; } } - virtqueue_kick(chan->vq); spin_unlock_irqrestore(&chan->lock, flags); - P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: virtio request kicked\n"); - return 0; + err = wait_event_interruptible(*req->wq, + req->status >= REQ_STATUS_RCVD); + /* + * Non kernel buffers are pinned, unpin them + */ +err_out: + if (!kern_buf) { + if (in_pages) { + p9_release_pages(in_pages, in_nr_pages); + atomic_sub(in_nr_pages, &vp_pinned); + } + if (out_pages) { + p9_release_pages(out_pages, out_nr_pages); + atomic_sub(out_nr_pages, &vp_pinned); + } + /* wakeup anybody waiting for slots to pin pages */ + wake_up(&vp_wq); + } + kfree(in_pages); + kfree(out_pages); + return err; } static ssize_t p9_mount_tag_show(struct device *dev, @@ -591,8 +652,8 @@ static struct p9_trans_module p9_virtio_trans = { .create = p9_virtio_create, .close = p9_virtio_close, .request = p9_virtio_request, + .zc_request = p9_virtio_zc_request, .cancel = p9_virtio_cancel, - /* * We leave one entry for input and one entry for response * headers. We also skip one more entry to accomodate, address @@ -600,7 +661,6 @@ static struct p9_trans_module p9_virtio_trans = { * page in zero copy. */ .maxsize = PAGE_SIZE * (VIRTQUEUE_NUM - 3), - .pref = P9_TRANS_PREF_PAYLOAD_SEP, .def = 0, .owner = THIS_MODULE, }; diff --git a/net/appletalk/aarp.c b/net/appletalk/aarp.c index 50dce79..173a2e8 100644 --- a/net/appletalk/aarp.c +++ b/net/appletalk/aarp.c @@ -39,6 +39,7 @@ #include #include #include +#include int sysctl_aarp_expiry_time = AARP_EXPIRY_TIME; int sysctl_aarp_tick_time = AARP_TICK_TIME; @@ -779,87 +780,87 @@ static int aarp_rcv(struct sk_buff *skb, struct net_device *dev, } switch (function) { - case AARP_REPLY: - if (!unresolved_count) /* Speed up */ - break; - - /* Find the entry. */ - a = __aarp_find_entry(unresolved[hash], dev, &sa); - if (!a || dev != a->dev) - break; + case AARP_REPLY: + if (!unresolved_count) /* Speed up */ + break; - /* We can fill one in - this is good. */ - memcpy(a->hwaddr, ea->hw_src, ETH_ALEN); - __aarp_resolved(&unresolved[hash], a, hash); - if (!unresolved_count) - mod_timer(&aarp_timer, - jiffies + sysctl_aarp_expiry_time); + /* Find the entry. */ + a = __aarp_find_entry(unresolved[hash], dev, &sa); + if (!a || dev != a->dev) break; - case AARP_REQUEST: - case AARP_PROBE: + /* We can fill one in - this is good. */ + memcpy(a->hwaddr, ea->hw_src, ETH_ALEN); + __aarp_resolved(&unresolved[hash], a, hash); + if (!unresolved_count) + mod_timer(&aarp_timer, + jiffies + sysctl_aarp_expiry_time); + break; + + case AARP_REQUEST: + case AARP_PROBE: + + /* + * If it is my address set ma to my address and reply. + * We can treat probe and request the same. Probe + * simply means we shouldn't cache the querying host, + * as in a probe they are proposing an address not + * using one. + * + * Support for proxy-AARP added. We check if the + * address is one of our proxies before we toss the + * packet out. + */ + + sa.s_node = ea->pa_dst_node; + sa.s_net = ea->pa_dst_net; + + /* See if we have a matching proxy. */ + ma = __aarp_proxy_find(dev, &sa); + if (!ma) + ma = &ifa->address; + else { /* We need to make a copy of the entry. */ + da.s_node = sa.s_node; + da.s_net = sa.s_net; + ma = &da; + } + if (function == AARP_PROBE) { /* - * If it is my address set ma to my address and reply. - * We can treat probe and request the same. Probe - * simply means we shouldn't cache the querying host, - * as in a probe they are proposing an address not - * using one. - * - * Support for proxy-AARP added. We check if the - * address is one of our proxies before we toss the - * packet out. + * A probe implies someone trying to get an + * address. So as a precaution flush any + * entries we have for this address. */ + a = __aarp_find_entry(resolved[sa.s_node % + (AARP_HASH_SIZE - 1)], + skb->dev, &sa); - sa.s_node = ea->pa_dst_node; - sa.s_net = ea->pa_dst_net; - - /* See if we have a matching proxy. */ - ma = __aarp_proxy_find(dev, &sa); - if (!ma) - ma = &ifa->address; - else { /* We need to make a copy of the entry. */ - da.s_node = sa.s_node; - da.s_net = sa.s_net; - ma = &da; - } - - if (function == AARP_PROBE) { - /* - * A probe implies someone trying to get an - * address. So as a precaution flush any - * entries we have for this address. - */ - a = __aarp_find_entry(resolved[sa.s_node % - (AARP_HASH_SIZE - 1)], - skb->dev, &sa); - - /* - * Make it expire next tick - that avoids us - * getting into a probe/flush/learn/probe/ - * flush/learn cycle during probing of a slow - * to respond host addr. - */ - if (a) { - a->expires_at = jiffies - 1; - mod_timer(&aarp_timer, jiffies + - sysctl_aarp_tick_time); - } + /* + * Make it expire next tick - that avoids us + * getting into a probe/flush/learn/probe/ + * flush/learn cycle during probing of a slow + * to respond host addr. + */ + if (a) { + a->expires_at = jiffies - 1; + mod_timer(&aarp_timer, jiffies + + sysctl_aarp_tick_time); } + } - if (sa.s_node != ma->s_node) - break; + if (sa.s_node != ma->s_node) + break; - if (sa.s_net && ma->s_net && sa.s_net != ma->s_net) - break; + if (sa.s_net && ma->s_net && sa.s_net != ma->s_net) + break; - sa.s_node = ea->pa_src_node; - sa.s_net = ea->pa_src_net; + sa.s_node = ea->pa_src_node; + sa.s_net = ea->pa_src_net; - /* aarp_my_address has found the address to use for us. - */ - aarp_send_reply(dev, ma, &sa, ea->hw_src); - break; + /* aarp_my_address has found the address to use for us. + */ + aarp_send_reply(dev, ma, &sa, ea->hw_src); + break; } unlock: diff --git a/net/appletalk/atalk_proc.c b/net/appletalk/atalk_proc.c index 6ef0e76..b5b1a22 100644 --- a/net/appletalk/atalk_proc.c +++ b/net/appletalk/atalk_proc.c @@ -14,6 +14,7 @@ #include #include #include +#include static __inline__ struct atalk_iface *atalk_get_interface_idx(loff_t pos) diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c index 956a530..79aaac2 100644 --- a/net/appletalk/ddp.c +++ b/net/appletalk/ddp.c @@ -684,192 +684,192 @@ static int atif_ioctl(int cmd, void __user *arg) atif = atalk_find_dev(dev); switch (cmd) { - case SIOCSIFADDR: - if (!capable(CAP_NET_ADMIN)) - return -EPERM; - if (sa->sat_family != AF_APPLETALK) - return -EINVAL; - if (dev->type != ARPHRD_ETHER && - dev->type != ARPHRD_LOOPBACK && - dev->type != ARPHRD_LOCALTLK && - dev->type != ARPHRD_PPP) - return -EPROTONOSUPPORT; - - nr = (struct atalk_netrange *)&sa->sat_zero[0]; - add_route = 1; - - /* - * if this is a point-to-point iface, and we already - * have an iface for this AppleTalk address, then we - * should not add a route - */ - if ((dev->flags & IFF_POINTOPOINT) && - atalk_find_interface(sa->sat_addr.s_net, - sa->sat_addr.s_node)) { - printk(KERN_DEBUG "AppleTalk: point-to-point " - "interface added with " - "existing address\n"); - add_route = 0; - } - - /* - * Phase 1 is fine on LocalTalk but we don't do - * EtherTalk phase 1. Anyone wanting to add it go ahead. - */ - if (dev->type == ARPHRD_ETHER && nr->nr_phase != 2) - return -EPROTONOSUPPORT; - if (sa->sat_addr.s_node == ATADDR_BCAST || - sa->sat_addr.s_node == 254) - return -EINVAL; - if (atif) { - /* Already setting address */ - if (atif->status & ATIF_PROBE) - return -EBUSY; - - atif->address.s_net = sa->sat_addr.s_net; - atif->address.s_node = sa->sat_addr.s_node; - atrtr_device_down(dev); /* Flush old routes */ - } else { - atif = atif_add_device(dev, &sa->sat_addr); - if (!atif) - return -ENOMEM; - } - atif->nets = *nr; - - /* - * Check if the chosen address is used. If so we - * error and atalkd will try another. - */ + case SIOCSIFADDR: + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + if (sa->sat_family != AF_APPLETALK) + return -EINVAL; + if (dev->type != ARPHRD_ETHER && + dev->type != ARPHRD_LOOPBACK && + dev->type != ARPHRD_LOCALTLK && + dev->type != ARPHRD_PPP) + return -EPROTONOSUPPORT; + + nr = (struct atalk_netrange *)&sa->sat_zero[0]; + add_route = 1; - if (!(dev->flags & IFF_LOOPBACK) && - !(dev->flags & IFF_POINTOPOINT) && - atif_probe_device(atif) < 0) { - atif_drop_device(dev); - return -EADDRINUSE; - } - - /* Hey it worked - add the direct routes */ - sa = (struct sockaddr_at *)&rtdef.rt_gateway; - sa->sat_family = AF_APPLETALK; - sa->sat_addr.s_net = atif->address.s_net; - sa->sat_addr.s_node = atif->address.s_node; - sa = (struct sockaddr_at *)&rtdef.rt_dst; - rtdef.rt_flags = RTF_UP; - sa->sat_family = AF_APPLETALK; - sa->sat_addr.s_node = ATADDR_ANYNODE; - if (dev->flags & IFF_LOOPBACK || - dev->flags & IFF_POINTOPOINT) - rtdef.rt_flags |= RTF_HOST; - - /* Routerless initial state */ - if (nr->nr_firstnet == htons(0) && - nr->nr_lastnet == htons(0xFFFE)) { - sa->sat_addr.s_net = atif->address.s_net; - atrtr_create(&rtdef, dev); - atrtr_set_default(dev); - } else { - limit = ntohs(nr->nr_lastnet); - if (limit - ntohs(nr->nr_firstnet) > 4096) { - printk(KERN_WARNING "Too many routes/" - "iface.\n"); - return -EINVAL; - } - if (add_route) - for (ct = ntohs(nr->nr_firstnet); - ct <= limit; ct++) { - sa->sat_addr.s_net = htons(ct); - atrtr_create(&rtdef, dev); - } - } - dev_mc_add_global(dev, aarp_mcast); - return 0; + /* + * if this is a point-to-point iface, and we already + * have an iface for this AppleTalk address, then we + * should not add a route + */ + if ((dev->flags & IFF_POINTOPOINT) && + atalk_find_interface(sa->sat_addr.s_net, + sa->sat_addr.s_node)) { + printk(KERN_DEBUG "AppleTalk: point-to-point " + "interface added with " + "existing address\n"); + add_route = 0; + } - case SIOCGIFADDR: + /* + * Phase 1 is fine on LocalTalk but we don't do + * EtherTalk phase 1. Anyone wanting to add it go ahead. + */ + if (dev->type == ARPHRD_ETHER && nr->nr_phase != 2) + return -EPROTONOSUPPORT; + if (sa->sat_addr.s_node == ATADDR_BCAST || + sa->sat_addr.s_node == 254) + return -EINVAL; + if (atif) { + /* Already setting address */ + if (atif->status & ATIF_PROBE) + return -EBUSY; + + atif->address.s_net = sa->sat_addr.s_net; + atif->address.s_node = sa->sat_addr.s_node; + atrtr_device_down(dev); /* Flush old routes */ + } else { + atif = atif_add_device(dev, &sa->sat_addr); if (!atif) - return -EADDRNOTAVAIL; + return -ENOMEM; + } + atif->nets = *nr; - sa->sat_family = AF_APPLETALK; - sa->sat_addr = atif->address; - break; + /* + * Check if the chosen address is used. If so we + * error and atalkd will try another. + */ - case SIOCGIFBRDADDR: - if (!atif) - return -EADDRNOTAVAIL; + if (!(dev->flags & IFF_LOOPBACK) && + !(dev->flags & IFF_POINTOPOINT) && + atif_probe_device(atif) < 0) { + atif_drop_device(dev); + return -EADDRINUSE; + } - sa->sat_family = AF_APPLETALK; + /* Hey it worked - add the direct routes */ + sa = (struct sockaddr_at *)&rtdef.rt_gateway; + sa->sat_family = AF_APPLETALK; + sa->sat_addr.s_net = atif->address.s_net; + sa->sat_addr.s_node = atif->address.s_node; + sa = (struct sockaddr_at *)&rtdef.rt_dst; + rtdef.rt_flags = RTF_UP; + sa->sat_family = AF_APPLETALK; + sa->sat_addr.s_node = ATADDR_ANYNODE; + if (dev->flags & IFF_LOOPBACK || + dev->flags & IFF_POINTOPOINT) + rtdef.rt_flags |= RTF_HOST; + + /* Routerless initial state */ + if (nr->nr_firstnet == htons(0) && + nr->nr_lastnet == htons(0xFFFE)) { sa->sat_addr.s_net = atif->address.s_net; - sa->sat_addr.s_node = ATADDR_BCAST; - break; - - case SIOCATALKDIFADDR: - case SIOCDIFADDR: - if (!capable(CAP_NET_ADMIN)) - return -EPERM; - if (sa->sat_family != AF_APPLETALK) - return -EINVAL; - atalk_dev_down(dev); - break; - - case SIOCSARP: - if (!capable(CAP_NET_ADMIN)) - return -EPERM; - if (sa->sat_family != AF_APPLETALK) + atrtr_create(&rtdef, dev); + atrtr_set_default(dev); + } else { + limit = ntohs(nr->nr_lastnet); + if (limit - ntohs(nr->nr_firstnet) > 4096) { + printk(KERN_WARNING "Too many routes/" + "iface.\n"); return -EINVAL; - /* - * for now, we only support proxy AARP on ELAP; - * we should be able to do it for LocalTalk, too. - */ - if (dev->type != ARPHRD_ETHER) - return -EPROTONOSUPPORT; - - /* - * atif points to the current interface on this network; - * we aren't concerned about its current status (at - * least for now), but it has all the settings about - * the network we're going to probe. Consequently, it - * must exist. - */ - if (!atif) - return -EADDRNOTAVAIL; + } + if (add_route) + for (ct = ntohs(nr->nr_firstnet); + ct <= limit; ct++) { + sa->sat_addr.s_net = htons(ct); + atrtr_create(&rtdef, dev); + } + } + dev_mc_add_global(dev, aarp_mcast); + return 0; + + case SIOCGIFADDR: + if (!atif) + return -EADDRNOTAVAIL; + + sa->sat_family = AF_APPLETALK; + sa->sat_addr = atif->address; + break; + + case SIOCGIFBRDADDR: + if (!atif) + return -EADDRNOTAVAIL; + + sa->sat_family = AF_APPLETALK; + sa->sat_addr.s_net = atif->address.s_net; + sa->sat_addr.s_node = ATADDR_BCAST; + break; + + case SIOCATALKDIFADDR: + case SIOCDIFADDR: + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + if (sa->sat_family != AF_APPLETALK) + return -EINVAL; + atalk_dev_down(dev); + break; - nr = (struct atalk_netrange *)&(atif->nets); - /* - * Phase 1 is fine on Localtalk but we don't do - * Ethertalk phase 1. Anyone wanting to add it go ahead. - */ - if (dev->type == ARPHRD_ETHER && nr->nr_phase != 2) - return -EPROTONOSUPPORT; + case SIOCSARP: + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + if (sa->sat_family != AF_APPLETALK) + return -EINVAL; + /* + * for now, we only support proxy AARP on ELAP; + * we should be able to do it for LocalTalk, too. + */ + if (dev->type != ARPHRD_ETHER) + return -EPROTONOSUPPORT; - if (sa->sat_addr.s_node == ATADDR_BCAST || - sa->sat_addr.s_node == 254) - return -EINVAL; + /* + * atif points to the current interface on this network; + * we aren't concerned about its current status (at + * least for now), but it has all the settings about + * the network we're going to probe. Consequently, it + * must exist. + */ + if (!atif) + return -EADDRNOTAVAIL; - /* - * Check if the chosen address is used. If so we - * error and ATCP will try another. - */ - if (atif_proxy_probe_device(atif, &(sa->sat_addr)) < 0) - return -EADDRINUSE; + nr = (struct atalk_netrange *)&(atif->nets); + /* + * Phase 1 is fine on Localtalk but we don't do + * Ethertalk phase 1. Anyone wanting to add it go ahead. + */ + if (dev->type == ARPHRD_ETHER && nr->nr_phase != 2) + return -EPROTONOSUPPORT; - /* - * We now have an address on the local network, and - * the AARP code will defend it for us until we take it - * down. We don't set up any routes right now, because - * ATCP will install them manually via SIOCADDRT. - */ - break; + if (sa->sat_addr.s_node == ATADDR_BCAST || + sa->sat_addr.s_node == 254) + return -EINVAL; - case SIOCDARP: - if (!capable(CAP_NET_ADMIN)) - return -EPERM; - if (sa->sat_family != AF_APPLETALK) - return -EINVAL; - if (!atif) - return -EADDRNOTAVAIL; + /* + * Check if the chosen address is used. If so we + * error and ATCP will try another. + */ + if (atif_proxy_probe_device(atif, &(sa->sat_addr)) < 0) + return -EADDRINUSE; - /* give to aarp module to remove proxy entry */ - aarp_proxy_remove(atif->dev, &(sa->sat_addr)); - return 0; + /* + * We now have an address on the local network, and + * the AARP code will defend it for us until we take it + * down. We don't set up any routes right now, because + * ATCP will install them manually via SIOCADDRT. + */ + break; + + case SIOCDARP: + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + if (sa->sat_family != AF_APPLETALK) + return -EINVAL; + if (!atif) + return -EADDRNOTAVAIL; + + /* give to aarp module to remove proxy entry */ + aarp_proxy_remove(atif->dev, &(sa->sat_addr)); + return 0; } return copy_to_user(arg, &atreq, sizeof(atreq)) ? -EFAULT : 0; @@ -884,25 +884,25 @@ static int atrtr_ioctl(unsigned int cmd, void __user *arg) return -EFAULT; switch (cmd) { - case SIOCDELRT: - if (rt.rt_dst.sa_family != AF_APPLETALK) - return -EINVAL; - return atrtr_delete(&((struct sockaddr_at *) - &rt.rt_dst)->sat_addr); - - case SIOCADDRT: { - struct net_device *dev = NULL; - if (rt.rt_dev) { - char name[IFNAMSIZ]; - if (copy_from_user(name, rt.rt_dev, IFNAMSIZ-1)) - return -EFAULT; - name[IFNAMSIZ-1] = '\0'; - dev = __dev_get_by_name(&init_net, name); - if (!dev) - return -ENODEV; - } - return atrtr_create(&rt, dev); + case SIOCDELRT: + if (rt.rt_dst.sa_family != AF_APPLETALK) + return -EINVAL; + return atrtr_delete(&((struct sockaddr_at *) + &rt.rt_dst)->sat_addr); + + case SIOCADDRT: { + struct net_device *dev = NULL; + if (rt.rt_dev) { + char name[IFNAMSIZ]; + if (copy_from_user(name, rt.rt_dev, IFNAMSIZ-1)) + return -EFAULT; + name[IFNAMSIZ-1] = '\0'; + dev = __dev_get_by_name(&init_net, name); + if (!dev) + return -ENODEV; } + return atrtr_create(&rt, dev); + } } return -EINVAL; } @@ -951,13 +951,12 @@ static unsigned long atalk_sum_skb(const struct sk_buff *skb, int offset, /* checksum stuff in frags */ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { int end; - + const skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; WARN_ON(start > offset + len); - end = start + skb_shinfo(skb)->frags[i].size; + end = start + skb_frag_size(frag); if ((copy = end - offset) > 0) { u8 *vaddr; - skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; if (copy > len) copy = len; @@ -1495,8 +1494,6 @@ static int atalk_rcv(struct sk_buff *skb, struct net_device *dev, goto drop; /* Queue packet (standard) */ - skb->sk = sock; - if (sock_queue_rcv_skb(sock, skb) < 0) goto drop; @@ -1650,7 +1647,6 @@ static int atalk_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr if (!skb) goto out; - skb->sk = sk; skb_reserve(skb, ddp_dl->header_length); skb_reserve(skb, dev->hard_header_len); skb->dev = dev; @@ -1741,7 +1737,6 @@ static int atalk_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr size_t size, int flags) { struct sock *sk = sock->sk; - struct sockaddr_at *sat = (struct sockaddr_at *)msg->msg_name; struct ddpehdr *ddp; int copied = 0; int offset = 0; @@ -1770,14 +1765,13 @@ static int atalk_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr } err = skb_copy_datagram_iovec(skb, offset, msg->msg_iov, copied); - if (!err) { - if (sat) { - sat->sat_family = AF_APPLETALK; - sat->sat_port = ddp->deh_sport; - sat->sat_addr.s_node = ddp->deh_snode; - sat->sat_addr.s_net = ddp->deh_snet; - } - msg->msg_namelen = sizeof(*sat); + if (!err && msg->msg_name) { + struct sockaddr_at *sat = msg->msg_name; + sat->sat_family = AF_APPLETALK; + sat->sat_port = ddp->deh_sport; + sat->sat_addr.s_node = ddp->deh_snode; + sat->sat_addr.s_net = ddp->deh_snet; + msg->msg_namelen = sizeof(*sat); } skb_free_datagram(sk, skb); /* Free the datagram. */ diff --git a/net/atm/atm_misc.c b/net/atm/atm_misc.c index fc63526..f41f026 100644 --- a/net/atm/atm_misc.c +++ b/net/atm/atm_misc.c @@ -9,7 +9,7 @@ #include #include #include -#include +#include int atm_charge(struct atm_vcc *vcc, int truesize) { diff --git a/net/atm/clip.c b/net/atm/clip.c index 5889074..8523940 100644 --- a/net/atm/clip.c +++ b/net/atm/clip.c @@ -37,7 +37,7 @@ #include #include /* for htons etc. */ #include /* save/restore_flags */ -#include +#include #include "common.h" #include "resources.h" @@ -271,10 +271,8 @@ static const struct neigh_ops clip_neigh_ops = { .family = AF_INET, .solicit = clip_neigh_solicit, .error_report = clip_neigh_error, - .output = dev_queue_xmit, - .connected_output = dev_queue_xmit, - .hh_output = dev_queue_xmit, - .queue_xmit = dev_queue_xmit, + .output = neigh_direct_output, + .connected_output = neigh_direct_output, }; static int clip_constructor(struct neighbour *neigh) diff --git a/net/atm/common.c b/net/atm/common.c index 4b263b8..0ca06e8 100644 --- a/net/atm/common.c +++ b/net/atm/common.c @@ -23,7 +23,7 @@ #include #include -#include +#include #include "resources.h" /* atm_find_dev */ #include "common.h" /* prototypes */ @@ -500,8 +500,6 @@ int vcc_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, struct sk_buff *skb; int copied, error = -EINVAL; - msg->msg_namelen = 0; - if (sock->state != SS_CONNECTED) return -ENOTCONN; if (flags & ~MSG_DONTWAIT) /* only handle MSG_DONTWAIT */ diff --git a/net/atm/lec.c b/net/atm/lec.c index ba48daa..f1964ca 100644 --- a/net/atm/lec.c +++ b/net/atm/lec.c @@ -643,7 +643,7 @@ static const struct net_device_ops lec_netdev_ops = { .ndo_start_xmit = lec_start_xmit, .ndo_change_mtu = lec_change_mtu, .ndo_tx_timeout = lec_tx_timeout, - .ndo_set_multicast_list = lec_set_multicast_list, + .ndo_set_rx_mode = lec_set_multicast_list, }; static const unsigned char lec_ctrl_magic[] = { @@ -1335,7 +1335,7 @@ static void lane2_associate_ind(struct net_device *dev, const u8 *mac_addr, #include #include #include -#include +#include #include #include diff --git a/net/atm/mpc.c b/net/atm/mpc.c index 3ccca42..aa97240 100644 --- a/net/atm/mpc.c +++ b/net/atm/mpc.c @@ -1005,7 +1005,7 @@ static int mpoa_event_listener(struct notifier_block *mpoa_notifier, struct mpoa_client *mpc; struct lec_priv *priv; - dev = (struct net_device *)dev_ptr; + dev = dev_ptr; if (!net_eq(dev_net(dev), &init_net)) return NOTIFY_DONE; diff --git a/net/atm/pppoatm.c b/net/atm/pppoatm.c index e9aced0..db4a11c 100644 --- a/net/atm/pppoatm.c +++ b/net/atm/pppoatm.c @@ -37,6 +37,7 @@ #include #include +#include #include #include #include diff --git a/net/atm/proc.c b/net/atm/proc.c index be3afde..0d020de 100644 --- a/net/atm/proc.c +++ b/net/atm/proc.c @@ -27,7 +27,7 @@ #include #include #include /* for HZ */ -#include +#include #include "resources.h" #include "common.h" /* atm_proc_init prototype */ #include "signaling.h" /* to get sigd - ugly too */ diff --git a/net/atm/pvc.c b/net/atm/pvc.c index db0dd47..ae03240 100644 --- a/net/atm/pvc.c +++ b/net/atm/pvc.c @@ -11,6 +11,7 @@ #include #include #include +#include #include /* for sock_no_* */ #include "resources.h" /* devs and vccs */ diff --git a/net/atm/svc.c b/net/atm/svc.c index 754ee47..1281049 100644 --- a/net/atm/svc.c +++ b/net/atm/svc.c @@ -20,6 +20,7 @@ #include #include /* for sock_no_* */ #include +#include #include "resources.h" #include "common.h" /* common for PVCs and SVCs */ diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index 86ac37f..7b8db0e 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -1635,11 +1635,11 @@ static int ax25_recvmsg(struct kiocb *iocb, struct socket *sock, skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied); - if (msg->msg_namelen != 0) { - struct sockaddr_ax25 *sax = (struct sockaddr_ax25 *)msg->msg_name; + if (msg->msg_name) { ax25_digi digi; ax25_address src; const unsigned char *mac = skb_mac_header(skb); + struct sockaddr_ax25 *sax = msg->msg_name; memset(sax, 0, sizeof(struct full_sockaddr_ax25)); ax25_addr_parse(mac + 1, skb->data - mac - 1, &src, NULL, diff --git a/net/ax25/ax25_route.c b/net/ax25/ax25_route.c index a169084..87fddab 100644 --- a/net/ax25/ax25_route.c +++ b/net/ax25/ax25_route.c @@ -38,6 +38,7 @@ #include #include #include +#include static ax25_route *ax25_route_list; static DEFINE_RWLOCK(ax25_route_lock); diff --git a/net/ax25/ax25_uid.c b/net/ax25/ax25_uid.c index d349be9..4c83137 100644 --- a/net/ax25/ax25_uid.c +++ b/net/ax25/ax25_uid.c @@ -37,6 +37,7 @@ #include #include #include +#include #include #include diff --git a/net/dcb/dcbevent.c b/net/dcb/dcbevent.c index 665a880..1d9eb7c 100644 --- a/net/dcb/dcbevent.c +++ b/net/dcb/dcbevent.c @@ -19,6 +19,7 @@ #include #include +#include static ATOMIC_NOTIFIER_HEAD(dcbevent_notif_chain); diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c index d8f262f..2f9517d 100644 --- a/net/dcb/dcbnl.c +++ b/net/dcb/dcbnl.c @@ -25,6 +25,7 @@ #include #include #include +#include #include /** @@ -1167,64 +1168,6 @@ err: return ret; } -/* Handle IEEE 802.1Qaz SET commands. If any requested operation can not - * be completed the entire msg is aborted and error value is returned. - * No attempt is made to reconcile the case where only part of the - * cmd can be completed. - */ -static int dcbnl_ieee_set(struct net_device *netdev, struct nlattr **tb, - u32 pid, u32 seq, u16 flags) -{ - const struct dcbnl_rtnl_ops *ops = netdev->dcbnl_ops; - struct nlattr *ieee[DCB_ATTR_IEEE_MAX + 1]; - int err = -EOPNOTSUPP; - - if (!ops) - goto err; - - err = nla_parse_nested(ieee, DCB_ATTR_IEEE_MAX, - tb[DCB_ATTR_IEEE], dcbnl_ieee_policy); - if (err) - goto err; - - if (ieee[DCB_ATTR_IEEE_ETS] && ops->ieee_setets) { - struct ieee_ets *ets = nla_data(ieee[DCB_ATTR_IEEE_ETS]); - err = ops->ieee_setets(netdev, ets); - if (err) - goto err; - } - - if (ieee[DCB_ATTR_IEEE_PFC] && ops->ieee_setpfc) { - struct ieee_pfc *pfc = nla_data(ieee[DCB_ATTR_IEEE_PFC]); - err = ops->ieee_setpfc(netdev, pfc); - if (err) - goto err; - } - - if (ieee[DCB_ATTR_IEEE_APP_TABLE]) { - struct nlattr *attr; - int rem; - - nla_for_each_nested(attr, ieee[DCB_ATTR_IEEE_APP_TABLE], rem) { - struct dcb_app *app_data; - if (nla_type(attr) != DCB_ATTR_IEEE_APP) - continue; - app_data = nla_data(attr); - if (ops->ieee_setapp) - err = ops->ieee_setapp(netdev, app_data); - else - err = dcb_setapp(netdev, app_data); - if (err) - goto err; - } - } - -err: - dcbnl_reply(err, RTM_SETDCB, DCB_CMD_IEEE_SET, DCB_ATTR_IEEE, - pid, seq, flags); - return err; -} - static int dcbnl_build_peer_app(struct net_device *netdev, struct sk_buff* skb, int app_nested_type, int app_info_type, int app_entry_type) @@ -1280,29 +1223,13 @@ nla_put_failure: } /* Handle IEEE 802.1Qaz GET commands. */ -static int dcbnl_ieee_get(struct net_device *netdev, struct nlattr **tb, - u32 pid, u32 seq, u16 flags) +static int dcbnl_ieee_fill(struct sk_buff *skb, struct net_device *netdev) { - struct sk_buff *skb; - struct nlmsghdr *nlh; - struct dcbmsg *dcb; struct nlattr *ieee, *app; struct dcb_app_type *itr; const struct dcbnl_rtnl_ops *ops = netdev->dcbnl_ops; - int err; - - if (!ops) - return -EOPNOTSUPP; - - skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); - if (!skb) - return -ENOBUFS; - - nlh = NLMSG_NEW(skb, pid, seq, RTM_GETDCB, sizeof(*dcb), flags); - - dcb = NLMSG_DATA(nlh); - dcb->dcb_family = AF_UNSPEC; - dcb->cmd = DCB_CMD_IEEE_GET; + int dcbx; + int err = -EMSGSIZE; NLA_PUT_STRING(skb, DCB_ATTR_IFNAME, netdev->name); @@ -1332,7 +1259,7 @@ static int dcbnl_ieee_get(struct net_device *netdev, struct nlattr **tb, spin_lock(&dcb_lock); list_for_each_entry(itr, &dcb_app_list, list) { - if (strncmp(itr->name, netdev->name, IFNAMSIZ) == 0) { + if (itr->ifindex == netdev->ifindex) { err = nla_put(skb, DCB_ATTR_IEEE_APP, sizeof(itr->app), &itr->app); if (err) { @@ -1341,6 +1268,12 @@ static int dcbnl_ieee_get(struct net_device *netdev, struct nlattr **tb, } } } + + if (netdev->dcbnl_ops->getdcbx) + dcbx = netdev->dcbnl_ops->getdcbx(netdev); + else + dcbx = -EOPNOTSUPP; + spin_unlock(&dcb_lock); nla_nest_end(skb, app); @@ -1371,15 +1304,414 @@ static int dcbnl_ieee_get(struct net_device *netdev, struct nlattr **tb, } nla_nest_end(skb, ieee); - nlmsg_end(skb, nlh); + if (dcbx >= 0) { + err = nla_put_u8(skb, DCB_ATTR_DCBX, dcbx); + if (err) + goto nla_put_failure; + } + + return 0; - return rtnl_unicast(skb, &init_net, pid); nla_put_failure: - nlmsg_cancel(skb, nlh); -nlmsg_failure: - kfree_skb(skb); - return -1; + return err; +} + +static int dcbnl_cee_pg_fill(struct sk_buff *skb, struct net_device *dev, + int dir) +{ + u8 pgid, up_map, prio, tc_pct; + const struct dcbnl_rtnl_ops *ops = dev->dcbnl_ops; + int i = dir ? DCB_ATTR_CEE_TX_PG : DCB_ATTR_CEE_RX_PG; + struct nlattr *pg = nla_nest_start(skb, i); + + if (!pg) + goto nla_put_failure; + + for (i = DCB_PG_ATTR_TC_0; i <= DCB_PG_ATTR_TC_7; i++) { + struct nlattr *tc_nest = nla_nest_start(skb, i); + + if (!tc_nest) + goto nla_put_failure; + + pgid = DCB_ATTR_VALUE_UNDEFINED; + prio = DCB_ATTR_VALUE_UNDEFINED; + tc_pct = DCB_ATTR_VALUE_UNDEFINED; + up_map = DCB_ATTR_VALUE_UNDEFINED; + + if (!dir) + ops->getpgtccfgrx(dev, i - DCB_PG_ATTR_TC_0, + &prio, &pgid, &tc_pct, &up_map); + else + ops->getpgtccfgtx(dev, i - DCB_PG_ATTR_TC_0, + &prio, &pgid, &tc_pct, &up_map); + + NLA_PUT_U8(skb, DCB_TC_ATTR_PARAM_PGID, pgid); + NLA_PUT_U8(skb, DCB_TC_ATTR_PARAM_UP_MAPPING, up_map); + NLA_PUT_U8(skb, DCB_TC_ATTR_PARAM_STRICT_PRIO, prio); + NLA_PUT_U8(skb, DCB_TC_ATTR_PARAM_BW_PCT, tc_pct); + nla_nest_end(skb, tc_nest); + } + + for (i = DCB_PG_ATTR_BW_ID_0; i <= DCB_PG_ATTR_BW_ID_7; i++) { + tc_pct = DCB_ATTR_VALUE_UNDEFINED; + + if (!dir) + ops->getpgbwgcfgrx(dev, i - DCB_PG_ATTR_BW_ID_0, + &tc_pct); + else + ops->getpgbwgcfgtx(dev, i - DCB_PG_ATTR_BW_ID_0, + &tc_pct); + NLA_PUT_U8(skb, i, tc_pct); + } + nla_nest_end(skb, pg); + return 0; + +nla_put_failure: + return -EMSGSIZE; +} + +static int dcbnl_cee_fill(struct sk_buff *skb, struct net_device *netdev) +{ + struct nlattr *cee, *app; + struct dcb_app_type *itr; + const struct dcbnl_rtnl_ops *ops = netdev->dcbnl_ops; + int dcbx, i, err = -EMSGSIZE; + u8 value; + + NLA_PUT_STRING(skb, DCB_ATTR_IFNAME, netdev->name); + + cee = nla_nest_start(skb, DCB_ATTR_CEE); + if (!cee) + goto nla_put_failure; + + /* local pg */ + if (ops->getpgtccfgtx && ops->getpgbwgcfgtx) { + err = dcbnl_cee_pg_fill(skb, netdev, 1); + if (err) + goto nla_put_failure; + } + + if (ops->getpgtccfgrx && ops->getpgbwgcfgrx) { + err = dcbnl_cee_pg_fill(skb, netdev, 0); + if (err) + goto nla_put_failure; + } + + /* local pfc */ + if (ops->getpfccfg) { + struct nlattr *pfc_nest = nla_nest_start(skb, DCB_ATTR_CEE_PFC); + + if (!pfc_nest) + goto nla_put_failure; + + for (i = DCB_PFC_UP_ATTR_0; i <= DCB_PFC_UP_ATTR_7; i++) { + ops->getpfccfg(netdev, i - DCB_PFC_UP_ATTR_0, &value); + NLA_PUT_U8(skb, i, value); + } + nla_nest_end(skb, pfc_nest); + } + + /* local app */ + spin_lock(&dcb_lock); + app = nla_nest_start(skb, DCB_ATTR_CEE_APP_TABLE); + if (!app) + goto dcb_unlock; + + list_for_each_entry(itr, &dcb_app_list, list) { + if (itr->ifindex == netdev->ifindex) { + struct nlattr *app_nest = nla_nest_start(skb, + DCB_ATTR_APP); + if (!app_nest) + goto dcb_unlock; + + err = nla_put_u8(skb, DCB_APP_ATTR_IDTYPE, + itr->app.selector); + if (err) + goto dcb_unlock; + + err = nla_put_u16(skb, DCB_APP_ATTR_ID, + itr->app.protocol); + if (err) + goto dcb_unlock; + + err = nla_put_u8(skb, DCB_APP_ATTR_PRIORITY, + itr->app.priority); + if (err) + goto dcb_unlock; + + nla_nest_end(skb, app_nest); + } + } + nla_nest_end(skb, app); + + if (netdev->dcbnl_ops->getdcbx) + dcbx = netdev->dcbnl_ops->getdcbx(netdev); + else + dcbx = -EOPNOTSUPP; + + spin_unlock(&dcb_lock); + + /* features flags */ + if (ops->getfeatcfg) { + struct nlattr *feat = nla_nest_start(skb, DCB_ATTR_CEE_FEAT); + if (!feat) + goto nla_put_failure; + + for (i = DCB_FEATCFG_ATTR_ALL + 1; i <= DCB_FEATCFG_ATTR_MAX; + i++) + if (!ops->getfeatcfg(netdev, i, &value)) + NLA_PUT_U8(skb, i, value); + + nla_nest_end(skb, feat); + } + + /* peer info if available */ + if (ops->cee_peer_getpg) { + struct cee_pg pg; + memset(&pg, 0, sizeof(pg)); + err = ops->cee_peer_getpg(netdev, &pg); + if (!err) + NLA_PUT(skb, DCB_ATTR_CEE_PEER_PG, sizeof(pg), &pg); + } + + if (ops->cee_peer_getpfc) { + struct cee_pfc pfc; + memset(&pfc, 0, sizeof(pfc)); + err = ops->cee_peer_getpfc(netdev, &pfc); + if (!err) + NLA_PUT(skb, DCB_ATTR_CEE_PEER_PFC, sizeof(pfc), &pfc); + } + + if (ops->peer_getappinfo && ops->peer_getapptable) { + err = dcbnl_build_peer_app(netdev, skb, + DCB_ATTR_CEE_PEER_APP_TABLE, + DCB_ATTR_CEE_PEER_APP_INFO, + DCB_ATTR_CEE_PEER_APP); + if (err) + goto nla_put_failure; + } + nla_nest_end(skb, cee); + + /* DCBX state */ + if (dcbx >= 0) { + err = nla_put_u8(skb, DCB_ATTR_DCBX, dcbx); + if (err) + goto nla_put_failure; + } + return 0; + +dcb_unlock: + spin_unlock(&dcb_lock); +nla_put_failure: + return err; +} + +static int dcbnl_notify(struct net_device *dev, int event, int cmd, + u32 seq, u32 pid, int dcbx_ver) +{ + struct net *net = dev_net(dev); + struct sk_buff *skb; + struct nlmsghdr *nlh; + struct dcbmsg *dcb; + const struct dcbnl_rtnl_ops *ops = dev->dcbnl_ops; + int err; + + if (!ops) + return -EOPNOTSUPP; + + skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!skb) + return -ENOBUFS; + + nlh = nlmsg_put(skb, pid, 0, event, sizeof(*dcb), 0); + if (nlh == NULL) { + nlmsg_free(skb); + return -EMSGSIZE; + } + + dcb = NLMSG_DATA(nlh); + dcb->dcb_family = AF_UNSPEC; + dcb->cmd = cmd; + + if (dcbx_ver == DCB_CAP_DCBX_VER_IEEE) + err = dcbnl_ieee_fill(skb, dev); + else + err = dcbnl_cee_fill(skb, dev); + + if (err < 0) { + /* Report error to broadcast listeners */ + nlmsg_cancel(skb, nlh); + kfree_skb(skb); + rtnl_set_sk_err(net, RTNLGRP_DCB, err); + } else { + /* End nlmsg and notify broadcast listeners */ + nlmsg_end(skb, nlh); + rtnl_notify(skb, net, 0, RTNLGRP_DCB, NULL, GFP_KERNEL); + } + + return err; +} + +int dcbnl_ieee_notify(struct net_device *dev, int event, int cmd, + u32 seq, u32 pid) +{ + return dcbnl_notify(dev, event, cmd, seq, pid, DCB_CAP_DCBX_VER_IEEE); } +EXPORT_SYMBOL(dcbnl_ieee_notify); + +int dcbnl_cee_notify(struct net_device *dev, int event, int cmd, + u32 seq, u32 pid) +{ + return dcbnl_notify(dev, event, cmd, seq, pid, DCB_CAP_DCBX_VER_CEE); +} +EXPORT_SYMBOL(dcbnl_cee_notify); + +/* Handle IEEE 802.1Qaz SET commands. If any requested operation can not + * be completed the entire msg is aborted and error value is returned. + * No attempt is made to reconcile the case where only part of the + * cmd can be completed. + */ +static int dcbnl_ieee_set(struct net_device *netdev, struct nlattr **tb, + u32 pid, u32 seq, u16 flags) +{ + const struct dcbnl_rtnl_ops *ops = netdev->dcbnl_ops; + struct nlattr *ieee[DCB_ATTR_IEEE_MAX + 1]; + int err = -EOPNOTSUPP; + + if (!ops) + return err; + + if (!tb[DCB_ATTR_IEEE]) + return -EINVAL; + + err = nla_parse_nested(ieee, DCB_ATTR_IEEE_MAX, + tb[DCB_ATTR_IEEE], dcbnl_ieee_policy); + if (err) + return err; + + if (ieee[DCB_ATTR_IEEE_ETS] && ops->ieee_setets) { + struct ieee_ets *ets = nla_data(ieee[DCB_ATTR_IEEE_ETS]); + err = ops->ieee_setets(netdev, ets); + if (err) + goto err; + } + + if (ieee[DCB_ATTR_IEEE_PFC] && ops->ieee_setpfc) { + struct ieee_pfc *pfc = nla_data(ieee[DCB_ATTR_IEEE_PFC]); + err = ops->ieee_setpfc(netdev, pfc); + if (err) + goto err; + } + + if (ieee[DCB_ATTR_IEEE_APP_TABLE]) { + struct nlattr *attr; + int rem; + + nla_for_each_nested(attr, ieee[DCB_ATTR_IEEE_APP_TABLE], rem) { + struct dcb_app *app_data; + if (nla_type(attr) != DCB_ATTR_IEEE_APP) + continue; + app_data = nla_data(attr); + if (ops->ieee_setapp) + err = ops->ieee_setapp(netdev, app_data); + else + err = dcb_ieee_setapp(netdev, app_data); + if (err) + goto err; + } + } + +err: + dcbnl_reply(err, RTM_SETDCB, DCB_CMD_IEEE_SET, DCB_ATTR_IEEE, + pid, seq, flags); + dcbnl_ieee_notify(netdev, RTM_SETDCB, DCB_CMD_IEEE_SET, seq, 0); + return err; +} + +static int dcbnl_ieee_get(struct net_device *netdev, struct nlattr **tb, + u32 pid, u32 seq, u16 flags) +{ + struct net *net = dev_net(netdev); + struct sk_buff *skb; + struct nlmsghdr *nlh; + struct dcbmsg *dcb; + const struct dcbnl_rtnl_ops *ops = netdev->dcbnl_ops; + int err; + + if (!ops) + return -EOPNOTSUPP; + + skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!skb) + return -ENOBUFS; + + nlh = nlmsg_put(skb, pid, seq, RTM_GETDCB, sizeof(*dcb), flags); + if (nlh == NULL) { + nlmsg_free(skb); + return -EMSGSIZE; + } + + dcb = NLMSG_DATA(nlh); + dcb->dcb_family = AF_UNSPEC; + dcb->cmd = DCB_CMD_IEEE_GET; + + err = dcbnl_ieee_fill(skb, netdev); + + if (err < 0) { + nlmsg_cancel(skb, nlh); + kfree_skb(skb); + } else { + nlmsg_end(skb, nlh); + err = rtnl_unicast(skb, net, pid); + } + + return err; +} + +static int dcbnl_ieee_del(struct net_device *netdev, struct nlattr **tb, + u32 pid, u32 seq, u16 flags) +{ + const struct dcbnl_rtnl_ops *ops = netdev->dcbnl_ops; + struct nlattr *ieee[DCB_ATTR_IEEE_MAX + 1]; + int err = -EOPNOTSUPP; + + if (!ops) + return -EOPNOTSUPP; + + if (!tb[DCB_ATTR_IEEE]) + return -EINVAL; + + err = nla_parse_nested(ieee, DCB_ATTR_IEEE_MAX, + tb[DCB_ATTR_IEEE], dcbnl_ieee_policy); + if (err) + return err; + + if (ieee[DCB_ATTR_IEEE_APP_TABLE]) { + struct nlattr *attr; + int rem; + + nla_for_each_nested(attr, ieee[DCB_ATTR_IEEE_APP_TABLE], rem) { + struct dcb_app *app_data; + + if (nla_type(attr) != DCB_ATTR_IEEE_APP) + continue; + app_data = nla_data(attr); + if (ops->ieee_delapp) + err = ops->ieee_delapp(netdev, app_data); + else + err = dcb_ieee_delapp(netdev, app_data); + if (err) + goto err; + } + } + +err: + dcbnl_reply(err, RTM_SETDCB, DCB_CMD_IEEE_DEL, DCB_ATTR_IEEE, + pid, seq, flags); + dcbnl_ieee_notify(netdev, RTM_SETDCB, DCB_CMD_IEEE_DEL, seq, 0); + return err; +} + /* DCBX configuration */ static int dcbnl_getdcbx(struct net_device *netdev, struct nlattr **tb, @@ -1527,10 +1859,10 @@ err: static int dcbnl_cee_get(struct net_device *netdev, struct nlattr **tb, u32 pid, u32 seq, u16 flags) { + struct net *net = dev_net(netdev); struct sk_buff *skb; struct nlmsghdr *nlh; struct dcbmsg *dcb; - struct nlattr *cee; const struct dcbnl_rtnl_ops *ops = netdev->dcbnl_ops; int err; @@ -1541,53 +1873,26 @@ static int dcbnl_cee_get(struct net_device *netdev, struct nlattr **tb, if (!skb) return -ENOBUFS; - nlh = NLMSG_NEW(skb, pid, seq, RTM_GETDCB, sizeof(*dcb), flags); + nlh = nlmsg_put(skb, pid, seq, RTM_GETDCB, sizeof(*dcb), flags); + if (nlh == NULL) { + nlmsg_free(skb); + return -EMSGSIZE; + } dcb = NLMSG_DATA(nlh); dcb->dcb_family = AF_UNSPEC; dcb->cmd = DCB_CMD_CEE_GET; - NLA_PUT_STRING(skb, DCB_ATTR_IFNAME, netdev->name); - - cee = nla_nest_start(skb, DCB_ATTR_CEE); - if (!cee) - goto nla_put_failure; - - /* get peer info if available */ - if (ops->cee_peer_getpg) { - struct cee_pg pg; - memset(&pg, 0, sizeof(pg)); - err = ops->cee_peer_getpg(netdev, &pg); - if (!err) - NLA_PUT(skb, DCB_ATTR_CEE_PEER_PG, sizeof(pg), &pg); - } - - if (ops->cee_peer_getpfc) { - struct cee_pfc pfc; - memset(&pfc, 0, sizeof(pfc)); - err = ops->cee_peer_getpfc(netdev, &pfc); - if (!err) - NLA_PUT(skb, DCB_ATTR_CEE_PEER_PFC, sizeof(pfc), &pfc); - } + err = dcbnl_cee_fill(skb, netdev); - if (ops->peer_getappinfo && ops->peer_getapptable) { - err = dcbnl_build_peer_app(netdev, skb, - DCB_ATTR_CEE_PEER_APP_TABLE, - DCB_ATTR_CEE_PEER_APP_INFO, - DCB_ATTR_CEE_PEER_APP); - if (err) - goto nla_put_failure; + if (err < 0) { + nlmsg_cancel(skb, nlh); + nlmsg_free(skb); + } else { + nlmsg_end(skb, nlh); + err = rtnl_unicast(skb, net, pid); } - - nla_nest_end(skb, cee); - nlmsg_end(skb, nlh); - - return rtnl_unicast(skb, &init_net, pid); -nla_put_failure: - nlmsg_cancel(skb, nlh); -nlmsg_failure: - kfree_skb(skb); - return -1; + return err; } static int dcb_doit(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) @@ -1697,11 +2002,15 @@ static int dcb_doit(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) goto out; case DCB_CMD_IEEE_SET: ret = dcbnl_ieee_set(netdev, tb, pid, nlh->nlmsg_seq, - nlh->nlmsg_flags); + nlh->nlmsg_flags); goto out; case DCB_CMD_IEEE_GET: ret = dcbnl_ieee_get(netdev, tb, pid, nlh->nlmsg_seq, - nlh->nlmsg_flags); + nlh->nlmsg_flags); + goto out; + case DCB_CMD_IEEE_DEL: + ret = dcbnl_ieee_del(netdev, tb, pid, nlh->nlmsg_seq, + nlh->nlmsg_flags); goto out; case DCB_CMD_GDCBX: ret = dcbnl_getdcbx(netdev, tb, pid, nlh->nlmsg_seq, @@ -1749,7 +2058,7 @@ u8 dcb_getapp(struct net_device *dev, struct dcb_app *app) list_for_each_entry(itr, &dcb_app_list, list) { if (itr->app.selector == app->selector && itr->app.protocol == app->protocol && - (strncmp(itr->name, dev->name, IFNAMSIZ) == 0)) { + itr->ifindex == dev->ifindex) { prio = itr->app.priority; break; } @@ -1761,25 +2070,28 @@ u8 dcb_getapp(struct net_device *dev, struct dcb_app *app) EXPORT_SYMBOL(dcb_getapp); /** - * ixgbe_dcbnl_setapp - add dcb application data to app list + * dcb_setapp - add CEE dcb application data to app list * - * Priority 0 is the default priority this removes applications - * from the app list if the priority is set to zero. + * Priority 0 is an invalid priority in CEE spec. This routine + * removes applications from the app list if the priority is + * set to zero. */ -u8 dcb_setapp(struct net_device *dev, struct dcb_app *new) +int dcb_setapp(struct net_device *dev, struct dcb_app *new) { struct dcb_app_type *itr; struct dcb_app_type event; - memcpy(&event.name, dev->name, sizeof(event.name)); + event.ifindex = dev->ifindex; memcpy(&event.app, new, sizeof(event.app)); + if (dev->dcbnl_ops->getdcbx) + event.dcbx = dev->dcbnl_ops->getdcbx(dev); spin_lock(&dcb_lock); /* Search for existing match and replace */ list_for_each_entry(itr, &dcb_app_list, list) { if (itr->app.selector == new->selector && itr->app.protocol == new->protocol && - (strncmp(itr->name, dev->name, IFNAMSIZ) == 0)) { + itr->ifindex == dev->ifindex) { if (new->priority) itr->app.priority = new->priority; else { @@ -1799,7 +2111,7 @@ u8 dcb_setapp(struct net_device *dev, struct dcb_app *new) } memcpy(&entry->app, new, sizeof(*new)); - strncpy(entry->name, dev->name, IFNAMSIZ); + entry->ifindex = dev->ifindex; list_add(&entry->list, &dcb_app_list); } out: @@ -1809,6 +2121,118 @@ out: } EXPORT_SYMBOL(dcb_setapp); +/** + * dcb_ieee_getapp_mask - retrieve the IEEE DCB application priority + * + * Helper routine which on success returns a non-zero 802.1Qaz user + * priority bitmap otherwise returns 0 to indicate the dcb_app was + * not found in APP list. + */ +u8 dcb_ieee_getapp_mask(struct net_device *dev, struct dcb_app *app) +{ + struct dcb_app_type *itr; + u8 prio = 0; + + spin_lock(&dcb_lock); + list_for_each_entry(itr, &dcb_app_list, list) { + if (itr->app.selector == app->selector && + itr->app.protocol == app->protocol && + itr->ifindex == dev->ifindex) { + prio |= 1 << itr->app.priority; + } + } + spin_unlock(&dcb_lock); + + return prio; +} +EXPORT_SYMBOL(dcb_ieee_getapp_mask); + +/** + * dcb_ieee_setapp - add IEEE dcb application data to app list + * + * This adds Application data to the list. Multiple application + * entries may exists for the same selector and protocol as long + * as the priorities are different. + */ +int dcb_ieee_setapp(struct net_device *dev, struct dcb_app *new) +{ + struct dcb_app_type *itr, *entry; + struct dcb_app_type event; + int err = 0; + + event.ifindex = dev->ifindex; + memcpy(&event.app, new, sizeof(event.app)); + if (dev->dcbnl_ops->getdcbx) + event.dcbx = dev->dcbnl_ops->getdcbx(dev); + + spin_lock(&dcb_lock); + /* Search for existing match and abort if found */ + list_for_each_entry(itr, &dcb_app_list, list) { + if (itr->app.selector == new->selector && + itr->app.protocol == new->protocol && + itr->app.priority == new->priority && + itr->ifindex == dev->ifindex) { + err = -EEXIST; + goto out; + } + } + + /* App entry does not exist add new entry */ + entry = kmalloc(sizeof(struct dcb_app_type), GFP_ATOMIC); + if (!entry) { + err = -ENOMEM; + goto out; + } + + memcpy(&entry->app, new, sizeof(*new)); + entry->ifindex = dev->ifindex; + list_add(&entry->list, &dcb_app_list); +out: + spin_unlock(&dcb_lock); + if (!err) + call_dcbevent_notifiers(DCB_APP_EVENT, &event); + return err; +} +EXPORT_SYMBOL(dcb_ieee_setapp); + +/** + * dcb_ieee_delapp - delete IEEE dcb application data from list + * + * This removes a matching APP data from the APP list + */ +int dcb_ieee_delapp(struct net_device *dev, struct dcb_app *del) +{ + struct dcb_app_type *itr; + struct dcb_app_type event; + int err = -ENOENT; + + event.ifindex = dev->ifindex; + memcpy(&event.app, del, sizeof(event.app)); + if (dev->dcbnl_ops->getdcbx) + event.dcbx = dev->dcbnl_ops->getdcbx(dev); + + spin_lock(&dcb_lock); + /* Search for existing match and remove it. */ + list_for_each_entry(itr, &dcb_app_list, list) { + if (itr->app.selector == del->selector && + itr->app.protocol == del->protocol && + itr->app.priority == del->priority && + itr->ifindex == dev->ifindex) { + list_del(&itr->list); + kfree(itr); + err = 0; + goto out; + } + } + +out: + spin_unlock(&dcb_lock); + if (!err) + call_dcbevent_notifiers(DCB_APP_EVENT, &event); + return err; +} +EXPORT_SYMBOL(dcb_ieee_delapp); + static void dcb_flushapp(void) { struct dcb_app_type *app; diff --git a/net/dccp/ackvec.c b/net/dccp/ackvec.c index 25b7a8d..ba07824 100644 --- a/net/dccp/ackvec.c +++ b/net/dccp/ackvec.c @@ -12,6 +12,7 @@ #include "dccp.h" #include #include +#include static struct kmem_cache *dccp_ackvec_slab; static struct kmem_cache *dccp_ackvec_record_slab; diff --git a/net/dccp/ccid.c b/net/dccp/ccid.c index 36479ca..48b585a 100644 --- a/net/dccp/ccid.c +++ b/net/dccp/ccid.c @@ -118,7 +118,7 @@ static int ccid_activate(struct ccid_operations *ccid_ops) if (ccid_ops->ccid_hc_tx_slab == NULL) goto out_free_rx_slab; - pr_info("CCID: Activated CCID %d (%s)\n", + pr_info("DCCP: Activated CCID %d (%s)\n", ccid_ops->ccid_id, ccid_ops->ccid_name); err = 0; out: @@ -136,7 +136,7 @@ static void ccid_deactivate(struct ccid_operations *ccid_ops) ccid_kmem_cache_destroy(ccid_ops->ccid_hc_rx_slab); ccid_ops->ccid_hc_rx_slab = NULL; - pr_info("CCID: Deactivated CCID %d (%s)\n", + pr_info("DCCP: Deactivated CCID %d (%s)\n", ccid_ops->ccid_id, ccid_ops->ccid_name); } diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c index fadecd2..67164bb 100644 --- a/net/dccp/ccids/ccid2.c +++ b/net/dccp/ccids/ccid2.c @@ -85,7 +85,6 @@ static int ccid2_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb) static void ccid2_change_l_ack_ratio(struct sock *sk, u32 val) { - struct dccp_sock *dp = dccp_sk(sk); u32 max_ratio = DIV_ROUND_UP(ccid2_hc_tx_sk(sk)->tx_cwnd, 2); /* @@ -98,14 +97,33 @@ static void ccid2_change_l_ack_ratio(struct sock *sk, u32 val) DCCP_WARN("Limiting Ack Ratio (%u) to %u\n", val, max_ratio); val = max_ratio; } - if (val > DCCPF_ACK_RATIO_MAX) - val = DCCPF_ACK_RATIO_MAX; + dccp_feat_signal_nn_change(sk, DCCPF_ACK_RATIO, + min_t(u32, val, DCCPF_ACK_RATIO_MAX)); +} - if (val == dp->dccps_l_ack_ratio) - return; +static void ccid2_check_l_ack_ratio(struct sock *sk) +{ + struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); - ccid2_pr_debug("changing local ack ratio to %u\n", val); - dp->dccps_l_ack_ratio = val; + /* + * After a loss, idle period, application limited period, or RTO we + * need to check that the ack ratio is still less than the congestion + * window. Otherwise, we will send an entire congestion window of + * packets and got no response because we haven't sent ack ratio + * packets yet. + * If the ack ratio does need to be reduced, we reduce it to half of + * the congestion window (or 1 if that's zero) instead of to the + * congestion window. This prevents problems if one ack is lost. + */ + if (dccp_feat_nn_get(sk, DCCPF_ACK_RATIO) > hc->tx_cwnd) + ccid2_change_l_ack_ratio(sk, hc->tx_cwnd/2 ? : 1U); +} + +static void ccid2_change_l_seq_window(struct sock *sk, u64 val) +{ + dccp_feat_signal_nn_change(sk, DCCPF_SEQUENCE_WINDOW, + clamp_val(val, DCCPF_SEQ_WMIN, + DCCPF_SEQ_WMAX)); } static void ccid2_hc_tx_rto_expire(unsigned long data) @@ -153,17 +171,97 @@ out: sock_put(sk); } +/* + * Congestion window validation (RFC 2861). + */ +static int ccid2_do_cwv = 1; +module_param(ccid2_do_cwv, bool, 0644); +MODULE_PARM_DESC(ccid2_do_cwv, "Perform RFC2861 Congestion Window Validation"); + +/** + * ccid2_update_used_window - Track how much of cwnd is actually used + * This is done in addition to CWV. The sender needs to have an idea of how many + * packets may be in flight, to set the local Sequence Window value accordingly + * (RFC 4340, 7.5.2). The CWV mechanism is exploited to keep track of the + * maximum-used window. We use an EWMA low-pass filter to filter out noise. + */ +static void ccid2_update_used_window(struct ccid2_hc_tx_sock *hc, u32 new_wnd) +{ + hc->tx_expected_wnd = (3 * hc->tx_expected_wnd + new_wnd) / 4; +} + +/* This borrows the code of tcp_cwnd_application_limited() */ +static void ccid2_cwnd_application_limited(struct sock *sk, const u32 now) +{ + struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); + /* don't reduce cwnd below the initial window (IW) */ + u32 init_win = rfc3390_bytes_to_packets(dccp_sk(sk)->dccps_mss_cache), + win_used = max(hc->tx_cwnd_used, init_win); + + if (win_used < hc->tx_cwnd) { + hc->tx_ssthresh = max(hc->tx_ssthresh, + (hc->tx_cwnd >> 1) + (hc->tx_cwnd >> 2)); + hc->tx_cwnd = (hc->tx_cwnd + win_used) >> 1; + } + hc->tx_cwnd_used = 0; + hc->tx_cwnd_stamp = now; + + ccid2_check_l_ack_ratio(sk); +} + +/* This borrows the code of tcp_cwnd_restart() */ +static void ccid2_cwnd_restart(struct sock *sk, const u32 now) +{ + struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); + u32 cwnd = hc->tx_cwnd, restart_cwnd, + iwnd = rfc3390_bytes_to_packets(dccp_sk(sk)->dccps_mss_cache); + + hc->tx_ssthresh = max(hc->tx_ssthresh, (cwnd >> 1) + (cwnd >> 2)); + + /* don't reduce cwnd below the initial window (IW) */ + restart_cwnd = min(cwnd, iwnd); + cwnd >>= (now - hc->tx_lsndtime) / hc->tx_rto; + hc->tx_cwnd = max(cwnd, restart_cwnd); + + hc->tx_cwnd_stamp = now; + hc->tx_cwnd_used = 0; + + ccid2_check_l_ack_ratio(sk); +} + static void ccid2_hc_tx_packet_sent(struct sock *sk, unsigned int len) { struct dccp_sock *dp = dccp_sk(sk); struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); + const u32 now = ccid2_time_stamp; struct ccid2_seq *next; - hc->tx_pipe++; + /* slow-start after idle periods (RFC 2581, RFC 2861) */ + if (ccid2_do_cwv && !hc->tx_pipe && + (s32)(now - hc->tx_lsndtime) >= hc->tx_rto) + ccid2_cwnd_restart(sk, now); + + hc->tx_lsndtime = now; + hc->tx_pipe += 1; + + /* see whether cwnd was fully used (RFC 2861), update expected window */ + if (ccid2_cwnd_network_limited(hc)) { + ccid2_update_used_window(hc, hc->tx_cwnd); + hc->tx_cwnd_used = 0; + hc->tx_cwnd_stamp = now; + } else { + if (hc->tx_pipe > hc->tx_cwnd_used) + hc->tx_cwnd_used = hc->tx_pipe; + + ccid2_update_used_window(hc, hc->tx_cwnd_used); + + if (ccid2_do_cwv && (s32)(now - hc->tx_cwnd_stamp) >= hc->tx_rto) + ccid2_cwnd_application_limited(sk, now); + } hc->tx_seqh->ccid2s_seq = dp->dccps_gss; hc->tx_seqh->ccid2s_acked = 0; - hc->tx_seqh->ccid2s_sent = ccid2_time_stamp; + hc->tx_seqh->ccid2s_sent = now; next = hc->tx_seqh->ccid2s_next; /* check if we need to alloc more space */ @@ -329,17 +427,37 @@ static void ccid2_new_ack(struct sock *sk, struct ccid2_seq *seqp, unsigned int *maxincr) { struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); - - if (hc->tx_cwnd < hc->tx_ssthresh) { - if (*maxincr > 0 && ++hc->tx_packets_acked == 2) { + struct dccp_sock *dp = dccp_sk(sk); + int r_seq_used = hc->tx_cwnd / dp->dccps_l_ack_ratio; + + if (hc->tx_cwnd < dp->dccps_l_seq_win && + r_seq_used < dp->dccps_r_seq_win) { + if (hc->tx_cwnd < hc->tx_ssthresh) { + if (*maxincr > 0 && ++hc->tx_packets_acked >= 2) { + hc->tx_cwnd += 1; + *maxincr -= 1; + hc->tx_packets_acked = 0; + } + } else if (++hc->tx_packets_acked >= hc->tx_cwnd) { hc->tx_cwnd += 1; - *maxincr -= 1; hc->tx_packets_acked = 0; } - } else if (++hc->tx_packets_acked >= hc->tx_cwnd) { - hc->tx_cwnd += 1; - hc->tx_packets_acked = 0; } + + /* + * Adjust the local sequence window and the ack ratio to allow about + * 5 times the number of packets in the network (RFC 4340 7.5.2) + */ + if (r_seq_used * CCID2_WIN_CHANGE_FACTOR >= dp->dccps_r_seq_win) + ccid2_change_l_ack_ratio(sk, dp->dccps_l_ack_ratio * 2); + else if (r_seq_used * CCID2_WIN_CHANGE_FACTOR < dp->dccps_r_seq_win/2) + ccid2_change_l_ack_ratio(sk, dp->dccps_l_ack_ratio / 2 ? : 1U); + + if (hc->tx_cwnd * CCID2_WIN_CHANGE_FACTOR >= dp->dccps_l_seq_win) + ccid2_change_l_seq_window(sk, dp->dccps_l_seq_win * 2); + else if (hc->tx_cwnd * CCID2_WIN_CHANGE_FACTOR < dp->dccps_l_seq_win/2) + ccid2_change_l_seq_window(sk, dp->dccps_l_seq_win / 2); + /* * FIXME: RTT is sampled several times per acknowledgment (for each * entry in the Ack Vector), instead of once per Ack (as in TCP SACK). @@ -365,9 +483,7 @@ static void ccid2_congestion_event(struct sock *sk, struct ccid2_seq *seqp) hc->tx_cwnd = hc->tx_cwnd / 2 ? : 1U; hc->tx_ssthresh = max(hc->tx_cwnd, 2U); - /* Avoid spurious timeouts resulting from Ack Ratio > cwnd */ - if (dccp_sk(sk)->dccps_l_ack_ratio > hc->tx_cwnd) - ccid2_change_l_ack_ratio(sk, hc->tx_cwnd); + ccid2_check_l_ack_ratio(sk); } static int ccid2_hc_tx_parse_options(struct sock *sk, u8 packet_type, @@ -418,8 +534,16 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) if (hc->tx_rpdupack >= NUMDUPACK) { hc->tx_rpdupack = -1; /* XXX lame */ hc->tx_rpseq = 0; - +#ifdef __CCID2_COPES_GRACEFULLY_WITH_ACK_CONGESTION_CONTROL__ + /* + * FIXME: Ack Congestion Control is broken; in + * the current state instabilities occurred with + * Ack Ratios greater than 1; causing hang-ups + * and long RTO timeouts. This needs to be fixed + * before opening up dynamic changes. -- gerrit + */ ccid2_change_l_ack_ratio(sk, 2 * dp->dccps_l_ack_ratio); +#endif } } } @@ -583,15 +707,6 @@ done: dccp_ackvec_parsed_cleanup(&hc->tx_av_chunks); } -/* - * Convert RFC 3390 larger initial window into an equivalent number of packets. - * This is based on the numbers specified in RFC 5681, 3.1. - */ -static inline u32 rfc3390_bytes_to_packets(const u32 smss) -{ - return smss <= 1095 ? 4 : (smss > 2190 ? 2 : 3); -} - static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk) { struct ccid2_hc_tx_sock *hc = ccid_priv(ccid); @@ -603,6 +718,7 @@ static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk) /* Use larger initial windows (RFC 4341, section 5). */ hc->tx_cwnd = rfc3390_bytes_to_packets(dp->dccps_mss_cache); + hc->tx_expected_wnd = hc->tx_cwnd; /* Make sure that Ack Ratio is enabled and within bounds. */ max_ratio = DIV_ROUND_UP(hc->tx_cwnd, 2); @@ -615,7 +731,8 @@ static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk) hc->tx_rto = DCCP_TIMEOUT_INIT; hc->tx_rpdupack = -1; - hc->tx_last_cong = ccid2_time_stamp; + hc->tx_last_cong = hc->tx_lsndtime = hc->tx_cwnd_stamp = ccid2_time_stamp; + hc->tx_cwnd_used = 0; setup_timer(&hc->tx_rtotimer, ccid2_hc_tx_rto_expire, (unsigned long)sk); INIT_LIST_HEAD(&hc->tx_av_chunks); @@ -636,18 +753,14 @@ static void ccid2_hc_tx_exit(struct sock *sk) static void ccid2_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) { - const struct dccp_sock *dp = dccp_sk(sk); struct ccid2_hc_rx_sock *hc = ccid2_hc_rx_sk(sk); - switch (DCCP_SKB_CB(skb)->dccpd_type) { - case DCCP_PKT_DATA: - case DCCP_PKT_DATAACK: - hc->rx_data++; - if (hc->rx_data >= dp->dccps_r_ack_ratio) { - dccp_send_ack(sk); - hc->rx_data = 0; - } - break; + if (!dccp_data_packet(skb)) + return; + + if (++hc->rx_num_data_pkts >= dccp_sk(sk)->dccps_r_ack_ratio) { + dccp_send_ack(sk); + hc->rx_num_data_pkts = 0; } } diff --git a/net/dccp/ccids/ccid2.h b/net/dccp/ccids/ccid2.h index e9985da..18c9754 100644 --- a/net/dccp/ccids/ccid2.h +++ b/net/dccp/ccids/ccid2.h @@ -43,6 +43,12 @@ struct ccid2_seq { #define CCID2_SEQBUF_LEN 1024 #define CCID2_SEQBUF_MAX 128 +/* + * Multiple of congestion window to keep the sequence window at + * (RFC 4340 7.5.2) + */ +#define CCID2_WIN_CHANGE_FACTOR 5 + /** * struct ccid2_hc_tx_sock - CCID2 TX half connection * @tx_{cwnd,ssthresh,pipe}: as per RFC 4341, section 5 @@ -53,6 +59,10 @@ struct ccid2_seq { * @tx_rttvar: moving average/maximum of @mdev_max * @tx_rto: RTO value deriving from SRTT and RTTVAR (RFC 2988) * @tx_rtt_seq: to decay RTTVAR at most once per flight + * @tx_cwnd_used: actually used cwnd, W_used of RFC 2861 + * @tx_expected_wnd: moving average of @tx_cwnd_used + * @tx_cwnd_stamp: to track idle periods in CWV + * @tx_lsndtime: last time (in jiffies) a data packet was sent * @tx_rpseq: last consecutive seqno * @tx_rpdupack: dupacks since rpseq * @tx_av_chunks: list of Ack Vectors received on current skb @@ -76,6 +86,12 @@ struct ccid2_hc_tx_sock { u64 tx_rtt_seq:48; struct timer_list tx_rtotimer; + /* Congestion Window validation (optional, RFC 2861) */ + u32 tx_cwnd_used, + tx_expected_wnd, + tx_cwnd_stamp, + tx_lsndtime; + u64 tx_rpseq; int tx_rpdupack; u32 tx_last_cong; @@ -88,8 +104,21 @@ static inline bool ccid2_cwnd_network_limited(struct ccid2_hc_tx_sock *hc) return hc->tx_pipe >= hc->tx_cwnd; } +/* + * Convert RFC 3390 larger initial window into an equivalent number of packets. + * This is based on the numbers specified in RFC 5681, 3.1. + */ +static inline u32 rfc3390_bytes_to_packets(const u32 smss) +{ + return smss <= 1095 ? 4 : (smss > 2190 ? 2 : 3); +} + +/** + * struct ccid2_hc_rx_sock - Receiving end of CCID-2 half-connection + * @rx_num_data_pkts: number of data packets received since last feedback + */ struct ccid2_hc_rx_sock { - int rx_data; + u32 rx_num_data_pkts; }; static inline struct ccid2_hc_tx_sock *ccid2_hc_tx_sk(const struct sock *sk) diff --git a/net/dccp/ccids/lib/tfrc.c b/net/dccp/ccids/lib/tfrc.c index 4902029..1f94b7e 100644 --- a/net/dccp/ccids/lib/tfrc.c +++ b/net/dccp/ccids/lib/tfrc.c @@ -4,6 +4,7 @@ * Copyright (c) 2007 The University of Aberdeen, Scotland, UK * Copyright (c) 2007 Arnaldo Carvalho de Melo */ +#include #include "tfrc.h" #ifdef CONFIG_IP_DCCP_TFRC_DEBUG diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h index 5fdb072..583490a 100644 --- a/net/dccp/dccp.h +++ b/net/dccp/dccp.h @@ -474,6 +474,7 @@ static inline int dccp_ack_pending(const struct sock *sk) return dccp_ackvec_pending(sk) || inet_csk_ack_scheduled(sk); } +extern int dccp_feat_signal_nn_change(struct sock *sk, u8 feat, u64 nn_val); extern int dccp_feat_finalise_settings(struct dccp_sock *dp); extern int dccp_feat_server_ccid_dependencies(struct dccp_request_sock *dreq); extern int dccp_feat_insert_opts(struct dccp_sock*, struct dccp_request_sock*, diff --git a/net/dccp/feat.c b/net/dccp/feat.c index 568def9..23cea0e 100644 --- a/net/dccp/feat.c +++ b/net/dccp/feat.c @@ -12,6 +12,7 @@ * ----------- * o Feature negotiation is coordinated with connection setup (as in TCP), wild * changes of parameters of an established connection are not supported. + * o Changing non-negotiable (NN) values is supported in state OPEN/PARTOPEN. * o All currently known SP features have 1-byte quantities. If in the future * extensions of RFCs 4340..42 define features with item lengths larger than * one byte, a feature-specific extension of the code will be required. @@ -343,6 +344,20 @@ static int __dccp_feat_activate(struct sock *sk, const int idx, return dccp_feat_table[idx].activation_hdlr(sk, val, rx); } +/** + * dccp_feat_activate - Activate feature value on socket + * @sk: fully connected DCCP socket (after handshake is complete) + * @feat_num: feature to activate, one of %dccp_feature_numbers + * @local: whether local (1) or remote (0) @feat_num is meant + * @fval: the value (SP or NN) to activate, or NULL to use the default value + * For general use this function is preferable over __dccp_feat_activate(). + */ +static int dccp_feat_activate(struct sock *sk, u8 feat_num, bool local, + dccp_feat_val const *fval) +{ + return __dccp_feat_activate(sk, dccp_feat_index(feat_num), local, fval); +} + /* Test for "Req'd" feature (RFC 4340, 6.4) */ static inline int dccp_feat_must_be_understood(u8 feat_num) { @@ -650,11 +665,22 @@ int dccp_feat_insert_opts(struct dccp_sock *dp, struct dccp_request_sock *dreq, return -1; if (pos->needs_mandatory && dccp_insert_option_mandatory(skb)) return -1; - /* - * Enter CHANGING after transmitting the Change option (6.6.2). - */ - if (pos->state == FEAT_INITIALISING) - pos->state = FEAT_CHANGING; + + if (skb->sk->sk_state == DCCP_OPEN && + (opt == DCCPO_CONFIRM_R || opt == DCCPO_CONFIRM_L)) { + /* + * Confirms don't get retransmitted (6.6.3) once the + * connection is in state OPEN + */ + dccp_feat_list_pop(pos); + } else { + /* + * Enter CHANGING after transmitting the Change + * option (6.6.2). + */ + if (pos->state == FEAT_INITIALISING) + pos->state = FEAT_CHANGING; + } } return 0; } @@ -730,6 +756,70 @@ int dccp_feat_register_sp(struct sock *sk, u8 feat, u8 is_local, 0, list, len); } +/** + * dccp_feat_nn_get - Query current/pending value of NN feature + * @sk: DCCP socket of an established connection + * @feat: NN feature number from %dccp_feature_numbers + * For a known NN feature, returns value currently being negotiated, or + * current (confirmed) value if no negotiation is going on. + */ +u64 dccp_feat_nn_get(struct sock *sk, u8 feat) +{ + if (dccp_feat_type(feat) == FEAT_NN) { + struct dccp_sock *dp = dccp_sk(sk); + struct dccp_feat_entry *entry; + + entry = dccp_feat_list_lookup(&dp->dccps_featneg, feat, 1); + if (entry != NULL) + return entry->val.nn; + + switch (feat) { + case DCCPF_ACK_RATIO: + return dp->dccps_l_ack_ratio; + case DCCPF_SEQUENCE_WINDOW: + return dp->dccps_l_seq_win; + } + } + DCCP_BUG("attempt to look up unsupported feature %u", feat); + return 0; +} +EXPORT_SYMBOL_GPL(dccp_feat_nn_get); + +/** + * dccp_feat_signal_nn_change - Update NN values for an established connection + * @sk: DCCP socket of an established connection + * @feat: NN feature number from %dccp_feature_numbers + * @nn_val: the new value to use + * This function is used to communicate NN updates out-of-band. + */ +int dccp_feat_signal_nn_change(struct sock *sk, u8 feat, u64 nn_val) +{ + struct list_head *fn = &dccp_sk(sk)->dccps_featneg; + dccp_feat_val fval = { .nn = nn_val }; + struct dccp_feat_entry *entry; + + if (sk->sk_state != DCCP_OPEN && sk->sk_state != DCCP_PARTOPEN) + return 0; + + if (dccp_feat_type(feat) != FEAT_NN || + !dccp_feat_is_valid_nn_val(feat, nn_val)) + return -EINVAL; + + if (nn_val == dccp_feat_nn_get(sk, feat)) + return 0; /* already set or negotiation under way */ + + entry = dccp_feat_list_lookup(fn, feat, 1); + if (entry != NULL) { + dccp_pr_debug("Clobbering existing NN entry %llu -> %llu\n", + (unsigned long long)entry->val.nn, + (unsigned long long)nn_val); + dccp_feat_list_pop(entry); + } + + inet_csk_schedule_ack(sk); + return dccp_feat_push_change(fn, feat, 1, 0, &fval); +} +EXPORT_SYMBOL_GPL(dccp_feat_signal_nn_change); /* * Tracking features whose value depend on the choice of CCID @@ -1187,6 +1277,100 @@ confirmation_failed: } /** + * dccp_feat_handle_nn_established - Fast-path reception of NN options + * @sk: socket of an established DCCP connection + * @mandatory: whether @opt was preceded by a Mandatory option + * @opt: %DCCPO_CHANGE_L | %DCCPO_CONFIRM_R (NN only) + * @feat: NN number, one of %dccp_feature_numbers + * @val: NN value + * @len: length of @val in bytes + * This function combines the functionality of change_recv/confirm_recv, with + * the following differences (reset codes are the same): + * - cleanup after receiving the Confirm; + * - values are directly activated after successful parsing; + * - deliberately restricted to NN features. + * The restriction to NN features is essential since SP features can have non- + * predictable outcomes (depending on the remote configuration), and are inter- + * dependent (CCIDs for instance cause further dependencies). + */ +static u8 dccp_feat_handle_nn_established(struct sock *sk, u8 mandatory, u8 opt, + u8 feat, u8 *val, u8 len) +{ + struct list_head *fn = &dccp_sk(sk)->dccps_featneg; + const bool local = (opt == DCCPO_CONFIRM_R); + struct dccp_feat_entry *entry; + u8 type = dccp_feat_type(feat); + dccp_feat_val fval; + + dccp_feat_print_opt(opt, feat, val, len, mandatory); + + /* Ignore non-mandatory unknown and non-NN features */ + if (type == FEAT_UNKNOWN) { + if (local && !mandatory) + return 0; + goto fast_path_unknown; + } else if (type != FEAT_NN) { + return 0; + } + + /* + * We don't accept empty Confirms, since in fast-path feature + * negotiation the values are enabled immediately after sending + * the Change option. + * Empty Changes on the other hand are invalid (RFC 4340, 6.1). + */ + if (len == 0 || len > sizeof(fval.nn)) + goto fast_path_unknown; + + if (opt == DCCPO_CHANGE_L) { + fval.nn = dccp_decode_value_var(val, len); + if (!dccp_feat_is_valid_nn_val(feat, fval.nn)) + goto fast_path_unknown; + + if (dccp_feat_push_confirm(fn, feat, local, &fval) || + dccp_feat_activate(sk, feat, local, &fval)) + return DCCP_RESET_CODE_TOO_BUSY; + + /* set the `Ack Pending' flag to piggyback a Confirm */ + inet_csk_schedule_ack(sk); + + } else if (opt == DCCPO_CONFIRM_R) { + entry = dccp_feat_list_lookup(fn, feat, local); + if (entry == NULL || entry->state != FEAT_CHANGING) + return 0; + + fval.nn = dccp_decode_value_var(val, len); + /* + * Just ignore a value that doesn't match our current value. + * If the option changes twice within two RTTs, then at least + * one CONFIRM will be received for the old value after a + * new CHANGE was sent. + */ + if (fval.nn != entry->val.nn) + return 0; + + /* Only activate after receiving the Confirm option (6.6.1). */ + dccp_feat_activate(sk, feat, local, &fval); + + /* It has been confirmed - so remove the entry */ + dccp_feat_list_pop(entry); + + } else { + DCCP_WARN("Received illegal option %u\n", opt); + goto fast_path_failed; + } + return 0; + +fast_path_unknown: + if (!mandatory) + return dccp_push_empty_confirm(fn, feat, local); + +fast_path_failed: + return mandatory ? DCCP_RESET_CODE_MANDATORY_ERROR + : DCCP_RESET_CODE_OPTION_ERROR; +} + +/** * dccp_feat_parse_options - Process Feature-Negotiation Options * @sk: for general use and used by the client during connection setup * @dreq: used by the server during connection setup @@ -1221,6 +1405,14 @@ int dccp_feat_parse_options(struct sock *sk, struct dccp_request_sock *dreq, return dccp_feat_confirm_recv(fn, mandatory, opt, feat, val, len, server); } + break; + /* + * Support for exchanging NN options on an established connection. + */ + case DCCP_OPEN: + case DCCP_PARTOPEN: + return dccp_feat_handle_nn_established(sk, mandatory, opt, feat, + val, len); } return 0; /* ignore FN options in all other states */ } diff --git a/net/dccp/feat.h b/net/dccp/feat.h index e56a4e5..90b957d 100644 --- a/net/dccp/feat.h +++ b/net/dccp/feat.h @@ -129,6 +129,7 @@ extern int dccp_feat_clone_list(struct list_head const *, struct list_head *); extern void dccp_encode_value_var(const u64 value, u8 *to, const u8 len); extern u64 dccp_decode_value_var(const u8 *bf, const u8 len); +extern u64 dccp_feat_nn_get(struct sock *sk, u8 feat); extern int dccp_insert_option_mandatory(struct sk_buff *skb); extern int dccp_insert_fn_opt(struct sk_buff *skb, u8 type, u8 feat, diff --git a/net/dccp/input.c b/net/dccp/input.c index 4222e7a..51d5fe5 100644 --- a/net/dccp/input.c +++ b/net/dccp/input.c @@ -619,20 +619,31 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, return 1; } - if (sk->sk_state != DCCP_REQUESTING && sk->sk_state != DCCP_RESPOND) { - if (dccp_check_seqno(sk, skb)) - goto discard; - - /* - * Step 8: Process options and mark acknowledgeable - */ - if (dccp_parse_options(sk, NULL, skb)) - return 1; + /* Step 6: Check sequence numbers (omitted in LISTEN/REQUEST state) */ + if (sk->sk_state != DCCP_REQUESTING && dccp_check_seqno(sk, skb)) + goto discard; - dccp_handle_ackvec_processing(sk, skb); - dccp_deliver_input_to_ccids(sk, skb); + /* + * Step 7: Check for unexpected packet types + * If (S.is_server and P.type == Response) + * or (S.is_client and P.type == Request) + * or (S.state == RESPOND and P.type == Data), + * Send Sync packet acknowledging P.seqno + * Drop packet and return + */ + if ((dp->dccps_role != DCCP_ROLE_CLIENT && + dh->dccph_type == DCCP_PKT_RESPONSE) || + (dp->dccps_role == DCCP_ROLE_CLIENT && + dh->dccph_type == DCCP_PKT_REQUEST) || + (sk->sk_state == DCCP_RESPOND && dh->dccph_type == DCCP_PKT_DATA)) { + dccp_send_sync(sk, dcb->dccpd_seq, DCCP_PKT_SYNC); + goto discard; } + /* Step 8: Process options */ + if (dccp_parse_options(sk, NULL, skb)) + return 1; + /* * Step 9: Process Reset * If P.type == Reset, @@ -640,31 +651,15 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, * S.state := TIMEWAIT * Set TIMEWAIT timer * Drop packet and return - */ + */ if (dh->dccph_type == DCCP_PKT_RESET) { dccp_rcv_reset(sk, skb); return 0; - /* - * Step 7: Check for unexpected packet types - * If (S.is_server and P.type == Response) - * or (S.is_client and P.type == Request) - * or (S.state == RESPOND and P.type == Data), - * Send Sync packet acknowledging P.seqno - * Drop packet and return - */ - } else if ((dp->dccps_role != DCCP_ROLE_CLIENT && - dh->dccph_type == DCCP_PKT_RESPONSE) || - (dp->dccps_role == DCCP_ROLE_CLIENT && - dh->dccph_type == DCCP_PKT_REQUEST) || - (sk->sk_state == DCCP_RESPOND && - dh->dccph_type == DCCP_PKT_DATA)) { - dccp_send_sync(sk, dcb->dccpd_seq, DCCP_PKT_SYNC); - goto discard; - } else if (dh->dccph_type == DCCP_PKT_CLOSEREQ) { + } else if (dh->dccph_type == DCCP_PKT_CLOSEREQ) { /* Step 13 */ if (dccp_rcv_closereq(sk, skb)) return 0; goto discard; - } else if (dh->dccph_type == DCCP_PKT_CLOSE) { + } else if (dh->dccph_type == DCCP_PKT_CLOSE) { /* Step 14 */ if (dccp_rcv_close(sk, skb)) return 0; goto discard; @@ -679,8 +674,12 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, __kfree_skb(skb); return 0; - case DCCP_RESPOND: case DCCP_PARTOPEN: + /* Step 8: if using Ack Vectors, mark packet acknowledgeable */ + dccp_handle_ackvec_processing(sk, skb); + dccp_deliver_input_to_ccids(sk, skb); + /* fall through */ + case DCCP_RESPOND: queued = dccp_rcv_respond_partopen_state_process(sk, skb, dh, len); break; diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 332639b..72416c8 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -111,6 +111,7 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) rt = ip_route_newports(fl4, rt, orig_sport, orig_dport, inet->inet_sport, inet->inet_dport, sk); if (IS_ERR(rt)) { + err = PTR_ERR(rt); rt = NULL; goto failure; } @@ -433,7 +434,8 @@ exit: NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); return NULL; put_and_exit: - sock_put(newsk); + inet_csk_prepare_forced_close(newsk); + dccp_done(newsk); goto exit; } diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index b74f761..592b78c 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -271,7 +271,7 @@ static int dccp_v6_send_response(struct sock *sk, struct request_sock *req, &ireq6->loc_addr, &ireq6->rmt_addr); ipv6_addr_copy(&fl6.daddr, &ireq6->rmt_addr); - err = ip6_xmit(sk, skb, &fl6, opt); + err = ip6_xmit(sk, skb, &fl6, opt, np->tclass); err = net_xmit_eval(err); } @@ -326,7 +326,7 @@ static void dccp_v6_ctl_send_reset(struct sock *sk, struct sk_buff *rxskb) dst = ip6_dst_lookup_flow(ctl_sk, &fl6, NULL, false); if (!IS_ERR(dst)) { skb_dst_set(skb, dst); - ip6_xmit(ctl_sk, skb, &fl6, NULL); + ip6_xmit(ctl_sk, skb, &fl6, NULL, 0); DCCP_INC_STATS_BH(DCCP_MIB_OUTSEGS); DCCP_INC_STATS_BH(DCCP_MIB_OUTRSTS); return; @@ -609,7 +609,8 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk, newinet->inet_rcv_saddr = LOOPBACK4_IPV6; if (__inet_inherit_port(sk, newsk) < 0) { - sock_put(newsk); + inet_csk_prepare_forced_close(newsk); + dccp_done(newsk); goto out; } __inet6_hash(newsk, NULL); diff --git a/net/dccp/output.c b/net/dccp/output.c index fab108e..dede3ed 100644 --- a/net/dccp/output.c +++ b/net/dccp/output.c @@ -27,11 +27,13 @@ static inline void dccp_event_ack_sent(struct sock *sk) inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK); } -static void dccp_skb_entail(struct sock *sk, struct sk_buff *skb) +/* enqueue @skb on sk_send_head for retransmission, return clone to send now */ +static struct sk_buff *dccp_skb_entail(struct sock *sk, struct sk_buff *skb) { skb_set_owner_w(skb, sk); WARN_ON(sk->sk_send_head); sk->sk_send_head = skb; + return skb_clone(sk->sk_send_head, gfp_any()); } /* @@ -552,8 +554,7 @@ int dccp_connect(struct sock *sk) DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_REQUEST; - dccp_skb_entail(sk, skb); - dccp_transmit_skb(sk, skb_clone(skb, GFP_KERNEL)); + dccp_transmit_skb(sk, dccp_skb_entail(sk, skb)); DCCP_INC_STATS(DCCP_MIB_ACTIVEOPENS); /* Timer for repeating the REQUEST until an answer. */ @@ -678,8 +679,7 @@ void dccp_send_close(struct sock *sk, const int active) DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_CLOSE; if (active) { - dccp_skb_entail(sk, skb); - dccp_transmit_skb(sk, skb_clone(skb, prio)); + skb = dccp_skb_entail(sk, skb); /* * Retransmission timer for active-close: RFC 4340, 8.3 requires * to retransmit the Close/CloseReq until the CLOSING/CLOSEREQ @@ -692,6 +692,6 @@ void dccp_send_close(struct sock *sk, const int active) */ inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, DCCP_TIMEOUT_INIT, DCCP_RTO_MAX); - } else - dccp_transmit_skb(sk, skb); + } + dccp_transmit_skb(sk, skb); } diff --git a/net/dccp/proto.c b/net/dccp/proto.c index 152975d..e742f90 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -184,7 +184,6 @@ int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized) dp->dccps_rate_last = jiffies; dp->dccps_role = DCCP_ROLE_UNDEFINED; dp->dccps_service = DCCP_SERVICE_CODE_IS_ABSENT; - dp->dccps_l_ack_ratio = dp->dccps_r_ack_ratio = 1; dp->dccps_tx_qlen = sysctl_dccp_tx_qlen; dccp_init_xmit_timers(sk); diff --git a/net/dccp/timer.c b/net/dccp/timer.c index 7587870..16f0b22 100644 --- a/net/dccp/timer.c +++ b/net/dccp/timer.c @@ -12,6 +12,7 @@ #include #include +#include #include "dccp.h" diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index d71f0d2..16fbf8c 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c @@ -291,23 +291,23 @@ int dn_sockaddr2username(struct sockaddr_dn *sdn, unsigned char *buf, unsigned c *buf++ = type; - switch(type) { - case 0: - *buf++ = sdn->sdn_objnum; - break; - case 1: - *buf++ = 0; - *buf++ = le16_to_cpu(sdn->sdn_objnamel); - memcpy(buf, sdn->sdn_objname, le16_to_cpu(sdn->sdn_objnamel)); - len = 3 + le16_to_cpu(sdn->sdn_objnamel); - break; - case 2: - memset(buf, 0, 5); - buf += 5; - *buf++ = le16_to_cpu(sdn->sdn_objnamel); - memcpy(buf, sdn->sdn_objname, le16_to_cpu(sdn->sdn_objnamel)); - len = 7 + le16_to_cpu(sdn->sdn_objnamel); - break; + switch (type) { + case 0: + *buf++ = sdn->sdn_objnum; + break; + case 1: + *buf++ = 0; + *buf++ = le16_to_cpu(sdn->sdn_objnamel); + memcpy(buf, sdn->sdn_objname, le16_to_cpu(sdn->sdn_objnamel)); + len = 3 + le16_to_cpu(sdn->sdn_objnamel); + break; + case 2: + memset(buf, 0, 5); + buf += 5; + *buf++ = le16_to_cpu(sdn->sdn_objnamel); + memcpy(buf, sdn->sdn_objname, le16_to_cpu(sdn->sdn_objnamel)); + len = 7 + le16_to_cpu(sdn->sdn_objnamel); + break; } return len; @@ -337,23 +337,23 @@ int dn_username2sockaddr(unsigned char *data, int len, struct sockaddr_dn *sdn, *fmt = *data++; type = *data++; - switch(*fmt) { - case 0: - sdn->sdn_objnum = type; - return 2; - case 1: - namel = 16; - break; - case 2: - len -= 4; - data += 4; - break; - case 4: - len -= 8; - data += 8; - break; - default: - return -1; + switch (*fmt) { + case 0: + sdn->sdn_objnum = type; + return 2; + case 1: + namel = 16; + break; + case 2: + len -= 4; + data += 4; + break; + case 4: + len -= 8; + data += 8; + break; + default: + return -1; } len -= 1; @@ -575,25 +575,26 @@ int dn_destroy_timer(struct sock *sk) scp->persist = dn_nsp_persist(sk); - switch(scp->state) { - case DN_DI: - dn_nsp_send_disc(sk, NSP_DISCINIT, 0, GFP_ATOMIC); - if (scp->nsp_rxtshift >= decnet_di_count) - scp->state = DN_CN; - return 0; + switch (scp->state) { + case DN_DI: + dn_nsp_send_disc(sk, NSP_DISCINIT, 0, GFP_ATOMIC); + if (scp->nsp_rxtshift >= decnet_di_count) + scp->state = DN_CN; + return 0; - case DN_DR: - dn_nsp_send_disc(sk, NSP_DISCINIT, 0, GFP_ATOMIC); - if (scp->nsp_rxtshift >= decnet_dr_count) - scp->state = DN_DRC; - return 0; + case DN_DR: + dn_nsp_send_disc(sk, NSP_DISCINIT, 0, GFP_ATOMIC); + if (scp->nsp_rxtshift >= decnet_dr_count) + scp->state = DN_DRC; + return 0; - case DN_DN: - if (scp->nsp_rxtshift < decnet_dn_count) { - /* printk(KERN_DEBUG "dn_destroy_timer: DN\n"); */ - dn_nsp_send_disc(sk, NSP_DISCCONF, NSP_REASON_DC, GFP_ATOMIC); - return 0; - } + case DN_DN: + if (scp->nsp_rxtshift < decnet_dn_count) { + /* printk(KERN_DEBUG "dn_destroy_timer: DN\n"); */ + dn_nsp_send_disc(sk, NSP_DISCCONF, NSP_REASON_DC, + GFP_ATOMIC); + return 0; + } } scp->persist = (HZ * decnet_time_wait); @@ -623,42 +624,42 @@ static void dn_destroy_sock(struct sock *sk) sk->sk_state = TCP_CLOSE; - switch(scp->state) { - case DN_DN: - dn_nsp_send_disc(sk, NSP_DISCCONF, NSP_REASON_DC, - sk->sk_allocation); - scp->persist_fxn = dn_destroy_timer; - scp->persist = dn_nsp_persist(sk); - break; - case DN_CR: - scp->state = DN_DR; - goto disc_reject; - case DN_RUN: - scp->state = DN_DI; - case DN_DI: - case DN_DR: + switch (scp->state) { + case DN_DN: + dn_nsp_send_disc(sk, NSP_DISCCONF, NSP_REASON_DC, + sk->sk_allocation); + scp->persist_fxn = dn_destroy_timer; + scp->persist = dn_nsp_persist(sk); + break; + case DN_CR: + scp->state = DN_DR; + goto disc_reject; + case DN_RUN: + scp->state = DN_DI; + case DN_DI: + case DN_DR: disc_reject: - dn_nsp_send_disc(sk, NSP_DISCINIT, 0, sk->sk_allocation); - case DN_NC: - case DN_NR: - case DN_RJ: - case DN_DIC: - case DN_CN: - case DN_DRC: - case DN_CI: - case DN_CD: - scp->persist_fxn = dn_destroy_timer; - scp->persist = dn_nsp_persist(sk); - break; - default: - printk(KERN_DEBUG "DECnet: dn_destroy_sock passed socket in invalid state\n"); - case DN_O: - dn_stop_slow_timer(sk); + dn_nsp_send_disc(sk, NSP_DISCINIT, 0, sk->sk_allocation); + case DN_NC: + case DN_NR: + case DN_RJ: + case DN_DIC: + case DN_CN: + case DN_DRC: + case DN_CI: + case DN_CD: + scp->persist_fxn = dn_destroy_timer; + scp->persist = dn_nsp_persist(sk); + break; + default: + printk(KERN_DEBUG "DECnet: dn_destroy_sock passed socket in invalid state\n"); + case DN_O: + dn_stop_slow_timer(sk); - dn_unhash_sock_bh(sk); - sock_put(sk); + dn_unhash_sock_bh(sk); + sock_put(sk); - break; + break; } } @@ -683,15 +684,15 @@ static int dn_create(struct net *net, struct socket *sock, int protocol, if (!net_eq(net, &init_net)) return -EAFNOSUPPORT; - switch(sock->type) { - case SOCK_SEQPACKET: - if (protocol != DNPROTO_NSP) - return -EPROTONOSUPPORT; - break; - case SOCK_STREAM: - break; - default: - return -ESOCKTNOSUPPORT; + switch (sock->type) { + case SOCK_SEQPACKET: + if (protocol != DNPROTO_NSP) + return -EPROTONOSUPPORT; + break; + case SOCK_STREAM: + break; + default: + return -ESOCKTNOSUPPORT; } @@ -987,16 +988,16 @@ static inline int dn_check_state(struct sock *sk, struct sockaddr_dn *addr, int { struct dn_scp *scp = DN_SK(sk); - switch(scp->state) { - case DN_RUN: - return 0; - case DN_CR: - return dn_confirm_accept(sk, timeo, sk->sk_allocation); - case DN_CI: - case DN_CC: - return dn_wait_run(sk, timeo); - case DN_O: - return __dn_connect(sk, addr, addrlen, timeo, flags); + switch (scp->state) { + case DN_RUN: + return 0; + case DN_CR: + return dn_confirm_accept(sk, timeo, sk->sk_allocation); + case DN_CI: + case DN_CC: + return dn_wait_run(sk, timeo); + case DN_O: + return __dn_connect(sk, addr, addrlen, timeo, flags); } return -EINVAL; @@ -1363,141 +1364,140 @@ static int __dn_setsockopt(struct socket *sock, int level,int optname, char __us if (copy_from_user(&u, optval, optlen)) return -EFAULT; - switch(optname) { - case DSO_CONDATA: - if (sock->state == SS_CONNECTED) - return -EISCONN; - if ((scp->state != DN_O) && (scp->state != DN_CR)) - return -EINVAL; + switch (optname) { + case DSO_CONDATA: + if (sock->state == SS_CONNECTED) + return -EISCONN; + if ((scp->state != DN_O) && (scp->state != DN_CR)) + return -EINVAL; - if (optlen != sizeof(struct optdata_dn)) - return -EINVAL; + if (optlen != sizeof(struct optdata_dn)) + return -EINVAL; - if (le16_to_cpu(u.opt.opt_optl) > 16) - return -EINVAL; + if (le16_to_cpu(u.opt.opt_optl) > 16) + return -EINVAL; - memcpy(&scp->conndata_out, &u.opt, optlen); - break; - - case DSO_DISDATA: - if (sock->state != SS_CONNECTED && scp->accept_mode == ACC_IMMED) - return -ENOTCONN; - - if (optlen != sizeof(struct optdata_dn)) - return -EINVAL; + memcpy(&scp->conndata_out, &u.opt, optlen); + break; - if (le16_to_cpu(u.opt.opt_optl) > 16) - return -EINVAL; + case DSO_DISDATA: + if (sock->state != SS_CONNECTED && + scp->accept_mode == ACC_IMMED) + return -ENOTCONN; - memcpy(&scp->discdata_out, &u.opt, optlen); - break; + if (optlen != sizeof(struct optdata_dn)) + return -EINVAL; - case DSO_CONACCESS: - if (sock->state == SS_CONNECTED) - return -EISCONN; - if (scp->state != DN_O) - return -EINVAL; + if (le16_to_cpu(u.opt.opt_optl) > 16) + return -EINVAL; - if (optlen != sizeof(struct accessdata_dn)) - return -EINVAL; + memcpy(&scp->discdata_out, &u.opt, optlen); + break; - if ((u.acc.acc_accl > DN_MAXACCL) || - (u.acc.acc_passl > DN_MAXACCL) || - (u.acc.acc_userl > DN_MAXACCL)) - return -EINVAL; + case DSO_CONACCESS: + if (sock->state == SS_CONNECTED) + return -EISCONN; + if (scp->state != DN_O) + return -EINVAL; - memcpy(&scp->accessdata, &u.acc, optlen); - break; + if (optlen != sizeof(struct accessdata_dn)) + return -EINVAL; - case DSO_ACCEPTMODE: - if (sock->state == SS_CONNECTED) - return -EISCONN; - if (scp->state != DN_O) - return -EINVAL; + if ((u.acc.acc_accl > DN_MAXACCL) || + (u.acc.acc_passl > DN_MAXACCL) || + (u.acc.acc_userl > DN_MAXACCL)) + return -EINVAL; - if (optlen != sizeof(int)) - return -EINVAL; + memcpy(&scp->accessdata, &u.acc, optlen); + break; - if ((u.mode != ACC_IMMED) && (u.mode != ACC_DEFER)) - return -EINVAL; + case DSO_ACCEPTMODE: + if (sock->state == SS_CONNECTED) + return -EISCONN; + if (scp->state != DN_O) + return -EINVAL; - scp->accept_mode = (unsigned char)u.mode; - break; + if (optlen != sizeof(int)) + return -EINVAL; - case DSO_CONACCEPT: + if ((u.mode != ACC_IMMED) && (u.mode != ACC_DEFER)) + return -EINVAL; - if (scp->state != DN_CR) - return -EINVAL; - timeo = sock_rcvtimeo(sk, 0); - err = dn_confirm_accept(sk, &timeo, sk->sk_allocation); - return err; + scp->accept_mode = (unsigned char)u.mode; + break; - case DSO_CONREJECT: + case DSO_CONACCEPT: + if (scp->state != DN_CR) + return -EINVAL; + timeo = sock_rcvtimeo(sk, 0); + err = dn_confirm_accept(sk, &timeo, sk->sk_allocation); + return err; - if (scp->state != DN_CR) - return -EINVAL; + case DSO_CONREJECT: + if (scp->state != DN_CR) + return -EINVAL; - scp->state = DN_DR; - sk->sk_shutdown = SHUTDOWN_MASK; - dn_nsp_send_disc(sk, 0x38, 0, sk->sk_allocation); - break; + scp->state = DN_DR; + sk->sk_shutdown = SHUTDOWN_MASK; + dn_nsp_send_disc(sk, 0x38, 0, sk->sk_allocation); + break; - default: + default: #ifdef CONFIG_NETFILTER return nf_setsockopt(sk, PF_DECnet, optname, optval, optlen); #endif - case DSO_LINKINFO: - case DSO_STREAM: - case DSO_SEQPACKET: - return -ENOPROTOOPT; - - case DSO_MAXWINDOW: - if (optlen != sizeof(unsigned long)) - return -EINVAL; - if (u.win > NSP_MAX_WINDOW) - u.win = NSP_MAX_WINDOW; - if (u.win == 0) - return -EINVAL; - scp->max_window = u.win; - if (scp->snd_window > u.win) - scp->snd_window = u.win; - break; + case DSO_LINKINFO: + case DSO_STREAM: + case DSO_SEQPACKET: + return -ENOPROTOOPT; + + case DSO_MAXWINDOW: + if (optlen != sizeof(unsigned long)) + return -EINVAL; + if (u.win > NSP_MAX_WINDOW) + u.win = NSP_MAX_WINDOW; + if (u.win == 0) + return -EINVAL; + scp->max_window = u.win; + if (scp->snd_window > u.win) + scp->snd_window = u.win; + break; - case DSO_NODELAY: - if (optlen != sizeof(int)) - return -EINVAL; - if (scp->nonagle == 2) - return -EINVAL; - scp->nonagle = (u.val == 0) ? 0 : 1; - /* if (scp->nonagle == 1) { Push pending frames } */ - break; + case DSO_NODELAY: + if (optlen != sizeof(int)) + return -EINVAL; + if (scp->nonagle == 2) + return -EINVAL; + scp->nonagle = (u.val == 0) ? 0 : 1; + /* if (scp->nonagle == 1) { Push pending frames } */ + break; - case DSO_CORK: - if (optlen != sizeof(int)) - return -EINVAL; - if (scp->nonagle == 1) - return -EINVAL; - scp->nonagle = (u.val == 0) ? 0 : 2; - /* if (scp->nonagle == 0) { Push pending frames } */ - break; + case DSO_CORK: + if (optlen != sizeof(int)) + return -EINVAL; + if (scp->nonagle == 1) + return -EINVAL; + scp->nonagle = (u.val == 0) ? 0 : 2; + /* if (scp->nonagle == 0) { Push pending frames } */ + break; - case DSO_SERVICES: - if (optlen != sizeof(unsigned char)) - return -EINVAL; - if ((u.services & ~NSP_FC_MASK) != 0x01) - return -EINVAL; - if ((u.services & NSP_FC_MASK) == NSP_FC_MASK) - return -EINVAL; - scp->services_loc = u.services; - break; + case DSO_SERVICES: + if (optlen != sizeof(unsigned char)) + return -EINVAL; + if ((u.services & ~NSP_FC_MASK) != 0x01) + return -EINVAL; + if ((u.services & NSP_FC_MASK) == NSP_FC_MASK) + return -EINVAL; + scp->services_loc = u.services; + break; - case DSO_INFO: - if (optlen != sizeof(unsigned char)) - return -EINVAL; - if (u.info & 0xfc) - return -EINVAL; - scp->info_loc = u.info; - break; + case DSO_INFO: + if (optlen != sizeof(unsigned char)) + return -EINVAL; + if (u.info & 0xfc) + return -EINVAL; + scp->info_loc = u.info; + break; } return 0; @@ -1527,107 +1527,106 @@ static int __dn_getsockopt(struct socket *sock, int level,int optname, char __us if(get_user(r_len , optlen)) return -EFAULT; - switch(optname) { - case DSO_CONDATA: - if (r_len > sizeof(struct optdata_dn)) - r_len = sizeof(struct optdata_dn); - r_data = &scp->conndata_in; - break; - - case DSO_DISDATA: - if (r_len > sizeof(struct optdata_dn)) - r_len = sizeof(struct optdata_dn); - r_data = &scp->discdata_in; - break; + switch (optname) { + case DSO_CONDATA: + if (r_len > sizeof(struct optdata_dn)) + r_len = sizeof(struct optdata_dn); + r_data = &scp->conndata_in; + break; - case DSO_CONACCESS: - if (r_len > sizeof(struct accessdata_dn)) - r_len = sizeof(struct accessdata_dn); - r_data = &scp->accessdata; - break; + case DSO_DISDATA: + if (r_len > sizeof(struct optdata_dn)) + r_len = sizeof(struct optdata_dn); + r_data = &scp->discdata_in; + break; - case DSO_ACCEPTMODE: - if (r_len > sizeof(unsigned char)) - r_len = sizeof(unsigned char); - r_data = &scp->accept_mode; - break; + case DSO_CONACCESS: + if (r_len > sizeof(struct accessdata_dn)) + r_len = sizeof(struct accessdata_dn); + r_data = &scp->accessdata; + break; - case DSO_LINKINFO: - if (r_len > sizeof(struct linkinfo_dn)) - r_len = sizeof(struct linkinfo_dn); + case DSO_ACCEPTMODE: + if (r_len > sizeof(unsigned char)) + r_len = sizeof(unsigned char); + r_data = &scp->accept_mode; + break; - memset(&link, 0, sizeof(link)); + case DSO_LINKINFO: + if (r_len > sizeof(struct linkinfo_dn)) + r_len = sizeof(struct linkinfo_dn); - switch(sock->state) { - case SS_CONNECTING: - link.idn_linkstate = LL_CONNECTING; - break; - case SS_DISCONNECTING: - link.idn_linkstate = LL_DISCONNECTING; - break; - case SS_CONNECTED: - link.idn_linkstate = LL_RUNNING; - break; - default: - link.idn_linkstate = LL_INACTIVE; - } + memset(&link, 0, sizeof(link)); - link.idn_segsize = scp->segsize_rem; - r_data = &link; + switch (sock->state) { + case SS_CONNECTING: + link.idn_linkstate = LL_CONNECTING; + break; + case SS_DISCONNECTING: + link.idn_linkstate = LL_DISCONNECTING; + break; + case SS_CONNECTED: + link.idn_linkstate = LL_RUNNING; break; - default: + link.idn_linkstate = LL_INACTIVE; + } + + link.idn_segsize = scp->segsize_rem; + r_data = &link; + break; + + default: #ifdef CONFIG_NETFILTER - { - int ret, len; + { + int ret, len; - if(get_user(len, optlen)) - return -EFAULT; + if (get_user(len, optlen)) + return -EFAULT; - ret = nf_getsockopt(sk, PF_DECnet, optname, - optval, &len); - if (ret >= 0) - ret = put_user(len, optlen); - return ret; - } + ret = nf_getsockopt(sk, PF_DECnet, optname, optval, &len); + if (ret >= 0) + ret = put_user(len, optlen); + return ret; + } #endif - case DSO_STREAM: - case DSO_SEQPACKET: - case DSO_CONACCEPT: - case DSO_CONREJECT: - return -ENOPROTOOPT; - - case DSO_MAXWINDOW: - if (r_len > sizeof(unsigned long)) - r_len = sizeof(unsigned long); - r_data = &scp->max_window; - break; + case DSO_STREAM: + case DSO_SEQPACKET: + case DSO_CONACCEPT: + case DSO_CONREJECT: + return -ENOPROTOOPT; + + case DSO_MAXWINDOW: + if (r_len > sizeof(unsigned long)) + r_len = sizeof(unsigned long); + r_data = &scp->max_window; + break; - case DSO_NODELAY: - if (r_len > sizeof(int)) - r_len = sizeof(int); - val = (scp->nonagle == 1); - r_data = &val; - break; + case DSO_NODELAY: + if (r_len > sizeof(int)) + r_len = sizeof(int); + val = (scp->nonagle == 1); + r_data = &val; + break; - case DSO_CORK: - if (r_len > sizeof(int)) - r_len = sizeof(int); - val = (scp->nonagle == 2); - r_data = &val; - break; + case DSO_CORK: + if (r_len > sizeof(int)) + r_len = sizeof(int); + val = (scp->nonagle == 2); + r_data = &val; + break; - case DSO_SERVICES: - if (r_len > sizeof(unsigned char)) - r_len = sizeof(unsigned char); - r_data = &scp->services_rem; - break; + case DSO_SERVICES: + if (r_len > sizeof(unsigned char)) + r_len = sizeof(unsigned char); + r_data = &scp->services_rem; + break; - case DSO_INFO: - if (r_len > sizeof(unsigned char)) - r_len = sizeof(unsigned char); - r_data = &scp->info_rem; - break; + case DSO_INFO: + if (r_len > sizeof(unsigned char)) + r_len = sizeof(unsigned char); + r_data = &scp->info_rem; + break; } if (r_data) { @@ -2088,15 +2087,15 @@ static int dn_device_event(struct notifier_block *this, unsigned long event, if (!net_eq(dev_net(dev), &init_net)) return NOTIFY_DONE; - switch(event) { - case NETDEV_UP: - dn_dev_up(dev); - break; - case NETDEV_DOWN: - dn_dev_down(dev); - break; - default: - break; + switch (event) { + case NETDEV_UP: + dn_dev_up(dev); + break; + case NETDEV_DOWN: + dn_dev_down(dev); + break; + default: + break; } return NOTIFY_DONE; @@ -2209,54 +2208,54 @@ static void dn_printable_object(struct sockaddr_dn *dn, unsigned char *buf) int i; switch (le16_to_cpu(dn->sdn_objnamel)) { - case 0: - sprintf(buf, "%d", dn->sdn_objnum); - break; - default: - for (i = 0; i < le16_to_cpu(dn->sdn_objnamel); i++) { - buf[i] = dn->sdn_objname[i]; - if (IS_NOT_PRINTABLE(buf[i])) - buf[i] = '.'; - } - buf[i] = 0; + case 0: + sprintf(buf, "%d", dn->sdn_objnum); + break; + default: + for (i = 0; i < le16_to_cpu(dn->sdn_objnamel); i++) { + buf[i] = dn->sdn_objname[i]; + if (IS_NOT_PRINTABLE(buf[i])) + buf[i] = '.'; + } + buf[i] = 0; } } static char *dn_state2asc(unsigned char state) { - switch(state) { - case DN_O: - return "OPEN"; - case DN_CR: - return " CR"; - case DN_DR: - return " DR"; - case DN_DRC: - return " DRC"; - case DN_CC: - return " CC"; - case DN_CI: - return " CI"; - case DN_NR: - return " NR"; - case DN_NC: - return " NC"; - case DN_CD: - return " CD"; - case DN_RJ: - return " RJ"; - case DN_RUN: - return " RUN"; - case DN_DI: - return " DI"; - case DN_DIC: - return " DIC"; - case DN_DN: - return " DN"; - case DN_CL: - return " CL"; - case DN_CN: - return " CN"; + switch (state) { + case DN_O: + return "OPEN"; + case DN_CR: + return " CR"; + case DN_DR: + return " DR"; + case DN_DRC: + return " DRC"; + case DN_CC: + return " CC"; + case DN_CI: + return " CI"; + case DN_NR: + return " NR"; + case DN_NC: + return " NC"; + case DN_CD: + return " CD"; + case DN_RJ: + return " RJ"; + case DN_RUN: + return " RUN"; + case DN_DI: + return " DI"; + case DN_DIC: + return " DIC"; + case DN_DN: + return " DN"; + case DN_CL: + return " CL"; + case DN_CN: + return " CN"; } return "????"; diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c index 3780fd6..74d321a 100644 --- a/net/decnet/dn_dev.c +++ b/net/decnet/dn_dev.c @@ -437,17 +437,17 @@ int dn_dev_ioctl(unsigned int cmd, void __user *arg) dev_load(&init_net, ifr->ifr_name); - switch(cmd) { - case SIOCGIFADDR: - break; - case SIOCSIFADDR: - if (!capable(CAP_NET_ADMIN)) - return -EACCES; - if (sdn->sdn_family != AF_DECnet) - return -EINVAL; - break; - default: + switch (cmd) { + case SIOCGIFADDR: + break; + case SIOCSIFADDR: + if (!capable(CAP_NET_ADMIN)) + return -EACCES; + if (sdn->sdn_family != AF_DECnet) return -EINVAL; + break; + default: + return -EINVAL; } rtnl_lock(); @@ -470,27 +470,27 @@ int dn_dev_ioctl(unsigned int cmd, void __user *arg) goto done; } - switch(cmd) { - case SIOCGIFADDR: - *((__le16 *)sdn->sdn_nodeaddr) = ifa->ifa_local; - goto rarok; - - case SIOCSIFADDR: - if (!ifa) { - if ((ifa = dn_dev_alloc_ifa()) == NULL) { - ret = -ENOBUFS; - break; - } - memcpy(ifa->ifa_label, dev->name, IFNAMSIZ); - } else { - if (ifa->ifa_local == dn_saddr2dn(sdn)) - break; - dn_dev_del_ifa(dn_db, ifap, 0); + switch (cmd) { + case SIOCGIFADDR: + *((__le16 *)sdn->sdn_nodeaddr) = ifa->ifa_local; + goto rarok; + + case SIOCSIFADDR: + if (!ifa) { + if ((ifa = dn_dev_alloc_ifa()) == NULL) { + ret = -ENOBUFS; + break; } + memcpy(ifa->ifa_label, dev->name, IFNAMSIZ); + } else { + if (ifa->ifa_local == dn_saddr2dn(sdn)) + break; + dn_dev_del_ifa(dn_db, ifap, 0); + } - ifa->ifa_local = ifa->ifa_address = dn_saddr2dn(sdn); + ifa->ifa_local = ifa->ifa_address = dn_saddr2dn(sdn); - ret = dn_dev_set_ifa(dev, ifa); + ret = dn_dev_set_ifa(dev, ifa); } done: rtnl_unlock(); @@ -1101,7 +1101,7 @@ static struct dn_dev *dn_dev_create(struct net_device *dev, int *err) dn_db->neigh_parms = neigh_parms_alloc(dev, &dn_neigh_table); if (!dn_db->neigh_parms) { - rcu_assign_pointer(dev->dn_ptr, NULL); + RCU_INIT_POINTER(dev->dn_ptr, NULL); kfree(dn_db); return NULL; } @@ -1313,7 +1313,7 @@ static void *dn_dev_seq_next(struct seq_file *seq, void *v, loff_t *pos) ++*pos; - dev = (struct net_device *)v; + dev = v; if (v == SEQ_START_TOKEN) dev = net_device_entry(&init_net.dev_base_head); @@ -1335,13 +1335,13 @@ static void dn_dev_seq_stop(struct seq_file *seq, void *v) static char *dn_type2asc(char type) { - switch(type) { - case DN_DEV_BCAST: - return "B"; - case DN_DEV_UCAST: - return "U"; - case DN_DEV_MPOINT: - return "M"; + switch (type) { + case DN_DEV_BCAST: + return "B"; + case DN_DEV_UCAST: + return "U"; + case DN_DEV_MPOINT: + return "M"; } return "?"; diff --git a/net/decnet/dn_fib.c b/net/decnet/dn_fib.c index 104324d..9e885f1 100644 --- a/net/decnet/dn_fib.c +++ b/net/decnet/dn_fib.c @@ -30,7 +30,7 @@ #include #include #include -#include +#include #include #include #include @@ -414,33 +414,34 @@ int dn_fib_semantic_match(int type, struct dn_fib_info *fi, const struct flowidn res->fi = fi; - switch(type) { - case RTN_NAT: - DN_FIB_RES_RESET(*res); + switch (type) { + case RTN_NAT: + DN_FIB_RES_RESET(*res); + atomic_inc(&fi->fib_clntref); + return 0; + case RTN_UNICAST: + case RTN_LOCAL: + for_nexthops(fi) { + if (nh->nh_flags & RTNH_F_DEAD) + continue; + if (!fld->flowidn_oif || + fld->flowidn_oif == nh->nh_oif) + break; + } + if (nhsel < fi->fib_nhs) { + res->nh_sel = nhsel; atomic_inc(&fi->fib_clntref); return 0; - case RTN_UNICAST: - case RTN_LOCAL: - for_nexthops(fi) { - if (nh->nh_flags & RTNH_F_DEAD) - continue; - if (!fld->flowidn_oif || - fld->flowidn_oif == nh->nh_oif) - break; - } - if (nhsel < fi->fib_nhs) { - res->nh_sel = nhsel; - atomic_inc(&fi->fib_clntref); - return 0; - } - endfor_nexthops(fi); - res->fi = NULL; - return 1; - default: - if (net_ratelimit()) - printk("DECnet: impossible routing event : dn_fib_semantic_match type=%d\n", type); - res->fi = NULL; - return -EINVAL; + } + endfor_nexthops(fi); + res->fi = NULL; + return 1; + default: + if (net_ratelimit()) + printk("DECnet: impossible routing event : dn_fib_semantic_match type=%d\n", + type); + res->fi = NULL; + return -EINVAL; } } return err; @@ -647,20 +648,20 @@ static int dn_fib_dnaddr_event(struct notifier_block *this, unsigned long event, { struct dn_ifaddr *ifa = (struct dn_ifaddr *)ptr; - switch(event) { - case NETDEV_UP: - dn_fib_add_ifaddr(ifa); - dn_fib_sync_up(ifa->ifa_dev->dev); + switch (event) { + case NETDEV_UP: + dn_fib_add_ifaddr(ifa); + dn_fib_sync_up(ifa->ifa_dev->dev); + dn_rt_cache_flush(-1); + break; + case NETDEV_DOWN: + dn_fib_del_ifaddr(ifa); + if (ifa->ifa_dev && ifa->ifa_dev->ifa_list == NULL) { + dn_fib_disable_addr(ifa->ifa_dev->dev, 1); + } else { dn_rt_cache_flush(-1); - break; - case NETDEV_DOWN: - dn_fib_del_ifaddr(ifa); - if (ifa->ifa_dev && ifa->ifa_dev->ifa_list == NULL) { - dn_fib_disable_addr(ifa->ifa_dev->dev, 1); - } else { - dn_rt_cache_flush(-1); - } - break; + } + break; } return NOTIFY_DONE; } diff --git a/net/decnet/dn_neigh.c b/net/decnet/dn_neigh.c index 9810610..7f0eb08 100644 --- a/net/decnet/dn_neigh.c +++ b/net/decnet/dn_neigh.c @@ -38,7 +38,7 @@ #include #include #include -#include +#include #include #include #include @@ -51,9 +51,9 @@ static int dn_neigh_construct(struct neighbour *); static void dn_long_error_report(struct neighbour *, struct sk_buff *); static void dn_short_error_report(struct neighbour *, struct sk_buff *); -static int dn_long_output(struct sk_buff *); -static int dn_short_output(struct sk_buff *); -static int dn_phase3_output(struct sk_buff *); +static int dn_long_output(struct neighbour *, struct sk_buff *); +static int dn_short_output(struct neighbour *, struct sk_buff *); +static int dn_phase3_output(struct neighbour *, struct sk_buff *); /* @@ -64,8 +64,6 @@ static const struct neigh_ops dn_long_ops = { .error_report = dn_long_error_report, .output = dn_long_output, .connected_output = dn_long_output, - .hh_output = dev_queue_xmit, - .queue_xmit = dev_queue_xmit, }; /* @@ -76,8 +74,6 @@ static const struct neigh_ops dn_short_ops = { .error_report = dn_short_error_report, .output = dn_short_output, .connected_output = dn_short_output, - .hh_output = dev_queue_xmit, - .queue_xmit = dev_queue_xmit, }; /* @@ -88,8 +84,6 @@ static const struct neigh_ops dn_phase3_ops = { .error_report = dn_short_error_report, /* Can use short version here */ .output = dn_phase3_output, .connected_output = dn_phase3_output, - .hh_output = dev_queue_xmit, - .queue_xmit = dev_queue_xmit }; static u32 dn_neigh_hash(const void *pkey, @@ -215,7 +209,7 @@ static int dn_neigh_output_packet(struct sk_buff *skb) dn_dn2eth(mac_addr, rt->rt_local_src); if (dev_hard_header(skb, dev, ntohs(skb->protocol), neigh->ha, mac_addr, skb->len) >= 0) - return neigh->ops->queue_xmit(skb); + return dev_queue_xmit(skb); if (net_ratelimit()) printk(KERN_DEBUG "dn_neigh_output_packet: oops, can't send packet\n"); @@ -224,10 +218,8 @@ static int dn_neigh_output_packet(struct sk_buff *skb) return -EINVAL; } -static int dn_long_output(struct sk_buff *skb) +static int dn_long_output(struct neighbour *neigh, struct sk_buff *skb) { - struct dst_entry *dst = skb_dst(skb); - struct neighbour *neigh = dst_get_neighbour(dst); struct net_device *dev = neigh->dev; int headroom = dev->hard_header_len + sizeof(struct dn_long_packet) + 3; unsigned char *data; @@ -271,10 +263,8 @@ static int dn_long_output(struct sk_buff *skb) neigh->dev, dn_neigh_output_packet); } -static int dn_short_output(struct sk_buff *skb) +static int dn_short_output(struct neighbour *neigh, struct sk_buff *skb) { - struct dst_entry *dst = skb_dst(skb); - struct neighbour *neigh = dst_get_neighbour(dst); struct net_device *dev = neigh->dev; int headroom = dev->hard_header_len + sizeof(struct dn_short_packet) + 2; struct dn_short_packet *sp; @@ -315,10 +305,8 @@ static int dn_short_output(struct sk_buff *skb) * Phase 3 output is the same is short output, execpt that * it clears the area bits before transmission. */ -static int dn_phase3_output(struct sk_buff *skb) +static int dn_phase3_output(struct neighbour *neigh, struct sk_buff *skb) { - struct dst_entry *dst = skb_dst(skb); - struct neighbour *neigh = dst_get_neighbour(dst); struct net_device *dev = neigh->dev; int headroom = dev->hard_header_len + sizeof(struct dn_short_packet) + 2; struct dn_short_packet *sp; @@ -404,13 +392,13 @@ int dn_neigh_router_hello(struct sk_buff *skb) dn->flags &= ~DN_NDFLAG_P3; - switch(msg->iinfo & DN_RT_INFO_TYPE) { - case DN_RT_INFO_L1RT: - dn->flags &=~DN_NDFLAG_R2; - dn->flags |= DN_NDFLAG_R1; - break; - case DN_RT_INFO_L2RT: - dn->flags |= DN_NDFLAG_R2; + switch (msg->iinfo & DN_RT_INFO_TYPE) { + case DN_RT_INFO_L1RT: + dn->flags &=~DN_NDFLAG_R2; + dn->flags |= DN_NDFLAG_R1; + break; + case DN_RT_INFO_L2RT: + dn->flags |= DN_NDFLAG_R2; } } diff --git a/net/decnet/dn_nsp_in.c b/net/decnet/dn_nsp_in.c index b430549..73fa268 100644 --- a/net/decnet/dn_nsp_in.c +++ b/net/decnet/dn_nsp_in.c @@ -101,23 +101,27 @@ static void dn_ack(struct sock *sk, struct sk_buff *skb, unsigned short ack) unsigned short type = ((ack >> 12) & 0x0003); int wakeup = 0; - switch(type) { - case 0: /* ACK - Data */ - if (dn_after(ack, scp->ackrcv_dat)) { - scp->ackrcv_dat = ack & 0x0fff; - wakeup |= dn_nsp_check_xmit_queue(sk, skb, &scp->data_xmit_queue, ack); - } - break; - case 1: /* NAK - Data */ - break; - case 2: /* ACK - OtherData */ - if (dn_after(ack, scp->ackrcv_oth)) { - scp->ackrcv_oth = ack & 0x0fff; - wakeup |= dn_nsp_check_xmit_queue(sk, skb, &scp->other_xmit_queue, ack); - } - break; - case 3: /* NAK - OtherData */ - break; + switch (type) { + case 0: /* ACK - Data */ + if (dn_after(ack, scp->ackrcv_dat)) { + scp->ackrcv_dat = ack & 0x0fff; + wakeup |= dn_nsp_check_xmit_queue(sk, skb, + &scp->data_xmit_queue, + ack); + } + break; + case 1: /* NAK - Data */ + break; + case 2: /* ACK - OtherData */ + if (dn_after(ack, scp->ackrcv_oth)) { + scp->ackrcv_oth = ack & 0x0fff; + wakeup |= dn_nsp_check_xmit_queue(sk, skb, + &scp->other_xmit_queue, + ack); + } + break; + case 3: /* NAK - OtherData */ + break; } if (wakeup && !sock_flag(sk, SOCK_DEAD)) @@ -417,19 +421,19 @@ static void dn_nsp_disc_init(struct sock *sk, struct sk_buff *skb) scp->addrrem = cb->src_port; sk->sk_state = TCP_CLOSE; - switch(scp->state) { - case DN_CI: - case DN_CD: - scp->state = DN_RJ; - sk->sk_err = ECONNREFUSED; - break; - case DN_RUN: - sk->sk_shutdown |= SHUTDOWN_MASK; - scp->state = DN_DN; - break; - case DN_DI: - scp->state = DN_DIC; - break; + switch (scp->state) { + case DN_CI: + case DN_CD: + scp->state = DN_RJ; + sk->sk_err = ECONNREFUSED; + break; + case DN_RUN: + sk->sk_shutdown |= SHUTDOWN_MASK; + scp->state = DN_DN; + break; + case DN_DI: + scp->state = DN_DIC; + break; } if (!sock_flag(sk, SOCK_DEAD)) { @@ -470,23 +474,23 @@ static void dn_nsp_disc_conf(struct sock *sk, struct sk_buff *skb) sk->sk_state = TCP_CLOSE; - switch(scp->state) { - case DN_CI: - scp->state = DN_NR; - break; - case DN_DR: - if (reason == NSP_REASON_DC) - scp->state = DN_DRC; - if (reason == NSP_REASON_NL) - scp->state = DN_CN; - break; - case DN_DI: - scp->state = DN_DIC; - break; - case DN_RUN: - sk->sk_shutdown |= SHUTDOWN_MASK; - case DN_CC: + switch (scp->state) { + case DN_CI: + scp->state = DN_NR; + break; + case DN_DR: + if (reason == NSP_REASON_DC) + scp->state = DN_DRC; + if (reason == NSP_REASON_NL) scp->state = DN_CN; + break; + case DN_DI: + scp->state = DN_DIC; + break; + case DN_RUN: + sk->sk_shutdown |= SHUTDOWN_MASK; + case DN_CC: + scp->state = DN_CN; } if (!sock_flag(sk, SOCK_DEAD)) { @@ -692,16 +696,16 @@ static int dn_nsp_no_socket(struct sk_buff *skb, unsigned short reason) goto out; if ((reason != NSP_REASON_OK) && ((cb->nsp_flags & 0x0c) == 0x08)) { - switch(cb->nsp_flags & 0x70) { - case 0x10: - case 0x60: /* (Retransmitted) Connect Init */ - dn_nsp_return_disc(skb, NSP_DISCINIT, reason); - ret = NET_RX_SUCCESS; - break; - case 0x20: /* Connect Confirm */ - dn_nsp_return_disc(skb, NSP_DISCCONF, reason); - ret = NET_RX_SUCCESS; - break; + switch (cb->nsp_flags & 0x70) { + case 0x10: + case 0x60: /* (Retransmitted) Connect Init */ + dn_nsp_return_disc(skb, NSP_DISCINIT, reason); + ret = NET_RX_SUCCESS; + break; + case 0x20: /* Connect Confirm */ + dn_nsp_return_disc(skb, NSP_DISCCONF, reason); + ret = NET_RX_SUCCESS; + break; } } @@ -733,17 +737,17 @@ static int dn_nsp_rx_packet(struct sk_buff *skb) * Filter out conninits and useless packet types */ if ((cb->nsp_flags & 0x0c) == 0x08) { - switch(cb->nsp_flags & 0x70) { - case 0x00: /* NOP */ - case 0x70: /* Reserved */ - case 0x50: /* Reserved, Phase II node init */ + switch (cb->nsp_flags & 0x70) { + case 0x00: /* NOP */ + case 0x70: /* Reserved */ + case 0x50: /* Reserved, Phase II node init */ + goto free_out; + case 0x10: + case 0x60: + if (unlikely(cb->rt_flags & DN_RT_F_RTS)) goto free_out; - case 0x10: - case 0x60: - if (unlikely(cb->rt_flags & DN_RT_F_RTS)) - goto free_out; - sk = dn_find_listener(skb, &reason); - goto got_it; + sk = dn_find_listener(skb, &reason); + goto got_it; } } @@ -836,20 +840,20 @@ int dn_nsp_backlog_rcv(struct sock *sk, struct sk_buff *skb) * Control packet. */ if ((cb->nsp_flags & 0x0c) == 0x08) { - switch(cb->nsp_flags & 0x70) { - case 0x10: - case 0x60: - dn_nsp_conn_init(sk, skb); - break; - case 0x20: - dn_nsp_conn_conf(sk, skb); - break; - case 0x30: - dn_nsp_disc_init(sk, skb); - break; - case 0x40: - dn_nsp_disc_conf(sk, skb); - break; + switch (cb->nsp_flags & 0x70) { + case 0x10: + case 0x60: + dn_nsp_conn_init(sk, skb); + break; + case 0x20: + dn_nsp_conn_conf(sk, skb); + break; + case 0x30: + dn_nsp_disc_init(sk, skb); + break; + case 0x40: + dn_nsp_disc_conf(sk, skb); + break; } } else if (cb->nsp_flags == 0x24) { @@ -890,15 +894,15 @@ int dn_nsp_backlog_rcv(struct sock *sk, struct sk_buff *skb) if (scp->state != DN_RUN) goto free_out; - switch(cb->nsp_flags) { - case 0x10: /* LS */ - dn_nsp_linkservice(sk, skb); - break; - case 0x30: /* OD */ - dn_nsp_otherdata(sk, skb); - break; - default: - dn_nsp_data(sk, skb); + switch (cb->nsp_flags) { + case 0x10: /* LS */ + dn_nsp_linkservice(sk, skb); + break; + case 0x30: /* OD */ + dn_nsp_otherdata(sk, skb); + break; + default: + dn_nsp_data(sk, skb); } } else { /* Ack, chuck it out here */ diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index 82d6250..94f4ec0 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -77,6 +77,7 @@ #include #include #include +#include #include #include #include @@ -111,11 +112,12 @@ static unsigned long dn_rt_deadline; static int dn_dst_gc(struct dst_ops *ops); static struct dst_entry *dn_dst_check(struct dst_entry *, __u32); static unsigned int dn_dst_default_advmss(const struct dst_entry *dst); -static unsigned int dn_dst_default_mtu(const struct dst_entry *dst); +static unsigned int dn_dst_mtu(const struct dst_entry *dst); static void dn_dst_destroy(struct dst_entry *); static struct dst_entry *dn_dst_negative_advice(struct dst_entry *); static void dn_dst_link_failure(struct sk_buff *); static void dn_dst_update_pmtu(struct dst_entry *dst, u32 mtu); +static struct neighbour *dn_dst_neigh_lookup(const struct dst_entry *dst, const void *daddr); static int dn_route_input(struct sk_buff *); static void dn_run_flush(unsigned long dummy); @@ -133,12 +135,13 @@ static struct dst_ops dn_dst_ops = { .gc = dn_dst_gc, .check = dn_dst_check, .default_advmss = dn_dst_default_advmss, - .default_mtu = dn_dst_default_mtu, + .mtu = dn_dst_mtu, .cow_metrics = dst_cow_metrics_generic, .destroy = dn_dst_destroy, .negative_advice = dn_dst_negative_advice, .link_failure = dn_dst_link_failure, .update_pmtu = dn_dst_update_pmtu, + .neigh_lookup = dn_dst_neigh_lookup, }; static void dn_dst_destroy(struct dst_entry *dst) @@ -497,11 +500,11 @@ static int dn_route_rx_packet(struct sk_buff *skb) } if ((skb->pkt_type == PACKET_HOST) && (cb->rt_flags & DN_RT_F_RQR)) { - switch(cb->rt_flags & DN_RT_PKT_MSK) { - case DN_RT_PKT_SHORT: - return dn_return_short(skb); - case DN_RT_PKT_LONG: - return dn_return_long(skb); + switch (cb->rt_flags & DN_RT_PKT_MSK) { + case DN_RT_PKT_SHORT: + return dn_return_short(skb); + case DN_RT_PKT_LONG: + return dn_return_long(skb); } } @@ -654,38 +657,38 @@ int dn_route_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type if (unlikely(skb_linearize(skb))) goto dump_it; - switch(flags & DN_RT_CNTL_MSK) { - case DN_RT_PKT_INIT: - dn_dev_init_pkt(skb); - break; - case DN_RT_PKT_VERI: - dn_dev_veri_pkt(skb); - break; + switch (flags & DN_RT_CNTL_MSK) { + case DN_RT_PKT_INIT: + dn_dev_init_pkt(skb); + break; + case DN_RT_PKT_VERI: + dn_dev_veri_pkt(skb); + break; } if (dn->parms.state != DN_DEV_S_RU) goto dump_it; - switch(flags & DN_RT_CNTL_MSK) { - case DN_RT_PKT_HELO: - return NF_HOOK(NFPROTO_DECNET, NF_DN_HELLO, - skb, skb->dev, NULL, - dn_route_ptp_hello); - - case DN_RT_PKT_L1RT: - case DN_RT_PKT_L2RT: - return NF_HOOK(NFPROTO_DECNET, NF_DN_ROUTE, - skb, skb->dev, NULL, - dn_route_discard); - case DN_RT_PKT_ERTH: - return NF_HOOK(NFPROTO_DECNET, NF_DN_HELLO, - skb, skb->dev, NULL, - dn_neigh_router_hello); - - case DN_RT_PKT_EEDH: - return NF_HOOK(NFPROTO_DECNET, NF_DN_HELLO, - skb, skb->dev, NULL, - dn_neigh_endnode_hello); + switch (flags & DN_RT_CNTL_MSK) { + case DN_RT_PKT_HELO: + return NF_HOOK(NFPROTO_DECNET, NF_DN_HELLO, + skb, skb->dev, NULL, + dn_route_ptp_hello); + + case DN_RT_PKT_L1RT: + case DN_RT_PKT_L2RT: + return NF_HOOK(NFPROTO_DECNET, NF_DN_ROUTE, + skb, skb->dev, NULL, + dn_route_discard); + case DN_RT_PKT_ERTH: + return NF_HOOK(NFPROTO_DECNET, NF_DN_HELLO, + skb, skb->dev, NULL, + dn_neigh_router_hello); + + case DN_RT_PKT_EEDH: + return NF_HOOK(NFPROTO_DECNET, NF_DN_HELLO, + skb, skb->dev, NULL, + dn_neigh_endnode_hello); } } else { if (dn->parms.state != DN_DEV_S_RU) @@ -693,11 +696,11 @@ int dn_route_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type skb_pull(skb, 1); /* Pull flags */ - switch(flags & DN_RT_PKT_MSK) { - case DN_RT_PKT_LONG: - return dn_route_rx_long(skb); - case DN_RT_PKT_SHORT: - return dn_route_rx_short(skb); + switch (flags & DN_RT_PKT_MSK) { + case DN_RT_PKT_LONG: + return dn_route_rx_long(skb); + case DN_RT_PKT_SHORT: + return dn_route_rx_short(skb); } } @@ -707,6 +710,14 @@ out: return NET_RX_DROP; } +static int dn_to_neigh_output(struct sk_buff *skb) +{ + struct dst_entry *dst = skb_dst(skb); + struct neighbour *n = dst_get_neighbour(dst); + + return n->output(n, skb); +} + static int dn_output(struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); @@ -735,7 +746,7 @@ static int dn_output(struct sk_buff *skb) cb->hops = 0; return NF_HOOK(NFPROTO_DECNET, NF_DN_LOCAL_OUT, skb, NULL, dev, - neigh->output); + dn_to_neigh_output); error: if (net_ratelimit()) @@ -752,7 +763,6 @@ static int dn_forward(struct sk_buff *skb) struct dst_entry *dst = skb_dst(skb); struct dn_dev *dn_db = rcu_dereference(dst->dev->dn_ptr); struct dn_route *rt; - struct neighbour *neigh = dst_get_neighbour(dst); int header_len; #ifdef CONFIG_NETFILTER struct net_device *dev = skb->dev; @@ -785,7 +795,7 @@ static int dn_forward(struct sk_buff *skb) cb->rt_flags |= DN_RT_F_IE; return NF_HOOK(NFPROTO_DECNET, NF_DN_FORWARD, skb, dev, skb->dev, - neigh->output); + dn_to_neigh_output); drop: kfree_skb(skb); @@ -815,9 +825,16 @@ static unsigned int dn_dst_default_advmss(const struct dst_entry *dst) return dn_mss_from_pmtu(dst->dev, dst_mtu(dst)); } -static unsigned int dn_dst_default_mtu(const struct dst_entry *dst) +static unsigned int dn_dst_mtu(const struct dst_entry *dst) +{ + unsigned int mtu = dst_metric_raw(dst, RTAX_MTU); + + return mtu ? : dst->dev->mtu; +} + +static struct neighbour *dn_dst_neigh_lookup(const struct dst_entry *dst, const void *daddr) { - return dst->dev->mtu; + return __neigh_lookup_errno(&dn_neigh_table, daddr, dst->dev); } static int dn_rt_set_next_hop(struct dn_route *rt, struct dn_fib_res *res) @@ -1421,20 +1438,20 @@ make_route: dst_set_neighbour(&rt->dst, neigh); rt->dst.lastuse = jiffies; rt->dst.output = dn_rt_bug; - switch(res.type) { - case RTN_UNICAST: - rt->dst.input = dn_forward; - break; - case RTN_LOCAL: - rt->dst.output = dn_output; - rt->dst.input = dn_nsp_rx; - rt->dst.dev = in_dev; - flags |= RTCF_LOCAL; - break; - default: - case RTN_UNREACHABLE: - case RTN_BLACKHOLE: - rt->dst.input = dst_discard; + switch (res.type) { + case RTN_UNICAST: + rt->dst.input = dn_forward; + break; + case RTN_LOCAL: + rt->dst.output = dn_output; + rt->dst.input = dn_nsp_rx; + rt->dst.dev = in_dev; + flags |= RTCF_LOCAL; + break; + default: + case RTN_UNREACHABLE: + case RTN_BLACKHOLE: + rt->dst.input = dst_discard; } rt->rt_flags = flags; diff --git a/net/decnet/dn_rules.c b/net/decnet/dn_rules.c index f0efb0c..f65c9dd 100644 --- a/net/decnet/dn_rules.c +++ b/net/decnet/dn_rules.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include diff --git a/net/decnet/dn_table.c b/net/decnet/dn_table.c index bd0a52d..a9a62f2 100644 --- a/net/decnet/dn_table.c +++ b/net/decnet/dn_table.c @@ -25,7 +25,7 @@ #include #include #include -#include +#include #include #include /* RTF_xxx */ #include @@ -147,17 +147,18 @@ static void dn_rehash_zone(struct dn_zone *dz) old_divisor = dz->dz_divisor; - switch(old_divisor) { - case 16: - new_divisor = 256; - new_hashmask = 0xFF; - break; - default: - printk(KERN_DEBUG "DECnet: dn_rehash_zone: BUG! %d\n", old_divisor); - case 256: - new_divisor = 1024; - new_hashmask = 0x3FF; - break; + switch (old_divisor) { + case 16: + new_divisor = 256; + new_hashmask = 0xFF; + break; + default: + printk(KERN_DEBUG "DECnet: dn_rehash_zone: BUG! %d\n", + old_divisor); + case 256: + new_divisor = 1024; + new_hashmask = 0x3FF; + break; } ht = kcalloc(new_divisor, sizeof(struct dn_fib_node*), GFP_KERNEL); diff --git a/net/decnet/dn_timer.c b/net/decnet/dn_timer.c index 0982571..d9c150c 100644 --- a/net/decnet/dn_timer.c +++ b/net/decnet/dn_timer.c @@ -22,7 +22,7 @@ #include #include #include -#include +#include #include #include @@ -36,16 +36,13 @@ static void dn_slow_timer(unsigned long arg); void dn_start_slow_timer(struct sock *sk) { - sk->sk_timer.expires = jiffies + SLOW_INTERVAL; - sk->sk_timer.function = dn_slow_timer; - sk->sk_timer.data = (unsigned long)sk; - - add_timer(&sk->sk_timer); + setup_timer(&sk->sk_timer, dn_slow_timer, (unsigned long)sk); + sk_reset_timer(sk, &sk->sk_timer, jiffies + SLOW_INTERVAL); } void dn_stop_slow_timer(struct sock *sk) { - del_timer(&sk->sk_timer); + sk_stop_timer(sk, &sk->sk_timer); } static void dn_slow_timer(unsigned long arg) @@ -53,12 +50,10 @@ static void dn_slow_timer(unsigned long arg) struct sock *sk = (struct sock *)arg; struct dn_scp *scp = DN_SK(sk); - sock_hold(sk); bh_lock_sock(sk); if (sock_owned_by_user(sk)) { - sk->sk_timer.expires = jiffies + HZ / 10; - add_timer(&sk->sk_timer); + sk_reset_timer(sk, &sk->sk_timer, jiffies + HZ / 10); goto out; } @@ -100,9 +95,7 @@ static void dn_slow_timer(unsigned long arg) scp->keepalive_fxn(sk); } - sk->sk_timer.expires = jiffies + SLOW_INTERVAL; - - add_timer(&sk->sk_timer); + sk_reset_timer(sk, &sk->sk_timer, jiffies + SLOW_INTERVAL); out: bh_unlock_sock(sk); sock_put(sk); diff --git a/net/decnet/netfilter/dn_rtmsg.c b/net/decnet/netfilter/dn_rtmsg.c index 64a7f39..69975e0 100644 --- a/net/decnet/netfilter/dn_rtmsg.c +++ b/net/decnet/netfilter/dn_rtmsg.c @@ -69,15 +69,15 @@ static void dnrmg_send_peer(struct sk_buff *skb) int group = 0; unsigned char flags = *skb->data; - switch(flags & DN_RT_CNTL_MSK) { - case DN_RT_PKT_L1RT: - group = DNRNG_NLGRP_L1; - break; - case DN_RT_PKT_L2RT: - group = DNRNG_NLGRP_L2; - break; - default: - return; + switch (flags & DN_RT_CNTL_MSK) { + case DN_RT_PKT_L1RT: + group = DNRNG_NLGRP_L1; + break; + case DN_RT_PKT_L2RT: + group = DNRNG_NLGRP_L2; + break; + default: + return; } skb2 = dnrmg_build_message(skb, &status); diff --git a/net/decnet/sysctl_net_decnet.c b/net/decnet/sysctl_net_decnet.c index d1cc2fd..d50a13c 100644 --- a/net/decnet/sysctl_net_decnet.c +++ b/net/decnet/sysctl_net_decnet.c @@ -69,14 +69,15 @@ static struct ctl_table_header *dn_skeleton_table_header = NULL; static void strip_it(char *str) { for(;;) { - switch(*str) { - case ' ': - case '\n': - case '\r': - case ':': - *str = 0; - case 0: - return; + switch (*str) { + case ' ': + case '\n': + case '\r': + case ':': + *str = 0; + /* Fallthrough */ + case 0: + return; } str++; } diff --git a/net/dns_resolver/dns_query.c b/net/dns_resolver/dns_query.c index c32be29..2022b46 100644 --- a/net/dns_resolver/dns_query.c +++ b/net/dns_resolver/dns_query.c @@ -150,7 +150,9 @@ int dns_query(const char *type, const char *name, size_t namelen, if (!*_result) goto put; - memcpy(*_result, upayload->data, len + 1); + memcpy(*_result, upayload->data, len); + (*_result)[len] = '\0'; + if (_expiry) *_expiry = rkey->expiry; diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index 3fb14b7..0dc1589 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include "dsa_priv.h" diff --git a/net/dsa/mv88e6131.c b/net/dsa/mv88e6131.c index 45f7411..9bd1061 100644 --- a/net/dsa/mv88e6131.c +++ b/net/dsa/mv88e6131.c @@ -118,10 +118,14 @@ static int mv88e6131_setup_global(struct dsa_switch *ds) REG_WRITE(REG_GLOBAL, 0x1a, (dsa_upstream_port(ds) * 0x1100) | 0x00f0); /* - * Disable cascade port functionality, and set the switch's + * Disable cascade port functionality unless this device + * is used in a cascade configuration, and set the switch's * DSA device number. */ - REG_WRITE(REG_GLOBAL, 0x1c, 0xe000 | (ds->index & 0x1f)); + if (ds->dst->pd->nr_chips > 1) + REG_WRITE(REG_GLOBAL, 0x1c, 0xf000 | (ds->index & 0x1f)); + else + REG_WRITE(REG_GLOBAL, 0x1c, 0xe000 | (ds->index & 0x1f)); /* * Send all frames with destination addresses matching diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 0a47b6c..56cf9b8 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -301,7 +301,6 @@ static const struct net_device_ops dsa_netdev_ops = { .ndo_start_xmit = dsa_xmit, .ndo_change_rx_flags = dsa_slave_change_rx_flags, .ndo_set_rx_mode = dsa_slave_set_rx_mode, - .ndo_set_multicast_list = dsa_slave_set_rx_mode, .ndo_set_mac_address = dsa_slave_set_mac_address, .ndo_do_ioctl = dsa_slave_ioctl, }; @@ -314,7 +313,6 @@ static const struct net_device_ops edsa_netdev_ops = { .ndo_start_xmit = edsa_xmit, .ndo_change_rx_flags = dsa_slave_change_rx_flags, .ndo_set_rx_mode = dsa_slave_set_rx_mode, - .ndo_set_multicast_list = dsa_slave_set_rx_mode, .ndo_set_mac_address = dsa_slave_set_mac_address, .ndo_do_ioctl = dsa_slave_ioctl, }; @@ -327,7 +325,6 @@ static const struct net_device_ops trailer_netdev_ops = { .ndo_start_xmit = trailer_xmit, .ndo_change_rx_flags = dsa_slave_change_rx_flags, .ndo_set_rx_mode = dsa_slave_set_rx_mode, - .ndo_set_multicast_list = dsa_slave_set_rx_mode, .ndo_set_mac_address = dsa_slave_set_mac_address, .ndo_do_ioctl = dsa_slave_ioctl, }; diff --git a/net/econet/af_econet.c b/net/econet/af_econet.c index a1d9f37..1c1f26c 100644 --- a/net/econet/af_econet.c +++ b/net/econet/af_econet.c @@ -9,6 +9,8 @@ * */ +#define pr_fmt(fmt) fmt + #include #include @@ -44,7 +46,7 @@ #include #include -#include +#include #include static const struct proto_ops econet_ops; @@ -63,9 +65,7 @@ static DEFINE_SPINLOCK(aun_queue_lock); static struct socket *udpsock; #define AUN_PORT 0x8000 - -struct aunhdr -{ +struct aunhdr { unsigned char code; /* AUN magic protocol byte */ unsigned char port; unsigned char cb; @@ -82,8 +82,7 @@ static struct timer_list ab_cleanup_timer; #endif /* CONFIG_ECONET_AUNUDP */ /* Per-packet information */ -struct ec_cb -{ +struct ec_cb { struct sockaddr_ec sec; unsigned long cookie; /* Supplied by user. */ #ifdef CONFIG_ECONET_AUNUDP @@ -137,7 +136,7 @@ static int econet_recvmsg(struct kiocb *iocb, struct socket *sock, * but then it will block. */ - skb=skb_recv_datagram(sk,flags,flags&MSG_DONTWAIT,&err); + skb = skb_recv_datagram(sk, flags, flags & MSG_DONTWAIT, &err); /* * An error occurred so return it. Because skb_recv_datagram() @@ -145,7 +144,7 @@ static int econet_recvmsg(struct kiocb *iocb, struct socket *sock, * retries. */ - if(skb==NULL) + if (skb == NULL) goto out; /* @@ -154,10 +153,9 @@ static int econet_recvmsg(struct kiocb *iocb, struct socket *sock, */ copied = skb->len; - if (copied > len) - { - copied=len; - msg->msg_flags|=MSG_TRUNC; + if (copied > len) { + copied = len; + msg->msg_flags |= MSG_TRUNC; } /* We can't use skb_copy_datagram here */ @@ -186,7 +184,8 @@ out: * Bind an Econet socket. */ -static int econet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) +static int econet_bind(struct socket *sock, struct sockaddr *uaddr, + int addr_len) { struct sockaddr_ec *sec = (struct sockaddr_ec *)uaddr; struct sock *sk; @@ -226,9 +225,8 @@ static void tx_result(struct sock *sk, unsigned long cookie, int result) struct ec_cb *eb; struct sockaddr_ec *sec; - if (skb == NULL) - { - printk(KERN_DEBUG "ec: memory squeeze, transmit result dropped.\n"); + if (skb == NULL) { + pr_debug("econet: memory squeeze, transmit result dropped\n"); return; } @@ -265,7 +263,7 @@ static void ec_tx_done(struct sk_buff *skb, int result) static int econet_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, size_t len) { - struct sockaddr_ec *saddr=(struct sockaddr_ec *)msg->msg_name; + struct sockaddr_ec *saddr = (struct sockaddr_ec *)msg->msg_name; struct net_device *dev; struct ec_addr addr; int err; @@ -298,14 +296,14 @@ static int econet_sendmsg(struct kiocb *iocb, struct socket *sock, mutex_lock(&econet_mutex); - if (saddr == NULL || msg->msg_namelen < sizeof(struct sockaddr_ec)) { - mutex_unlock(&econet_mutex); - return -EINVAL; - } - addr.station = saddr->addr.station; - addr.net = saddr->addr.net; - port = saddr->port; - cb = saddr->cb; + if (saddr == NULL || msg->msg_namelen < sizeof(struct sockaddr_ec)) { + mutex_unlock(&econet_mutex); + return -EINVAL; + } + addr.station = saddr->addr.station; + addr.net = saddr->addr.net; + port = saddr->port; + cb = saddr->cb; /* Look for a device with the right network number. */ dev = net2dev_map[addr.net]; @@ -333,9 +331,9 @@ static int econet_sendmsg(struct kiocb *iocb, struct socket *sock, dev_hold(dev); - skb = sock_alloc_send_skb(sk, len+LL_ALLOCATED_SPACE(dev), + skb = sock_alloc_send_skb(sk, len + LL_ALLOCATED_SPACE(dev), msg->msg_flags & MSG_DONTWAIT, &err); - if (skb==NULL) + if (skb == NULL) goto out_unlock; skb_reserve(skb, LL_RESERVED_SPACE(dev)); @@ -355,7 +353,7 @@ static int econet_sendmsg(struct kiocb *iocb, struct socket *sock, struct ec_framehdr *fh; /* Poke in our control byte and port number. Hack, hack. */ - fh = (struct ec_framehdr *)(skb->data); + fh = (struct ec_framehdr *)skb->data; fh->cb = cb; fh->port = port; if (sock->type != SOCK_DGRAM) { @@ -365,7 +363,7 @@ static int econet_sendmsg(struct kiocb *iocb, struct socket *sock, } /* Copy the data. Returns -EFAULT on error */ - err = memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len); + err = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len); skb->protocol = proto; skb->dev = dev; skb->priority = sk->sk_priority; @@ -385,9 +383,9 @@ static int econet_sendmsg(struct kiocb *iocb, struct socket *sock, mutex_unlock(&econet_mutex); return len; - out_free: +out_free: kfree_skb(skb); - out_unlock: +out_unlock: if (dev) dev_put(dev); #else @@ -458,15 +456,14 @@ static int econet_sendmsg(struct kiocb *iocb, struct socket *sock, goto error_free_buf; /* Get a skbuff (no data, just holds our cb information) */ - if ((skb = sock_alloc_send_skb(sk, 0, - msg->msg_flags & MSG_DONTWAIT, - &err)) == NULL) + skb = sock_alloc_send_skb(sk, 0, msg->msg_flags & MSG_DONTWAIT, &err); + if (skb == NULL) goto error_free_buf; eb = (struct ec_cb *)&skb->cb; eb->cookie = saddr->cookie; - eb->timeout = (5*HZ); + eb->timeout = 5 * HZ; eb->start = jiffies; ah.handle = aun_seq; eb->seq = (aun_seq++); @@ -480,9 +477,10 @@ static int econet_sendmsg(struct kiocb *iocb, struct socket *sock, udpmsg.msg_iovlen = 2; udpmsg.msg_control = NULL; udpmsg.msg_controllen = 0; - udpmsg.msg_flags=0; + udpmsg.msg_flags = 0; - oldfs = get_fs(); set_fs(KERNEL_DS); /* More privs :-) */ + oldfs = get_fs(); + set_fs(KERNEL_DS); /* More privs :-) */ err = sock_sendmsg(udpsock, &udpmsg, size); set_fs(oldfs); @@ -530,7 +528,7 @@ static int econet_getname(struct socket *sock, struct sockaddr *uaddr, static void econet_destroy_timer(unsigned long data) { - struct sock *sk=(struct sock *)data; + struct sock *sk = (struct sock *)data; if (!sk_has_allocations(sk)) { sk_free(sk); @@ -539,7 +537,7 @@ static void econet_destroy_timer(unsigned long data) sk->sk_timer.expires = jiffies + 10 * HZ; add_timer(&sk->sk_timer); - printk(KERN_DEBUG "econet socket destroy delayed\n"); + pr_debug("econet: socket destroy delayed\n"); } /* @@ -651,7 +649,8 @@ static int ec_dev_ioctl(struct socket *sock, unsigned int cmd, void __user *arg) if (copy_from_user(&ifr, arg, sizeof(struct ifreq))) return -EFAULT; - if ((dev = dev_get_by_name(&init_net, ifr.ifr_name)) == NULL) + dev = dev_get_by_name(&init_net, ifr.ifr_name); + if (dev == NULL) return -ENODEV; sec = (struct sockaddr_ec *)&ifr.ifr_addr; @@ -715,28 +714,26 @@ static int ec_dev_ioctl(struct socket *sock, unsigned int cmd, void __user *arg) * Handle generic ioctls */ -static int econet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) +static int econet_ioctl(struct socket *sock, unsigned int cmd, + unsigned long arg) { struct sock *sk = sock->sk; void __user *argp = (void __user *)arg; - switch(cmd) { - case SIOCGSTAMP: - return sock_get_timestamp(sk, argp); + switch (cmd) { + case SIOCGSTAMP: + return sock_get_timestamp(sk, argp); - case SIOCGSTAMPNS: - return sock_get_timestampns(sk, argp); + case SIOCGSTAMPNS: + return sock_get_timestampns(sk, argp); - case SIOCSIFADDR: - case SIOCGIFADDR: - return ec_dev_ioctl(sock, cmd, argp); - break; + case SIOCSIFADDR: + case SIOCGIFADDR: + return ec_dev_ioctl(sock, cmd, argp); - default: - return -ENOIOCTLCMD; } - /*NOTREACHED*/ - return 0; + + return -ENOIOCTLCMD; } static const struct net_proto_family econet_family_ops = { @@ -836,7 +833,7 @@ static void aun_send_response(__u32 addr, unsigned long seq, int code, int cb) udpmsg.msg_namelen = sizeof(sin); udpmsg.msg_control = NULL; udpmsg.msg_controllen = 0; - udpmsg.msg_flags=0; + udpmsg.msg_flags = 0; kernel_sendmsg(udpsock, &udpmsg, &iov, 1, sizeof(ah)); } @@ -859,26 +856,25 @@ static void aun_incoming(struct sk_buff *skb, struct aunhdr *ah, size_t len) if (dst) edev = dst->dev->ec_ptr; - if (! edev) + if (!edev) goto bad; - if ((sk = ec_listening_socket(ah->port, stn, edev->net)) == NULL) + sk = ec_listening_socket(ah->port, stn, edev->net); + if (sk == NULL) goto bad; /* Nobody wants it */ newskb = alloc_skb((len - sizeof(struct aunhdr) + 15) & ~15, GFP_ATOMIC); - if (newskb == NULL) - { - printk(KERN_DEBUG "AUN: memory squeeze, dropping packet.\n"); + if (newskb == NULL) { + pr_debug("AUN: memory squeeze, dropping packet\n"); /* Send nack and hope sender tries again */ goto bad; } - memcpy(skb_put(newskb, len - sizeof(struct aunhdr)), (void *)(ah+1), + memcpy(skb_put(newskb, len - sizeof(struct aunhdr)), (void *)(ah + 1), len - sizeof(struct aunhdr)); - if (ec_queue_packet(sk, newskb, stn, edev->net, ah->cb, ah->port)) - { + if (ec_queue_packet(sk, newskb, stn, edev->net, ah->cb, ah->port)) { /* Socket is bankrupt. */ kfree_skb(newskb); goto bad; @@ -914,7 +910,7 @@ static void aun_tx_ack(unsigned long seq, int result) goto foundit; } spin_unlock_irqrestore(&aun_queue_lock, flags); - printk(KERN_DEBUG "AUN: unknown sequence %ld\n", seq); + pr_debug("AUN: unknown sequence %ld\n", seq); return; foundit: @@ -939,18 +935,17 @@ static void aun_data_available(struct sock *sk, int slen) while ((skb = skb_recv_datagram(sk, 0, 1, &err)) == NULL) { if (err == -EAGAIN) { - printk(KERN_ERR "AUN: no data available?!"); + pr_err("AUN: no data available?!\n"); return; } - printk(KERN_DEBUG "AUN: recvfrom() error %d\n", -err); + pr_debug("AUN: recvfrom() error %d\n", -err); } data = skb_transport_header(skb) + sizeof(struct udphdr); ah = (struct aunhdr *)data; len = skb->len - sizeof(struct udphdr); - switch (ah->code) - { + switch (ah->code) { case 2: aun_incoming(skb, ah, len); break; @@ -961,7 +956,7 @@ static void aun_data_available(struct sock *sk, int slen) aun_tx_ack(ah->handle, ECTYPE_TRANSMIT_NOT_LISTENING); break; default: - printk(KERN_DEBUG "unknown AUN packet (type %d)\n", data[0]); + pr_debug("AUN: unknown packet type: %d\n", data[0]); } skb_free_datagram(sk, skb); @@ -991,7 +986,7 @@ static void ab_cleanup(unsigned long h) } spin_unlock_irqrestore(&aun_queue_lock, flags); - mod_timer(&ab_cleanup_timer, jiffies + (HZ*2)); + mod_timer(&ab_cleanup_timer, jiffies + (HZ * 2)); } static int __init aun_udp_initialise(void) @@ -1001,7 +996,7 @@ static int __init aun_udp_initialise(void) skb_queue_head_init(&aun_queue); setup_timer(&ab_cleanup_timer, ab_cleanup, 0); - ab_cleanup_timer.expires = jiffies + (HZ*2); + ab_cleanup_timer.expires = jiffies + (HZ * 2); add_timer(&ab_cleanup_timer); memset(&sin, 0, sizeof(sin)); @@ -1009,9 +1004,9 @@ static int __init aun_udp_initialise(void) /* We can count ourselves lucky Acorn machines are too dim to speak IPv6. :-) */ - if ((error = sock_create_kern(PF_INET, SOCK_DGRAM, 0, &udpsock)) < 0) - { - printk("AUN: socket error %d\n", -error); + error = sock_create_kern(PF_INET, SOCK_DGRAM, 0, &udpsock); + if (error < 0) { + pr_err("AUN: socket error %d\n", -error); return error; } @@ -1020,10 +1015,9 @@ static int __init aun_udp_initialise(void) from interrupts */ error = udpsock->ops->bind(udpsock, (struct sockaddr *)&sin, - sizeof(sin)); - if (error < 0) - { - printk("AUN: bind error %d\n", -error); + sizeof(sin)); + if (error < 0) { + pr_err("AUN: bind error %d\n", -error); goto release; } @@ -1044,7 +1038,8 @@ release: * Receive an Econet frame from a device. */ -static int econet_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) +static int econet_rcv(struct sk_buff *skb, struct net_device *dev, + struct packet_type *pt, struct net_device *orig_dev) { struct ec_framehdr *hdr; struct sock *sk = NULL; @@ -1059,13 +1054,14 @@ static int econet_rcv(struct sk_buff *skb, struct net_device *dev, struct packet if (!edev) goto drop; - if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL) + skb = skb_share_check(skb, GFP_ATOMIC); + if (skb == NULL) return NET_RX_DROP; if (!pskb_may_pull(skb, sizeof(struct ec_framehdr))) goto drop; - hdr = (struct ec_framehdr *) skb->data; + hdr = (struct ec_framehdr *)skb->data; /* First check for encapsulated IP */ if (hdr->port == EC_PORT_IP) { @@ -1093,8 +1089,8 @@ drop: } static struct packet_type econet_packet_type __read_mostly = { - .type = cpu_to_be16(ETH_P_ECONET), - .func = econet_rcv, + .type = cpu_to_be16(ETH_P_ECONET), + .func = econet_rcv, }; static void econet_hw_initialise(void) @@ -1104,9 +1100,10 @@ static void econet_hw_initialise(void) #endif -static int econet_notifier(struct notifier_block *this, unsigned long msg, void *data) +static int econet_notifier(struct notifier_block *this, unsigned long msg, + void *data) { - struct net_device *dev = (struct net_device *)data; + struct net_device *dev = data; struct ec_device *edev; if (!net_eq(dev_net(dev), &init_net)) @@ -1116,8 +1113,7 @@ static int econet_notifier(struct notifier_block *this, unsigned long msg, void case NETDEV_UNREGISTER: /* A device has gone down - kill any data we hold for it. */ edev = dev->ec_ptr; - if (edev) - { + if (edev) { if (net2dev_map[0] == dev) net2dev_map[0] = NULL; net2dev_map[edev->net] = NULL; @@ -1131,7 +1127,7 @@ static int econet_notifier(struct notifier_block *this, unsigned long msg, void } static struct notifier_block econet_netdev_notifier = { - .notifier_call =econet_notifier, + .notifier_call = econet_notifier, }; static void __exit econet_proto_exit(void) diff --git a/net/ieee802154/Kconfig b/net/ieee802154/Kconfig index 1c1de97..7dee650 100644 --- a/net/ieee802154/Kconfig +++ b/net/ieee802154/Kconfig @@ -10,3 +10,9 @@ config IEEE802154 Say Y here to compile LR-WPAN support into the kernel or say M to compile it as modules. + +config IEEE802154_6LOWPAN + tristate "6lowpan support over IEEE 802.15.4" + depends on IEEE802154 && IPV6 + ---help--- + IPv6 compression over IEEE 802.15.4. diff --git a/net/ieee802154/Makefile b/net/ieee802154/Makefile index 5761185..d7716d6 100644 --- a/net/ieee802154/Makefile +++ b/net/ieee802154/Makefile @@ -1,3 +1,5 @@ -obj-$(CONFIG_IEEE802154) += ieee802154.o af_802154.o -ieee802154-y := netlink.o nl-mac.o nl-phy.o nl_policy.o wpan-class.o -af_802154-y := af_ieee802154.o raw.o dgram.o +obj-$(CONFIG_IEEE802154) += ieee802154.o af_802154.o +obj-$(CONFIG_IEEE802154_6LOWPAN) += 6lowpan.o + +ieee802154-y := netlink.o nl-mac.o nl-phy.o nl_policy.o wpan-class.o +af_802154-y := af_ieee802154.o raw.o dgram.o diff --git a/net/ieee802154/af_ieee802154.c b/net/ieee802154/af_ieee802154.c index 6df6ecf..40e606f 100644 --- a/net/ieee802154/af_ieee802154.c +++ b/net/ieee802154/af_ieee802154.c @@ -302,7 +302,7 @@ static int ieee802154_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) { if (!netif_running(dev)) - return -ENODEV; + goto drop; pr_debug("got frame, type %d, dev %p\n", dev->type, dev); #ifdef DEBUG print_hex_dump_bytes("ieee802154_rcv ", DUMP_PREFIX_NONE, skb->data, skb->len); diff --git a/net/ieee802154/dgram.c b/net/ieee802154/dgram.c index 1a3334c..faecf64 100644 --- a/net/ieee802154/dgram.c +++ b/net/ieee802154/dgram.c @@ -1,5 +1,5 @@ /* - * ZigBee socket interface + * IEEE 802.15.4 dgram socket interface * * Copyright 2007, 2008 Siemens AG * diff --git a/net/ieee802154/nl-mac.c b/net/ieee802154/nl-mac.c index 71ee110..adaf462 100644 --- a/net/ieee802154/nl-mac.c +++ b/net/ieee802154/nl-mac.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include diff --git a/net/ieee802154/nl-phy.c b/net/ieee802154/nl-phy.c index 02548b2..c64a38d 100644 --- a/net/ieee802154/nl-phy.c +++ b/net/ieee802154/nl-phy.c @@ -24,6 +24,7 @@ #include #include +#include #include #include #include @@ -213,12 +214,37 @@ static int ieee802154_add_iface(struct sk_buff *skb, goto nla_put_failure; } + if (info->attrs[IEEE802154_ATTR_HW_ADDR] && + nla_len(info->attrs[IEEE802154_ATTR_HW_ADDR]) != + IEEE802154_ADDR_LEN) { + rc = -EINVAL; + goto nla_put_failure; + } + dev = phy->add_iface(phy, devname); if (IS_ERR(dev)) { rc = PTR_ERR(dev); goto nla_put_failure; } + if (info->attrs[IEEE802154_ATTR_HW_ADDR]) { + struct sockaddr addr; + + addr.sa_family = ARPHRD_IEEE802154; + nla_memcpy(&addr.sa_data, info->attrs[IEEE802154_ATTR_HW_ADDR], + IEEE802154_ADDR_LEN); + + /* + * strangely enough, some callbacks (inetdev_event) from + * dev_set_mac_address require RTNL_LOCK + */ + rtnl_lock(); + rc = dev_set_mac_address(dev, &addr); + rtnl_unlock(); + if (rc) + goto dev_unregister; + } + NLA_PUT_STRING(msg, IEEE802154_ATTR_PHY_NAME, wpan_phy_name(phy)); NLA_PUT_STRING(msg, IEEE802154_ATTR_DEV_NAME, dev->name); @@ -228,6 +254,11 @@ static int ieee802154_add_iface(struct sk_buff *skb, return ieee802154_nl_reply(msg, info); +dev_unregister: + rtnl_lock(); /* del_iface must be called with RTNL lock */ + phy->del_iface(phy, dev); + dev_put(dev); + rtnl_unlock(); nla_put_failure: nlmsg_free(msg); out_dev: diff --git a/net/mac80211/Kconfig b/net/mac80211/Kconfig index f5fdfcb..7d3b438 100644 --- a/net/mac80211/Kconfig +++ b/net/mac80211/Kconfig @@ -199,6 +199,19 @@ config MAC80211_VERBOSE_MPL_DEBUG Do not select this option. +config MAC80211_VERBOSE_MPATH_DEBUG + bool "Verbose mesh path debugging" + depends on MAC80211_DEBUG_MENU + depends on MAC80211_MESH + ---help--- + Selecting this option causes mac80211 to print out very + verbose mesh path selection debugging messages (when mac80211 + is taking part in a mesh network). + It should not be selected on production systems as those + messages are remotely triggerable. + + Do not select this option. + config MAC80211_VERBOSE_MHWMP_DEBUG bool "Verbose mesh HWMP routing debugging" depends on MAC80211_DEBUG_MENU @@ -212,6 +225,18 @@ config MAC80211_VERBOSE_MHWMP_DEBUG Do not select this option. +config MAC80211_VERBOSE_TDLS_DEBUG + bool "Verbose TDLS debugging" + depends on MAC80211_DEBUG_MENU + ---help--- + Selecting this option causes mac80211 to print out very + verbose TDLS selection debugging messages (when mac80211 + is a TDLS STA). + It should not be selected on production systems as those + messages are remotely triggerable. + + Do not select this option. + config MAC80211_DEBUG_COUNTERS bool "Extra statistics for TX/RX debugging" depends on MAC80211_DEBUG_MENU diff --git a/net/mac80211/aes_ccm.c b/net/mac80211/aes_ccm.c index b9b595c..0785e95 100644 --- a/net/mac80211/aes_ccm.c +++ b/net/mac80211/aes_ccm.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include "key.h" @@ -21,21 +22,21 @@ static void aes_ccm_prepare(struct crypto_cipher *tfm, u8 *scratch, u8 *a) int i; u8 *b_0, *aad, *b, *s_0; - b_0 = scratch + 3 * AES_BLOCK_LEN; - aad = scratch + 4 * AES_BLOCK_LEN; + b_0 = scratch + 3 * AES_BLOCK_SIZE; + aad = scratch + 4 * AES_BLOCK_SIZE; b = scratch; - s_0 = scratch + AES_BLOCK_LEN; + s_0 = scratch + AES_BLOCK_SIZE; crypto_cipher_encrypt_one(tfm, b, b_0); /* Extra Authenticate-only data (always two AES blocks) */ - for (i = 0; i < AES_BLOCK_LEN; i++) + for (i = 0; i < AES_BLOCK_SIZE; i++) aad[i] ^= b[i]; crypto_cipher_encrypt_one(tfm, b, aad); - aad += AES_BLOCK_LEN; + aad += AES_BLOCK_SIZE; - for (i = 0; i < AES_BLOCK_LEN; i++) + for (i = 0; i < AES_BLOCK_SIZE; i++) aad[i] ^= b[i]; crypto_cipher_encrypt_one(tfm, a, aad); @@ -57,12 +58,12 @@ void ieee80211_aes_ccm_encrypt(struct crypto_cipher *tfm, u8 *scratch, u8 *pos, *cpos, *b, *s_0, *e, *b_0; b = scratch; - s_0 = scratch + AES_BLOCK_LEN; - e = scratch + 2 * AES_BLOCK_LEN; - b_0 = scratch + 3 * AES_BLOCK_LEN; + s_0 = scratch + AES_BLOCK_SIZE; + e = scratch + 2 * AES_BLOCK_SIZE; + b_0 = scratch + 3 * AES_BLOCK_SIZE; - num_blocks = DIV_ROUND_UP(data_len, AES_BLOCK_LEN); - last_len = data_len % AES_BLOCK_LEN; + num_blocks = DIV_ROUND_UP(data_len, AES_BLOCK_SIZE); + last_len = data_len % AES_BLOCK_SIZE; aes_ccm_prepare(tfm, scratch, b); /* Process payload blocks */ @@ -70,7 +71,7 @@ void ieee80211_aes_ccm_encrypt(struct crypto_cipher *tfm, u8 *scratch, cpos = cdata; for (j = 1; j <= num_blocks; j++) { int blen = (j == num_blocks && last_len) ? - last_len : AES_BLOCK_LEN; + last_len : AES_BLOCK_SIZE; /* Authentication followed by encryption */ for (i = 0; i < blen; i++) @@ -96,12 +97,12 @@ int ieee80211_aes_ccm_decrypt(struct crypto_cipher *tfm, u8 *scratch, u8 *pos, *cpos, *b, *s_0, *a, *b_0; b = scratch; - s_0 = scratch + AES_BLOCK_LEN; - a = scratch + 2 * AES_BLOCK_LEN; - b_0 = scratch + 3 * AES_BLOCK_LEN; + s_0 = scratch + AES_BLOCK_SIZE; + a = scratch + 2 * AES_BLOCK_SIZE; + b_0 = scratch + 3 * AES_BLOCK_SIZE; - num_blocks = DIV_ROUND_UP(data_len, AES_BLOCK_LEN); - last_len = data_len % AES_BLOCK_LEN; + num_blocks = DIV_ROUND_UP(data_len, AES_BLOCK_SIZE); + last_len = data_len % AES_BLOCK_SIZE; aes_ccm_prepare(tfm, scratch, a); /* Process payload blocks */ @@ -109,7 +110,7 @@ int ieee80211_aes_ccm_decrypt(struct crypto_cipher *tfm, u8 *scratch, pos = data; for (j = 1; j <= num_blocks; j++) { int blen = (j == num_blocks && last_len) ? - last_len : AES_BLOCK_LEN; + last_len : AES_BLOCK_SIZE; /* Decryption followed by authentication */ b_0[14] = (j >> 8) & 0xff; diff --git a/net/mac80211/aes_ccm.h b/net/mac80211/aes_ccm.h index 6e7820e..5b7d744 100644 --- a/net/mac80211/aes_ccm.h +++ b/net/mac80211/aes_ccm.h @@ -12,8 +12,6 @@ #include -#define AES_BLOCK_LEN 16 - struct crypto_cipher *ieee80211_aes_key_setup_encrypt(const u8 key[]); void ieee80211_aes_ccm_encrypt(struct crypto_cipher *tfm, u8 *scratch, u8 *data, size_t data_len, diff --git a/net/mac80211/aes_cmac.c b/net/mac80211/aes_cmac.c index d502b26..8dfd70d 100644 --- a/net/mac80211/aes_cmac.c +++ b/net/mac80211/aes_cmac.c @@ -11,12 +11,12 @@ #include #include #include +#include #include #include "key.h" #include "aes_cmac.h" -#define AES_BLOCK_SIZE 16 #define AES_CMAC_KEY_LEN 16 #define CMAC_TLEN 8 /* CMAC TLen = 64 bits (8 octets) */ #define AAD_LEN 20 @@ -35,10 +35,10 @@ static void gf_mulx(u8 *pad) } -static void aes_128_cmac_vector(struct crypto_cipher *tfm, u8 *scratch, - size_t num_elem, +static void aes_128_cmac_vector(struct crypto_cipher *tfm, size_t num_elem, const u8 *addr[], const size_t *len, u8 *mac) { + u8 scratch[2 * AES_BLOCK_SIZE]; u8 *cbc, *pad; const u8 *pos, *end; size_t i, e, left, total_len; @@ -95,7 +95,7 @@ static void aes_128_cmac_vector(struct crypto_cipher *tfm, u8 *scratch, } -void ieee80211_aes_cmac(struct crypto_cipher *tfm, u8 *scratch, const u8 *aad, +void ieee80211_aes_cmac(struct crypto_cipher *tfm, const u8 *aad, const u8 *data, size_t data_len, u8 *mic) { const u8 *addr[3]; @@ -110,7 +110,7 @@ void ieee80211_aes_cmac(struct crypto_cipher *tfm, u8 *scratch, const u8 *aad, addr[2] = zero; len[2] = CMAC_TLEN; - aes_128_cmac_vector(tfm, scratch, 3, addr, len, mic); + aes_128_cmac_vector(tfm, 3, addr, len, mic); } diff --git a/net/mac80211/aes_cmac.h b/net/mac80211/aes_cmac.h index 0eb9a48..20785a6 100644 --- a/net/mac80211/aes_cmac.h +++ b/net/mac80211/aes_cmac.h @@ -12,7 +12,7 @@ #include struct crypto_cipher * ieee80211_aes_cmac_key_setup(const u8 key[]); -void ieee80211_aes_cmac(struct crypto_cipher *tfm, u8 *scratch, const u8 *aad, +void ieee80211_aes_cmac(struct crypto_cipher *tfm, const u8 *aad, const u8 *data, size_t data_len, u8 *mic); void ieee80211_aes_cmac_key_free(struct crypto_cipher *tfm); diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c index 1a41b14..6174785 100644 --- a/net/mac80211/agg-rx.c +++ b/net/mac80211/agg-rx.c @@ -38,6 +38,7 @@ #include #include +#include #include #include "ieee80211_i.h" #include "driver-ops.h" @@ -48,8 +49,6 @@ static void ieee80211_free_tid_rx(struct rcu_head *h) container_of(h, struct tid_ampdu_rx, rcu_head); int i; - del_timer_sync(&tid_rx->reorder_timer); - for (i = 0; i < tid_rx->buf_size; i++) dev_kfree_skb(tid_rx->reorder_buf[i]); kfree(tid_rx->reorder_buf); @@ -71,7 +70,7 @@ void ___ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid, if (!tid_rx) return; - rcu_assign_pointer(sta->ampdu_mlme.tid_rx[tid], NULL); + RCU_INIT_POINTER(sta->ampdu_mlme.tid_rx[tid], NULL); #ifdef CONFIG_MAC80211_HT_DEBUG printk(KERN_DEBUG "Rx BA session stop requested for %pM tid %u\n", @@ -90,6 +89,12 @@ void ___ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid, del_timer_sync(&tid_rx->session_timer); + /* make sure ieee80211_sta_reorder_release() doesn't re-arm the timer */ + spin_lock_bh(&tid_rx->reorder_lock); + tid_rx->removed = true; + spin_unlock_bh(&tid_rx->reorder_lock); + del_timer_sync(&tid_rx->reorder_timer); + call_rcu(&tid_rx->rcu_head, ieee80211_free_tid_rx); } @@ -101,6 +106,29 @@ void __ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid, mutex_unlock(&sta->ampdu_mlme.mtx); } +void ieee80211_stop_rx_ba_session(struct ieee80211_vif *vif, u16 ba_rx_bitmap, + const u8 *addr) +{ + struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); + struct sta_info *sta; + int i; + + rcu_read_lock(); + sta = sta_info_get(sdata, addr); + if (!sta) { + rcu_read_unlock(); + return; + } + + for (i = 0; i < STA_TID_NUM; i++) + if (ba_rx_bitmap & BIT(i)) + set_bit(i, sta->ampdu_mlme.tid_rx_stop_requested); + + ieee80211_queue_work(&sta->local->hw, &sta->ampdu_mlme.work); + rcu_read_unlock(); +} +EXPORT_SYMBOL(ieee80211_stop_rx_ba_session); + /* * After accepting the AddBA Request we activated a timer, * resetting it after each frame that arrives from the originator. @@ -145,12 +173,8 @@ static void ieee80211_send_addba_resp(struct ieee80211_sub_if_data *sdata, u8 *d u16 capab; skb = dev_alloc_skb(sizeof(*mgmt) + local->hw.extra_tx_headroom); - - if (!skb) { - printk(KERN_DEBUG "%s: failed to allocate buffer " - "for addba resp frame\n", sdata->name); + if (!skb) return; - } skb_reserve(skb, local->hw.extra_tx_headroom); mgmt = (struct ieee80211_mgmt *) skb_put(skb, 24); @@ -205,7 +229,7 @@ void ieee80211_process_addba_request(struct ieee80211_local *local, status = WLAN_STATUS_REQUEST_DECLINED; - if (test_sta_flags(sta, WLAN_STA_BLOCK_BA)) { + if (test_sta_flag(sta, WLAN_STA_BLOCK_BA)) { #ifdef CONFIG_MAC80211_HT_DEBUG printk(KERN_DEBUG "Suspend in progress. " "Denying ADDBA request\n"); @@ -248,19 +272,17 @@ void ieee80211_process_addba_request(struct ieee80211_local *local, "%pM on tid %u\n", mgmt->sa, tid); #endif /* CONFIG_MAC80211_HT_DEBUG */ - goto end; + + /* delete existing Rx BA session on the same tid */ + ___ieee80211_stop_rx_ba_session(sta, tid, WLAN_BACK_RECIPIENT, + WLAN_STATUS_UNSPECIFIED_QOS, + false); } /* prepare A-MPDU MLME for Rx aggregation */ tid_agg_rx = kmalloc(sizeof(struct tid_ampdu_rx), GFP_KERNEL); - if (!tid_agg_rx) { -#ifdef CONFIG_MAC80211_HT_DEBUG - if (net_ratelimit()) - printk(KERN_ERR "allocate rx mlme to tid %d failed\n", - tid); -#endif + if (!tid_agg_rx) goto end; - } spin_lock_init(&tid_agg_rx->reorder_lock); @@ -280,11 +302,6 @@ void ieee80211_process_addba_request(struct ieee80211_local *local, tid_agg_rx->reorder_time = kcalloc(buf_size, sizeof(unsigned long), GFP_KERNEL); if (!tid_agg_rx->reorder_buf || !tid_agg_rx->reorder_time) { -#ifdef CONFIG_MAC80211_HT_DEBUG - if (net_ratelimit()) - printk(KERN_ERR "can not allocate reordering buffer " - "to tid %d\n", tid); -#endif kfree(tid_agg_rx->reorder_buf); kfree(tid_agg_rx->reorder_time); kfree(tid_agg_rx); diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c index b7f4f5c..2e4b961 100644 --- a/net/mac80211/agg-tx.c +++ b/net/mac80211/agg-tx.c @@ -15,6 +15,7 @@ #include #include +#include #include #include "ieee80211_i.h" #include "driver-ops.h" @@ -68,11 +69,9 @@ static void ieee80211_send_addba_request(struct ieee80211_sub_if_data *sdata, skb = dev_alloc_skb(sizeof(*mgmt) + local->hw.extra_tx_headroom); - if (!skb) { - printk(KERN_ERR "%s: failed to allocate buffer " - "for addba request frame\n", sdata->name); + if (!skb) return; - } + skb_reserve(skb, local->hw.extra_tx_headroom); mgmt = (struct ieee80211_mgmt *) skb_put(skb, 24); memset(mgmt, 0, 24); @@ -106,19 +105,18 @@ static void ieee80211_send_addba_request(struct ieee80211_sub_if_data *sdata, ieee80211_tx_skb(sdata, skb); } -void ieee80211_send_bar(struct ieee80211_sub_if_data *sdata, u8 *ra, u16 tid, u16 ssn) +void ieee80211_send_bar(struct ieee80211_vif *vif, u8 *ra, u16 tid, u16 ssn) { + struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); struct ieee80211_local *local = sdata->local; struct sk_buff *skb; struct ieee80211_bar *bar; u16 bar_control = 0; skb = dev_alloc_skb(sizeof(*bar) + local->hw.extra_tx_headroom); - if (!skb) { - printk(KERN_ERR "%s: failed to allocate buffer for " - "bar frame\n", sdata->name); + if (!skb) return; - } + skb_reserve(skb, local->hw.extra_tx_headroom); bar = (struct ieee80211_bar *)skb_put(skb, sizeof(*bar)); memset(bar, 0, sizeof(*bar)); @@ -128,13 +126,14 @@ void ieee80211_send_bar(struct ieee80211_sub_if_data *sdata, u8 *ra, u16 tid, u1 memcpy(bar->ta, sdata->vif.addr, ETH_ALEN); bar_control |= (u16)IEEE80211_BAR_CTRL_ACK_POLICY_NORMAL; bar_control |= (u16)IEEE80211_BAR_CTRL_CBMTID_COMPRESSED_BA; - bar_control |= (u16)(tid << 12); + bar_control |= (u16)(tid << IEEE80211_BAR_CTRL_TID_INFO_SHIFT); bar->control = cpu_to_le16(bar_control); bar->start_seq_num = cpu_to_le16(ssn); IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT; ieee80211_tx_skb(sdata, skb); } +EXPORT_SYMBOL(ieee80211_send_bar); void ieee80211_assign_tid_tx(struct sta_info *sta, int tid, struct tid_ampdu_tx *tid_tx) @@ -412,7 +411,8 @@ int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid, return -EINVAL; if ((tid >= STA_TID_NUM) || - !(local->hw.flags & IEEE80211_HW_AMPDU_AGGREGATION)) + !(local->hw.flags & IEEE80211_HW_AMPDU_AGGREGATION) || + (local->hw.flags & IEEE80211_HW_TX_AMPDU_SETUP_IN_HW)) return -EINVAL; #ifdef CONFIG_MAC80211_HT_DEBUG @@ -431,7 +431,7 @@ int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid, sdata->vif.type != NL80211_IFTYPE_AP) return -EINVAL; - if (test_sta_flags(sta, WLAN_STA_BLOCK_BA)) { + if (test_sta_flag(sta, WLAN_STA_BLOCK_BA)) { #ifdef CONFIG_MAC80211_HT_DEBUG printk(KERN_DEBUG "BA sessions blocked. " "Denying BA session request\n"); @@ -461,11 +461,6 @@ int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid, /* prepare A-MPDU MLME for Tx aggregation */ tid_tx = kzalloc(sizeof(struct tid_ampdu_tx), GFP_ATOMIC); if (!tid_tx) { -#ifdef CONFIG_MAC80211_HT_DEBUG - if (net_ratelimit()) - printk(KERN_ERR "allocate tx mlme to tid %d failed\n", - tid); -#endif ret = -ENOMEM; goto err_unlock_sta; } @@ -590,14 +585,9 @@ void ieee80211_start_tx_ba_cb_irqsafe(struct ieee80211_vif *vif, struct ieee80211_ra_tid *ra_tid; struct sk_buff *skb = dev_alloc_skb(0); - if (unlikely(!skb)) { -#ifdef CONFIG_MAC80211_HT_DEBUG - if (net_ratelimit()) - printk(KERN_WARNING "%s: Not enough memory, " - "dropping start BA session", sdata->name); -#endif + if (unlikely(!skb)) return; - } + ra_tid = (struct ieee80211_ra_tid *) &skb->cb; memcpy(&ra_tid->ra, ra, ETH_ALEN); ra_tid->tid = tid; @@ -743,14 +733,9 @@ void ieee80211_stop_tx_ba_cb_irqsafe(struct ieee80211_vif *vif, struct ieee80211_ra_tid *ra_tid; struct sk_buff *skb = dev_alloc_skb(0); - if (unlikely(!skb)) { -#ifdef CONFIG_MAC80211_HT_DEBUG - if (net_ratelimit()) - printk(KERN_WARNING "%s: Not enough memory, " - "dropping stop BA session", sdata->name); -#endif + if (unlikely(!skb)) return; - } + ra_tid = (struct ieee80211_ra_tid *) &skb->cb; memcpy(&ra_tid->ra, ra, ETH_ALEN); ra_tid->tid = tid; @@ -809,17 +794,14 @@ void ieee80211_process_addba_resp(struct ieee80211_local *local, goto out; } + /* + * IEEE 802.11-2007 7.3.1.14: + * In an ADDBA Response frame, when the Status Code field + * is set to 0, the Buffer Size subfield is set to a value + * of at least 1. + */ if (le16_to_cpu(mgmt->u.action.u.addba_resp.status) - == WLAN_STATUS_SUCCESS) { - /* - * IEEE 802.11-2007 7.3.1.14: - * In an ADDBA Response frame, when the Status Code field - * is set to 0, the Buffer Size subfield is set to a value - * of at least 1. - */ - if (!buf_size) - goto out; - + == WLAN_STATUS_SUCCESS && buf_size) { if (test_and_set_bit(HT_AGG_STATE_RESPONSE_RECEIVED, &tid_tx->state)) { /* ignore duplicate response */ diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 143a006..11cee76 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include "ieee80211_i.h" #include "driver-ops.h" @@ -62,7 +63,7 @@ static int ieee80211_change_iface(struct wiphy *wiphy, if (type == NL80211_IFTYPE_AP_VLAN && params && params->use_4addr == 0) - rcu_assign_pointer(sdata->u.vlan.sta, NULL); + RCU_INIT_POINTER(sdata->u.vlan.sta, NULL); else if (type == NL80211_IFTYPE_STATION && params && params->use_4addr >= 0) sdata->u.mgd.use_4addr = params->use_4addr; @@ -209,6 +210,7 @@ static int ieee80211_get_key(struct wiphy *wiphy, struct net_device *dev, u8 seq[6] = {0}; struct key_params params; struct ieee80211_key *key = NULL; + u64 pn64; u32 iv32; u16 iv16; int err = -ENOENT; @@ -256,22 +258,24 @@ static int ieee80211_get_key(struct wiphy *wiphy, struct net_device *dev, params.seq_len = 6; break; case WLAN_CIPHER_SUITE_CCMP: - seq[0] = key->u.ccmp.tx_pn[5]; - seq[1] = key->u.ccmp.tx_pn[4]; - seq[2] = key->u.ccmp.tx_pn[3]; - seq[3] = key->u.ccmp.tx_pn[2]; - seq[4] = key->u.ccmp.tx_pn[1]; - seq[5] = key->u.ccmp.tx_pn[0]; + pn64 = atomic64_read(&key->u.ccmp.tx_pn); + seq[0] = pn64; + seq[1] = pn64 >> 8; + seq[2] = pn64 >> 16; + seq[3] = pn64 >> 24; + seq[4] = pn64 >> 32; + seq[5] = pn64 >> 40; params.seq = seq; params.seq_len = 6; break; case WLAN_CIPHER_SUITE_AES_CMAC: - seq[0] = key->u.aes_cmac.tx_pn[5]; - seq[1] = key->u.aes_cmac.tx_pn[4]; - seq[2] = key->u.aes_cmac.tx_pn[3]; - seq[3] = key->u.aes_cmac.tx_pn[2]; - seq[4] = key->u.aes_cmac.tx_pn[1]; - seq[5] = key->u.aes_cmac.tx_pn[0]; + pn64 = atomic64_read(&key->u.aes_cmac.tx_pn); + seq[0] = pn64; + seq[1] = pn64 >> 8; + seq[2] = pn64 >> 16; + seq[3] = pn64 >> 24; + seq[4] = pn64 >> 32; + seq[5] = pn64 >> 40; params.seq = seq; params.seq_len = 6; break; @@ -340,7 +344,8 @@ static void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo) STATION_INFO_RX_BITRATE | STATION_INFO_RX_DROP_MISC | STATION_INFO_BSS_PARAM | - STATION_INFO_CONNECTED_TIME; + STATION_INFO_CONNECTED_TIME | + STATION_INFO_STA_FLAGS; do_posix_clock_monotonic_gettime(&uptime); sinfo->connected_time = uptime.tv_sec - sta->last_connected; @@ -400,6 +405,23 @@ static void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo) sinfo->bss_param.flags |= BSS_PARAM_FLAGS_SHORT_SLOT_TIME; sinfo->bss_param.dtim_period = sdata->local->hw.conf.ps_dtim_period; sinfo->bss_param.beacon_interval = sdata->vif.bss_conf.beacon_int; + + sinfo->sta_flags.set = 0; + sinfo->sta_flags.mask = BIT(NL80211_STA_FLAG_AUTHORIZED) | + BIT(NL80211_STA_FLAG_SHORT_PREAMBLE) | + BIT(NL80211_STA_FLAG_WME) | + BIT(NL80211_STA_FLAG_MFP) | + BIT(NL80211_STA_FLAG_AUTHENTICATED); + if (test_sta_flag(sta, WLAN_STA_AUTHORIZED)) + sinfo->sta_flags.set |= BIT(NL80211_STA_FLAG_AUTHORIZED); + if (test_sta_flag(sta, WLAN_STA_SHORT_PREAMBLE)) + sinfo->sta_flags.set |= BIT(NL80211_STA_FLAG_SHORT_PREAMBLE); + if (test_sta_flag(sta, WLAN_STA_WME)) + sinfo->sta_flags.set |= BIT(NL80211_STA_FLAG_WME); + if (test_sta_flag(sta, WLAN_STA_MFP)) + sinfo->sta_flags.set |= BIT(NL80211_STA_FLAG_MFP); + if (test_sta_flag(sta, WLAN_STA_AUTH)) + sinfo->sta_flags.set |= BIT(NL80211_STA_FLAG_AUTHENTICATED); } @@ -452,6 +474,20 @@ static int ieee80211_get_station(struct wiphy *wiphy, struct net_device *dev, return ret; } +static void ieee80211_config_ap_ssid(struct ieee80211_sub_if_data *sdata, + struct beacon_parameters *params) +{ + struct ieee80211_bss_conf *bss_conf = &sdata->vif.bss_conf; + + bss_conf->ssid_len = params->ssid_len; + + if (params->ssid_len) + memcpy(bss_conf->ssid, params->ssid, params->ssid_len); + + bss_conf->hidden_ssid = + (params->hidden_ssid != NL80211_HIDDEN_SSID_NOT_IN_USE); +} + /* * This handles both adding a beacon and setting new beacon info */ @@ -545,8 +581,11 @@ static int ieee80211_config_beacon(struct ieee80211_sub_if_data *sdata, kfree(old); + ieee80211_config_ap_ssid(sdata, params); + ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON_ENABLED | - BSS_CHANGED_BEACON); + BSS_CHANGED_BEACON | + BSS_CHANGED_SSID); return 0; } @@ -591,7 +630,7 @@ static int ieee80211_del_beacon(struct wiphy *wiphy, struct net_device *dev) if (!old) return -ENOENT; - rcu_assign_pointer(sdata->u.ap.beacon, NULL); + RCU_INIT_POINTER(sdata->u.ap.beacon, NULL); synchronize_rcu(); kfree(old); @@ -647,7 +686,6 @@ static void sta_apply_parameters(struct ieee80211_local *local, struct sta_info *sta, struct station_parameters *params) { - unsigned long flags; u32 rates; int i, j; struct ieee80211_supported_band *sband; @@ -656,40 +694,58 @@ static void sta_apply_parameters(struct ieee80211_local *local, sband = local->hw.wiphy->bands[local->oper_channel->band]; - spin_lock_irqsave(&sta->flaglock, flags); mask = params->sta_flags_mask; set = params->sta_flags_set; if (mask & BIT(NL80211_STA_FLAG_AUTHORIZED)) { - sta->flags &= ~WLAN_STA_AUTHORIZED; if (set & BIT(NL80211_STA_FLAG_AUTHORIZED)) - sta->flags |= WLAN_STA_AUTHORIZED; + set_sta_flag(sta, WLAN_STA_AUTHORIZED); + else + clear_sta_flag(sta, WLAN_STA_AUTHORIZED); } if (mask & BIT(NL80211_STA_FLAG_SHORT_PREAMBLE)) { - sta->flags &= ~WLAN_STA_SHORT_PREAMBLE; if (set & BIT(NL80211_STA_FLAG_SHORT_PREAMBLE)) - sta->flags |= WLAN_STA_SHORT_PREAMBLE; + set_sta_flag(sta, WLAN_STA_SHORT_PREAMBLE); + else + clear_sta_flag(sta, WLAN_STA_SHORT_PREAMBLE); } if (mask & BIT(NL80211_STA_FLAG_WME)) { - sta->flags &= ~WLAN_STA_WME; - if (set & BIT(NL80211_STA_FLAG_WME)) - sta->flags |= WLAN_STA_WME; + if (set & BIT(NL80211_STA_FLAG_WME)) { + set_sta_flag(sta, WLAN_STA_WME); + sta->sta.wme = true; + } else { + clear_sta_flag(sta, WLAN_STA_WME); + sta->sta.wme = false; + } } if (mask & BIT(NL80211_STA_FLAG_MFP)) { - sta->flags &= ~WLAN_STA_MFP; if (set & BIT(NL80211_STA_FLAG_MFP)) - sta->flags |= WLAN_STA_MFP; + set_sta_flag(sta, WLAN_STA_MFP); + else + clear_sta_flag(sta, WLAN_STA_MFP); } if (mask & BIT(NL80211_STA_FLAG_AUTHENTICATED)) { - sta->flags &= ~WLAN_STA_AUTH; if (set & BIT(NL80211_STA_FLAG_AUTHENTICATED)) - sta->flags |= WLAN_STA_AUTH; + set_sta_flag(sta, WLAN_STA_AUTH); + else + clear_sta_flag(sta, WLAN_STA_AUTH); + } + + if (mask & BIT(NL80211_STA_FLAG_TDLS_PEER)) { + if (set & BIT(NL80211_STA_FLAG_TDLS_PEER)) + set_sta_flag(sta, WLAN_STA_TDLS_PEER); + else + clear_sta_flag(sta, WLAN_STA_TDLS_PEER); + } + + if (params->sta_modify_mask & STATION_PARAM_APPLY_UAPSD) { + sta->sta.uapsd_queues = params->uapsd_queues; + sta->sta.max_sp = params->max_sp; } - spin_unlock_irqrestore(&sta->flaglock, flags); /* * cfg80211 validates this (1-2007) and allows setting the AID @@ -776,11 +832,18 @@ static int ieee80211_add_station(struct wiphy *wiphy, struct net_device *dev, if (is_multicast_ether_addr(mac)) return -EINVAL; + /* Only TDLS-supporting stations can add TDLS peers */ + if ((params->sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER)) && + !((wiphy->flags & WIPHY_FLAG_SUPPORTS_TDLS) && + sdata->vif.type == NL80211_IFTYPE_STATION)) + return -ENOTSUPP; + sta = sta_info_alloc(sdata, mac, GFP_KERNEL); if (!sta) return -ENOMEM; - sta->flags = WLAN_STA_AUTH | WLAN_STA_ASSOC; + set_sta_flag(sta, WLAN_STA_AUTH); + set_sta_flag(sta, WLAN_STA_ASSOC); sta_apply_parameters(local, sta, params); @@ -836,6 +899,14 @@ static int ieee80211_change_station(struct wiphy *wiphy, return -ENOENT; } + /* The TDLS bit cannot be toggled after the STA was added */ + if ((params->sta_flags_mask & BIT(NL80211_STA_FLAG_TDLS_PEER)) && + !!(params->sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER)) != + !!test_sta_flag(sta, WLAN_STA_TDLS_PEER)) { + rcu_read_unlock(); + return -EINVAL; + } + if (params->vlan && params->vlan != sta->sdata->dev) { vlansdata = IEEE80211_DEV_TO_SUB_IF(params->vlan); @@ -912,7 +983,7 @@ static int ieee80211_del_mpath(struct wiphy *wiphy, struct net_device *dev, if (dst) return mesh_path_del(dst, sdata); - mesh_path_flush(sdata); + mesh_path_flush_by_iface(sdata); return 0; } @@ -1131,6 +1202,22 @@ static int ieee80211_update_mesh_config(struct wiphy *wiphy, conf->dot11MeshHWMPRootMode = nconf->dot11MeshHWMPRootMode; ieee80211_mesh_root_setup(ifmsh); } + if (_chg_mesh_attr(NL80211_MESHCONF_GATE_ANNOUNCEMENTS, mask)) { + /* our current gate announcement implementation rides on root + * announcements, so require this ifmsh to also be a root node + * */ + if (nconf->dot11MeshGateAnnouncementProtocol && + !conf->dot11MeshHWMPRootMode) { + conf->dot11MeshHWMPRootMode = 1; + ieee80211_mesh_root_setup(ifmsh); + } + conf->dot11MeshGateAnnouncementProtocol = + nconf->dot11MeshGateAnnouncementProtocol; + } + if (_chg_mesh_attr(NL80211_MESHCONF_HWMP_RANN_INTERVAL, mask)) { + conf->dot11MeshHWMPRannInterval = + nconf->dot11MeshHWMPRannInterval; + } return 0; } @@ -1229,9 +1316,11 @@ static int ieee80211_change_bss(struct wiphy *wiphy, } static int ieee80211_set_txq_params(struct wiphy *wiphy, + struct net_device *dev, struct ieee80211_txq_params *params) { struct ieee80211_local *local = wiphy_priv(wiphy); + struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_tx_queue_params p; if (!local->ops->conf_tx) @@ -1249,7 +1338,11 @@ static int ieee80211_set_txq_params(struct wiphy *wiphy, */ p.uapsd = false; - if (drv_conf_tx(local, params->queue, &p)) { + if (params->queue >= local->hw.queues) + return -EINVAL; + + sdata->tx_conf[params->queue] = p; + if (drv_conf_tx(local, sdata, params->queue, &p)) { wiphy_debug(local->hw.wiphy, "failed to set TX queue parameters for queue %d\n", params->queue); @@ -1554,6 +1647,19 @@ static int ieee80211_testmode_cmd(struct wiphy *wiphy, void *data, int len) return local->ops->testmode_cmd(&local->hw, data, len); } + +static int ieee80211_testmode_dump(struct wiphy *wiphy, + struct sk_buff *skb, + struct netlink_callback *cb, + void *data, int len) +{ + struct ieee80211_local *local = wiphy_priv(wiphy); + + if (!local->ops->testmode_dump) + return -EOPNOTSUPP; + + return local->ops->testmode_dump(&local->hw, skb, cb, data, len); +} #endif int __ieee80211_request_smps(struct ieee80211_sub_if_data *sdata, @@ -1810,7 +1916,8 @@ static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct net_device *dev, struct ieee80211_channel *chan, bool offchan, enum nl80211_channel_type channel_type, bool channel_type_valid, unsigned int wait, - const u8 *buf, size_t len, u64 *cookie) + const u8 *buf, size_t len, bool no_cck, + u64 *cookie) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = sdata->local; @@ -1837,6 +1944,9 @@ static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct net_device *dev, flags |= IEEE80211_TX_CTL_TX_OFFCHAN; } + if (no_cck) + flags |= IEEE80211_TX_CTL_NO_CCK_RATE; + if (is_offchan && !offchan) return -EBUSY; @@ -1875,33 +1985,6 @@ static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct net_device *dev, *cookie = (unsigned long) skb; - if (is_offchan && local->ops->offchannel_tx) { - int ret; - - IEEE80211_SKB_CB(skb)->band = chan->band; - - mutex_lock(&local->mtx); - - if (local->hw_offchan_tx_cookie) { - mutex_unlock(&local->mtx); - return -EBUSY; - } - - /* TODO: bitrate control, TX processing? */ - ret = drv_offchannel_tx(local, skb, chan, channel_type, wait); - - if (ret == 0) - local->hw_offchan_tx_cookie = *cookie; - mutex_unlock(&local->mtx); - - /* - * Allow driver to return 1 to indicate it wants to have the - * frame transmitted with a remain_on_channel + regular TX. - */ - if (ret != 1) - return ret; - } - if (is_offchan && local->ops->remain_on_channel) { unsigned int duration; int ret; @@ -1988,18 +2071,6 @@ static int ieee80211_mgmt_tx_cancel_wait(struct wiphy *wiphy, mutex_lock(&local->mtx); - if (local->ops->offchannel_tx_cancel_wait && - local->hw_offchan_tx_cookie == cookie) { - ret = drv_offchannel_tx_cancel_wait(local); - - if (!ret) - local->hw_offchan_tx_cookie = 0; - - mutex_unlock(&local->mtx); - - return ret; - } - if (local->ops->cancel_remain_on_channel) { cookie ^= 2; ret = ieee80211_cancel_remain_on_channel_hw(local, cookie); @@ -2085,6 +2156,338 @@ static void ieee80211_get_ringparam(struct wiphy *wiphy, drv_get_ringparam(local, tx, tx_max, rx, rx_max); } +static int ieee80211_set_rekey_data(struct wiphy *wiphy, + struct net_device *dev, + struct cfg80211_gtk_rekey_data *data) +{ + struct ieee80211_local *local = wiphy_priv(wiphy); + struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + + if (!local->ops->set_rekey_data) + return -EOPNOTSUPP; + + drv_set_rekey_data(local, sdata, data); + + return 0; +} + +static void ieee80211_tdls_add_ext_capab(struct sk_buff *skb) +{ + u8 *pos = (void *)skb_put(skb, 7); + + *pos++ = WLAN_EID_EXT_CAPABILITY; + *pos++ = 5; /* len */ + *pos++ = 0x0; + *pos++ = 0x0; + *pos++ = 0x0; + *pos++ = 0x0; + *pos++ = WLAN_EXT_CAPA5_TDLS_ENABLED; +} + +static u16 ieee80211_get_tdls_sta_capab(struct ieee80211_sub_if_data *sdata) +{ + struct ieee80211_local *local = sdata->local; + u16 capab; + + capab = 0; + if (local->oper_channel->band != IEEE80211_BAND_2GHZ) + return capab; + + if (!(local->hw.flags & IEEE80211_HW_2GHZ_SHORT_SLOT_INCAPABLE)) + capab |= WLAN_CAPABILITY_SHORT_SLOT_TIME; + if (!(local->hw.flags & IEEE80211_HW_2GHZ_SHORT_PREAMBLE_INCAPABLE)) + capab |= WLAN_CAPABILITY_SHORT_PREAMBLE; + + return capab; +} + +static void ieee80211_tdls_add_link_ie(struct sk_buff *skb, u8 *src_addr, + u8 *peer, u8 *bssid) +{ + struct ieee80211_tdls_lnkie *lnkid; + + lnkid = (void *)skb_put(skb, sizeof(struct ieee80211_tdls_lnkie)); + + lnkid->ie_type = WLAN_EID_LINK_ID; + lnkid->ie_len = sizeof(struct ieee80211_tdls_lnkie) - 2; + + memcpy(lnkid->bssid, bssid, ETH_ALEN); + memcpy(lnkid->init_sta, src_addr, ETH_ALEN); + memcpy(lnkid->resp_sta, peer, ETH_ALEN); +} + +static int +ieee80211_prep_tdls_encap_data(struct wiphy *wiphy, struct net_device *dev, + u8 *peer, u8 action_code, u8 dialog_token, + u16 status_code, struct sk_buff *skb) +{ + struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct ieee80211_tdls_data *tf; + + tf = (void *)skb_put(skb, offsetof(struct ieee80211_tdls_data, u)); + + memcpy(tf->da, peer, ETH_ALEN); + memcpy(tf->sa, sdata->vif.addr, ETH_ALEN); + tf->ether_type = cpu_to_be16(ETH_P_TDLS); + tf->payload_type = WLAN_TDLS_SNAP_RFTYPE; + + switch (action_code) { + case WLAN_TDLS_SETUP_REQUEST: + tf->category = WLAN_CATEGORY_TDLS; + tf->action_code = WLAN_TDLS_SETUP_REQUEST; + + skb_put(skb, sizeof(tf->u.setup_req)); + tf->u.setup_req.dialog_token = dialog_token; + tf->u.setup_req.capability = + cpu_to_le16(ieee80211_get_tdls_sta_capab(sdata)); + + ieee80211_add_srates_ie(&sdata->vif, skb); + ieee80211_add_ext_srates_ie(&sdata->vif, skb); + ieee80211_tdls_add_ext_capab(skb); + break; + case WLAN_TDLS_SETUP_RESPONSE: + tf->category = WLAN_CATEGORY_TDLS; + tf->action_code = WLAN_TDLS_SETUP_RESPONSE; + + skb_put(skb, sizeof(tf->u.setup_resp)); + tf->u.setup_resp.status_code = cpu_to_le16(status_code); + tf->u.setup_resp.dialog_token = dialog_token; + tf->u.setup_resp.capability = + cpu_to_le16(ieee80211_get_tdls_sta_capab(sdata)); + + ieee80211_add_srates_ie(&sdata->vif, skb); + ieee80211_add_ext_srates_ie(&sdata->vif, skb); + ieee80211_tdls_add_ext_capab(skb); + break; + case WLAN_TDLS_SETUP_CONFIRM: + tf->category = WLAN_CATEGORY_TDLS; + tf->action_code = WLAN_TDLS_SETUP_CONFIRM; + + skb_put(skb, sizeof(tf->u.setup_cfm)); + tf->u.setup_cfm.status_code = cpu_to_le16(status_code); + tf->u.setup_cfm.dialog_token = dialog_token; + break; + case WLAN_TDLS_TEARDOWN: + tf->category = WLAN_CATEGORY_TDLS; + tf->action_code = WLAN_TDLS_TEARDOWN; + + skb_put(skb, sizeof(tf->u.teardown)); + tf->u.teardown.reason_code = cpu_to_le16(status_code); + break; + case WLAN_TDLS_DISCOVERY_REQUEST: + tf->category = WLAN_CATEGORY_TDLS; + tf->action_code = WLAN_TDLS_DISCOVERY_REQUEST; + + skb_put(skb, sizeof(tf->u.discover_req)); + tf->u.discover_req.dialog_token = dialog_token; + break; + default: + return -EINVAL; + } + + return 0; +} + +static int +ieee80211_prep_tdls_direct(struct wiphy *wiphy, struct net_device *dev, + u8 *peer, u8 action_code, u8 dialog_token, + u16 status_code, struct sk_buff *skb) +{ + struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct ieee80211_mgmt *mgmt; + + mgmt = (void *)skb_put(skb, 24); + memset(mgmt, 0, 24); + memcpy(mgmt->da, peer, ETH_ALEN); + memcpy(mgmt->sa, sdata->vif.addr, ETH_ALEN); + memcpy(mgmt->bssid, sdata->u.mgd.bssid, ETH_ALEN); + + mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | + IEEE80211_STYPE_ACTION); + + switch (action_code) { + case WLAN_PUB_ACTION_TDLS_DISCOVER_RES: + skb_put(skb, 1 + sizeof(mgmt->u.action.u.tdls_discover_resp)); + mgmt->u.action.category = WLAN_CATEGORY_PUBLIC; + mgmt->u.action.u.tdls_discover_resp.action_code = + WLAN_PUB_ACTION_TDLS_DISCOVER_RES; + mgmt->u.action.u.tdls_discover_resp.dialog_token = + dialog_token; + mgmt->u.action.u.tdls_discover_resp.capability = + cpu_to_le16(ieee80211_get_tdls_sta_capab(sdata)); + + ieee80211_add_srates_ie(&sdata->vif, skb); + ieee80211_add_ext_srates_ie(&sdata->vif, skb); + ieee80211_tdls_add_ext_capab(skb); + break; + default: + return -EINVAL; + } + + return 0; +} + +static int ieee80211_tdls_mgmt(struct wiphy *wiphy, struct net_device *dev, + u8 *peer, u8 action_code, u8 dialog_token, + u16 status_code, const u8 *extra_ies, + size_t extra_ies_len) +{ + struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct ieee80211_local *local = sdata->local; + struct ieee80211_tx_info *info; + struct sk_buff *skb = NULL; + bool send_direct; + int ret; + + if (!(wiphy->flags & WIPHY_FLAG_SUPPORTS_TDLS)) + return -ENOTSUPP; + + /* make sure we are in managed mode, and associated */ + if (sdata->vif.type != NL80211_IFTYPE_STATION || + !sdata->u.mgd.associated) + return -EINVAL; + +#ifdef CONFIG_MAC80211_VERBOSE_TDLS_DEBUG + printk(KERN_DEBUG "TDLS mgmt action %d peer %pM\n", action_code, peer); +#endif + + skb = dev_alloc_skb(local->hw.extra_tx_headroom + + max(sizeof(struct ieee80211_mgmt), + sizeof(struct ieee80211_tdls_data)) + + 50 + /* supported rates */ + 7 + /* ext capab */ + extra_ies_len + + sizeof(struct ieee80211_tdls_lnkie)); + if (!skb) + return -ENOMEM; + + info = IEEE80211_SKB_CB(skb); + skb_reserve(skb, local->hw.extra_tx_headroom); + + switch (action_code) { + case WLAN_TDLS_SETUP_REQUEST: + case WLAN_TDLS_SETUP_RESPONSE: + case WLAN_TDLS_SETUP_CONFIRM: + case WLAN_TDLS_TEARDOWN: + case WLAN_TDLS_DISCOVERY_REQUEST: + ret = ieee80211_prep_tdls_encap_data(wiphy, dev, peer, + action_code, dialog_token, + status_code, skb); + send_direct = false; + break; + case WLAN_PUB_ACTION_TDLS_DISCOVER_RES: + ret = ieee80211_prep_tdls_direct(wiphy, dev, peer, action_code, + dialog_token, status_code, + skb); + send_direct = true; + break; + default: + ret = -ENOTSUPP; + break; + } + + if (ret < 0) + goto fail; + + if (extra_ies_len) + memcpy(skb_put(skb, extra_ies_len), extra_ies, extra_ies_len); + + /* the TDLS link IE is always added last */ + switch (action_code) { + case WLAN_TDLS_SETUP_REQUEST: + case WLAN_TDLS_SETUP_CONFIRM: + case WLAN_TDLS_TEARDOWN: + case WLAN_TDLS_DISCOVERY_REQUEST: + /* we are the initiator */ + ieee80211_tdls_add_link_ie(skb, sdata->vif.addr, peer, + sdata->u.mgd.bssid); + break; + case WLAN_TDLS_SETUP_RESPONSE: + case WLAN_PUB_ACTION_TDLS_DISCOVER_RES: + /* we are the responder */ + ieee80211_tdls_add_link_ie(skb, peer, sdata->vif.addr, + sdata->u.mgd.bssid); + break; + default: + ret = -ENOTSUPP; + goto fail; + } + + if (send_direct) { + ieee80211_tx_skb(sdata, skb); + return 0; + } + + /* + * According to 802.11z: Setup req/resp are sent in AC_BK, otherwise + * we should default to AC_VI. + */ + switch (action_code) { + case WLAN_TDLS_SETUP_REQUEST: + case WLAN_TDLS_SETUP_RESPONSE: + skb_set_queue_mapping(skb, IEEE80211_AC_BK); + skb->priority = 2; + break; + default: + skb_set_queue_mapping(skb, IEEE80211_AC_VI); + skb->priority = 5; + break; + } + + /* disable bottom halves when entering the Tx path */ + local_bh_disable(); + ret = ieee80211_subif_start_xmit(skb, dev); + local_bh_enable(); + + return ret; + +fail: + dev_kfree_skb(skb); + return ret; +} + +static int ieee80211_tdls_oper(struct wiphy *wiphy, struct net_device *dev, + u8 *peer, enum nl80211_tdls_operation oper) +{ + struct sta_info *sta; + struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + + if (!(wiphy->flags & WIPHY_FLAG_SUPPORTS_TDLS)) + return -ENOTSUPP; + + if (sdata->vif.type != NL80211_IFTYPE_STATION) + return -EINVAL; + +#ifdef CONFIG_MAC80211_VERBOSE_TDLS_DEBUG + printk(KERN_DEBUG "TDLS oper %d peer %pM\n", oper, peer); +#endif + + switch (oper) { + case NL80211_TDLS_ENABLE_LINK: + rcu_read_lock(); + sta = sta_info_get(sdata, peer); + if (!sta) { + rcu_read_unlock(); + return -ENOLINK; + } + + set_sta_flag(sta, WLAN_STA_TDLS_PEER_AUTH); + rcu_read_unlock(); + break; + case NL80211_TDLS_DISABLE_LINK: + return sta_info_destroy_addr(sdata, peer); + case NL80211_TDLS_TEARDOWN: + case NL80211_TDLS_SETUP: + case NL80211_TDLS_DISCOVERY_REQ: + /* We don't support in-driver setup/teardown/discovery */ + return -ENOTSUPP; + default: + return -ENOTSUPP; + } + + return 0; +} + struct cfg80211_ops mac80211_config_ops = { .add_virtual_intf = ieee80211_add_iface, .del_virtual_intf = ieee80211_del_iface, @@ -2134,6 +2537,7 @@ struct cfg80211_ops mac80211_config_ops = { .set_wds_peer = ieee80211_set_wds_peer, .rfkill_poll = ieee80211_rfkill_poll, CFG80211_TESTMODE_CMD(ieee80211_testmode_cmd) + CFG80211_TESTMODE_DUMP(ieee80211_testmode_dump) .set_power_mgmt = ieee80211_set_power_mgmt, .set_bitrate_mask = ieee80211_set_bitrate_mask, .remain_on_channel = ieee80211_remain_on_channel, @@ -2146,4 +2550,7 @@ struct cfg80211_ops mac80211_config_ops = { .get_antenna = ieee80211_get_antenna, .set_ringparam = ieee80211_set_ringparam, .get_ringparam = ieee80211_get_ringparam, + .set_rekey_data = ieee80211_set_rekey_data, + .tdls_oper = ieee80211_tdls_oper, + .tdls_mgmt = ieee80211_tdls_mgmt, }; diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c index 186e02f..883996b 100644 --- a/net/mac80211/debugfs.c +++ b/net/mac80211/debugfs.c @@ -78,57 +78,6 @@ DEBUGFS_READONLY_FILE(wep_iv, "%#08x", DEBUGFS_READONLY_FILE(rate_ctrl_alg, "%s", local->rate_ctrl ? local->rate_ctrl->ops->name : "hw/driver"); -static ssize_t tsf_read(struct file *file, char __user *user_buf, - size_t count, loff_t *ppos) -{ - struct ieee80211_local *local = file->private_data; - u64 tsf; - - tsf = drv_get_tsf(local); - - return mac80211_format_buffer(user_buf, count, ppos, "0x%016llx\n", - (unsigned long long) tsf); -} - -static ssize_t tsf_write(struct file *file, - const char __user *user_buf, - size_t count, loff_t *ppos) -{ - struct ieee80211_local *local = file->private_data; - unsigned long long tsf; - char buf[100]; - size_t len; - - len = min(count, sizeof(buf) - 1); - if (copy_from_user(buf, user_buf, len)) - return -EFAULT; - buf[len] = '\0'; - - if (strncmp(buf, "reset", 5) == 0) { - if (local->ops->reset_tsf) { - drv_reset_tsf(local); - wiphy_info(local->hw.wiphy, "debugfs reset TSF\n"); - } - } else { - tsf = simple_strtoul(buf, NULL, 0); - if (local->ops->set_tsf) { - drv_set_tsf(local, tsf); - wiphy_info(local->hw.wiphy, - "debugfs set TSF to %#018llx\n", tsf); - - } - } - - return count; -} - -static const struct file_operations tsf_ops = { - .read = tsf_read, - .write = tsf_write, - .open = mac80211_open_file_generic, - .llseek = default_llseek, -}; - static ssize_t reset_write(struct file *file, const char __user *user_buf, size_t count, loff_t *ppos) { @@ -195,20 +144,12 @@ static ssize_t uapsd_queues_write(struct file *file, size_t count, loff_t *ppos) { struct ieee80211_local *local = file->private_data; - unsigned long val; - char buf[10]; - size_t len; + u8 val; int ret; - len = min(count, sizeof(buf) - 1); - if (copy_from_user(buf, user_buf, len)) - return -EFAULT; - buf[len] = '\0'; - - ret = strict_strtoul(buf, 0, &val); - + ret = kstrtou8_from_user(user_buf, count, 0, &val); if (ret) - return -EINVAL; + return ret; if (val & ~IEEE80211_WMM_IE_STA_QOSINFO_AC_MASK) return -ERANGE; @@ -305,6 +246,9 @@ static ssize_t hwflags_read(struct file *file, char __user *user_buf, char *buf = kzalloc(mxln, GFP_KERNEL); int sf = 0; /* how many written so far */ + if (!buf) + return 0; + sf += snprintf(buf, mxln - sf, "0x%x\n", local->hw.flags); if (local->hw.flags & IEEE80211_HW_HAS_RATE_CONTROL) sf += snprintf(buf + sf, mxln - sf, "HAS_RATE_CONTROL\n"); @@ -355,6 +299,8 @@ static ssize_t hwflags_read(struct file *file, char __user *user_buf, sf += snprintf(buf + sf, mxln - sf, "SUPPORTS_PER_STA_GTK\n"); if (local->hw.flags & IEEE80211_HW_AP_LINK_PS) sf += snprintf(buf + sf, mxln - sf, "AP_LINK_PS\n"); + if (local->hw.flags & IEEE80211_HW_TX_AMPDU_SETUP_IN_HW) + sf += snprintf(buf + sf, mxln - sf, "TX_AMPDU_SETUP_IN_HW\n"); rv = simple_read_from_buffer(user_buf, count, ppos, buf, strlen(buf)); kfree(buf); @@ -450,7 +396,6 @@ void debugfs_hw_add(struct ieee80211_local *local) DEBUGFS_ADD(frequency); DEBUGFS_ADD(total_ps_buffered); DEBUGFS_ADD(wep_iv); - DEBUGFS_ADD(tsf); DEBUGFS_ADD(queues); DEBUGFS_ADD_MODE(reset, 0200); DEBUGFS_ADD(noack); diff --git a/net/mac80211/debugfs_key.c b/net/mac80211/debugfs_key.c index 33c58b8..38e6101 100644 --- a/net/mac80211/debugfs_key.c +++ b/net/mac80211/debugfs_key.c @@ -78,7 +78,7 @@ KEY_OPS(algorithm); static ssize_t key_tx_spec_read(struct file *file, char __user *userbuf, size_t count, loff_t *ppos) { - const u8 *tpn; + u64 pn; char buf[20]; int len; struct ieee80211_key *key = file->private_data; @@ -94,15 +94,16 @@ static ssize_t key_tx_spec_read(struct file *file, char __user *userbuf, key->u.tkip.tx.iv16); break; case WLAN_CIPHER_SUITE_CCMP: - tpn = key->u.ccmp.tx_pn; + pn = atomic64_read(&key->u.ccmp.tx_pn); len = scnprintf(buf, sizeof(buf), "%02x%02x%02x%02x%02x%02x\n", - tpn[0], tpn[1], tpn[2], tpn[3], tpn[4], tpn[5]); + (u8)(pn >> 40), (u8)(pn >> 32), (u8)(pn >> 24), + (u8)(pn >> 16), (u8)(pn >> 8), (u8)pn); break; case WLAN_CIPHER_SUITE_AES_CMAC: - tpn = key->u.aes_cmac.tx_pn; + pn = atomic64_read(&key->u.aes_cmac.tx_pn); len = scnprintf(buf, sizeof(buf), "%02x%02x%02x%02x%02x%02x\n", - tpn[0], tpn[1], tpn[2], tpn[3], tpn[4], - tpn[5]); + (u8)(pn >> 40), (u8)(pn >> 32), (u8)(pn >> 24), + (u8)(pn >> 16), (u8)(pn >> 8), (u8)pn); break; default: return 0; diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c index 9ea7c0d..0228ecb 100644 --- a/net/mac80211/debugfs_netdev.c +++ b/net/mac80211/debugfs_netdev.c @@ -21,6 +21,7 @@ #include "rate.h" #include "debugfs.h" #include "debugfs_netdev.h" +#include "driver-ops.h" static ssize_t ieee80211_if_read( struct ieee80211_sub_if_data *sdata, @@ -32,8 +33,7 @@ static ssize_t ieee80211_if_read( ssize_t ret = -EINVAL; read_lock(&dev_base_lock); - if (sdata->dev->reg_state == NETREG_REGISTERED) - ret = (*format)(sdata, buf, sizeof(buf)); + ret = (*format)(sdata, buf, sizeof(buf)); read_unlock(&dev_base_lock); if (ret >= 0) @@ -61,8 +61,7 @@ static ssize_t ieee80211_if_write( ret = -ENODEV; rtnl_lock(); - if (sdata->dev->reg_state == NETREG_REGISTERED) - ret = (*write)(sdata, buf, count); + ret = (*write)(sdata, buf, count); rtnl_unlock(); freebuf: @@ -331,6 +330,46 @@ static ssize_t ieee80211_if_fmt_num_buffered_multicast( } __IEEE80211_IF_FILE(num_buffered_multicast, NULL); +/* IBSS attributes */ +static ssize_t ieee80211_if_fmt_tsf( + const struct ieee80211_sub_if_data *sdata, char *buf, int buflen) +{ + struct ieee80211_local *local = sdata->local; + u64 tsf; + + tsf = drv_get_tsf(local, (struct ieee80211_sub_if_data *)sdata); + + return scnprintf(buf, buflen, "0x%016llx\n", (unsigned long long) tsf); +} + +static ssize_t ieee80211_if_parse_tsf( + struct ieee80211_sub_if_data *sdata, const char *buf, int buflen) +{ + struct ieee80211_local *local = sdata->local; + unsigned long long tsf; + int ret; + + if (strncmp(buf, "reset", 5) == 0) { + if (local->ops->reset_tsf) { + drv_reset_tsf(local, sdata); + wiphy_info(local->hw.wiphy, "debugfs reset TSF\n"); + } + } else { + ret = kstrtoull(buf, 10, &tsf); + if (ret < 0) + return -EINVAL; + if (local->ops->set_tsf) { + drv_set_tsf(local, sdata, tsf); + wiphy_info(local->hw.wiphy, + "debugfs set TSF to %#018llx\n", tsf); + } + } + + return buflen; +} +__IEEE80211_IF_FILE_W(tsf); + + /* WDS attributes */ IEEE80211_IF_FILE(peer, u.wds.remote_addr, MAC); @@ -340,6 +379,8 @@ IEEE80211_IF_FILE(fwded_mcast, u.mesh.mshstats.fwded_mcast, DEC); IEEE80211_IF_FILE(fwded_unicast, u.mesh.mshstats.fwded_unicast, DEC); IEEE80211_IF_FILE(fwded_frames, u.mesh.mshstats.fwded_frames, DEC); IEEE80211_IF_FILE(dropped_frames_ttl, u.mesh.mshstats.dropped_frames_ttl, DEC); +IEEE80211_IF_FILE(dropped_frames_congestion, + u.mesh.mshstats.dropped_frames_congestion, DEC); IEEE80211_IF_FILE(dropped_frames_no_route, u.mesh.mshstats.dropped_frames_no_route, DEC); IEEE80211_IF_FILE(estab_plinks, u.mesh.mshstats.estab_plinks, ATOMIC); @@ -372,6 +413,10 @@ IEEE80211_IF_FILE(min_discovery_timeout, u.mesh.mshcfg.min_discovery_timeout, DEC); IEEE80211_IF_FILE(dot11MeshHWMPRootMode, u.mesh.mshcfg.dot11MeshHWMPRootMode, DEC); +IEEE80211_IF_FILE(dot11MeshGateAnnouncementProtocol, + u.mesh.mshcfg.dot11MeshGateAnnouncementProtocol, DEC); +IEEE80211_IF_FILE(dot11MeshHWMPRannInterval, + u.mesh.mshcfg.dot11MeshHWMPRannInterval, DEC); #endif @@ -415,6 +460,11 @@ static void add_ap_files(struct ieee80211_sub_if_data *sdata) DEBUGFS_ADD_MODE(tkip_mic_test, 0200); } +static void add_ibss_files(struct ieee80211_sub_if_data *sdata) +{ + DEBUGFS_ADD_MODE(tsf, 0600); +} + static void add_wds_files(struct ieee80211_sub_if_data *sdata) { DEBUGFS_ADD(drop_unencrypted); @@ -459,6 +509,7 @@ static void add_mesh_stats(struct ieee80211_sub_if_data *sdata) MESHSTATS_ADD(fwded_frames); MESHSTATS_ADD(dropped_frames_ttl); MESHSTATS_ADD(dropped_frames_no_route); + MESHSTATS_ADD(dropped_frames_congestion); MESHSTATS_ADD(estab_plinks); #undef MESHSTATS_ADD } @@ -485,7 +536,9 @@ static void add_mesh_config(struct ieee80211_sub_if_data *sdata) MESHPARAMS_ADD(dot11MeshHWMPmaxPREQretries); MESHPARAMS_ADD(path_refresh_time); MESHPARAMS_ADD(min_discovery_timeout); - + MESHPARAMS_ADD(dot11MeshHWMPRootMode); + MESHPARAMS_ADD(dot11MeshHWMPRannInterval); + MESHPARAMS_ADD(dot11MeshGateAnnouncementProtocol); #undef MESHPARAMS_ADD } #endif @@ -506,7 +559,7 @@ static void add_files(struct ieee80211_sub_if_data *sdata) add_sta_files(sdata); break; case NL80211_IFTYPE_ADHOC: - /* XXX */ + add_ibss_files(sdata); break; case NL80211_IFTYPE_AP: add_ap_files(sdata); @@ -545,6 +598,7 @@ void ieee80211_debugfs_remove_netdev(struct ieee80211_sub_if_data *sdata) debugfs_remove_recursive(sdata->debugfs.dir); sdata->debugfs.dir = NULL; + sdata->debugfs.subdir_stations = NULL; } void ieee80211_debugfs_rename_netdev(struct ieee80211_sub_if_data *sdata) diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c index a01d213..3110cbd 100644 --- a/net/mac80211/debugfs_sta.c +++ b/net/mac80211/debugfs_sta.c @@ -56,19 +56,22 @@ STA_FILE(last_signal, last_signal, D); static ssize_t sta_flags_read(struct file *file, char __user *userbuf, size_t count, loff_t *ppos) { - char buf[100]; + char buf[121]; struct sta_info *sta = file->private_data; - u32 staflags = get_sta_flags(sta); - int res = scnprintf(buf, sizeof(buf), "%s%s%s%s%s%s%s%s%s", - staflags & WLAN_STA_AUTH ? "AUTH\n" : "", - staflags & WLAN_STA_ASSOC ? "ASSOC\n" : "", - staflags & WLAN_STA_PS_STA ? "PS (sta)\n" : "", - staflags & WLAN_STA_PS_DRIVER ? "PS (driver)\n" : "", - staflags & WLAN_STA_AUTHORIZED ? "AUTHORIZED\n" : "", - staflags & WLAN_STA_SHORT_PREAMBLE ? "SHORT PREAMBLE\n" : "", - staflags & WLAN_STA_WME ? "WME\n" : "", - staflags & WLAN_STA_WDS ? "WDS\n" : "", - staflags & WLAN_STA_MFP ? "MFP\n" : ""); + +#define TEST(flg) \ + test_sta_flag(sta, WLAN_STA_##flg) ? #flg "\n" : "" + + int res = scnprintf(buf, sizeof(buf), + "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s", + TEST(AUTH), TEST(ASSOC), TEST(PS_STA), + TEST(PS_DRIVER), TEST(AUTHORIZED), + TEST(SHORT_PREAMBLE), TEST(ASSOC_AP), + TEST(WME), TEST(WDS), TEST(CLEAR_PS_FILT), + TEST(MFP), TEST(BLOCK_BA), TEST(PSPOLL), + TEST(UAPSD), TEST(SP), TEST(TDLS_PEER), + TEST(TDLS_PEER_AUTH)); +#undef TEST return simple_read_from_buffer(userbuf, count, ppos, buf, res); } STA_OPS(flags); @@ -78,8 +81,14 @@ static ssize_t sta_num_ps_buf_frames_read(struct file *file, size_t count, loff_t *ppos) { struct sta_info *sta = file->private_data; - return mac80211_format_buffer(userbuf, count, ppos, "%u\n", - skb_queue_len(&sta->ps_tx_buf)); + char buf[17*IEEE80211_NUM_ACS], *p = buf; + int ac; + + for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) + p += scnprintf(p, sizeof(buf)+buf-p, "AC%d: %d\n", ac, + skb_queue_len(&sta->ps_tx_buf[ac]) + + skb_queue_len(&sta->tx_filtered[ac])); + return simple_read_from_buffer(userbuf, count, ppos, buf, p - buf); } STA_OPS(num_ps_buf_frames); @@ -265,9 +274,9 @@ static ssize_t sta_ht_capa_read(struct file *file, char __user *userbuf, PRINT_HT_CAP((htc->cap & BIT(10)), "HT Delayed Block Ack"); - PRINT_HT_CAP((htc->cap & BIT(11)), "Max AMSDU length: " - "3839 bytes"); PRINT_HT_CAP(!(htc->cap & BIT(11)), "Max AMSDU length: " + "3839 bytes"); + PRINT_HT_CAP((htc->cap & BIT(11)), "Max AMSDU length: " "7935 bytes"); /* diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index eebf7a6..5f165d7 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -130,6 +130,37 @@ static inline void drv_bss_info_changed(struct ieee80211_local *local, trace_drv_return_void(local); } +static inline int drv_tx_sync(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + const u8 *bssid, + enum ieee80211_tx_sync_type type) +{ + int ret = 0; + + might_sleep(); + + trace_drv_tx_sync(local, sdata, bssid, type); + if (local->ops->tx_sync) + ret = local->ops->tx_sync(&local->hw, &sdata->vif, + bssid, type); + trace_drv_return_int(local, ret); + return ret; +} + +static inline void drv_finish_tx_sync(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + const u8 *bssid, + enum ieee80211_tx_sync_type type) +{ + might_sleep(); + + trace_drv_finish_tx_sync(local, sdata, bssid, type); + if (local->ops->finish_tx_sync) + local->ops->finish_tx_sync(&local->hw, &sdata->vif, + bssid, type); + trace_drv_return_void(local); +} + static inline u64 drv_prepare_multicast(struct ieee80211_local *local, struct netdev_hw_addr_list *mc_list) { @@ -218,6 +249,16 @@ static inline int drv_hw_scan(struct ieee80211_local *local, return ret; } +static inline void drv_cancel_hw_scan(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata) +{ + might_sleep(); + + trace_drv_cancel_hw_scan(local, sdata); + local->ops->cancel_hw_scan(&local->hw, &sdata->vif); + trace_drv_return_void(local); +} + static inline int drv_sched_scan_start(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, @@ -372,50 +413,56 @@ static inline void drv_sta_remove(struct ieee80211_local *local, trace_drv_return_void(local); } -static inline int drv_conf_tx(struct ieee80211_local *local, u16 queue, +static inline int drv_conf_tx(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, u16 queue, const struct ieee80211_tx_queue_params *params) { int ret = -EOPNOTSUPP; might_sleep(); - trace_drv_conf_tx(local, queue, params); + trace_drv_conf_tx(local, sdata, queue, params); if (local->ops->conf_tx) - ret = local->ops->conf_tx(&local->hw, queue, params); + ret = local->ops->conf_tx(&local->hw, &sdata->vif, + queue, params); trace_drv_return_int(local, ret); return ret; } -static inline u64 drv_get_tsf(struct ieee80211_local *local) +static inline u64 drv_get_tsf(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata) { u64 ret = -1ULL; might_sleep(); - trace_drv_get_tsf(local); + trace_drv_get_tsf(local, sdata); if (local->ops->get_tsf) - ret = local->ops->get_tsf(&local->hw); + ret = local->ops->get_tsf(&local->hw, &sdata->vif); trace_drv_return_u64(local, ret); return ret; } -static inline void drv_set_tsf(struct ieee80211_local *local, u64 tsf) +static inline void drv_set_tsf(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + u64 tsf) { might_sleep(); - trace_drv_set_tsf(local, tsf); + trace_drv_set_tsf(local, sdata, tsf); if (local->ops->set_tsf) - local->ops->set_tsf(&local->hw, tsf); + local->ops->set_tsf(&local->hw, &sdata->vif, tsf); trace_drv_return_void(local); } -static inline void drv_reset_tsf(struct ieee80211_local *local) +static inline void drv_reset_tsf(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata) { might_sleep(); - trace_drv_reset_tsf(local); + trace_drv_reset_tsf(local, sdata); if (local->ops->reset_tsf) - local->ops->reset_tsf(&local->hw); + local->ops->reset_tsf(&local->hw, &sdata->vif); trace_drv_return_void(local); } @@ -549,37 +596,6 @@ static inline int drv_cancel_remain_on_channel(struct ieee80211_local *local) return ret; } -static inline int drv_offchannel_tx(struct ieee80211_local *local, - struct sk_buff *skb, - struct ieee80211_channel *chan, - enum nl80211_channel_type channel_type, - unsigned int wait) -{ - int ret; - - might_sleep(); - - trace_drv_offchannel_tx(local, skb, chan, channel_type, wait); - ret = local->ops->offchannel_tx(&local->hw, skb, chan, - channel_type, wait); - trace_drv_return_int(local, ret); - - return ret; -} - -static inline int drv_offchannel_tx_cancel_wait(struct ieee80211_local *local) -{ - int ret; - - might_sleep(); - - trace_drv_offchannel_tx_cancel_wait(local); - ret = local->ops->offchannel_tx_cancel_wait(&local->hw); - trace_drv_return_int(local, ret); - - return ret; -} - static inline int drv_set_ringparam(struct ieee80211_local *local, u32 tx, u32 rx) { @@ -637,4 +653,52 @@ static inline int drv_set_bitrate_mask(struct ieee80211_local *local, return ret; } +static inline void drv_set_rekey_data(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + struct cfg80211_gtk_rekey_data *data) +{ + trace_drv_set_rekey_data(local, sdata, data); + if (local->ops->set_rekey_data) + local->ops->set_rekey_data(&local->hw, &sdata->vif, data); + trace_drv_return_void(local); +} + +static inline void drv_rssi_callback(struct ieee80211_local *local, + const enum ieee80211_rssi_event event) +{ + trace_drv_rssi_callback(local, event); + if (local->ops->rssi_callback) + local->ops->rssi_callback(&local->hw, event); + trace_drv_return_void(local); +} + +static inline void +drv_release_buffered_frames(struct ieee80211_local *local, + struct sta_info *sta, u16 tids, int num_frames, + enum ieee80211_frame_release_type reason, + bool more_data) +{ + trace_drv_release_buffered_frames(local, &sta->sta, tids, num_frames, + reason, more_data); + if (local->ops->release_buffered_frames) + local->ops->release_buffered_frames(&local->hw, &sta->sta, tids, + num_frames, reason, + more_data); + trace_drv_return_void(local); +} + +static inline void +drv_allow_buffered_frames(struct ieee80211_local *local, + struct sta_info *sta, u16 tids, int num_frames, + enum ieee80211_frame_release_type reason, + bool more_data) +{ + trace_drv_allow_buffered_frames(local, &sta->sta, tids, num_frames, + reason, more_data); + if (local->ops->allow_buffered_frames) + local->ops->allow_buffered_frames(&local->hw, &sta->sta, + tids, num_frames, reason, + more_data); + trace_drv_return_void(local); +} #endif /* __MAC80211_DRIVER_OPS */ diff --git a/net/mac80211/driver-trace.h b/net/mac80211/driver-trace.h index ed9edcb..2af4fca 100644 --- a/net/mac80211/driver-trace.h +++ b/net/mac80211/driver-trace.h @@ -319,6 +319,49 @@ TRACE_EVENT(drv_bss_info_changed, ) ); +DECLARE_EVENT_CLASS(tx_sync_evt, + TP_PROTO(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + const u8 *bssid, + enum ieee80211_tx_sync_type type), + TP_ARGS(local, sdata, bssid, type), + + TP_STRUCT__entry( + LOCAL_ENTRY + VIF_ENTRY + __array(char, bssid, ETH_ALEN) + __field(u32, sync_type) + ), + + TP_fast_assign( + LOCAL_ASSIGN; + VIF_ASSIGN; + memcpy(__entry->bssid, bssid, ETH_ALEN); + __entry->sync_type = type; + ), + + TP_printk( + LOCAL_PR_FMT VIF_PR_FMT " bssid:%pM type:%d", + LOCAL_PR_ARG, VIF_PR_ARG, __entry->bssid, __entry->sync_type + ) +); + +DEFINE_EVENT(tx_sync_evt, drv_tx_sync, + TP_PROTO(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + const u8 *bssid, + enum ieee80211_tx_sync_type type), + TP_ARGS(local, sdata, bssid, type) +); + +DEFINE_EVENT(tx_sync_evt, drv_finish_tx_sync, + TP_PROTO(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + const u8 *bssid, + enum ieee80211_tx_sync_type type), + TP_ARGS(local, sdata, bssid, type) +); + TRACE_EVENT(drv_prepare_multicast, TP_PROTO(struct ieee80211_local *local, int mc_count), @@ -460,6 +503,12 @@ DEFINE_EVENT(local_sdata_evt, drv_hw_scan, TP_ARGS(local, sdata) ); +DEFINE_EVENT(local_sdata_evt, drv_cancel_hw_scan, + TP_PROTO(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata), + TP_ARGS(local, sdata) +); + DEFINE_EVENT(local_sdata_evt, drv_sched_scan_start, TP_PROTO(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata), @@ -648,64 +697,76 @@ TRACE_EVENT(drv_sta_remove, ); TRACE_EVENT(drv_conf_tx, - TP_PROTO(struct ieee80211_local *local, u16 queue, + TP_PROTO(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + u16 queue, const struct ieee80211_tx_queue_params *params), - TP_ARGS(local, queue, params), + TP_ARGS(local, sdata, queue, params), TP_STRUCT__entry( LOCAL_ENTRY + VIF_ENTRY __field(u16, queue) __field(u16, txop) __field(u16, cw_min) __field(u16, cw_max) __field(u8, aifs) + __field(bool, uapsd) ), TP_fast_assign( LOCAL_ASSIGN; + VIF_ASSIGN; __entry->queue = queue; __entry->txop = params->txop; __entry->cw_max = params->cw_max; __entry->cw_min = params->cw_min; __entry->aifs = params->aifs; + __entry->uapsd = params->uapsd; ), TP_printk( - LOCAL_PR_FMT " queue:%d", - LOCAL_PR_ARG, __entry->queue + LOCAL_PR_FMT VIF_PR_FMT " queue:%d", + LOCAL_PR_ARG, VIF_PR_ARG, __entry->queue ) ); -DEFINE_EVENT(local_only_evt, drv_get_tsf, - TP_PROTO(struct ieee80211_local *local), - TP_ARGS(local) +DEFINE_EVENT(local_sdata_evt, drv_get_tsf, + TP_PROTO(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata), + TP_ARGS(local, sdata) ); TRACE_EVENT(drv_set_tsf, - TP_PROTO(struct ieee80211_local *local, u64 tsf), + TP_PROTO(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + u64 tsf), - TP_ARGS(local, tsf), + TP_ARGS(local, sdata, tsf), TP_STRUCT__entry( LOCAL_ENTRY + VIF_ENTRY __field(u64, tsf) ), TP_fast_assign( LOCAL_ASSIGN; + VIF_ASSIGN; __entry->tsf = tsf; ), TP_printk( - LOCAL_PR_FMT " tsf:%llu", - LOCAL_PR_ARG, (unsigned long long)__entry->tsf + LOCAL_PR_FMT VIF_PR_FMT " tsf:%llu", + LOCAL_PR_ARG, VIF_PR_ARG, (unsigned long long)__entry->tsf ) ); -DEFINE_EVENT(local_only_evt, drv_reset_tsf, - TP_PROTO(struct ieee80211_local *local), - TP_ARGS(local) +DEFINE_EVENT(local_sdata_evt, drv_reset_tsf, + TP_PROTO(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata), + TP_ARGS(local, sdata) ); DEFINE_EVENT(local_only_evt, drv_tx_last_beacon, @@ -1018,6 +1079,111 @@ TRACE_EVENT(drv_set_bitrate_mask, ) ); +TRACE_EVENT(drv_set_rekey_data, + TP_PROTO(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + struct cfg80211_gtk_rekey_data *data), + + TP_ARGS(local, sdata, data), + + TP_STRUCT__entry( + LOCAL_ENTRY + VIF_ENTRY + __array(u8, kek, NL80211_KEK_LEN) + __array(u8, kck, NL80211_KCK_LEN) + __array(u8, replay_ctr, NL80211_REPLAY_CTR_LEN) + ), + + TP_fast_assign( + LOCAL_ASSIGN; + VIF_ASSIGN; + memcpy(__entry->kek, data->kek, NL80211_KEK_LEN); + memcpy(__entry->kck, data->kck, NL80211_KCK_LEN); + memcpy(__entry->replay_ctr, data->replay_ctr, + NL80211_REPLAY_CTR_LEN); + ), + + TP_printk(LOCAL_PR_FMT VIF_PR_FMT, + LOCAL_PR_ARG, VIF_PR_ARG) +); + +TRACE_EVENT(drv_rssi_callback, + TP_PROTO(struct ieee80211_local *local, + enum ieee80211_rssi_event rssi_event), + + TP_ARGS(local, rssi_event), + + TP_STRUCT__entry( + LOCAL_ENTRY + __field(u32, rssi_event) + ), + + TP_fast_assign( + LOCAL_ASSIGN; + __entry->rssi_event = rssi_event; + ), + + TP_printk( + LOCAL_PR_FMT " rssi_event:%d", + LOCAL_PR_ARG, __entry->rssi_event + ) +); + +DECLARE_EVENT_CLASS(release_evt, + TP_PROTO(struct ieee80211_local *local, + struct ieee80211_sta *sta, + u16 tids, int num_frames, + enum ieee80211_frame_release_type reason, + bool more_data), + + TP_ARGS(local, sta, tids, num_frames, reason, more_data), + + TP_STRUCT__entry( + LOCAL_ENTRY + STA_ENTRY + __field(u16, tids) + __field(int, num_frames) + __field(int, reason) + __field(bool, more_data) + ), + + TP_fast_assign( + LOCAL_ASSIGN; + STA_ASSIGN; + __entry->tids = tids; + __entry->num_frames = num_frames; + __entry->reason = reason; + __entry->more_data = more_data; + ), + + TP_printk( + LOCAL_PR_FMT STA_PR_FMT + " TIDs:0x%.4x frames:%d reason:%d more:%d", + LOCAL_PR_ARG, STA_PR_ARG, __entry->tids, __entry->num_frames, + __entry->reason, __entry->more_data + ) +); + +DEFINE_EVENT(release_evt, drv_release_buffered_frames, + TP_PROTO(struct ieee80211_local *local, + struct ieee80211_sta *sta, + u16 tids, int num_frames, + enum ieee80211_frame_release_type reason, + bool more_data), + + TP_ARGS(local, sta, tids, num_frames, reason, more_data) +); + +DEFINE_EVENT(release_evt, drv_allow_buffered_frames, + TP_PROTO(struct ieee80211_local *local, + struct ieee80211_sta *sta, + u16 tids, int num_frames, + enum ieee80211_frame_release_type reason, + bool more_data), + + TP_ARGS(local, sta, tids, num_frames, reason, more_data) +); + /* * Tracing for API calls that drivers call. */ @@ -1287,6 +1453,73 @@ DEFINE_EVENT(local_only_evt, api_remain_on_channel_expired, TP_ARGS(local) ); +TRACE_EVENT(api_gtk_rekey_notify, + TP_PROTO(struct ieee80211_sub_if_data *sdata, + const u8 *bssid, const u8 *replay_ctr), + + TP_ARGS(sdata, bssid, replay_ctr), + + TP_STRUCT__entry( + VIF_ENTRY + __array(u8, bssid, ETH_ALEN) + __array(u8, replay_ctr, NL80211_REPLAY_CTR_LEN) + ), + + TP_fast_assign( + VIF_ASSIGN; + memcpy(__entry->bssid, bssid, ETH_ALEN); + memcpy(__entry->replay_ctr, replay_ctr, NL80211_REPLAY_CTR_LEN); + ), + + TP_printk(VIF_PR_FMT, VIF_PR_ARG) +); + +TRACE_EVENT(api_enable_rssi_reports, + TP_PROTO(struct ieee80211_sub_if_data *sdata, + int rssi_min_thold, int rssi_max_thold), + + TP_ARGS(sdata, rssi_min_thold, rssi_max_thold), + + TP_STRUCT__entry( + VIF_ENTRY + __field(int, rssi_min_thold) + __field(int, rssi_max_thold) + ), + + TP_fast_assign( + VIF_ASSIGN; + __entry->rssi_min_thold = rssi_min_thold; + __entry->rssi_max_thold = rssi_max_thold; + ), + + TP_printk( + VIF_PR_FMT " rssi_min_thold =%d, rssi_max_thold = %d", + VIF_PR_ARG, __entry->rssi_min_thold, __entry->rssi_max_thold + ) +); + +TRACE_EVENT(api_eosp, + TP_PROTO(struct ieee80211_local *local, + struct ieee80211_sta *sta), + + TP_ARGS(local, sta), + + TP_STRUCT__entry( + LOCAL_ENTRY + STA_ENTRY + ), + + TP_fast_assign( + LOCAL_ASSIGN; + STA_ASSIGN; + ), + + TP_printk( + LOCAL_PR_FMT STA_PR_FMT, + LOCAL_PR_ARG, STA_PR_FMT + ) +); + /* * Tracing for internal functions * (which may also be called in response to driver calls) diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c index 591add2..f0fb737 100644 --- a/net/mac80211/ht.c +++ b/net/mac80211/ht.c @@ -14,6 +14,7 @@ */ #include +#include #include #include "ieee80211_i.h" #include "rate.h" @@ -130,7 +131,7 @@ void ieee80211_ba_session_work(struct work_struct *work) * down by the code that set the flag, so this * need not run. */ - if (test_sta_flags(sta, WLAN_STA_BLOCK_BA)) + if (test_sta_flag(sta, WLAN_STA_BLOCK_BA)) return; mutex_lock(&sta->ampdu_mlme.mtx); @@ -140,6 +141,12 @@ void ieee80211_ba_session_work(struct work_struct *work) sta, tid, WLAN_BACK_RECIPIENT, WLAN_REASON_QSTA_TIMEOUT, true); + if (test_and_clear_bit(tid, + sta->ampdu_mlme.tid_rx_stop_requested)) + ___ieee80211_stop_rx_ba_session( + sta, tid, WLAN_BACK_RECIPIENT, + WLAN_REASON_UNSPECIFIED, true); + tid_tx = sta->ampdu_mlme.tid_start_tx[tid]; if (tid_tx) { /* @@ -180,12 +187,8 @@ void ieee80211_send_delba(struct ieee80211_sub_if_data *sdata, u16 params; skb = dev_alloc_skb(sizeof(*mgmt) + local->hw.extra_tx_headroom); - - if (!skb) { - printk(KERN_ERR "%s: failed to allocate buffer " - "for delba frame\n", sdata->name); + if (!skb) return; - } skb_reserve(skb, local->hw.extra_tx_headroom); mgmt = (struct ieee80211_mgmt *) skb_put(skb, 24); diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index c6399f6..1c018d1 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -81,10 +81,10 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, lockdep_assert_held(&ifibss->mtx); /* Reset own TSF to allow time synchronization work. */ - drv_reset_tsf(local); + drv_reset_tsf(local, sdata); skb = ifibss->skb; - rcu_assign_pointer(ifibss->presp, NULL); + RCU_INIT_POINTER(ifibss->presp, NULL); synchronize_rcu(); skb->data = skb->head; skb->len = 0; @@ -314,7 +314,7 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, } if (sta && elems->wmm_info) - set_sta_flags(sta, WLAN_STA_WME); + set_sta_flag(sta, WLAN_STA_WME); rcu_read_unlock(); } @@ -382,7 +382,7 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, * second best option: get current TSF * (will return -1 if not supported) */ - rx_timestamp = drv_get_tsf(local); + rx_timestamp = drv_get_tsf(local, sdata); } #ifdef CONFIG_MAC80211_IBSS_DEBUG @@ -417,7 +417,7 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, * must be callable in atomic context. */ struct sta_info *ieee80211_ibss_add_sta(struct ieee80211_sub_if_data *sdata, - u8 *bssid,u8 *addr, u32 supp_rates, + u8 *bssid, u8 *addr, u32 supp_rates, gfp_t gfp) { struct ieee80211_if_ibss *ifibss = &sdata->u.ibss; @@ -452,7 +452,7 @@ struct sta_info *ieee80211_ibss_add_sta(struct ieee80211_sub_if_data *sdata, return NULL; sta->last_rx = jiffies; - set_sta_flags(sta, WLAN_STA_AUTHORIZED); + set_sta_flag(sta, WLAN_STA_AUTHORIZED); /* make sure mandatory rates are always added */ sta->sta.supp_rates[band] = supp_rates | @@ -914,6 +914,7 @@ int ieee80211_ibss_join(struct ieee80211_sub_if_data *sdata, sdata->u.ibss.privacy = params->privacy; sdata->u.ibss.basic_rates = params->basic_rates; + sdata->u.ibss.last_scan_completed = jiffies; memcpy(sdata->vif.bss_conf.mcast_rate, params->mcast_rate, sizeof(params->mcast_rate)); @@ -995,7 +996,7 @@ int ieee80211_ibss_leave(struct ieee80211_sub_if_data *sdata) kfree(sdata->u.ibss.ie); skb = rcu_dereference_protected(sdata->u.ibss.presp, lockdep_is_held(&sdata->u.ibss.mtx)); - rcu_assign_pointer(sdata->u.ibss.presp, NULL); + RCU_INIT_POINTER(sdata->u.ibss.presp, NULL); sdata->vif.bss_conf.ibss_joined = false; ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON_ENABLED | BSS_CHANGED_IBSS); diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 62b86f0..8da371c5 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -53,11 +53,25 @@ struct ieee80211_local; #define TU_TO_EXP_TIME(x) (jiffies + usecs_to_jiffies((x) * 1024)) -#define IEEE80211_DEFAULT_UAPSD_QUEUES \ - (IEEE80211_WMM_IE_STA_QOSINFO_AC_BK | \ - IEEE80211_WMM_IE_STA_QOSINFO_AC_BE | \ - IEEE80211_WMM_IE_STA_QOSINFO_AC_VI | \ - IEEE80211_WMM_IE_STA_QOSINFO_AC_VO) +/* + * Some APs experience problems when working with U-APSD. Decreasing the + * probability of that happening by using legacy mode for all ACs but VO isn't + * enough. + * + * Cisco 4410N originally forced us to enable VO by default only because it + * treated non-VO ACs as legacy. + * + * However some APs (notably Netgear R7000) silently reclassify packets to + * different ACs. Since u-APSD ACs require trigger frames for frame retrieval + * clients would never see some frames (e.g. ARP responses) or would fetch them + * accidentally after a long time. + * + * It makes little sense to enable u-APSD queues by default because it needs + * userspace applications to be aware of it to actually take advantage of the + * possible additional powersavings. Implicitly depending on driver autotrigger + * frame support doesn't make much sense. + */ +#define IEEE80211_DEFAULT_UAPSD_QUEUES 0 #define IEEE80211_DEFAULT_MAX_SP_LEN \ IEEE80211_WMM_IE_STA_QOSINFO_SP_ALL @@ -136,7 +150,6 @@ typedef unsigned __bitwise__ ieee80211_tx_result; #define TX_DROP ((__force ieee80211_tx_result) 1u) #define TX_QUEUED ((__force ieee80211_tx_result) 2u) -#define IEEE80211_TX_FRAGMENTED BIT(0) #define IEEE80211_TX_UNICAST BIT(1) #define IEEE80211_TX_PS_BUFFERED BIT(2) @@ -149,7 +162,6 @@ struct ieee80211_tx_data { struct ieee80211_channel *channel; - u16 ethertype; unsigned int flags; }; @@ -202,7 +214,22 @@ struct ieee80211_rx_data { struct ieee80211_key *key; unsigned int flags; - int queue; + + /* + * Index into sequence numbers array, 0..16 + * since the last (16) is used for non-QoS, + * will be 16 on non-QoS frames. + */ + int seqno_idx; + + /* + * Index into the security IV/PN arrays, 0..16 + * since the last (16) is used for CCMP-encrypted + * management frames, will be set to 16 on mgmt + * frames and 0 on non-QoS frames. + */ + int security_idx; + u32 tkip_iv32; u16 tkip_iv16; }; @@ -246,6 +273,7 @@ struct mesh_stats { __u32 fwded_frames; /* Mesh total forwarded frames */ __u32 dropped_frames_ttl; /* Not transmitted since mesh_ttl == 0*/ __u32 dropped_frames_no_route; /* Not transmitted, no route found */ + __u32 dropped_frames_congestion;/* Not forwarded due to congestion */ atomic_t estab_plinks; }; @@ -308,6 +336,7 @@ struct ieee80211_work { u8 key[WLAN_KEY_LEN_WEP104]; u8 key_len, key_idx; bool privacy; + bool synced; } probe_auth; struct { struct cfg80211_bss *bss; @@ -321,6 +350,7 @@ struct ieee80211_work { u8 ssid_len; u8 supp_rates_len; bool wmm_used, use_11n, uapsd_used; + bool synced; } assoc; struct { u32 duration; @@ -419,6 +449,14 @@ struct ieee80211_if_managed { * generated for the current association. */ int last_cqm_event_signal; + + /* + * State variables for keeping track of RSSI of the AP currently + * connected to and informing driver when RSSI has gone + * below/above a certain threshold. + */ + int rssi_min_thold, rssi_max_thold; + int last_ave_beacon_signal; }; struct ieee80211_if_ibss { @@ -491,6 +529,7 @@ struct ieee80211_if_mesh { struct mesh_config mshcfg; u32 mesh_seqnum; bool accepting_plinks; + int num_gates; const u8 *ie; u8 ie_len; enum { @@ -517,12 +556,14 @@ struct ieee80211_if_mesh { * @IEEE80211_SDATA_DONT_BRIDGE_PACKETS: bridge packets between * associated stations and deliver multicast frames both * back to wireless media and to the local net stack. + * @IEEE80211_SDATA_DISCONNECT_RESUME: Disconnect after resume. */ enum ieee80211_sub_if_data_flags { IEEE80211_SDATA_ALLMULTI = BIT(0), IEEE80211_SDATA_PROMISC = BIT(1), IEEE80211_SDATA_OPERATING_GMODE = BIT(2), IEEE80211_SDATA_DONT_BRIDGE_PACKETS = BIT(3), + IEEE80211_SDATA_DISCONNECT_RESUME = BIT(4), }; /** @@ -546,6 +587,9 @@ struct ieee80211_sub_if_data { /* keys */ struct list_head key_list; + /* count for keys needing tailroom space allocation */ + int crypto_tx_tailroom_needed_cnt; + struct net_device *dev; struct ieee80211_local *local; @@ -579,6 +623,8 @@ struct ieee80211_sub_if_data { __be16 control_port_protocol; bool control_port_no_encrypt; + struct ieee80211_tx_queue_params tx_conf[IEEE80211_MAX_QUEUES]; + struct work_struct work; struct sk_buff_head skb_queue; @@ -632,6 +678,11 @@ enum sdata_queue_type { enum { IEEE80211_RX_MSG = 1, IEEE80211_TX_STATUS_MSG = 2, + IEEE80211_EOSP_MSG = 3, +}; + +struct skb_eosp_msg_data { + u8 sta[ETH_ALEN], iface[ETH_ALEN]; }; enum queue_stop_reason { @@ -641,6 +692,7 @@ enum queue_stop_reason { IEEE80211_QUEUE_STOP_REASON_AGGREGATION, IEEE80211_QUEUE_STOP_REASON_SUSPEND, IEEE80211_QUEUE_STOP_REASON_SKB_ADD, + IEEE80211_QUEUE_STOP_REASON_CHTYPE_CHANGE, }; #ifdef CONFIG_MAC80211_LEDS @@ -664,16 +716,22 @@ struct tpt_led_trigger { * well be on the operating channel * @SCAN_HW_SCANNING: The hardware is scanning for us, we have no way to * determine if we are on the operating channel or not + * @SCAN_OFF_CHANNEL: We're off our operating channel for scanning, + * gets only set in conjunction with SCAN_SW_SCANNING * @SCAN_COMPLETED: Set for our scan work function when the driver reported * that the scan completed. * @SCAN_ABORTED: Set for our scan work function when the driver reported * a scan complete for an aborted scan. + * @SCAN_HW_CANCELLED: Set for our scan work function when the scan is being + * cancelled. */ enum { SCAN_SW_SCANNING, SCAN_HW_SCANNING, + SCAN_OFF_CHANNEL, SCAN_COMPLETED, SCAN_ABORTED, + SCAN_HW_CANCELLED, }; /** @@ -973,7 +1031,6 @@ struct ieee80211_local { unsigned int hw_roc_duration; u32 hw_roc_cookie; bool hw_roc_for_tx; - unsigned long hw_offchan_tx_cookie; /* dummy netdev for use w/ NAPI */ struct net_device napi_dev; @@ -993,69 +1050,6 @@ struct ieee80211_ra_tid { u16 tid; }; -/* Parsed Information Elements */ -struct ieee802_11_elems { - u8 *ie_start; - size_t total_len; - - /* pointers to IEs */ - u8 *ssid; - u8 *supp_rates; - u8 *fh_params; - u8 *ds_params; - u8 *cf_params; - struct ieee80211_tim_ie *tim; - u8 *ibss_params; - u8 *challenge; - u8 *wpa; - u8 *rsn; - u8 *erp_info; - u8 *ext_supp_rates; - u8 *wmm_info; - u8 *wmm_param; - struct ieee80211_ht_cap *ht_cap_elem; - struct ieee80211_ht_info *ht_info_elem; - struct ieee80211_meshconf_ie *mesh_config; - u8 *mesh_id; - u8 *peer_link; - u8 *preq; - u8 *prep; - u8 *perr; - struct ieee80211_rann_ie *rann; - u8 *ch_switch_elem; - u8 *country_elem; - u8 *pwr_constr_elem; - u8 *quiet_elem; /* first quite element */ - u8 *timeout_int; - - /* length of them, respectively */ - u8 ssid_len; - u8 supp_rates_len; - u8 fh_params_len; - u8 ds_params_len; - u8 cf_params_len; - u8 tim_len; - u8 ibss_params_len; - u8 challenge_len; - u8 wpa_len; - u8 rsn_len; - u8 erp_info_len; - u8 ext_supp_rates_len; - u8 wmm_info_len; - u8 wmm_param_len; - u8 mesh_id_len; - u8 peer_link_len; - u8 preq_len; - u8 prep_len; - u8 perr_len; - u8 ch_switch_elem_len; - u8 country_elem_len; - u8 pwr_constr_elem_len; - u8 quiet_elem_len; - u8 num_of_quiet_elem; /* can be more the one */ - u8 timeout_int_len; -}; - static inline struct ieee80211_local *hw_to_local( struct ieee80211_hw *hw) { @@ -1166,10 +1160,8 @@ int ieee80211_request_sched_scan_stop(struct ieee80211_sub_if_data *sdata); void ieee80211_sched_scan_stopped_work(struct work_struct *work); /* off-channel helpers */ -bool ieee80211_cfg_on_oper_channel(struct ieee80211_local *local); -void ieee80211_offchannel_enable_all_ps(struct ieee80211_local *local, - bool tell_ap); -void ieee80211_offchannel_stop_vifs(struct ieee80211_local *local); +void ieee80211_offchannel_stop_beaconing(struct ieee80211_local *local); +void ieee80211_offchannel_stop_station(struct ieee80211_local *local); void ieee80211_offchannel_return(struct ieee80211_local *local, bool enable_beaconing); void ieee80211_hw_roc_setup(struct ieee80211_local *local); @@ -1202,23 +1194,10 @@ netdev_tx_t ieee80211_monitor_start_xmit(struct sk_buff *skb, netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb, struct net_device *dev); -/* - * radiotap header for status frames - */ -struct ieee80211_tx_status_rtap_hdr { - struct ieee80211_radiotap_header hdr; - u8 rate; - u8 padding_for_rate; - __le16 tx_flags; - u8 data_retries; -} __packed; - - /* HT */ void ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_supported_band *sband, struct ieee80211_ht_cap *ht_cap_ie, struct ieee80211_sta_ht_cap *ht_cap); -void ieee80211_send_bar(struct ieee80211_sub_if_data *sdata, u8 *ra, u16 tid, u16 ssn); void ieee80211_send_delba(struct ieee80211_sub_if_data *sdata, const u8 *da, u16 tid, u16 initiator, u16 reason_code); @@ -1302,6 +1281,7 @@ void mac80211_ev_michael_mic_failure(struct ieee80211_sub_if_data *sdata, int ke struct ieee80211_hdr *hdr, const u8 *tsc, gfp_t gfp); void ieee80211_set_wmm_default(struct ieee80211_sub_if_data *sdata); +void ieee80211_xmit(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb); void ieee80211_tx_skb(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb); void ieee802_11_parse_elems(u8 *start, size_t len, struct ieee802_11_elems *elems); @@ -1333,11 +1313,11 @@ void ieee80211_stop_queue_by_reason(struct ieee80211_hw *hw, int queue, enum queue_stop_reason reason); void ieee80211_add_pending_skb(struct ieee80211_local *local, struct sk_buff *skb); -int ieee80211_add_pending_skbs(struct ieee80211_local *local, - struct sk_buff_head *skbs); -int ieee80211_add_pending_skbs_fn(struct ieee80211_local *local, - struct sk_buff_head *skbs, - void (*fn)(void *data), void *data); +void ieee80211_add_pending_skbs(struct ieee80211_local *local, + struct sk_buff_head *skbs); +void ieee80211_add_pending_skbs_fn(struct ieee80211_local *local, + struct sk_buff_head *skbs, + void (*fn)(void *data), void *data); void ieee80211_send_auth(struct ieee80211_sub_if_data *sdata, u16 transaction, u16 auth_alg, @@ -1348,12 +1328,14 @@ int ieee80211_build_preq_ies(struct ieee80211_local *local, u8 *buffer, enum ieee80211_band band, u32 rate_mask, u8 channel); struct sk_buff *ieee80211_build_probe_req(struct ieee80211_sub_if_data *sdata, - u8 *dst, + u8 *dst, u32 ratemask, const u8 *ssid, size_t ssid_len, - const u8 *ie, size_t ie_len); + const u8 *ie, size_t ie_len, + bool directed); void ieee80211_send_probe_req(struct ieee80211_sub_if_data *sdata, u8 *dst, const u8 *ssid, size_t ssid_len, - const u8 *ie, size_t ie_len); + const u8 *ie, size_t ie_len, + u32 ratemask, bool directed, bool no_cck); void ieee80211_sta_def_wmm_params(struct ieee80211_sub_if_data *sdata, const size_t supp_rates_len, diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index bd1ef84..24ec86f 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -299,8 +299,8 @@ static int ieee80211_do_open(struct net_device *dev, bool coming_up) goto err_del_interface; } - /* no locking required since STA is not live yet */ - sta->flags |= WLAN_STA_AUTHORIZED; + /* no atomic bitop required since STA is not live yet */ + set_sta_flag(sta, WLAN_STA_AUTHORIZED); res = sta_info_insert(sta); if (res) { @@ -363,8 +363,7 @@ static int ieee80211_open(struct net_device *dev) int err; /* fail early if user set an invalid address */ - if (!is_zero_ether_addr(dev->dev_addr) && - !is_valid_ether_addr(dev->dev_addr)) + if (!is_valid_ether_addr(dev->dev_addr)) return -EADDRNOTAVAIL; err = ieee80211_check_concurrent_iface(sdata, sdata->vif.type); @@ -383,10 +382,12 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, u32 hw_reconf_flags = 0; int i; enum nl80211_channel_type orig_ct; + bool cancel_scan; clear_bit(SDATA_STATE_RUNNING, &sdata->state); - if (local->scan_sdata == sdata) + cancel_scan = local->scan_sdata == sdata; + if (cancel_scan) ieee80211_scan_cancel(local); /* @@ -457,21 +458,19 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, BSS_CHANGED_BEACON_ENABLED); /* remove beacon */ - rcu_assign_pointer(sdata->u.ap.beacon, NULL); + RCU_INIT_POINTER(sdata->u.ap.beacon, NULL); synchronize_rcu(); kfree(old_beacon); - /* free all potentially still buffered bcast frames */ - while ((skb = skb_dequeue(&sdata->u.ap.ps_bc_buf))) { - local->total_ps_buffered--; - dev_kfree_skb(skb); - } - /* down all dependent devices, that is VLANs */ list_for_each_entry_safe(vlan, tmpsdata, &sdata->u.ap.vlans, u.vlan.list) dev_close(vlan->dev); WARN_ON(!list_empty(&sdata->u.ap.vlans)); + + /* free all potentially still buffered bcast frames */ + local->total_ps_buffered -= skb_queue_len(&sdata->u.ap.ps_bc_buf); + skb_queue_purge(&sdata->u.ap.ps_bc_buf); } if (going_down) @@ -546,6 +545,9 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, ieee80211_recalc_ps(local, -1); + if (cancel_scan) + flush_delayed_work(&local->scan_work); + if (local->open_count == 0) { if (local->ops->napi_poll) napi_disable(&local->napi); @@ -658,7 +660,7 @@ static const struct net_device_ops ieee80211_dataif_ops = { .ndo_stop = ieee80211_stop, .ndo_uninit = ieee80211_teardown_sdata, .ndo_start_xmit = ieee80211_subif_start_xmit, - .ndo_set_multicast_list = ieee80211_set_multicast_list, + .ndo_set_rx_mode = ieee80211_set_multicast_list, .ndo_change_mtu = ieee80211_change_mtu, .ndo_set_mac_address = ieee80211_change_mac, .ndo_select_queue = ieee80211_netdev_select_queue, @@ -702,7 +704,7 @@ static const struct net_device_ops ieee80211_monitorif_ops = { .ndo_stop = ieee80211_stop, .ndo_uninit = ieee80211_teardown_sdata, .ndo_start_xmit = ieee80211_monitor_start_xmit, - .ndo_set_multicast_list = ieee80211_set_multicast_list, + .ndo_set_rx_mode = ieee80211_set_multicast_list, .ndo_change_mtu = ieee80211_change_mtu, .ndo_set_mac_address = eth_mac_addr, .ndo_select_queue = ieee80211_monitor_select_queue, @@ -1143,8 +1145,8 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name, ASSERT_RTNL(); - ndev = alloc_netdev_mq(sizeof(*sdata) + local->hw.vif_data_size, - name, ieee80211_if_setup, local->hw.queues); + ndev = alloc_netdev_mqs(sizeof(*sdata) + local->hw.vif_data_size, + name, ieee80211_if_setup, local->hw.queues, 1); if (!ndev) return -ENOMEM; dev_net_set(ndev, wiphy_net(local->hw.wiphy)); @@ -1227,6 +1229,9 @@ void ieee80211_if_remove(struct ieee80211_sub_if_data *sdata) list_del_rcu(&sdata->list); mutex_unlock(&sdata->local->iflist_mtx); + if (ieee80211_vif_is_mesh(&sdata->vif)) + mesh_path_flush_by_iface(sdata); + synchronize_rcu(); unregister_netdevice(sdata->dev); } @@ -1255,6 +1260,9 @@ void ieee80211_remove_interfaces(struct ieee80211_local *local) list_for_each_entry_safe(sdata, tmp, &local->interfaces, list) { list_del(&sdata->list); + if (ieee80211_vif_is_mesh(&sdata->vif)) + mesh_path_flush_by_iface(sdata); + unregister_netdevice_queue(sdata->dev, &unreg_list); } mutex_unlock(&local->iflist_mtx); diff --git a/net/mac80211/key.c b/net/mac80211/key.c index f825e2f..fb02ea5 100644 --- a/net/mac80211/key.c +++ b/net/mac80211/key.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include "ieee80211_i.h" #include "driver-ops.h" @@ -61,6 +62,36 @@ static struct ieee80211_sta *get_sta_for_key(struct ieee80211_key *key) return NULL; } +static void increment_tailroom_need_count(struct ieee80211_sub_if_data *sdata) +{ + /* + * When this count is zero, SKB resizing for allocating tailroom + * for IV or MMIC is skipped. But, this check has created two race + * cases in xmit path while transiting from zero count to one: + * + * 1. SKB resize was skipped because no key was added but just before + * the xmit key is added and SW encryption kicks off. + * + * 2. SKB resize was skipped because all the keys were hw planted but + * just before xmit one of the key is deleted and SW encryption kicks + * off. + * + * In both the above case SW encryption will find not enough space for + * tailroom and exits with WARN_ON. (See WARN_ONs at wpa.c) + * + * Solution has been explained at + * http://mid.gmane.org/1308590980.4322.19.camel@jlt3.sipsolutions.net + */ + + if (!sdata->crypto_tx_tailroom_needed_cnt++) { + /* + * Flush all XMIT packets currently using HW encryption or no + * encryption at all if the count transition is from 0 -> 1. + */ + synchronize_net(); + } +} + static int ieee80211_key_enable_hw_accel(struct ieee80211_key *key) { struct ieee80211_sub_if_data *sdata; @@ -101,6 +132,11 @@ static int ieee80211_key_enable_hw_accel(struct ieee80211_key *key) if (!ret) { key->flags |= KEY_FLAG_UPLOADED_TO_HARDWARE; + + if (!((key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_MMIC) || + (key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_IV))) + sdata->crypto_tx_tailroom_needed_cnt--; + return 0; } @@ -142,6 +178,10 @@ static void ieee80211_key_disable_hw_accel(struct ieee80211_key *key) sta = get_sta_for_key(key); sdata = key->sdata; + if (!((key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_MMIC) || + (key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_IV))) + increment_tailroom_need_count(sdata); + if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) sdata = container_of(sdata->bss, struct ieee80211_sub_if_data, @@ -239,7 +279,7 @@ static void __ieee80211_key_replace(struct ieee80211_sub_if_data *sdata, bool defunikey, defmultikey, defmgmtkey; if (new) - list_add(&new->list, &sdata->key_list); + list_add_tail(&new->list, &sdata->key_list); if (sta && pairwise) { rcu_assign_pointer(sta->ptk, new); @@ -330,6 +370,7 @@ struct ieee80211_key *ieee80211_key_alloc(u32 cipher, int idx, size_t key_len, get_unaligned_le16(seq); } } + spin_lock_init(&key->u.tkip.txlock); break; case WLAN_CIPHER_SUITE_CCMP: key->conf.iv_len = CCMP_HDR_LEN; @@ -394,8 +435,10 @@ static void __ieee80211_key_destroy(struct ieee80211_key *key) ieee80211_aes_key_free(key->u.ccmp.tfm); if (key->conf.cipher == WLAN_CIPHER_SUITE_AES_CMAC) ieee80211_aes_cmac_key_free(key->u.aes_cmac.tfm); - if (key->local) + if (key->local) { ieee80211_debugfs_key_remove(key); + key->sdata->crypto_tx_tailroom_needed_cnt--; + } kfree(key); } @@ -422,7 +465,7 @@ int ieee80211_key_link(struct ieee80211_key *key, * some hardware cannot handle TKIP with QoS, so * we indicate whether QoS could be in use. */ - if (test_sta_flags(sta, WLAN_STA_WME)) + if (test_sta_flag(sta, WLAN_STA_WME)) key->conf.flags |= IEEE80211_KEY_FLAG_WMM_STA; } else { if (sdata->vif.type == NL80211_IFTYPE_STATION) { @@ -436,7 +479,7 @@ int ieee80211_key_link(struct ieee80211_key *key, /* same here, the AP could be using QoS */ ap = sta_info_get(key->sdata, key->sdata->u.mgd.bssid); if (ap) { - if (test_sta_flags(ap, WLAN_STA_WME)) + if (test_sta_flag(ap, WLAN_STA_WME)) key->conf.flags |= IEEE80211_KEY_FLAG_WMM_STA; } @@ -452,6 +495,8 @@ int ieee80211_key_link(struct ieee80211_key *key, else old_key = key_mtx_dereference(sdata->local, sdata->keys[idx]); + increment_tailroom_need_count(sdata); + __ieee80211_key_replace(sdata, sta, pairwise, old_key, key); __ieee80211_key_destroy(old_key); @@ -498,12 +543,49 @@ void ieee80211_enable_keys(struct ieee80211_sub_if_data *sdata) mutex_lock(&sdata->local->key_mtx); - list_for_each_entry(key, &sdata->key_list, list) + sdata->crypto_tx_tailroom_needed_cnt = 0; + + list_for_each_entry(key, &sdata->key_list, list) { + increment_tailroom_need_count(sdata); ieee80211_key_enable_hw_accel(key); + } mutex_unlock(&sdata->local->key_mtx); } +void ieee80211_iter_keys(struct ieee80211_hw *hw, + struct ieee80211_vif *vif, + void (*iter)(struct ieee80211_hw *hw, + struct ieee80211_vif *vif, + struct ieee80211_sta *sta, + struct ieee80211_key_conf *key, + void *data), + void *iter_data) +{ + struct ieee80211_local *local = hw_to_local(hw); + struct ieee80211_key *key; + struct ieee80211_sub_if_data *sdata; + + ASSERT_RTNL(); + + mutex_lock(&local->key_mtx); + if (vif) { + sdata = vif_to_sdata(vif); + list_for_each_entry(key, &sdata->key_list, list) + iter(hw, &sdata->vif, + key->sta ? &key->sta->sta : NULL, + &key->conf, iter_data); + } else { + list_for_each_entry(sdata, &local->interfaces, list) + list_for_each_entry(key, &sdata->key_list, list) + iter(hw, &sdata->vif, + key->sta ? &key->sta->sta : NULL, + &key->conf, iter_data); + } + mutex_unlock(&local->key_mtx); +} +EXPORT_SYMBOL(ieee80211_iter_keys); + void ieee80211_disable_keys(struct ieee80211_sub_if_data *sdata) { struct ieee80211_key *key; @@ -533,3 +615,89 @@ void ieee80211_free_keys(struct ieee80211_sub_if_data *sdata) mutex_unlock(&sdata->local->key_mtx); } + + +void ieee80211_gtk_rekey_notify(struct ieee80211_vif *vif, const u8 *bssid, + const u8 *replay_ctr, gfp_t gfp) +{ + struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); + + trace_api_gtk_rekey_notify(sdata, bssid, replay_ctr); + + cfg80211_gtk_rekey_notify(sdata->dev, bssid, replay_ctr, gfp); +} +EXPORT_SYMBOL_GPL(ieee80211_gtk_rekey_notify); + +void ieee80211_get_key_tx_seq(struct ieee80211_key_conf *keyconf, + struct ieee80211_key_seq *seq) +{ + struct ieee80211_key *key; + u64 pn64; + + if (WARN_ON(!(keyconf->flags & IEEE80211_KEY_FLAG_GENERATE_IV))) + return; + + key = container_of(keyconf, struct ieee80211_key, conf); + + switch (key->conf.cipher) { + case WLAN_CIPHER_SUITE_TKIP: + seq->tkip.iv32 = key->u.tkip.tx.iv32; + seq->tkip.iv16 = key->u.tkip.tx.iv16; + break; + case WLAN_CIPHER_SUITE_CCMP: + pn64 = atomic64_read(&key->u.ccmp.tx_pn); + seq->ccmp.pn[5] = pn64; + seq->ccmp.pn[4] = pn64 >> 8; + seq->ccmp.pn[3] = pn64 >> 16; + seq->ccmp.pn[2] = pn64 >> 24; + seq->ccmp.pn[1] = pn64 >> 32; + seq->ccmp.pn[0] = pn64 >> 40; + break; + case WLAN_CIPHER_SUITE_AES_CMAC: + pn64 = atomic64_read(&key->u.aes_cmac.tx_pn); + seq->ccmp.pn[5] = pn64; + seq->ccmp.pn[4] = pn64 >> 8; + seq->ccmp.pn[3] = pn64 >> 16; + seq->ccmp.pn[2] = pn64 >> 24; + seq->ccmp.pn[1] = pn64 >> 32; + seq->ccmp.pn[0] = pn64 >> 40; + break; + default: + WARN_ON(1); + } +} +EXPORT_SYMBOL(ieee80211_get_key_tx_seq); + +void ieee80211_get_key_rx_seq(struct ieee80211_key_conf *keyconf, + int tid, struct ieee80211_key_seq *seq) +{ + struct ieee80211_key *key; + const u8 *pn; + + key = container_of(keyconf, struct ieee80211_key, conf); + + switch (key->conf.cipher) { + case WLAN_CIPHER_SUITE_TKIP: + if (WARN_ON(tid < 0 || tid >= NUM_RX_DATA_QUEUES)) + return; + seq->tkip.iv32 = key->u.tkip.rx[tid].iv32; + seq->tkip.iv16 = key->u.tkip.rx[tid].iv16; + break; + case WLAN_CIPHER_SUITE_CCMP: + if (WARN_ON(tid < -1 || tid >= NUM_RX_DATA_QUEUES)) + return; + if (tid < 0) + pn = key->u.ccmp.rx_pn[NUM_RX_DATA_QUEUES]; + else + pn = key->u.ccmp.rx_pn[tid]; + memcpy(seq->ccmp.pn, pn, CCMP_PN_LEN); + break; + case WLAN_CIPHER_SUITE_AES_CMAC: + if (WARN_ON(tid != 0)) + return; + pn = key->u.aes_cmac.rx_pn; + memcpy(seq->aes_cmac.pn, pn, CMAC_PN_LEN); + break; + } +} +EXPORT_SYMBOL(ieee80211_get_key_rx_seq); diff --git a/net/mac80211/key.h b/net/mac80211/key.h index d801d53..7d4e31f 100644 --- a/net/mac80211/key.h +++ b/net/mac80211/key.h @@ -28,8 +28,9 @@ #define CCMP_PN_LEN 6 #define TKIP_IV_LEN 8 #define TKIP_ICV_LEN 4 +#define CMAC_PN_LEN 6 -#define NUM_RX_DATA_QUEUES 17 +#define NUM_RX_DATA_QUEUES 16 struct ieee80211_local; struct ieee80211_sub_if_data; @@ -40,9 +41,11 @@ struct sta_info; * * @KEY_FLAG_UPLOADED_TO_HARDWARE: Indicates that this key is present * in the hardware for TX crypto hardware acceleration. + * @KEY_FLAG_TAINTED: Key is tainted and packets should be dropped. */ enum ieee80211_internal_key_flags { KEY_FLAG_UPLOADED_TO_HARDWARE = BIT(0), + KEY_FLAG_TAINTED = BIT(1), }; enum ieee80211_internal_tkip_state { @@ -52,9 +55,10 @@ enum ieee80211_internal_tkip_state { }; struct tkip_ctx { - u32 iv32; - u16 iv16; - u16 p1k[5]; + u32 iv32; /* current iv32 */ + u16 iv16; /* current iv16 */ + u16 p1k[5]; /* p1k cache */ + u32 p1k_iv32; /* iv32 for which p1k computed */ enum ieee80211_internal_tkip_state state; }; @@ -71,6 +75,9 @@ struct ieee80211_key { union { struct { + /* protects tx context */ + spinlock_t txlock; + /* last used TSC */ struct tkip_ctx tx; @@ -78,32 +85,23 @@ struct ieee80211_key { struct tkip_ctx rx[NUM_RX_DATA_QUEUES]; } tkip; struct { - u8 tx_pn[6]; + atomic64_t tx_pn; /* * Last received packet number. The first * NUM_RX_DATA_QUEUES counters are used with Data * frames and the last counter is used with Robust * Management frames. */ - u8 rx_pn[NUM_RX_DATA_QUEUES + 1][6]; + u8 rx_pn[NUM_RX_DATA_QUEUES + 1][CCMP_PN_LEN]; struct crypto_cipher *tfm; u32 replays; /* dot11RSNAStatsCCMPReplays */ - /* scratch buffers for virt_to_page() (crypto API) */ -#ifndef AES_BLOCK_LEN -#define AES_BLOCK_LEN 16 -#endif - u8 tx_crypto_buf[6 * AES_BLOCK_LEN]; - u8 rx_crypto_buf[6 * AES_BLOCK_LEN]; } ccmp; struct { - u8 tx_pn[6]; - u8 rx_pn[6]; + atomic64_t tx_pn; + u8 rx_pn[CMAC_PN_LEN]; struct crypto_cipher *tfm; u32 replays; /* dot11RSNAStatsCMACReplays */ u32 icverrors; /* dot11RSNAStatsCMACICVErrors */ - /* scratch buffers for virt_to_page() (crypto API) */ - u8 tx_crypto_buf[2 * AES_BLOCK_LEN]; - u8 rx_crypto_buf[2 * AES_BLOCK_LEN]; } aes_cmac; } u; diff --git a/net/mac80211/led.c b/net/mac80211/led.c index 1459033..1bf7903 100644 --- a/net/mac80211/led.c +++ b/net/mac80211/led.c @@ -9,6 +9,7 @@ /* just for IFNAMSIZ */ #include #include +#include #include "led.h" void ieee80211_led_rx(struct ieee80211_local *local) diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 1e36fb3..7d9b21d 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -19,7 +19,7 @@ #include #include #include -#include +#include #include #include #include @@ -92,47 +92,6 @@ static void ieee80211_reconfig_filter(struct work_struct *work) ieee80211_configure_filter(local); } -/* - * Returns true if we are logically configured to be on - * the operating channel AND the hardware-conf is currently - * configured on the operating channel. Compares channel-type - * as well. - */ -bool ieee80211_cfg_on_oper_channel(struct ieee80211_local *local) -{ - struct ieee80211_channel *chan, *scan_chan; - enum nl80211_channel_type channel_type; - - /* This logic needs to match logic in ieee80211_hw_config */ - if (local->scan_channel) { - chan = local->scan_channel; - /* If scanning on oper channel, use whatever channel-type - * is currently in use. - */ - if (chan == local->oper_channel) - channel_type = local->_oper_channel_type; - else - channel_type = NL80211_CHAN_NO_HT; - } else if (local->tmp_channel) { - chan = scan_chan = local->tmp_channel; - channel_type = local->tmp_channel_type; - } else { - chan = local->oper_channel; - channel_type = local->_oper_channel_type; - } - - if (chan != local->oper_channel || - channel_type != local->_oper_channel_type) - return false; - - /* Check current hardware-config against oper_channel. */ - if ((local->oper_channel != local->hw.conf.channel) || - (local->_oper_channel_type != local->hw.conf.channel_type)) - return false; - - return true; -} - int ieee80211_hw_config(struct ieee80211_local *local, u32 changed) { struct ieee80211_channel *chan, *scan_chan; @@ -145,9 +104,6 @@ int ieee80211_hw_config(struct ieee80211_local *local, u32 changed) scan_chan = local->scan_channel; - /* If this off-channel logic ever changes, ieee80211_on_oper_channel - * may need to change as well. - */ offchannel_flag = local->hw.conf.flags & IEEE80211_CONF_OFFCHANNEL; if (scan_chan) { chan = scan_chan; @@ -158,19 +114,17 @@ int ieee80211_hw_config(struct ieee80211_local *local, u32 changed) channel_type = local->_oper_channel_type; else channel_type = NL80211_CHAN_NO_HT; - } else if (local->tmp_channel) { + local->hw.conf.flags |= IEEE80211_CONF_OFFCHANNEL; + } else if (local->tmp_channel && + local->oper_channel != local->tmp_channel) { chan = scan_chan = local->tmp_channel; channel_type = local->tmp_channel_type; + local->hw.conf.flags |= IEEE80211_CONF_OFFCHANNEL; } else { chan = local->oper_channel; channel_type = local->_oper_channel_type; - } - - if (chan != local->oper_channel || - channel_type != local->_oper_channel_type) - local->hw.conf.flags |= IEEE80211_CONF_OFFCHANNEL; - else local->hw.conf.flags &= ~IEEE80211_CONF_OFFCHANNEL; + } offchannel_flag ^= local->hw.conf.flags & IEEE80211_CONF_OFFCHANNEL; @@ -279,7 +233,7 @@ void ieee80211_bss_info_change_notify(struct ieee80211_sub_if_data *sdata, if (changed & BSS_CHANGED_BEACON_ENABLED) { if (local->quiescing || !ieee80211_sdata_running(sdata) || - test_bit(SDATA_STATE_OFFCHANNEL, &sdata->state)) { + test_bit(SCAN_SW_SCANNING, &local->scanning)) { sdata->vif.bss_conf.enable_beacon = false; } else { /* @@ -325,6 +279,8 @@ u32 ieee80211_reset_erp_info(struct ieee80211_sub_if_data *sdata) static void ieee80211_tasklet_handler(unsigned long data) { struct ieee80211_local *local = (struct ieee80211_local *) data; + struct sta_info *sta, *tmp; + struct skb_eosp_msg_data *eosp_data; struct sk_buff *skb; while ((skb = skb_dequeue(&local->skb_queue)) || @@ -340,6 +296,18 @@ static void ieee80211_tasklet_handler(unsigned long data) skb->pkt_type = 0; ieee80211_tx_status(local_to_hw(local), skb); break; + case IEEE80211_EOSP_MSG: + eosp_data = (void *)skb->cb; + for_each_sta_info(local, eosp_data->sta, sta, tmp) { + /* skip wrong virtual interface */ + if (memcmp(eosp_data->iface, + sta->sdata->vif.addr, ETH_ALEN)) + continue; + clear_sta_flag(sta, WLAN_STA_SP); + break; + } + dev_kfree_skb(skb); + break; default: WARN(1, "mac80211: Packet is of unknown type %d\n", skb->pkt_type); @@ -608,6 +576,7 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len, local->hw.max_rates = 1; local->hw.max_report_rates = 0; local->hw.max_rx_aggregation_subframes = IEEE80211_MAX_AMPDU_BUF; + local->hw.max_tx_aggregation_subframes = IEEE80211_MAX_AMPDU_BUF; local->hw.conf.long_frame_max_tx_count = wiphy->retry_long; local->hw.conf.short_frame_max_tx_count = wiphy->retry_short; local->user_power_level = -1; @@ -742,6 +711,12 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) if (!local->int_scan_req) return -ENOMEM; + for (band = 0; band < IEEE80211_NUM_BANDS; band++) { + if (!local->hw.wiphy->bands[band]) + continue; + local->int_scan_req->rates[band] = (u32) -1; + } + /* if low-level driver supports AP, we also support VLAN */ if (local->hw.wiphy->interface_modes & BIT(NL80211_IFTYPE_AP)) { hw->wiphy->interface_modes |= BIT(NL80211_IFTYPE_AP_VLAN); @@ -862,6 +837,10 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) if (local->ops->sched_scan_start) local->hw.wiphy->flags |= WIPHY_FLAG_SUPPORTS_SCHED_SCAN; + /* mac80211 based drivers don't support internal TDLS setup */ + if (local->hw.wiphy->flags & WIPHY_FLAG_SUPPORTS_TDLS) + local->hw.wiphy->flags |= WIPHY_FLAG_TDLS_EXTERNAL_SETUP; + result = wiphy_register(local->hw.wiphy); if (result < 0) goto fail_wiphy_register; @@ -885,12 +864,8 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) * and we need some headroom for passing the frame to monitor * interfaces, but never both at the same time. */ -#ifndef __CHECKER__ - BUILD_BUG_ON(IEEE80211_TX_STATUS_HEADROOM != - sizeof(struct ieee80211_tx_status_rtap_hdr)); -#endif local->tx_headroom = max_t(unsigned int , local->hw.extra_tx_headroom, - sizeof(struct ieee80211_tx_status_rtap_hdr)); + IEEE80211_TX_STATUS_HEADROOM); debugfs_hw_add(local); @@ -1012,7 +987,6 @@ void ieee80211_unregister_hw(struct ieee80211_hw *hw) cancel_work_sync(&local->reconfig_filter); ieee80211_clear_tx_pending(local); - sta_info_stop(local); rate_control_deinitialize(local); if (skb_queue_len(&local->skb_queue) || @@ -1024,6 +998,7 @@ void ieee80211_unregister_hw(struct ieee80211_hw *hw) destroy_workqueue(local->workqueue); wiphy_unregister(local->hw.wiphy); + sta_info_stop(local); ieee80211_wep_free(local); ieee80211_led_exit(local); kfree(local->int_scan_req); diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index 370aa94..f85de8e 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -13,10 +13,6 @@ #include "ieee80211_i.h" #include "mesh.h" -#define IEEE80211_MESH_PEER_INACTIVITY_LIMIT (1800 * HZ) -#define IEEE80211_MESH_HOUSEKEEPING_INTERVAL (60 * HZ) -#define IEEE80211_MESH_RANN_INTERVAL (1 * HZ) - #define MESHCONF_CAPAB_ACCEPT_PLINKS 0x01 #define MESHCONF_CAPAB_FORWARDING 0x08 @@ -27,6 +23,17 @@ int mesh_allocated; static struct kmem_cache *rm_cache; +#ifdef CONFIG_MAC80211_MESH +bool mesh_action_is_path_sel(struct ieee80211_mgmt *mgmt) +{ + return (mgmt->u.action.u.mesh_action.action_code == + WLAN_MESH_ACTION_HWMP_PATH_SELECTION); +} +#else +bool mesh_action_is_path_sel(struct ieee80211_mgmt *mgmt) +{ return false; } +#endif + void ieee80211s_init(void) { mesh_pathtbl_init(); @@ -193,10 +200,9 @@ int mesh_rmc_check(u8 *sa, struct ieee80211s_hdr *mesh_hdr, } p = kmem_cache_alloc(rm_cache, GFP_ATOMIC); - if (!p) { - printk(KERN_DEBUG "o11s: could not allocate RMC entry\n"); + if (!p) return 0; - } + p->seqnum = seqnum; p->exp_time = jiffies + RMC_TIMEOUT; memcpy(p->sa, sa, ETH_ALEN); @@ -204,89 +210,136 @@ int mesh_rmc_check(u8 *sa, struct ieee80211s_hdr *mesh_hdr, return 0; } -void mesh_mgmt_ies_add(struct sk_buff *skb, struct ieee80211_sub_if_data *sdata) +int +mesh_add_meshconf_ie(struct sk_buff *skb, struct ieee80211_sub_if_data *sdata) { - struct ieee80211_local *local = sdata->local; - struct ieee80211_supported_band *sband; - u8 *pos; - int len, i, rate; - u8 neighbors; - - sband = local->hw.wiphy->bands[local->hw.conf.channel->band]; - len = sband->n_bitrates; - if (len > 8) - len = 8; - pos = skb_put(skb, len + 2); - *pos++ = WLAN_EID_SUPP_RATES; - *pos++ = len; - for (i = 0; i < len; i++) { - rate = sband->bitrates[i].bitrate; - *pos++ = (u8) (rate / 5); - } - - if (sband->n_bitrates > len) { - pos = skb_put(skb, sband->n_bitrates - len + 2); - *pos++ = WLAN_EID_EXT_SUPP_RATES; - *pos++ = sband->n_bitrates - len; - for (i = len; i < sband->n_bitrates; i++) { - rate = sband->bitrates[i].bitrate; - *pos++ = (u8) (rate / 5); - } - } - - if (sband->band == IEEE80211_BAND_2GHZ) { - pos = skb_put(skb, 2 + 1); - *pos++ = WLAN_EID_DS_PARAMS; - *pos++ = 1; - *pos++ = ieee80211_frequency_to_channel(local->hw.conf.channel->center_freq); - } + struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; + u8 *pos, neighbors; + u8 meshconf_len = sizeof(struct ieee80211_meshconf_ie); - pos = skb_put(skb, 2 + sdata->u.mesh.mesh_id_len); - *pos++ = WLAN_EID_MESH_ID; - *pos++ = sdata->u.mesh.mesh_id_len; - if (sdata->u.mesh.mesh_id_len) - memcpy(pos, sdata->u.mesh.mesh_id, sdata->u.mesh.mesh_id_len); + if (skb_tailroom(skb) < 2 + meshconf_len) + return -ENOMEM; - pos = skb_put(skb, 2 + sizeof(struct ieee80211_meshconf_ie)); + pos = skb_put(skb, 2 + meshconf_len); *pos++ = WLAN_EID_MESH_CONFIG; - *pos++ = sizeof(struct ieee80211_meshconf_ie); + *pos++ = meshconf_len; /* Active path selection protocol ID */ - *pos++ = sdata->u.mesh.mesh_pp_id; - + *pos++ = ifmsh->mesh_pp_id; /* Active path selection metric ID */ - *pos++ = sdata->u.mesh.mesh_pm_id; - + *pos++ = ifmsh->mesh_pm_id; /* Congestion control mode identifier */ - *pos++ = sdata->u.mesh.mesh_cc_id; - + *pos++ = ifmsh->mesh_cc_id; /* Synchronization protocol identifier */ - *pos++ = sdata->u.mesh.mesh_sp_id; - + *pos++ = ifmsh->mesh_sp_id; /* Authentication Protocol identifier */ - *pos++ = sdata->u.mesh.mesh_auth_id; - + *pos++ = ifmsh->mesh_auth_id; /* Mesh Formation Info - number of neighbors */ - neighbors = atomic_read(&sdata->u.mesh.mshstats.estab_plinks); + neighbors = atomic_read(&ifmsh->mshstats.estab_plinks); /* Number of neighbor mesh STAs or 15 whichever is smaller */ neighbors = (neighbors > 15) ? 15 : neighbors; *pos++ = neighbors << 1; - /* Mesh capability */ - sdata->u.mesh.accepting_plinks = mesh_plink_availables(sdata); + ifmsh->accepting_plinks = mesh_plink_availables(sdata); *pos = MESHCONF_CAPAB_FORWARDING; - *pos++ |= sdata->u.mesh.accepting_plinks ? + *pos++ |= ifmsh->accepting_plinks ? MESHCONF_CAPAB_ACCEPT_PLINKS : 0x00; *pos++ = 0x00; - if (sdata->u.mesh.ie) { - int len = sdata->u.mesh.ie_len; - const u8 *data = sdata->u.mesh.ie; - if (skb_tailroom(skb) > len) - memcpy(skb_put(skb, len), data, len); + return 0; +} + +int +mesh_add_meshid_ie(struct sk_buff *skb, struct ieee80211_sub_if_data *sdata) +{ + struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; + u8 *pos; + + if (skb_tailroom(skb) < 2 + ifmsh->mesh_id_len) + return -ENOMEM; + + pos = skb_put(skb, 2 + ifmsh->mesh_id_len); + *pos++ = WLAN_EID_MESH_ID; + *pos++ = ifmsh->mesh_id_len; + if (ifmsh->mesh_id_len) + memcpy(pos, ifmsh->mesh_id, ifmsh->mesh_id_len); + + return 0; +} + +int +mesh_add_vendor_ies(struct sk_buff *skb, struct ieee80211_sub_if_data *sdata) +{ + struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; + u8 offset, len; + const u8 *data; + + if (!ifmsh->ie || !ifmsh->ie_len) + return 0; + + /* fast-forward to vendor IEs */ + offset = ieee80211_ie_split_vendor(ifmsh->ie, ifmsh->ie_len, 0); + + if (offset) { + len = ifmsh->ie_len - offset; + data = ifmsh->ie + offset; + if (skb_tailroom(skb) < len) + return -ENOMEM; + memcpy(skb_put(skb, len), data, len); } + + return 0; } +int +mesh_add_rsn_ie(struct sk_buff *skb, struct ieee80211_sub_if_data *sdata) +{ + struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; + u8 len = 0; + const u8 *data; + + if (!ifmsh->ie || !ifmsh->ie_len) + return 0; + + /* find RSN IE */ + data = ifmsh->ie; + while (data < ifmsh->ie + ifmsh->ie_len) { + if (*data == WLAN_EID_RSN) { + len = data[1] + 2; + break; + } + data++; + } + + if (len) { + if (skb_tailroom(skb) < len) + return -ENOMEM; + memcpy(skb_put(skb, len), data, len); + } + + return 0; +} + +int mesh_add_ds_params_ie(struct sk_buff *skb, + struct ieee80211_sub_if_data *sdata) +{ + struct ieee80211_local *local = sdata->local; + struct ieee80211_supported_band *sband; + u8 *pos; + + if (skb_tailroom(skb) < 3) + return -ENOMEM; + + sband = local->hw.wiphy->bands[local->hw.conf.channel->band]; + if (sband->band == IEEE80211_BAND_2GHZ) { + pos = skb_put(skb, 2 + 1); + *pos++ = WLAN_EID_DS_PARAMS; + *pos++ = 1; + *pos++ = ieee80211_frequency_to_channel(local->hw.conf.channel->center_freq); + } + + return 0; +} static void ieee80211_mesh_path_timer(unsigned long data) { @@ -352,8 +405,7 @@ int ieee80211_fill_mesh_addresses(struct ieee80211_hdr *hdr, __le16 *fc, memcpy(hdr->addr3, meshsa, ETH_ALEN); return 24; } else { - *fc |= cpu_to_le16(IEEE80211_FCTL_FROMDS | - IEEE80211_FCTL_TODS); + *fc |= cpu_to_le16(IEEE80211_FCTL_FROMDS | IEEE80211_FCTL_TODS); /* RA TA DA SA */ memset(hdr->addr1, 0, ETH_ALEN); /* RA is resolved later */ memcpy(hdr->addr2, meshsa, ETH_ALEN); @@ -425,7 +477,8 @@ static void ieee80211_mesh_rootpath(struct ieee80211_sub_if_data *sdata) mesh_path_tx_root_frame(sdata); mod_timer(&ifmsh->mesh_path_root_timer, - round_jiffies(jiffies + IEEE80211_MESH_RANN_INTERVAL)); + round_jiffies(TU_TO_EXP_TIME( + ifmsh->mshcfg.dot11MeshHWMPRannInterval))); } #ifdef CONFIG_PM @@ -433,7 +486,7 @@ void ieee80211_mesh_quiesce(struct ieee80211_sub_if_data *sdata) { struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; - /* use atomic bitops in case both timers fire at the same time */ + /* use atomic bitops in case all timers fire at the same time */ if (del_timer_sync(&ifmsh->housekeeping_timer)) set_bit(TMR_RUNNING_HK, &ifmsh->timers_running); @@ -558,11 +611,18 @@ static void ieee80211_mesh_rx_mgmt_action(struct ieee80211_sub_if_data *sdata, struct ieee80211_rx_status *rx_status) { switch (mgmt->u.action.category) { - case WLAN_CATEGORY_MESH_ACTION: - mesh_rx_plink_frame(sdata, mgmt, len, rx_status); + case WLAN_CATEGORY_SELF_PROTECTED: + switch (mgmt->u.action.u.self_prot.action_code) { + case WLAN_SP_MESH_PEERING_OPEN: + case WLAN_SP_MESH_PEERING_CLOSE: + case WLAN_SP_MESH_PEERING_CONFIRM: + mesh_rx_plink_frame(sdata, mgmt, len, rx_status); + break; + } break; - case WLAN_CATEGORY_MESH_PATH_SEL: - mesh_rx_path_sel_frame(sdata, mgmt, len); + case WLAN_CATEGORY_MESH_ACTION: + if (mesh_action_is_path_sel(mgmt)) + mesh_rx_path_sel_frame(sdata, mgmt, len); break; } } @@ -634,6 +694,7 @@ void ieee80211_mesh_init_sdata(struct ieee80211_sub_if_data *sdata) ifmsh->accepting_plinks = true; ifmsh->preq_id = 0; ifmsh->sn = 0; + ifmsh->num_gates = 0; atomic_set(&ifmsh->mpaths, 0); mesh_rmc_init(sdata); ifmsh->last_preq = jiffies; diff --git a/net/mac80211/mesh.h b/net/mac80211/mesh.h index 249e733..8c00e2d 100644 --- a/net/mac80211/mesh.h +++ b/net/mac80211/mesh.h @@ -80,7 +80,10 @@ enum mesh_deferred_task_flags { * retry * @discovery_retries: number of discovery retries * @flags: mesh path flags, as specified on &enum mesh_path_flags - * @state_lock: mesh path state lock + * @state_lock: mesh path state lock used to protect changes to the + * mpath itself. No need to take this lock when adding or removing + * an mpath to a hash bucket on a path table. + * @is_gate: the destination station of this path is a mesh gate * * * The combination of dst and sdata is unique in the mesh path table. Since the @@ -104,6 +107,7 @@ struct mesh_path { u8 discovery_retries; enum mesh_path_flags flags; spinlock_t state_lock; + bool is_gate; }; /** @@ -120,6 +124,9 @@ struct mesh_path { * buckets * @mean_chain_len: maximum average length for the hash buckets' list, if it is * reached, the table will grow + * @known_gates: list of known mesh gates and their mpaths by the station. The + * gate's mpath may or may not be resolved and active. + * * rcu_head: RCU head to free the table */ struct mesh_table { @@ -133,6 +140,8 @@ struct mesh_table { int (*copy_node) (struct hlist_node *p, struct mesh_table *newtbl); int size_order; int mean_chain_len; + struct hlist_head *known_gates; + spinlock_t gates_lock; struct rcu_head rcu_head; }; @@ -166,6 +175,8 @@ struct mesh_rmc { u32 idx_mask; }; +#define IEEE80211_MESH_PEER_INACTIVITY_LIMIT (1800 * HZ) +#define IEEE80211_MESH_HOUSEKEEPING_INTERVAL (60 * HZ) #define MESH_DEFAULT_BEACON_INTERVAL 1000 /* in 1024 us units */ @@ -177,14 +188,6 @@ struct mesh_rmc { /* Maximum number of paths per interface */ #define MESH_MAX_MPATHS 1024 -/* Pending ANA approval */ -#define MESH_PATH_SEL_ACTION 0 - -/* PERR reason codes */ -#define PEER_RCODE_UNSPECIFIED 11 -#define PERR_RCODE_NO_ROUTE 12 -#define PERR_RCODE_DEST_UNREACH 13 - /* Public interfaces */ /* Various */ int ieee80211_fill_mesh_addresses(struct ieee80211_hdr *hdr, __le16 *fc, @@ -199,6 +202,16 @@ bool mesh_matches_local(struct ieee802_11_elems *ie, void mesh_ids_set_default(struct ieee80211_if_mesh *mesh); void mesh_mgmt_ies_add(struct sk_buff *skb, struct ieee80211_sub_if_data *sdata); +int mesh_add_meshconf_ie(struct sk_buff *skb, + struct ieee80211_sub_if_data *sdata); +int mesh_add_meshid_ie(struct sk_buff *skb, + struct ieee80211_sub_if_data *sdata); +int mesh_add_rsn_ie(struct sk_buff *skb, + struct ieee80211_sub_if_data *sdata); +int mesh_add_vendor_ies(struct sk_buff *skb, + struct ieee80211_sub_if_data *sdata); +int mesh_add_ds_params_ie(struct sk_buff *skb, + struct ieee80211_sub_if_data *sdata); void mesh_rmc_free(struct ieee80211_sub_if_data *sdata); int mesh_rmc_init(struct ieee80211_sub_if_data *sdata); void ieee80211s_init(void); @@ -223,10 +236,13 @@ struct mesh_path *mesh_path_lookup_by_idx(int idx, struct ieee80211_sub_if_data *sdata); void mesh_path_fix_nexthop(struct mesh_path *mpath, struct sta_info *next_hop); void mesh_path_expire(struct ieee80211_sub_if_data *sdata); -void mesh_path_flush(struct ieee80211_sub_if_data *sdata); void mesh_rx_path_sel_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_mgmt *mgmt, size_t len); int mesh_path_add(u8 *dst, struct ieee80211_sub_if_data *sdata); + +int mesh_path_add_gate(struct mesh_path *mpath); +int mesh_path_send_to_gates(struct mesh_path *mpath); +int mesh_gate_num(struct ieee80211_sub_if_data *sdata); /* Mesh plinks */ void mesh_neighbour_update(u8 *hw_addr, u32 rates, struct ieee80211_sub_if_data *sdata, @@ -256,12 +272,14 @@ void mesh_pathtbl_unregister(void); int mesh_path_del(u8 *addr, struct ieee80211_sub_if_data *sdata); void mesh_path_timer(unsigned long data); void mesh_path_flush_by_nexthop(struct sta_info *sta); +void mesh_path_flush_by_iface(struct ieee80211_sub_if_data *sdata); void mesh_path_discard_frame(struct sk_buff *skb, struct ieee80211_sub_if_data *sdata); void mesh_path_quiesce(struct ieee80211_sub_if_data *sdata); void mesh_path_restart(struct ieee80211_sub_if_data *sdata); void mesh_path_tx_root_frame(struct ieee80211_sub_if_data *sdata); +bool mesh_action_is_path_sel(struct ieee80211_mgmt *mgmt); extern int mesh_paths_generation; #ifdef CONFIG_MAC80211_MESH diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c index 2b18053..174040a 100644 --- a/net/mac80211/mesh_hwmp.c +++ b/net/mac80211/mesh_hwmp.c @@ -8,10 +8,12 @@ */ #include +#include "wme.h" #include "mesh.h" #ifdef CONFIG_MAC80211_VERBOSE_MHWMP_DEBUG -#define mhwmp_dbg(fmt, args...) printk(KERN_DEBUG "Mesh HWMP: " fmt, ##args) +#define mhwmp_dbg(fmt, args...) \ + printk(KERN_DEBUG "Mesh HWMP (%s): " fmt "\n", sdata->name, ##args) #else #define mhwmp_dbg(fmt, args...) do { (void)(0); } while (0) #endif @@ -57,29 +59,29 @@ static inline u32 u16_field_get(u8 *preq_elem, int offset, bool ae) #define PREQ_IE_TTL(x) (*(x + 2)) #define PREQ_IE_PREQ_ID(x) u32_field_get(x, 3, 0) #define PREQ_IE_ORIG_ADDR(x) (x + 7) -#define PREQ_IE_ORIG_SN(x) u32_field_get(x, 13, 0); -#define PREQ_IE_LIFETIME(x) u32_field_get(x, 17, AE_F_SET(x)); -#define PREQ_IE_METRIC(x) u32_field_get(x, 21, AE_F_SET(x)); +#define PREQ_IE_ORIG_SN(x) u32_field_get(x, 13, 0) +#define PREQ_IE_LIFETIME(x) u32_field_get(x, 17, AE_F_SET(x)) +#define PREQ_IE_METRIC(x) u32_field_get(x, 21, AE_F_SET(x)) #define PREQ_IE_TARGET_F(x) (*(AE_F_SET(x) ? x + 32 : x + 26)) #define PREQ_IE_TARGET_ADDR(x) (AE_F_SET(x) ? x + 33 : x + 27) -#define PREQ_IE_TARGET_SN(x) u32_field_get(x, 33, AE_F_SET(x)); +#define PREQ_IE_TARGET_SN(x) u32_field_get(x, 33, AE_F_SET(x)) #define PREP_IE_FLAGS(x) PREQ_IE_FLAGS(x) #define PREP_IE_HOPCOUNT(x) PREQ_IE_HOPCOUNT(x) #define PREP_IE_TTL(x) PREQ_IE_TTL(x) -#define PREP_IE_ORIG_ADDR(x) (x + 3) -#define PREP_IE_ORIG_SN(x) u32_field_get(x, 9, 0); -#define PREP_IE_LIFETIME(x) u32_field_get(x, 13, AE_F_SET(x)); -#define PREP_IE_METRIC(x) u32_field_get(x, 17, AE_F_SET(x)); -#define PREP_IE_TARGET_ADDR(x) (AE_F_SET(x) ? x + 27 : x + 21) -#define PREP_IE_TARGET_SN(x) u32_field_get(x, 27, AE_F_SET(x)); +#define PREP_IE_ORIG_ADDR(x) (AE_F_SET(x) ? x + 27 : x + 21) +#define PREP_IE_ORIG_SN(x) u32_field_get(x, 27, AE_F_SET(x)) +#define PREP_IE_LIFETIME(x) u32_field_get(x, 13, AE_F_SET(x)) +#define PREP_IE_METRIC(x) u32_field_get(x, 17, AE_F_SET(x)) +#define PREP_IE_TARGET_ADDR(x) (x + 3) +#define PREP_IE_TARGET_SN(x) u32_field_get(x, 9, 0) #define PERR_IE_TTL(x) (*(x)) #define PERR_IE_TARGET_FLAGS(x) (*(x + 2)) #define PERR_IE_TARGET_ADDR(x) (x + 3) -#define PERR_IE_TARGET_SN(x) u32_field_get(x, 9, 0); -#define PERR_IE_TARGET_RCODE(x) u16_field_get(x, 13, 0); +#define PERR_IE_TARGET_SN(x) u32_field_get(x, 9, 0) +#define PERR_IE_TARGET_RCODE(x) u16_field_get(x, 13, 0) #define MSEC_TO_TU(x) (x*1000/1024) #define SN_GT(x, y) ((long) (y) - (long) (x) < 0) @@ -132,24 +134,25 @@ static int mesh_path_sel_frame_tx(enum mpath_frame_type action, u8 flags, memcpy(mgmt->sa, sdata->vif.addr, ETH_ALEN); /* BSSID == SA */ memcpy(mgmt->bssid, sdata->vif.addr, ETH_ALEN); - mgmt->u.action.category = WLAN_CATEGORY_MESH_PATH_SEL; - mgmt->u.action.u.mesh_action.action_code = MESH_PATH_SEL_ACTION; + mgmt->u.action.category = WLAN_CATEGORY_MESH_ACTION; + mgmt->u.action.u.mesh_action.action_code = + WLAN_MESH_ACTION_HWMP_PATH_SELECTION; switch (action) { case MPATH_PREQ: - mhwmp_dbg("sending PREQ to %pM\n", target); + mhwmp_dbg("sending PREQ to %pM", target); ie_len = 37; pos = skb_put(skb, 2 + ie_len); *pos++ = WLAN_EID_PREQ; break; case MPATH_PREP: - mhwmp_dbg("sending PREP to %pM\n", target); + mhwmp_dbg("sending PREP to %pM", target); ie_len = 31; pos = skb_put(skb, 2 + ie_len); *pos++ = WLAN_EID_PREP; break; case MPATH_RANN: - mhwmp_dbg("sending RANN from %pM\n", orig_addr); + mhwmp_dbg("sending RANN from %pM", orig_addr); ie_len = sizeof(struct ieee80211_rann_ie); pos = skb_put(skb, 2 + ie_len); *pos++ = WLAN_EID_RANN; @@ -163,35 +166,63 @@ static int mesh_path_sel_frame_tx(enum mpath_frame_type action, u8 flags, *pos++ = flags; *pos++ = hop_count; *pos++ = ttl; - if (action == MPATH_PREQ) { - memcpy(pos, &preq_id, 4); + if (action == MPATH_PREP) { + memcpy(pos, target, ETH_ALEN); + pos += ETH_ALEN; + memcpy(pos, &target_sn, 4); pos += 4; - } - memcpy(pos, orig_addr, ETH_ALEN); - pos += ETH_ALEN; - memcpy(pos, &orig_sn, 4); - pos += 4; - if (action != MPATH_RANN) { - memcpy(pos, &lifetime, 4); + } else { + if (action == MPATH_PREQ) { + memcpy(pos, &preq_id, 4); + pos += 4; + } + memcpy(pos, orig_addr, ETH_ALEN); + pos += ETH_ALEN; + memcpy(pos, &orig_sn, 4); pos += 4; } + memcpy(pos, &lifetime, 4); /* interval for RANN */ + pos += 4; memcpy(pos, &metric, 4); pos += 4; if (action == MPATH_PREQ) { - /* destination count */ - *pos++ = 1; + *pos++ = 1; /* destination count */ *pos++ = target_flags; - } - if (action != MPATH_RANN) { memcpy(pos, target, ETH_ALEN); pos += ETH_ALEN; memcpy(pos, &target_sn, 4); + pos += 4; + } else if (action == MPATH_PREP) { + memcpy(pos, orig_addr, ETH_ALEN); + pos += ETH_ALEN; + memcpy(pos, &orig_sn, 4); + pos += 4; } ieee80211_tx_skb(sdata, skb); return 0; } + +/* Headroom is not adjusted. Caller should ensure that skb has sufficient + * headroom in case the frame is encrypted. */ +static void prepare_frame_for_deferred_tx(struct ieee80211_sub_if_data *sdata, + struct sk_buff *skb) +{ + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); + + skb_set_mac_header(skb, 0); + skb_set_network_header(skb, 0); + skb_set_transport_header(skb, 0); + + /* Send all internal mgmt frames on VO. Accordingly set TID to 7. */ + skb_set_queue_mapping(skb, IEEE80211_AC_VO); + skb->priority = 7; + + info->control.vif = &sdata->vif; + ieee80211_set_qos_hdr(sdata, skb); +} + /** * mesh_send_path error - Sends a PERR mesh management frame * @@ -199,6 +230,10 @@ static int mesh_path_sel_frame_tx(enum mpath_frame_type action, u8 flags, * @target_sn: SN of the broken destination * @target_rcode: reason code for this PERR * @ra: node this frame is addressed to + * + * Note: This function may be called with driver locks taken that the driver + * also acquires in the TX path. To avoid a deadlock we don't transmit the + * frame directly but add it to the pending queue instead. */ int mesh_path_error_tx(u8 ttl, u8 *target, __le32 target_sn, __le16 target_rcode, const u8 *ra, @@ -212,7 +247,7 @@ int mesh_path_error_tx(u8 ttl, u8 *target, __le32 target_sn, if (!skb) return -1; - skb_reserve(skb, local->hw.extra_tx_headroom); + skb_reserve(skb, local->tx_headroom + local->hw.extra_tx_headroom); /* 25 is the size of the common mgmt part (24) plus the size of the * common action part (1) */ @@ -224,9 +259,11 @@ int mesh_path_error_tx(u8 ttl, u8 *target, __le32 target_sn, memcpy(mgmt->da, ra, ETH_ALEN); memcpy(mgmt->sa, sdata->vif.addr, ETH_ALEN); - /* BSSID is left zeroed, wildcard value */ - mgmt->u.action.category = WLAN_CATEGORY_MESH_PATH_SEL; - mgmt->u.action.u.mesh_action.action_code = MESH_PATH_SEL_ACTION; + /* BSSID == SA */ + memcpy(mgmt->bssid, sdata->vif.addr, ETH_ALEN); + mgmt->u.action.category = WLAN_CATEGORY_MESH_ACTION; + mgmt->u.action.u.mesh_action.action_code = + WLAN_MESH_ACTION_HWMP_PATH_SELECTION; ie_len = 15; pos = skb_put(skb, 2 + ie_len); *pos++ = WLAN_EID_PERR; @@ -251,7 +288,9 @@ int mesh_path_error_tx(u8 ttl, u8 *target, __le32 target_sn, pos += 4; memcpy(pos, &target_rcode, 2); - ieee80211_tx_skb(sdata, skb); + /* see note in function header */ + prepare_frame_for_deferred_tx(sdata, skb); + ieee80211_add_pending_skb(local, skb); return 0; } @@ -449,7 +488,6 @@ static u32 hwmp_route_info_get(struct ieee80211_sub_if_data *sdata, if (fresh_info) { mesh_path_assign_nexthop(mpath, sta); - mpath->flags &= ~MESH_PATH_SN_VALID; mpath->metric = last_hop_metric; mpath->exp_time = time_after(mpath->exp_time, exp_time) ? mpath->exp_time : exp_time; @@ -484,10 +522,10 @@ static void hwmp_preq_frame_process(struct ieee80211_sub_if_data *sdata, orig_sn = PREQ_IE_ORIG_SN(preq_elem); target_flags = PREQ_IE_TARGET_F(preq_elem); - mhwmp_dbg("received PREQ from %pM\n", orig_addr); + mhwmp_dbg("received PREQ from %pM", orig_addr); if (memcmp(target_addr, sdata->vif.addr, ETH_ALEN) == 0) { - mhwmp_dbg("PREQ is for us\n"); + mhwmp_dbg("PREQ is for us"); forward = false; reply = true; metric = 0; @@ -523,7 +561,7 @@ static void hwmp_preq_frame_process(struct ieee80211_sub_if_data *sdata, lifetime = PREQ_IE_LIFETIME(preq_elem); ttl = ifmsh->mshcfg.element_ttl; if (ttl != 0) { - mhwmp_dbg("replying to the PREQ\n"); + mhwmp_dbg("replying to the PREQ"); mesh_path_sel_frame_tx(MPATH_PREP, 0, target_addr, cpu_to_le32(target_sn), 0, orig_addr, cpu_to_le32(orig_sn), mgmt->sa, 0, ttl, @@ -543,7 +581,7 @@ static void hwmp_preq_frame_process(struct ieee80211_sub_if_data *sdata, ifmsh->mshstats.dropped_frames_ttl++; return; } - mhwmp_dbg("forwarding the PREQ from %pM\n", orig_addr); + mhwmp_dbg("forwarding the PREQ from %pM", orig_addr); --ttl; flags = PREQ_IE_FLAGS(preq_elem); preq_id = PREQ_IE_PREQ_ID(preq_elem); @@ -578,7 +616,7 @@ static void hwmp_prep_frame_process(struct ieee80211_sub_if_data *sdata, u8 next_hop[ETH_ALEN]; u32 target_sn, orig_sn, lifetime; - mhwmp_dbg("received PREP from %pM\n", PREP_IE_ORIG_ADDR(prep_elem)); + mhwmp_dbg("received PREP from %pM", PREP_IE_ORIG_ADDR(prep_elem)); /* Note that we divert from the draft nomenclature and denominate * destination to what the draft refers to as origininator. So in this @@ -684,6 +722,8 @@ static void hwmp_rann_frame_process(struct ieee80211_sub_if_data *sdata, u8 ttl, flags, hopcount; u8 *orig_addr; u32 orig_sn, metric; + u32 interval = ifmsh->mshcfg.dot11MeshHWMPRannInterval; + bool root_is_gate; ttl = rann->rann_ttl; if (ttl <= 1) { @@ -692,12 +732,19 @@ static void hwmp_rann_frame_process(struct ieee80211_sub_if_data *sdata, } ttl--; flags = rann->rann_flags; + root_is_gate = !!(flags & RANN_FLAG_IS_GATE); orig_addr = rann->rann_addr; orig_sn = rann->rann_seq; hopcount = rann->rann_hopcount; hopcount++; metric = rann->rann_metric; - mhwmp_dbg("received RANN from %pM\n", orig_addr); + + /* Ignore our own RANNs */ + if (memcmp(orig_addr, sdata->vif.addr, ETH_ALEN) == 0) + return; + + mhwmp_dbg("received RANN from %pM (is_gate=%d)", orig_addr, + root_is_gate); rcu_read_lock(); mpath = mesh_path_lookup(orig_addr, sdata); @@ -709,18 +756,28 @@ static void hwmp_rann_frame_process(struct ieee80211_sub_if_data *sdata, sdata->u.mesh.mshstats.dropped_frames_no_route++; return; } - mesh_queue_preq(mpath, - PREQ_Q_F_START | PREQ_Q_F_REFRESH); } + + if ((!(mpath->flags & (MESH_PATH_ACTIVE | MESH_PATH_RESOLVING)) || + time_after(jiffies, mpath->exp_time - 1*HZ)) && + !(mpath->flags & MESH_PATH_FIXED)) { + mhwmp_dbg("%s time to refresh root mpath %pM", sdata->name, + orig_addr); + mesh_queue_preq(mpath, PREQ_Q_F_START | PREQ_Q_F_REFRESH); + } + if (mpath->sn < orig_sn) { mesh_path_sel_frame_tx(MPATH_RANN, flags, orig_addr, cpu_to_le32(orig_sn), 0, NULL, 0, broadcast_addr, - hopcount, ttl, 0, + hopcount, ttl, cpu_to_le32(interval), cpu_to_le32(metric + mpath->metric), 0, sdata); mpath->sn = orig_sn; } + if (root_is_gate) + mesh_path_add_gate(mpath); + rcu_read_unlock(); } @@ -732,11 +789,20 @@ void mesh_rx_path_sel_frame(struct ieee80211_sub_if_data *sdata, struct ieee802_11_elems elems; size_t baselen; u32 last_hop_metric; + struct sta_info *sta; /* need action_code */ if (len < IEEE80211_MIN_ACTION_SIZE + 1) return; + rcu_read_lock(); + sta = sta_info_get(sdata, mgmt->sa); + if (!sta || sta->plink_state != NL80211_PLINK_ESTAB) { + rcu_read_unlock(); + return; + } + rcu_read_unlock(); + baselen = (u8 *) mgmt->u.action.u.mesh_action.variable - (u8 *) mgmt; ieee802_11_parse_elems(mgmt->u.action.u.mesh_action.variable, len - baselen, &elems); @@ -788,16 +854,16 @@ static void mesh_queue_preq(struct mesh_path *mpath, u8 flags) preq_node = kmalloc(sizeof(struct mesh_preq_queue), GFP_ATOMIC); if (!preq_node) { - mhwmp_dbg("could not allocate PREQ node\n"); + mhwmp_dbg("could not allocate PREQ node"); return; } - spin_lock(&ifmsh->mesh_preq_queue_lock); + spin_lock_bh(&ifmsh->mesh_preq_queue_lock); if (ifmsh->preq_queue_len == MAX_PREQ_QUEUE_LEN) { - spin_unlock(&ifmsh->mesh_preq_queue_lock); + spin_unlock_bh(&ifmsh->mesh_preq_queue_lock); kfree(preq_node); if (printk_ratelimit()) - mhwmp_dbg("PREQ node queue full\n"); + mhwmp_dbg("PREQ node queue full"); return; } @@ -806,7 +872,7 @@ static void mesh_queue_preq(struct mesh_path *mpath, u8 flags) list_add_tail(&preq_node->list, &ifmsh->preq_queue.list); ++ifmsh->preq_queue_len; - spin_unlock(&ifmsh->mesh_preq_queue_lock); + spin_unlock_bh(&ifmsh->mesh_preq_queue_lock); if (time_after(jiffies, ifmsh->last_preq + min_preq_int_jiff(sdata))) ieee80211_queue_work(&sdata->local->hw, &sdata->work); @@ -982,35 +1048,46 @@ void mesh_path_timer(unsigned long data) { struct mesh_path *mpath = (void *) data; struct ieee80211_sub_if_data *sdata = mpath->sdata; + int ret; if (sdata->local->quiescing) return; spin_lock_bh(&mpath->state_lock); if (mpath->flags & MESH_PATH_RESOLVED || - (!(mpath->flags & MESH_PATH_RESOLVING))) + (!(mpath->flags & MESH_PATH_RESOLVING))) { mpath->flags &= ~(MESH_PATH_RESOLVING | MESH_PATH_RESOLVED); - else if (mpath->discovery_retries < max_preq_retries(sdata)) { + spin_unlock_bh(&mpath->state_lock); + } else if (mpath->discovery_retries < max_preq_retries(sdata)) { ++mpath->discovery_retries; mpath->discovery_timeout *= 2; + spin_unlock_bh(&mpath->state_lock); mesh_queue_preq(mpath, 0); } else { mpath->flags = 0; mpath->exp_time = jiffies; - mesh_path_flush_pending(mpath); + spin_unlock_bh(&mpath->state_lock); + if (!mpath->is_gate && mesh_gate_num(sdata) > 0) { + ret = mesh_path_send_to_gates(mpath); + if (ret) + mhwmp_dbg("no gate was reachable"); + } else + mesh_path_flush_pending(mpath); } - - spin_unlock_bh(&mpath->state_lock); } void mesh_path_tx_root_frame(struct ieee80211_sub_if_data *sdata) { struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; + u32 interval = ifmsh->mshcfg.dot11MeshHWMPRannInterval; + u8 flags; - mesh_path_sel_frame_tx(MPATH_RANN, 0, sdata->vif.addr, + flags = (ifmsh->mshcfg.dot11MeshGateAnnouncementProtocol) + ? RANN_FLAG_IS_GATE : 0; + mesh_path_sel_frame_tx(MPATH_RANN, flags, sdata->vif.addr, cpu_to_le32(++ifmsh->sn), 0, NULL, 0, broadcast_addr, 0, sdata->u.mesh.mshcfg.element_ttl, - 0, 0, 0, sdata); + cpu_to_le32(interval), 0, 0, sdata); } diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c index 0d2faac..7f54c50 100644 --- a/net/mac80211/mesh_pathtbl.c +++ b/net/mac80211/mesh_pathtbl.c @@ -14,9 +14,16 @@ #include #include #include +#include "wme.h" #include "ieee80211_i.h" #include "mesh.h" +#ifdef CONFIG_MAC80211_VERBOSE_MPATH_DEBUG +#define mpath_dbg(fmt, args...) printk(KERN_DEBUG fmt, ##args) +#else +#define mpath_dbg(fmt, args...) do { (void)(0); } while (0) +#endif + /* There will be initially 2^INIT_PATHS_SIZE_ORDER buckets */ #define INIT_PATHS_SIZE_ORDER 2 @@ -42,8 +49,10 @@ static struct mesh_table __rcu *mpp_paths; /* Store paths for MPP&MAP */ int mesh_paths_generation; /* This lock will have the grow table function as writer and add / delete nodes - * as readers. When reading the table (i.e. doing lookups) we are well protected - * by RCU + * as readers. RCU provides sufficient protection only when reading the table + * (i.e. doing lookups). Adding or adding or removing nodes requires we take + * the read lock or we risk operating on an old table. The write lock is only + * needed when modifying the number of buckets a table. */ static DEFINE_RWLOCK(pathtbl_resize_lock); @@ -60,6 +69,8 @@ static inline struct mesh_table *resize_dereference_mpp_paths(void) lockdep_is_held(&pathtbl_resize_lock)); } +static int mesh_gate_add(struct mesh_table *tbl, struct mesh_path *mpath); + /* * CAREFUL -- "tbl" must not be an expression, * in particular not an rcu_dereference(), since @@ -103,6 +114,7 @@ static struct mesh_table *mesh_table_alloc(int size_order) sizeof(newtbl->hash_rnd)); for (i = 0; i <= newtbl->hash_mask; i++) spin_lock_init(&newtbl->hashwlock[i]); + spin_lock_init(&newtbl->gates_lock); return newtbl; } @@ -118,6 +130,7 @@ static void mesh_table_free(struct mesh_table *tbl, bool free_leafs) { struct hlist_head *mesh_hash; struct hlist_node *p, *q; + struct mpath_node *gate; int i; mesh_hash = tbl->hash_buckets; @@ -129,6 +142,17 @@ static void mesh_table_free(struct mesh_table *tbl, bool free_leafs) } spin_unlock_bh(&tbl->hashwlock[i]); } + if (free_leafs) { + spin_lock_bh(&tbl->gates_lock); + hlist_for_each_entry_safe(gate, p, q, + tbl->known_gates, list) { + hlist_del(&gate->list); + kfree(gate); + } + kfree(tbl->known_gates); + spin_unlock_bh(&tbl->gates_lock); + } + __mesh_table_free(tbl); } @@ -146,6 +170,7 @@ static int mesh_table_grow(struct mesh_table *oldtbl, newtbl->free_node = oldtbl->free_node; newtbl->mean_chain_len = oldtbl->mean_chain_len; newtbl->copy_node = oldtbl->copy_node; + newtbl->known_gates = oldtbl->known_gates; atomic_set(&newtbl->entries, atomic_read(&oldtbl->entries)); oldhash = oldtbl->hash_buckets; @@ -188,6 +213,7 @@ void mesh_path_assign_nexthop(struct mesh_path *mpath, struct sta_info *sta) struct ieee80211_hdr *hdr; struct sk_buff_head tmpq; unsigned long flags; + struct ieee80211_sub_if_data *sdata = mpath->sdata; rcu_assign_pointer(mpath->next_hop, sta); @@ -198,6 +224,8 @@ void mesh_path_assign_nexthop(struct mesh_path *mpath, struct sta_info *sta) while ((skb = __skb_dequeue(&mpath->frame_queue)) != NULL) { hdr = (struct ieee80211_hdr *) skb->data; memcpy(hdr->addr1, sta->sta.addr, ETH_ALEN); + skb_set_queue_mapping(skb, ieee80211_select_queue(sdata, skb)); + ieee80211_set_qos_hdr(sdata, skb); __skb_queue_tail(&tmpq, skb); } @@ -205,62 +233,128 @@ void mesh_path_assign_nexthop(struct mesh_path *mpath, struct sta_info *sta) spin_unlock_irqrestore(&mpath->frame_queue.lock, flags); } +static void prepare_for_gate(struct sk_buff *skb, char *dst_addr, + struct mesh_path *gate_mpath) +{ + struct ieee80211_hdr *hdr; + struct ieee80211s_hdr *mshdr; + int mesh_hdrlen, hdrlen; + char *next_hop; + + hdr = (struct ieee80211_hdr *) skb->data; + hdrlen = ieee80211_hdrlen(hdr->frame_control); + mshdr = (struct ieee80211s_hdr *) (skb->data + hdrlen); + + if (!(mshdr->flags & MESH_FLAGS_AE)) { + /* size of the fixed part of the mesh header */ + mesh_hdrlen = 6; + + /* make room for the two extended addresses */ + skb_push(skb, 2 * ETH_ALEN); + memmove(skb->data, hdr, hdrlen + mesh_hdrlen); + + hdr = (struct ieee80211_hdr *) skb->data; + + /* we preserve the previous mesh header and only add + * the new addreses */ + mshdr = (struct ieee80211s_hdr *) (skb->data + hdrlen); + mshdr->flags = MESH_FLAGS_AE_A5_A6; + memcpy(mshdr->eaddr1, hdr->addr3, ETH_ALEN); + memcpy(mshdr->eaddr2, hdr->addr4, ETH_ALEN); + } + + /* update next hop */ + hdr = (struct ieee80211_hdr *) skb->data; + rcu_read_lock(); + next_hop = rcu_dereference(gate_mpath->next_hop)->sta.addr; + memcpy(hdr->addr1, next_hop, ETH_ALEN); + rcu_read_unlock(); + memcpy(hdr->addr3, dst_addr, ETH_ALEN); +} /** - * mesh_path_lookup - look up a path in the mesh path table - * @dst: hardware address (ETH_ALEN length) of destination - * @sdata: local subif * - * Returns: pointer to the mesh path structure, or NULL if not found + * mesh_path_move_to_queue - Move or copy frames from one mpath queue to another * - * Locking: must be called within a read rcu section. + * This function is used to transfer or copy frames from an unresolved mpath to + * a gate mpath. The function also adds the Address Extension field and + * updates the next hop. + * + * If a frame already has an Address Extension field, only the next hop and + * destination addresses are updated. + * + * The gate mpath must be an active mpath with a valid mpath->next_hop. + * + * @mpath: An active mpath the frames will be sent to (i.e. the gate) + * @from_mpath: The failed mpath + * @copy: When true, copy all the frames to the new mpath queue. When false, + * move them. */ -struct mesh_path *mesh_path_lookup(u8 *dst, struct ieee80211_sub_if_data *sdata) +static void mesh_path_move_to_queue(struct mesh_path *gate_mpath, + struct mesh_path *from_mpath, + bool copy) { - struct mesh_path *mpath; - struct hlist_node *n; - struct hlist_head *bucket; - struct mesh_table *tbl; - struct mpath_node *node; + struct sk_buff *skb, *cp_skb = NULL; + struct sk_buff_head gateq, failq; + unsigned long flags; + int num_skbs; - tbl = rcu_dereference(mesh_paths); + BUG_ON(gate_mpath == from_mpath); + BUG_ON(!gate_mpath->next_hop); - bucket = &tbl->hash_buckets[mesh_table_hash(dst, sdata, tbl)]; - hlist_for_each_entry_rcu(node, n, bucket, list) { - mpath = node->mpath; - if (mpath->sdata == sdata && - memcmp(dst, mpath->dst, ETH_ALEN) == 0) { - if (MPATH_EXPIRED(mpath)) { - spin_lock_bh(&mpath->state_lock); - if (MPATH_EXPIRED(mpath)) - mpath->flags &= ~MESH_PATH_ACTIVE; - spin_unlock_bh(&mpath->state_lock); - } - return mpath; + __skb_queue_head_init(&gateq); + __skb_queue_head_init(&failq); + + spin_lock_irqsave(&from_mpath->frame_queue.lock, flags); + skb_queue_splice_init(&from_mpath->frame_queue, &failq); + spin_unlock_irqrestore(&from_mpath->frame_queue.lock, flags); + + num_skbs = skb_queue_len(&failq); + + while (num_skbs--) { + skb = __skb_dequeue(&failq); + if (copy) { + cp_skb = skb_copy(skb, GFP_ATOMIC); + if (cp_skb) + __skb_queue_tail(&failq, cp_skb); } + + prepare_for_gate(skb, gate_mpath->dst, gate_mpath); + __skb_queue_tail(&gateq, skb); } - return NULL; + + spin_lock_irqsave(&gate_mpath->frame_queue.lock, flags); + skb_queue_splice(&gateq, &gate_mpath->frame_queue); + mpath_dbg("Mpath queue for gate %pM has %d frames\n", + gate_mpath->dst, + skb_queue_len(&gate_mpath->frame_queue)); + spin_unlock_irqrestore(&gate_mpath->frame_queue.lock, flags); + + if (!copy) + return; + + spin_lock_irqsave(&from_mpath->frame_queue.lock, flags); + skb_queue_splice(&failq, &from_mpath->frame_queue); + spin_unlock_irqrestore(&from_mpath->frame_queue.lock, flags); } -struct mesh_path *mpp_path_lookup(u8 *dst, struct ieee80211_sub_if_data *sdata) + +static struct mesh_path *path_lookup(struct mesh_table *tbl, u8 *dst, + struct ieee80211_sub_if_data *sdata) { struct mesh_path *mpath; struct hlist_node *n; struct hlist_head *bucket; - struct mesh_table *tbl; struct mpath_node *node; - tbl = rcu_dereference(mpp_paths); - bucket = &tbl->hash_buckets[mesh_table_hash(dst, sdata, tbl)]; hlist_for_each_entry_rcu(node, n, bucket, list) { mpath = node->mpath; if (mpath->sdata == sdata && - memcmp(dst, mpath->dst, ETH_ALEN) == 0) { + memcmp(dst, mpath->dst, ETH_ALEN) == 0) { if (MPATH_EXPIRED(mpath)) { spin_lock_bh(&mpath->state_lock); - if (MPATH_EXPIRED(mpath)) - mpath->flags &= ~MESH_PATH_ACTIVE; + mpath->flags &= ~MESH_PATH_ACTIVE; spin_unlock_bh(&mpath->state_lock); } return mpath; @@ -269,6 +363,25 @@ struct mesh_path *mpp_path_lookup(u8 *dst, struct ieee80211_sub_if_data *sdata) return NULL; } +/** + * mesh_path_lookup - look up a path in the mesh path table + * @dst: hardware address (ETH_ALEN length) of destination + * @sdata: local subif + * + * Returns: pointer to the mesh path structure, or NULL if not found + * + * Locking: must be called within a read rcu section. + */ +struct mesh_path *mesh_path_lookup(u8 *dst, struct ieee80211_sub_if_data *sdata) +{ + return path_lookup(rcu_dereference(mesh_paths), dst, sdata); +} + +struct mesh_path *mpp_path_lookup(u8 *dst, struct ieee80211_sub_if_data *sdata) +{ + return path_lookup(rcu_dereference(mpp_paths), dst, sdata); +} + /** * mesh_path_lookup_by_idx - look up a path in the mesh path table by its index @@ -293,8 +406,7 @@ struct mesh_path *mesh_path_lookup_by_idx(int idx, struct ieee80211_sub_if_data if (j++ == idx) { if (MPATH_EXPIRED(node->mpath)) { spin_lock_bh(&node->mpath->state_lock); - if (MPATH_EXPIRED(node->mpath)) - node->mpath->flags &= ~MESH_PATH_ACTIVE; + node->mpath->flags &= ~MESH_PATH_ACTIVE; spin_unlock_bh(&node->mpath->state_lock); } return node->mpath; @@ -304,6 +416,109 @@ struct mesh_path *mesh_path_lookup_by_idx(int idx, struct ieee80211_sub_if_data return NULL; } +static void mesh_gate_node_reclaim(struct rcu_head *rp) +{ + struct mpath_node *node = container_of(rp, struct mpath_node, rcu); + kfree(node); +} + +/** + * mesh_gate_add - mark mpath as path to a mesh gate and add to known_gates + * @mesh_tbl: table which contains known_gates list + * @mpath: mpath to known mesh gate + * + * Returns: 0 on success + * + */ +static int mesh_gate_add(struct mesh_table *tbl, struct mesh_path *mpath) +{ + struct mpath_node *gate, *new_gate; + struct hlist_node *n; + int err; + + rcu_read_lock(); + tbl = rcu_dereference(tbl); + + hlist_for_each_entry_rcu(gate, n, tbl->known_gates, list) + if (gate->mpath == mpath) { + err = -EEXIST; + goto err_rcu; + } + + new_gate = kzalloc(sizeof(struct mpath_node), GFP_ATOMIC); + if (!new_gate) { + err = -ENOMEM; + goto err_rcu; + } + + mpath->is_gate = true; + mpath->sdata->u.mesh.num_gates++; + new_gate->mpath = mpath; + spin_lock_bh(&tbl->gates_lock); + hlist_add_head_rcu(&new_gate->list, tbl->known_gates); + spin_unlock_bh(&tbl->gates_lock); + rcu_read_unlock(); + mpath_dbg("Mesh path (%s): Recorded new gate: %pM. %d known gates\n", + mpath->sdata->name, mpath->dst, + mpath->sdata->u.mesh.num_gates); + return 0; +err_rcu: + rcu_read_unlock(); + return err; +} + +/** + * mesh_gate_del - remove a mesh gate from the list of known gates + * @tbl: table which holds our list of known gates + * @mpath: gate mpath + * + * Returns: 0 on success + * + * Locking: must be called inside rcu_read_lock() section + */ +static int mesh_gate_del(struct mesh_table *tbl, struct mesh_path *mpath) +{ + struct mpath_node *gate; + struct hlist_node *p, *q; + + tbl = rcu_dereference(tbl); + + hlist_for_each_entry_safe(gate, p, q, tbl->known_gates, list) + if (gate->mpath == mpath) { + spin_lock_bh(&tbl->gates_lock); + hlist_del_rcu(&gate->list); + call_rcu(&gate->rcu, mesh_gate_node_reclaim); + spin_unlock_bh(&tbl->gates_lock); + mpath->sdata->u.mesh.num_gates--; + mpath->is_gate = false; + mpath_dbg("Mesh path (%s): Deleted gate: %pM. " + "%d known gates\n", mpath->sdata->name, + mpath->dst, mpath->sdata->u.mesh.num_gates); + break; + } + + return 0; +} + +/** + * + * mesh_path_add_gate - add the given mpath to a mesh gate to our path table + * @mpath: gate path to add to table + */ +int mesh_path_add_gate(struct mesh_path *mpath) +{ + return mesh_gate_add(mesh_paths, mpath); +} + +/** + * mesh_gate_num - number of gates known to this interface + * @sdata: subif data + */ +int mesh_gate_num(struct ieee80211_sub_if_data *sdata) +{ + return sdata->u.mesh.num_gates; +} + /** * mesh_path_add - allocate and add a new path to the mesh path table * @addr: destination address of the path (ETH_ALEN length) @@ -481,6 +696,7 @@ int mpp_path_add(u8 *dst, u8 *mpp, struct ieee80211_sub_if_data *sdata) new_mpath->flags = 0; skb_queue_head_init(&new_mpath->frame_queue); new_node->mpath = new_mpath; + init_timer(&new_mpath->timer); new_mpath->exp_time = jiffies; spin_lock_init(&new_mpath->state_lock); @@ -539,28 +755,53 @@ void mesh_plink_broken(struct sta_info *sta) struct hlist_node *p; struct ieee80211_sub_if_data *sdata = sta->sdata; int i; + __le16 reason = cpu_to_le16(WLAN_REASON_MESH_PATH_DEST_UNREACHABLE); rcu_read_lock(); tbl = rcu_dereference(mesh_paths); for_each_mesh_entry(tbl, p, node, i) { mpath = node->mpath; - spin_lock_bh(&mpath->state_lock); if (rcu_dereference(mpath->next_hop) == sta && mpath->flags & MESH_PATH_ACTIVE && !(mpath->flags & MESH_PATH_FIXED)) { + spin_lock_bh(&mpath->state_lock); mpath->flags &= ~MESH_PATH_ACTIVE; ++mpath->sn; spin_unlock_bh(&mpath->state_lock); mesh_path_error_tx(sdata->u.mesh.mshcfg.element_ttl, mpath->dst, cpu_to_le32(mpath->sn), - cpu_to_le16(PERR_RCODE_DEST_UNREACH), - bcast, sdata); - } else - spin_unlock_bh(&mpath->state_lock); + reason, bcast, sdata); + } } rcu_read_unlock(); } +static void mesh_path_node_reclaim(struct rcu_head *rp) +{ + struct mpath_node *node = container_of(rp, struct mpath_node, rcu); + struct ieee80211_sub_if_data *sdata = node->mpath->sdata; + + del_timer_sync(&node->mpath->timer); + atomic_dec(&sdata->u.mesh.mpaths); + kfree(node->mpath); + kfree(node); +} + +/* needs to be called with the corresponding hashwlock taken */ +static void __mesh_path_del(struct mesh_table *tbl, struct mpath_node *node) +{ + struct mesh_path *mpath; + mpath = node->mpath; + spin_lock(&mpath->state_lock); + mpath->flags |= MESH_PATH_RESOLVING; + if (mpath->is_gate) + mesh_gate_del(tbl, mpath); + hlist_del_rcu(&node->list); + call_rcu(&node->rcu, mesh_path_node_reclaim); + spin_unlock(&mpath->state_lock); + atomic_dec(&tbl->entries); +} + /** * mesh_path_flush_by_nexthop - Deletes mesh paths if their next hop matches * @@ -581,42 +822,59 @@ void mesh_path_flush_by_nexthop(struct sta_info *sta) int i; rcu_read_lock(); - tbl = rcu_dereference(mesh_paths); + read_lock_bh(&pathtbl_resize_lock); + tbl = resize_dereference_mesh_paths(); for_each_mesh_entry(tbl, p, node, i) { mpath = node->mpath; - if (rcu_dereference(mpath->next_hop) == sta) - mesh_path_del(mpath->dst, mpath->sdata); + if (rcu_dereference(mpath->next_hop) == sta) { + spin_lock_bh(&tbl->hashwlock[i]); + __mesh_path_del(tbl, node); + spin_unlock_bh(&tbl->hashwlock[i]); + } } + read_unlock_bh(&pathtbl_resize_lock); rcu_read_unlock(); } -void mesh_path_flush(struct ieee80211_sub_if_data *sdata) +static void table_flush_by_iface(struct mesh_table *tbl, + struct ieee80211_sub_if_data *sdata) { - struct mesh_table *tbl; struct mesh_path *mpath; struct mpath_node *node; struct hlist_node *p; int i; - rcu_read_lock(); - tbl = rcu_dereference(mesh_paths); + WARN_ON(!rcu_read_lock_held()); for_each_mesh_entry(tbl, p, node, i) { mpath = node->mpath; - if (mpath->sdata == sdata) - mesh_path_del(mpath->dst, mpath->sdata); + if (mpath->sdata != sdata) + continue; + spin_lock_bh(&tbl->hashwlock[i]); + __mesh_path_del(tbl, node); + spin_unlock_bh(&tbl->hashwlock[i]); } - rcu_read_unlock(); } -static void mesh_path_node_reclaim(struct rcu_head *rp) +/** + * mesh_path_flush_by_iface - Deletes all mesh paths associated with a given iface + * + * This function deletes both mesh paths as well as mesh portal paths. + * + * @sdata - interface data to match + * + */ +void mesh_path_flush_by_iface(struct ieee80211_sub_if_data *sdata) { - struct mpath_node *node = container_of(rp, struct mpath_node, rcu); - struct ieee80211_sub_if_data *sdata = node->mpath->sdata; + struct mesh_table *tbl; - del_timer_sync(&node->mpath->timer); - atomic_dec(&sdata->u.mesh.mpaths); - kfree(node->mpath); - kfree(node); + rcu_read_lock(); + read_lock_bh(&pathtbl_resize_lock); + tbl = resize_dereference_mesh_paths(); + table_flush_by_iface(tbl, sdata); + tbl = resize_dereference_mpp_paths(); + table_flush_by_iface(tbl, sdata); + read_unlock_bh(&pathtbl_resize_lock); + rcu_read_unlock(); } /** @@ -647,12 +905,7 @@ int mesh_path_del(u8 *addr, struct ieee80211_sub_if_data *sdata) mpath = node->mpath; if (mpath->sdata == sdata && memcmp(addr, mpath->dst, ETH_ALEN) == 0) { - spin_lock_bh(&mpath->state_lock); - mpath->flags |= MESH_PATH_RESOLVING; - hlist_del_rcu(&node->list); - call_rcu(&node->rcu, mesh_path_node_reclaim); - atomic_dec(&tbl->entries); - spin_unlock_bh(&mpath->state_lock); + __mesh_path_del(tbl, node); goto enddel; } } @@ -681,6 +934,58 @@ void mesh_path_tx_pending(struct mesh_path *mpath) } /** + * mesh_path_send_to_gates - sends pending frames to all known mesh gates + * + * @mpath: mesh path whose queue will be emptied + * + * If there is only one gate, the frames are transferred from the failed mpath + * queue to that gate's queue. If there are more than one gates, the frames + * are copied from each gate to the next. After frames are copied, the + * mpath queues are emptied onto the transmission queue. + */ +int mesh_path_send_to_gates(struct mesh_path *mpath) +{ + struct ieee80211_sub_if_data *sdata = mpath->sdata; + struct hlist_node *n; + struct mesh_table *tbl; + struct mesh_path *from_mpath = mpath; + struct mpath_node *gate = NULL; + bool copy = false; + struct hlist_head *known_gates; + + rcu_read_lock(); + tbl = rcu_dereference(mesh_paths); + known_gates = tbl->known_gates; + rcu_read_unlock(); + + if (!known_gates) + return -EHOSTUNREACH; + + hlist_for_each_entry_rcu(gate, n, known_gates, list) { + if (gate->mpath->sdata != sdata) + continue; + + if (gate->mpath->flags & MESH_PATH_ACTIVE) { + mpath_dbg("Forwarding to %pM\n", gate->mpath->dst); + mesh_path_move_to_queue(gate->mpath, from_mpath, copy); + from_mpath = gate->mpath; + copy = true; + } else { + mpath_dbg("Not forwarding %p\n", gate->mpath); + mpath_dbg("flags %x\n", gate->mpath->flags); + } + } + + hlist_for_each_entry_rcu(gate, n, known_gates, list) + if (gate->mpath->sdata == sdata) { + mpath_dbg("Sending to %pM\n", gate->mpath->dst); + mesh_path_tx_pending(gate->mpath); + } + + return (from_mpath == mpath) ? -EHOSTUNREACH : 0; +} + +/** * mesh_path_discard_frame - discard a frame whose path could not be resolved * * @skb: frame to discard @@ -699,18 +1004,23 @@ void mesh_path_discard_frame(struct sk_buff *skb, struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; struct mesh_path *mpath; u32 sn = 0; + __le16 reason = cpu_to_le16(WLAN_REASON_MESH_PATH_NOFORWARD); if (memcmp(hdr->addr4, sdata->vif.addr, ETH_ALEN) != 0) { u8 *ra, *da; da = hdr->addr3; ra = hdr->addr1; + rcu_read_lock(); mpath = mesh_path_lookup(da, sdata); - if (mpath) + if (mpath) { + spin_lock_bh(&mpath->state_lock); sn = ++mpath->sn; + spin_unlock_bh(&mpath->state_lock); + } + rcu_read_unlock(); mesh_path_error_tx(sdata->u.mesh.mshcfg.element_ttl, skb->data, - cpu_to_le32(sn), - cpu_to_le16(PERR_RCODE_NO_ROUTE), ra, sdata); + cpu_to_le32(sn), reason, ra, sdata); } kfree_skb(skb); @@ -728,8 +1038,7 @@ void mesh_path_flush_pending(struct mesh_path *mpath) { struct sk_buff *skb; - while ((skb = skb_dequeue(&mpath->frame_queue)) && - (mpath->flags & MESH_PATH_ACTIVE)) + while ((skb = skb_dequeue(&mpath->frame_queue)) != NULL) mesh_path_discard_frame(skb, mpath->sdata); } @@ -790,6 +1099,7 @@ static int mesh_path_node_copy(struct hlist_node *p, struct mesh_table *newtbl) int mesh_pathtbl_init(void) { struct mesh_table *tbl_path, *tbl_mpp; + int ret; tbl_path = mesh_table_alloc(INIT_PATHS_SIZE_ORDER); if (!tbl_path) @@ -797,21 +1107,40 @@ int mesh_pathtbl_init(void) tbl_path->free_node = &mesh_path_node_free; tbl_path->copy_node = &mesh_path_node_copy; tbl_path->mean_chain_len = MEAN_CHAIN_LEN; + tbl_path->known_gates = kzalloc(sizeof(struct hlist_head), GFP_ATOMIC); + if (!tbl_path->known_gates) { + ret = -ENOMEM; + goto free_path; + } + INIT_HLIST_HEAD(tbl_path->known_gates); + tbl_mpp = mesh_table_alloc(INIT_PATHS_SIZE_ORDER); if (!tbl_mpp) { - mesh_table_free(tbl_path, true); - return -ENOMEM; + ret = -ENOMEM; + goto free_path; } tbl_mpp->free_node = &mesh_path_node_free; tbl_mpp->copy_node = &mesh_path_node_copy; tbl_mpp->mean_chain_len = MEAN_CHAIN_LEN; + tbl_mpp->known_gates = kzalloc(sizeof(struct hlist_head), GFP_ATOMIC); + if (!tbl_mpp->known_gates) { + ret = -ENOMEM; + goto free_mpp; + } + INIT_HLIST_HEAD(tbl_mpp->known_gates); /* Need no locking since this is during init */ RCU_INIT_POINTER(mesh_paths, tbl_path); RCU_INIT_POINTER(mpp_paths, tbl_mpp); return 0; + +free_mpp: + mesh_table_free(tbl_mpp, true); +free_path: + mesh_table_free(tbl_path, true); + return ret; } void mesh_path_expire(struct ieee80211_sub_if_data *sdata) @@ -828,14 +1157,10 @@ void mesh_path_expire(struct ieee80211_sub_if_data *sdata) if (node->mpath->sdata != sdata) continue; mpath = node->mpath; - spin_lock_bh(&mpath->state_lock); if ((!(mpath->flags & MESH_PATH_RESOLVING)) && (!(mpath->flags & MESH_PATH_FIXED)) && - time_after(jiffies, mpath->exp_time + MESH_PATH_EXPIRE)) { - spin_unlock_bh(&mpath->state_lock); + time_after(jiffies, mpath->exp_time + MESH_PATH_EXPIRE)) mesh_path_del(mpath->dst, mpath->sdata); - } else - spin_unlock_bh(&mpath->state_lock); } rcu_read_unlock(); } @@ -843,6 +1168,6 @@ void mesh_path_expire(struct ieee80211_sub_if_data *sdata) void mesh_pathtbl_unregister(void) { /* no need for locking during exit path */ - mesh_table_free(rcu_dereference_raw(mesh_paths), true); - mesh_table_free(rcu_dereference_raw(mpp_paths), true); + mesh_table_free(rcu_dereference_protected(mesh_paths, 1), true); + mesh_table_free(rcu_dereference_protected(mpp_paths, 1), true); } diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c index f4adc09..7e57f5d 100644 --- a/net/mac80211/mesh_plink.c +++ b/net/mac80211/mesh_plink.c @@ -19,35 +19,18 @@ #define mpl_dbg(fmt, args...) do { (void)(0); } while (0) #endif -#define PLINK_GET_LLID(p) (p + 4) -#define PLINK_GET_PLID(p) (p + 6) +#define PLINK_GET_LLID(p) (p + 2) +#define PLINK_GET_PLID(p) (p + 4) #define mod_plink_timer(s, t) (mod_timer(&s->plink_timer, \ jiffies + HZ * t / 1000)) -/* Peer link cancel reasons, all subject to ANA approval */ -#define MESH_LINK_CANCELLED 2 -#define MESH_MAX_NEIGHBORS 3 -#define MESH_CAPABILITY_POLICY_VIOLATION 4 -#define MESH_CLOSE_RCVD 5 -#define MESH_MAX_RETRIES 6 -#define MESH_CONFIRM_TIMEOUT 7 -#define MESH_SECURITY_ROLE_NEGOTIATION_DIFFERS 8 -#define MESH_SECURITY_AUTHENTICATION_IMPOSSIBLE 9 -#define MESH_SECURITY_FAILED_VERIFICATION 10 - #define dot11MeshMaxRetries(s) (s->u.mesh.mshcfg.dot11MeshMaxRetries) #define dot11MeshRetryTimeout(s) (s->u.mesh.mshcfg.dot11MeshRetryTimeout) #define dot11MeshConfirmTimeout(s) (s->u.mesh.mshcfg.dot11MeshConfirmTimeout) #define dot11MeshHoldingTimeout(s) (s->u.mesh.mshcfg.dot11MeshHoldingTimeout) #define dot11MeshMaxPeerLinks(s) (s->u.mesh.mshcfg.dot11MeshMaxPeerLinks) -enum plink_frame_type { - PLINK_OPEN = 1, - PLINK_CONFIRM, - PLINK_CLOSE -}; - enum plink_event { PLINK_UNDEFINED, OPN_ACPT, @@ -60,6 +43,10 @@ enum plink_event { CLS_IGNR }; +static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata, + enum ieee80211_self_protected_actioncode action, + u8 *da, __le16 llid, __le16 plid, __le16 reason); + static inline void mesh_plink_inc_estab_count(struct ieee80211_sub_if_data *sdata) { @@ -105,7 +92,9 @@ static struct sta_info *mesh_plink_alloc(struct ieee80211_sub_if_data *sdata, if (!sta) return NULL; - sta->flags = WLAN_STA_AUTHORIZED | WLAN_STA_AUTH; + set_sta_flag(sta, WLAN_STA_AUTH); + set_sta_flag(sta, WLAN_STA_AUTHORIZED); + set_sta_flag(sta, WLAN_STA_WME); sta->sta.supp_rates[local->hw.conf.channel->band] = rates; rate_control_rate_init(sta); @@ -150,6 +139,10 @@ void mesh_plink_deactivate(struct sta_info *sta) spin_lock_bh(&sta->lock); deactivated = __mesh_plink_deactivate(sta); + sta->reason = cpu_to_le16(WLAN_REASON_MESH_PEER_CANCELED); + mesh_plink_frame_tx(sdata, WLAN_SP_MESH_PEERING_CLOSE, + sta->sta.addr, sta->llid, sta->plid, + sta->reason); spin_unlock_bh(&sta->lock); if (deactivated) @@ -157,16 +150,16 @@ void mesh_plink_deactivate(struct sta_info *sta) } static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata, - enum plink_frame_type action, u8 *da, __le16 llid, __le16 plid, - __le16 reason) { + enum ieee80211_self_protected_actioncode action, + u8 *da, __le16 llid, __le16 plid, __le16 reason) { struct ieee80211_local *local = sdata->local; struct sk_buff *skb = dev_alloc_skb(local->hw.extra_tx_headroom + 400 + sdata->u.mesh.ie_len); struct ieee80211_mgmt *mgmt; bool include_plid = false; - static const u8 meshpeeringproto[] = { 0x00, 0x0F, 0xAC, 0x2A }; + int ie_len = 4; + u16 peering_proto = 0; u8 *pos; - int ie_len; if (!skb) return -1; @@ -175,63 +168,75 @@ static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata, * common action part (1) */ mgmt = (struct ieee80211_mgmt *) - skb_put(skb, 25 + sizeof(mgmt->u.action.u.plink_action)); - memset(mgmt, 0, 25 + sizeof(mgmt->u.action.u.plink_action)); + skb_put(skb, 25 + sizeof(mgmt->u.action.u.self_prot)); + memset(mgmt, 0, 25 + sizeof(mgmt->u.action.u.self_prot)); mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_ACTION); memcpy(mgmt->da, da, ETH_ALEN); memcpy(mgmt->sa, sdata->vif.addr, ETH_ALEN); memcpy(mgmt->bssid, sdata->vif.addr, ETH_ALEN); - mgmt->u.action.category = WLAN_CATEGORY_MESH_ACTION; - mgmt->u.action.u.plink_action.action_code = action; - - if (action == PLINK_CLOSE) - mgmt->u.action.u.plink_action.aux = reason; - else { - mgmt->u.action.u.plink_action.aux = cpu_to_le16(0x0); - if (action == PLINK_CONFIRM) { - pos = skb_put(skb, 4); - /* two-byte status code followed by two-byte AID */ - memset(pos, 0, 2); + mgmt->u.action.category = WLAN_CATEGORY_SELF_PROTECTED; + mgmt->u.action.u.self_prot.action_code = action; + + if (action != WLAN_SP_MESH_PEERING_CLOSE) { + /* capability info */ + pos = skb_put(skb, 2); + memset(pos, 0, 2); + if (action == WLAN_SP_MESH_PEERING_CONFIRM) { + /* AID */ + pos = skb_put(skb, 2); memcpy(pos + 2, &plid, 2); } - mesh_mgmt_ies_add(skb, sdata); + if (ieee80211_add_srates_ie(&sdata->vif, skb) || + ieee80211_add_ext_srates_ie(&sdata->vif, skb) || + mesh_add_rsn_ie(skb, sdata) || + mesh_add_meshid_ie(skb, sdata) || + mesh_add_meshconf_ie(skb, sdata)) + return -1; + } else { /* WLAN_SP_MESH_PEERING_CLOSE */ + if (mesh_add_meshid_ie(skb, sdata)) + return -1; } - /* Add Peer Link Management element */ + /* Add Mesh Peering Management element */ switch (action) { - case PLINK_OPEN: - ie_len = 6; + case WLAN_SP_MESH_PEERING_OPEN: break; - case PLINK_CONFIRM: - ie_len = 8; + case WLAN_SP_MESH_PEERING_CONFIRM: + ie_len += 2; include_plid = true; break; - case PLINK_CLOSE: - default: - if (!plid) - ie_len = 8; - else { - ie_len = 10; + case WLAN_SP_MESH_PEERING_CLOSE: + if (plid) { + ie_len += 2; include_plid = true; } + ie_len += 2; /* reason code */ break; + default: + return -EINVAL; } + if (WARN_ON(skb_tailroom(skb) < 2 + ie_len)) + return -ENOMEM; + pos = skb_put(skb, 2 + ie_len); - *pos++ = WLAN_EID_PEER_LINK; + *pos++ = WLAN_EID_PEER_MGMT; *pos++ = ie_len; - memcpy(pos, meshpeeringproto, sizeof(meshpeeringproto)); - pos += 4; + memcpy(pos, &peering_proto, 2); + pos += 2; memcpy(pos, &llid, 2); + pos += 2; if (include_plid) { - pos += 2; memcpy(pos, &plid, 2); - } - if (action == PLINK_CLOSE) { pos += 2; + } + if (action == WLAN_SP_MESH_PEERING_CLOSE) { memcpy(pos, &reason, 2); + pos += 2; } + if (mesh_add_vendor_ies(skb, sdata)) + return -1; ieee80211_tx_skb(sdata, skb); return 0; @@ -322,21 +327,21 @@ static void mesh_plink_timer(unsigned long data) ++sta->plink_retries; mod_plink_timer(sta, sta->plink_timeout); spin_unlock_bh(&sta->lock); - mesh_plink_frame_tx(sdata, PLINK_OPEN, sta->sta.addr, llid, - 0, 0); + mesh_plink_frame_tx(sdata, WLAN_SP_MESH_PEERING_OPEN, + sta->sta.addr, llid, 0, 0); break; } - reason = cpu_to_le16(MESH_MAX_RETRIES); + reason = cpu_to_le16(WLAN_REASON_MESH_MAX_RETRIES); /* fall through on else */ case NL80211_PLINK_CNF_RCVD: /* confirm timer */ if (!reason) - reason = cpu_to_le16(MESH_CONFIRM_TIMEOUT); + reason = cpu_to_le16(WLAN_REASON_MESH_CONFIRM_TIMEOUT); sta->plink_state = NL80211_PLINK_HOLDING; mod_plink_timer(sta, dot11MeshHoldingTimeout(sdata)); spin_unlock_bh(&sta->lock); - mesh_plink_frame_tx(sdata, PLINK_CLOSE, sta->sta.addr, llid, plid, - reason); + mesh_plink_frame_tx(sdata, WLAN_SP_MESH_PEERING_CLOSE, + sta->sta.addr, llid, plid, reason); break; case NL80211_PLINK_HOLDING: /* holding timer */ @@ -380,7 +385,7 @@ int mesh_plink_open(struct sta_info *sta) __le16 llid; struct ieee80211_sub_if_data *sdata = sta->sdata; - if (!test_sta_flags(sta, WLAN_STA_AUTH)) + if (!test_sta_flag(sta, WLAN_STA_AUTH)) return -EPERM; spin_lock_bh(&sta->lock); @@ -396,7 +401,7 @@ int mesh_plink_open(struct sta_info *sta) mpl_dbg("Mesh plink: starting establishment with %pM\n", sta->sta.addr); - return mesh_plink_frame_tx(sdata, PLINK_OPEN, + return mesh_plink_frame_tx(sdata, WLAN_SP_MESH_PEERING_OPEN, sta->sta.addr, llid, 0, 0); } @@ -422,7 +427,7 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m struct ieee802_11_elems elems; struct sta_info *sta; enum plink_event event; - enum plink_frame_type ftype; + enum ieee80211_self_protected_actioncode ftype; size_t baselen; bool deactivated, matches_local = true; u8 ie_len; @@ -449,14 +454,15 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m return; } - baseaddr = mgmt->u.action.u.plink_action.variable; - baselen = (u8 *) mgmt->u.action.u.plink_action.variable - (u8 *) mgmt; - if (mgmt->u.action.u.plink_action.action_code == PLINK_CONFIRM) { + baseaddr = mgmt->u.action.u.self_prot.variable; + baselen = (u8 *) mgmt->u.action.u.self_prot.variable - (u8 *) mgmt; + if (mgmt->u.action.u.self_prot.action_code == + WLAN_SP_MESH_PEERING_CONFIRM) { baseaddr += 4; baselen += 4; } ieee802_11_parse_elems(baseaddr, len - baselen, &elems); - if (!elems.peer_link) { + if (!elems.peering) { mpl_dbg("Mesh plink: missing necessary peer link ie\n"); return; } @@ -466,37 +472,40 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m return; } - ftype = mgmt->u.action.u.plink_action.action_code; - ie_len = elems.peer_link_len; - if ((ftype == PLINK_OPEN && ie_len != 6) || - (ftype == PLINK_CONFIRM && ie_len != 8) || - (ftype == PLINK_CLOSE && ie_len != 8 && ie_len != 10)) { + ftype = mgmt->u.action.u.self_prot.action_code; + ie_len = elems.peering_len; + if ((ftype == WLAN_SP_MESH_PEERING_OPEN && ie_len != 4) || + (ftype == WLAN_SP_MESH_PEERING_CONFIRM && ie_len != 6) || + (ftype == WLAN_SP_MESH_PEERING_CLOSE && ie_len != 6 + && ie_len != 8)) { mpl_dbg("Mesh plink: incorrect plink ie length %d %d\n", ftype, ie_len); return; } - if (ftype != PLINK_CLOSE && (!elems.mesh_id || !elems.mesh_config)) { + if (ftype != WLAN_SP_MESH_PEERING_CLOSE && + (!elems.mesh_id || !elems.mesh_config)) { mpl_dbg("Mesh plink: missing necessary ie\n"); return; } /* Note the lines below are correct, the llid in the frame is the plid * from the point of view of this host. */ - memcpy(&plid, PLINK_GET_LLID(elems.peer_link), 2); - if (ftype == PLINK_CONFIRM || (ftype == PLINK_CLOSE && ie_len == 10)) - memcpy(&llid, PLINK_GET_PLID(elems.peer_link), 2); + memcpy(&plid, PLINK_GET_LLID(elems.peering), 2); + if (ftype == WLAN_SP_MESH_PEERING_CONFIRM || + (ftype == WLAN_SP_MESH_PEERING_CLOSE && ie_len == 8)) + memcpy(&llid, PLINK_GET_PLID(elems.peering), 2); rcu_read_lock(); sta = sta_info_get(sdata, mgmt->sa); - if (!sta && ftype != PLINK_OPEN) { + if (!sta && ftype != WLAN_SP_MESH_PEERING_OPEN) { mpl_dbg("Mesh plink: cls or cnf from unknown peer\n"); rcu_read_unlock(); return; } - if (sta && !test_sta_flags(sta, WLAN_STA_AUTH)) { + if (sta && !test_sta_flag(sta, WLAN_STA_AUTH)) { mpl_dbg("Mesh plink: Action frame from non-authed peer\n"); rcu_read_unlock(); return; @@ -509,30 +518,30 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m /* Now we will figure out the appropriate event... */ event = PLINK_UNDEFINED; - if (ftype != PLINK_CLOSE && (!mesh_matches_local(&elems, sdata))) { + if (ftype != WLAN_SP_MESH_PEERING_CLOSE && + (!mesh_matches_local(&elems, sdata))) { matches_local = false; switch (ftype) { - case PLINK_OPEN: + case WLAN_SP_MESH_PEERING_OPEN: event = OPN_RJCT; break; - case PLINK_CONFIRM: + case WLAN_SP_MESH_PEERING_CONFIRM: event = CNF_RJCT; break; - case PLINK_CLOSE: - /* avoid warning */ + default: break; } } if (!sta && !matches_local) { rcu_read_unlock(); - reason = cpu_to_le16(MESH_CAPABILITY_POLICY_VIOLATION); + reason = cpu_to_le16(WLAN_REASON_MESH_CONFIG); llid = 0; - mesh_plink_frame_tx(sdata, PLINK_CLOSE, mgmt->sa, llid, - plid, reason); + mesh_plink_frame_tx(sdata, WLAN_SP_MESH_PEERING_CLOSE, + mgmt->sa, llid, plid, reason); return; } else if (!sta) { - /* ftype == PLINK_OPEN */ + /* ftype == WLAN_SP_MESH_PEERING_OPEN */ u32 rates; rcu_read_unlock(); @@ -557,21 +566,21 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m } else if (matches_local) { spin_lock_bh(&sta->lock); switch (ftype) { - case PLINK_OPEN: + case WLAN_SP_MESH_PEERING_OPEN: if (!mesh_plink_free_count(sdata) || (sta->plid && sta->plid != plid)) event = OPN_IGNR; else event = OPN_ACPT; break; - case PLINK_CONFIRM: + case WLAN_SP_MESH_PEERING_CONFIRM: if (!mesh_plink_free_count(sdata) || (sta->llid != llid || sta->plid != plid)) event = CNF_IGNR; else event = CNF_ACPT; break; - case PLINK_CLOSE: + case WLAN_SP_MESH_PEERING_CLOSE: if (sta->plink_state == NL80211_PLINK_ESTAB) /* Do not check for llid or plid. This does not * follow the standard but since multiple plinks @@ -620,10 +629,12 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m sta->llid = llid; mesh_plink_timer_set(sta, dot11MeshRetryTimeout(sdata)); spin_unlock_bh(&sta->lock); - mesh_plink_frame_tx(sdata, PLINK_OPEN, sta->sta.addr, llid, - 0, 0); - mesh_plink_frame_tx(sdata, PLINK_CONFIRM, sta->sta.addr, - llid, plid, 0); + mesh_plink_frame_tx(sdata, + WLAN_SP_MESH_PEERING_OPEN, + sta->sta.addr, llid, 0, 0); + mesh_plink_frame_tx(sdata, + WLAN_SP_MESH_PEERING_CONFIRM, + sta->sta.addr, llid, plid, 0); break; default: spin_unlock_bh(&sta->lock); @@ -635,10 +646,10 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m switch (event) { case OPN_RJCT: case CNF_RJCT: - reason = cpu_to_le16(MESH_CAPABILITY_POLICY_VIOLATION); + reason = cpu_to_le16(WLAN_REASON_MESH_CONFIG); case CLS_ACPT: if (!reason) - reason = cpu_to_le16(MESH_CLOSE_RCVD); + reason = cpu_to_le16(WLAN_REASON_MESH_CLOSE); sta->reason = reason; sta->plink_state = NL80211_PLINK_HOLDING; if (!mod_plink_timer(sta, @@ -647,8 +658,9 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m llid = sta->llid; spin_unlock_bh(&sta->lock); - mesh_plink_frame_tx(sdata, PLINK_CLOSE, sta->sta.addr, llid, - plid, reason); + mesh_plink_frame_tx(sdata, + WLAN_SP_MESH_PEERING_CLOSE, + sta->sta.addr, llid, plid, reason); break; case OPN_ACPT: /* retry timer is left untouched */ @@ -656,8 +668,9 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m sta->plid = plid; llid = sta->llid; spin_unlock_bh(&sta->lock); - mesh_plink_frame_tx(sdata, PLINK_CONFIRM, sta->sta.addr, llid, - plid, 0); + mesh_plink_frame_tx(sdata, + WLAN_SP_MESH_PEERING_CONFIRM, + sta->sta.addr, llid, plid, 0); break; case CNF_ACPT: sta->plink_state = NL80211_PLINK_CNF_RCVD; @@ -677,10 +690,10 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m switch (event) { case OPN_RJCT: case CNF_RJCT: - reason = cpu_to_le16(MESH_CAPABILITY_POLICY_VIOLATION); + reason = cpu_to_le16(WLAN_REASON_MESH_CONFIG); case CLS_ACPT: if (!reason) - reason = cpu_to_le16(MESH_CLOSE_RCVD); + reason = cpu_to_le16(WLAN_REASON_MESH_CLOSE); sta->reason = reason; sta->plink_state = NL80211_PLINK_HOLDING; if (!mod_plink_timer(sta, @@ -689,14 +702,15 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m llid = sta->llid; spin_unlock_bh(&sta->lock); - mesh_plink_frame_tx(sdata, PLINK_CLOSE, sta->sta.addr, llid, - plid, reason); + mesh_plink_frame_tx(sdata, WLAN_SP_MESH_PEERING_CLOSE, + sta->sta.addr, llid, plid, reason); break; case OPN_ACPT: llid = sta->llid; spin_unlock_bh(&sta->lock); - mesh_plink_frame_tx(sdata, PLINK_CONFIRM, sta->sta.addr, llid, - plid, 0); + mesh_plink_frame_tx(sdata, + WLAN_SP_MESH_PEERING_CONFIRM, + sta->sta.addr, llid, plid, 0); break; case CNF_ACPT: del_timer(&sta->plink_timer); @@ -717,10 +731,10 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m switch (event) { case OPN_RJCT: case CNF_RJCT: - reason = cpu_to_le16(MESH_CAPABILITY_POLICY_VIOLATION); + reason = cpu_to_le16(WLAN_REASON_MESH_CONFIG); case CLS_ACPT: if (!reason) - reason = cpu_to_le16(MESH_CLOSE_RCVD); + reason = cpu_to_le16(WLAN_REASON_MESH_CLOSE); sta->reason = reason; sta->plink_state = NL80211_PLINK_HOLDING; if (!mod_plink_timer(sta, @@ -729,8 +743,9 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m llid = sta->llid; spin_unlock_bh(&sta->lock); - mesh_plink_frame_tx(sdata, PLINK_CLOSE, sta->sta.addr, llid, - plid, reason); + mesh_plink_frame_tx(sdata, + WLAN_SP_MESH_PEERING_CLOSE, + sta->sta.addr, llid, plid, reason); break; case OPN_ACPT: del_timer(&sta->plink_timer); @@ -740,8 +755,9 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON); mpl_dbg("Mesh plink with %pM ESTABLISHED\n", sta->sta.addr); - mesh_plink_frame_tx(sdata, PLINK_CONFIRM, sta->sta.addr, llid, - plid, 0); + mesh_plink_frame_tx(sdata, + WLAN_SP_MESH_PEERING_CONFIRM, + sta->sta.addr, llid, plid, 0); break; default: spin_unlock_bh(&sta->lock); @@ -752,7 +768,7 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m case NL80211_PLINK_ESTAB: switch (event) { case CLS_ACPT: - reason = cpu_to_le16(MESH_CLOSE_RCVD); + reason = cpu_to_le16(WLAN_REASON_MESH_CLOSE); sta->reason = reason; deactivated = __mesh_plink_deactivate(sta); sta->plink_state = NL80211_PLINK_HOLDING; @@ -761,14 +777,15 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m spin_unlock_bh(&sta->lock); if (deactivated) ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON); - mesh_plink_frame_tx(sdata, PLINK_CLOSE, sta->sta.addr, llid, - plid, reason); + mesh_plink_frame_tx(sdata, WLAN_SP_MESH_PEERING_CLOSE, + sta->sta.addr, llid, plid, reason); break; case OPN_ACPT: llid = sta->llid; spin_unlock_bh(&sta->lock); - mesh_plink_frame_tx(sdata, PLINK_CONFIRM, sta->sta.addr, llid, - plid, 0); + mesh_plink_frame_tx(sdata, + WLAN_SP_MESH_PEERING_CONFIRM, + sta->sta.addr, llid, plid, 0); break; default: spin_unlock_bh(&sta->lock); @@ -790,8 +807,8 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m llid = sta->llid; reason = sta->reason; spin_unlock_bh(&sta->lock); - mesh_plink_frame_tx(sdata, PLINK_CLOSE, sta->sta.addr, - llid, plid, reason); + mesh_plink_frame_tx(sdata, WLAN_SP_MESH_PEERING_CLOSE, + sta->sta.addr, llid, plid, reason); break; default: spin_unlock_bh(&sta->lock); diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 1563250..9da8626 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -16,10 +16,12 @@ #include #include #include +#include #include -#include +#include #include #include +#include #include #include @@ -160,7 +162,8 @@ static int ecw2cw(int ecw) */ static u32 ieee80211_enable_ht(struct ieee80211_sub_if_data *sdata, struct ieee80211_ht_info *hti, - const u8 *bssid, u16 ap_ht_cap_flags) + const u8 *bssid, u16 ap_ht_cap_flags, + bool beacon_htcap_ie) { struct ieee80211_local *local = sdata->local; struct ieee80211_supported_band *sband; @@ -232,6 +235,21 @@ static u32 ieee80211_enable_ht(struct ieee80211_sub_if_data *sdata, WARN_ON(!ieee80211_set_channel_type(local, sdata, channel_type)); } + if (beacon_htcap_ie && (prev_chantype != channel_type)) { + /* + * Whenever the AP announces the HT mode change that can be + * 40MHz intolerant or etc., it would be safer to stop tx + * queues before doing hw config to avoid buffer overflow. + */ + ieee80211_stop_queues_by_reason(&sdata->local->hw, + IEEE80211_QUEUE_STOP_REASON_CHTYPE_CHANGE); + + /* flush out all packets */ + synchronize_net(); + + drv_flush(local, false); + } + /* channel_type change automatically detected */ ieee80211_hw_config(local, 0); @@ -243,6 +261,10 @@ static u32 ieee80211_enable_ht(struct ieee80211_sub_if_data *sdata, IEEE80211_RC_HT_CHANGED, channel_type); rcu_read_unlock(); + + if (beacon_htcap_ie) + ieee80211_wake_queues_by_reason(&sdata->local->hw, + IEEE80211_QUEUE_STOP_REASON_CHTYPE_CHANGE); } ht_opmode = le16_to_cpu(hti->operation_mode); @@ -271,11 +293,9 @@ static void ieee80211_send_deauth_disassoc(struct ieee80211_sub_if_data *sdata, struct ieee80211_mgmt *mgmt; skb = dev_alloc_skb(local->hw.extra_tx_headroom + sizeof(*mgmt)); - if (!skb) { - printk(KERN_DEBUG "%s: failed to allocate buffer for " - "deauth/disassoc frame\n", sdata->name); + if (!skb) return; - } + skb_reserve(skb, local->hw.extra_tx_headroom); mgmt = (struct ieee80211_mgmt *) skb_put(skb, 24); @@ -330,6 +350,7 @@ void ieee80211_send_nullfunc(struct ieee80211_local *local, { struct sk_buff *skb; struct ieee80211_hdr_3addr *nullfunc; + struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; skb = ieee80211_nullfunc_get(&local->hw, &sdata->vif); if (!skb) @@ -340,6 +361,10 @@ void ieee80211_send_nullfunc(struct ieee80211_local *local, nullfunc->frame_control |= cpu_to_le16(IEEE80211_FCTL_PM); IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT; + if (ifmgd->flags & (IEEE80211_STA_BEACON_POLL | + IEEE80211_STA_CONNECTION_POLL)) + IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_CTL_USE_MINRATE; + ieee80211_tx_skb(sdata, skb); } @@ -354,11 +379,9 @@ static void ieee80211_send_4addr_nullfunc(struct ieee80211_local *local, return; skb = dev_alloc_skb(local->hw.extra_tx_headroom + 30); - if (!skb) { - printk(KERN_DEBUG "%s: failed to allocate buffer for 4addr " - "nullfunc frame\n", sdata->name); + if (!skb) return; - } + skb_reserve(skb, local->hw.extra_tx_headroom); nullfunc = (struct ieee80211_hdr *) skb_put(skb, 30); @@ -394,6 +417,9 @@ static void ieee80211_chswitch_work(struct work_struct *work) /* call "hw_config" only if doing sw channel switch */ ieee80211_hw_config(sdata->local, IEEE80211_CONF_CHANGE_CHANNEL); + } else { + /* update the device channel directly */ + sdata->local->hw.conf.channel = sdata->local->oper_channel; } /* XXX: shouldn't really modify cfg80211-owned data! */ @@ -608,7 +634,7 @@ static bool ieee80211_powersave_allowed(struct ieee80211_sub_if_data *sdata) { struct ieee80211_if_managed *mgd = &sdata->u.mgd; struct sta_info *sta = NULL; - u32 sta_flags = 0; + bool authorized = false; if (!mgd->powersave) return false; @@ -629,13 +655,10 @@ static bool ieee80211_powersave_allowed(struct ieee80211_sub_if_data *sdata) rcu_read_lock(); sta = sta_info_get(sdata, mgd->bssid); if (sta) - sta_flags = get_sta_flags(sta); + authorized = test_sta_flag(sta, WLAN_STA_AUTHORIZED); rcu_read_unlock(); - if (!(sta_flags & WLAN_STA_AUTHORIZED)) - return false; - - return true; + return authorized; } /* need to hold RTNL or interface lock */ @@ -752,7 +775,7 @@ void ieee80211_dynamic_ps_enable_work(struct work_struct *work) container_of(work, struct ieee80211_local, dynamic_ps_enable_work); struct ieee80211_sub_if_data *sdata = local->ps_sdata; - struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; + struct ieee80211_if_managed *ifmgd; unsigned long flags; int q; @@ -760,26 +783,39 @@ void ieee80211_dynamic_ps_enable_work(struct work_struct *work) if (!sdata) return; + ifmgd = &sdata->u.mgd; + if (local->hw.conf.flags & IEEE80211_CONF_PS) return; - /* - * transmission can be stopped by others which leads to - * dynamic_ps_timer expiry. Postpond the ps timer if it - * is not the actual idle state. - */ - spin_lock_irqsave(&local->queue_stop_reason_lock, flags); - for (q = 0; q < local->hw.queues; q++) { - if (local->queue_stop_reasons[q]) { - spin_unlock_irqrestore(&local->queue_stop_reason_lock, - flags); + if (!local->disable_dynamic_ps && + local->hw.conf.dynamic_ps_timeout > 0) { + /* don't enter PS if TX frames are pending */ + if (drv_tx_frames_pending(local)) { mod_timer(&local->dynamic_ps_timer, jiffies + msecs_to_jiffies( local->hw.conf.dynamic_ps_timeout)); return; } + + /* + * transmission can be stopped by others which leads to + * dynamic_ps_timer expiry. Postpone the ps timer if it + * is not the actual idle state. + */ + spin_lock_irqsave(&local->queue_stop_reason_lock, flags); + for (q = 0; q < local->hw.queues; q++) { + if (local->queue_stop_reasons[q]) { + spin_unlock_irqrestore(&local->queue_stop_reason_lock, + flags); + mod_timer(&local->dynamic_ps_timer, jiffies + + msecs_to_jiffies( + local->hw.conf.dynamic_ps_timeout)); + return; + } + } + spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags); } - spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags); if ((local->hw.flags & IEEE80211_HW_PS_NULLFUNC_STACK) && (!(ifmgd->flags & IEEE80211_STA_NULLFUNC_ACKED))) { @@ -804,7 +840,8 @@ void ieee80211_dynamic_ps_enable_work(struct work_struct *work) ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_PS); } - netif_tx_wake_all_queues(sdata->dev); + if (local->hw.flags & IEEE80211_HW_PS_NULLFUNC_STACK) + netif_tx_wake_all_queues(sdata->dev); } void ieee80211_dynamic_ps_timer(unsigned long data) @@ -906,7 +943,8 @@ static void ieee80211_sta_wmm_params(struct ieee80211_local *local, params.aifs, params.cw_min, params.cw_max, params.txop, params.uapsd); #endif - if (drv_conf_tx(local, queue, ¶ms)) + sdata->tx_conf[queue] = params; + if (drv_conf_tx(local, sdata, queue, ¶ms)) wiphy_debug(local->hw.wiphy, "failed to set TX queue parameters for queue %d\n", queue); @@ -1064,7 +1102,7 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, mutex_lock(&local->sta_mtx); sta = sta_info_get(sdata, bssid); if (sta) { - set_sta_flags(sta, WLAN_STA_BLOCK_BA); + set_sta_flag(sta, WLAN_STA_BLOCK_BA); ieee80211_sta_tear_down_BA_sessions(sta, tx); } mutex_unlock(&local->sta_mtx); @@ -1106,8 +1144,9 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, changed |= BSS_CHANGED_BSSID | BSS_CHANGED_HT; ieee80211_bss_info_change_notify(sdata, changed); + /* remove AP and TDLS peers */ if (remove_sta) - sta_info_destroy_addr(sdata, bssid); + sta_info_flush(local, sdata); del_timer_sync(&sdata->u.mgd.conn_mon_timer); del_timer_sync(&sdata->u.mgd.bcn_mon_timer); @@ -1207,7 +1246,8 @@ static void ieee80211_mgd_probe_ap_send(struct ieee80211_sub_if_data *sdata) ieee80211_send_nullfunc(sdata->local, sdata, 0); } else { ssid = ieee80211_bss_get_ie(ifmgd->associated, WLAN_EID_SSID); - ieee80211_send_probe_req(sdata, dst, ssid + 2, ssid[1], NULL, 0); + ieee80211_send_probe_req(sdata, dst, ssid + 2, ssid[1], NULL, 0, + (u32) -1, true, false); } ifmgd->probe_send_count++; @@ -1292,7 +1332,8 @@ struct sk_buff *ieee80211_ap_probereq_get(struct ieee80211_hw *hw, ssid = ieee80211_bss_get_ie(ifmgd->associated, WLAN_EID_SSID); skb = ieee80211_build_probe_req(sdata, ifmgd->associated->bssid, - ssid + 2, ssid[1], NULL, 0); + (u32) -1, ssid + 2, ssid[1], + NULL, 0, true); return skb; } @@ -1446,6 +1487,7 @@ static bool ieee80211_assoc_success(struct ieee80211_work *wk, int i, j, err; bool have_higher_than_11mbit = false; u16 ap_ht_cap_flags; + int min_rate = INT_MAX, min_rate_index = -1; /* AssocResp and ReassocResp have identical structure */ @@ -1479,17 +1521,22 @@ static bool ieee80211_assoc_success(struct ieee80211_work *wk, ifmgd->aid = aid; - sta = sta_info_alloc(sdata, cbss->bssid, GFP_KERNEL); - if (!sta) { - printk(KERN_DEBUG "%s: failed to alloc STA entry for" - " the AP\n", sdata->name); + mutex_lock(&sdata->local->sta_mtx); + /* + * station info was already allocated and inserted before + * the association and should be available to us + */ + sta = sta_info_get_rx(sdata, cbss->bssid); + if (WARN_ON(!sta)) { + mutex_unlock(&sdata->local->sta_mtx); return false; } - set_sta_flags(sta, WLAN_STA_AUTH | WLAN_STA_ASSOC | - WLAN_STA_ASSOC_AP); + set_sta_flag(sta, WLAN_STA_AUTH); + set_sta_flag(sta, WLAN_STA_ASSOC); + set_sta_flag(sta, WLAN_STA_ASSOC_AP); if (!(ifmgd->flags & IEEE80211_STA_CONTROL_PORT)) - set_sta_flags(sta, WLAN_STA_AUTHORIZED); + set_sta_flag(sta, WLAN_STA_AUTHORIZED); rates = 0; basic_rates = 0; @@ -1507,6 +1554,10 @@ static bool ieee80211_assoc_success(struct ieee80211_work *wk, rates |= BIT(j); if (is_basic) basic_rates |= BIT(j); + if (rate < min_rate) { + min_rate = rate; + min_rate_index = j; + } break; } } @@ -1524,11 +1575,25 @@ static bool ieee80211_assoc_success(struct ieee80211_work *wk, rates |= BIT(j); if (is_basic) basic_rates |= BIT(j); + if (rate < min_rate) { + min_rate = rate; + min_rate_index = j; + } break; } } } + /* + * some buggy APs don't advertise basic_rates. use the lowest + * supported rate instead. + */ + if (unlikely(!basic_rates) && min_rate_index >= 0) { + printk(KERN_DEBUG "%s: No basic rates in AssocResp. " + "Using min supported rate instead.\n", sdata->name); + basic_rates = BIT(min_rate_index); + } + sta->sta.supp_rates[wk->chan->band] = rates; sdata->vif.bss_conf.basic_rates = basic_rates; @@ -1548,12 +1613,13 @@ static bool ieee80211_assoc_success(struct ieee80211_work *wk, rate_control_rate_init(sta); if (ifmgd->flags & IEEE80211_STA_MFP_ENABLED) - set_sta_flags(sta, WLAN_STA_MFP); + set_sta_flag(sta, WLAN_STA_MFP); if (elems.wmm_param) - set_sta_flags(sta, WLAN_STA_WME); + set_sta_flag(sta, WLAN_STA_WME); - err = sta_info_insert(sta); + /* sta_info_reinsert will also unlock the mutex lock */ + err = sta_info_reinsert(sta); sta = NULL; if (err) { printk(KERN_DEBUG "%s: failed to insert STA entry for" @@ -1581,7 +1647,8 @@ static bool ieee80211_assoc_success(struct ieee80211_work *wk, (sdata->local->hw.queues >= 4) && !(ifmgd->flags & IEEE80211_STA_DISABLE_11N)) changed |= ieee80211_enable_ht(sdata, elems.ht_info_elem, - cbss->bssid, ap_ht_cap_flags); + cbss->bssid, ap_ht_cap_flags, + false); /* set AID and assoc capability, * ieee80211_set_associated() will tell the driver */ @@ -1762,6 +1829,7 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, ifmgd->ave_beacon_signal = rx_status->signal * 16; ifmgd->last_cqm_event_signal = 0; ifmgd->count_beacon_signal = 1; + ifmgd->last_ave_beacon_signal = 0; } else { ifmgd->ave_beacon_signal = (IEEE80211_SIGNAL_AVE_WEIGHT * rx_status->signal * 16 + @@ -1769,6 +1837,28 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, ifmgd->ave_beacon_signal) / 16; ifmgd->count_beacon_signal++; } + + if (ifmgd->rssi_min_thold != ifmgd->rssi_max_thold && + ifmgd->count_beacon_signal >= IEEE80211_SIGNAL_AVE_MIN_COUNT) { + int sig = ifmgd->ave_beacon_signal; + int last_sig = ifmgd->last_ave_beacon_signal; + + /* + * if signal crosses either of the boundaries, invoke callback + * with appropriate parameters + */ + if (sig > ifmgd->rssi_max_thold && + (last_sig <= ifmgd->rssi_min_thold || last_sig == 0)) { + ifmgd->last_ave_beacon_signal = sig; + drv_rssi_callback(local, RSSI_EVENT_HIGH); + } else if (sig < ifmgd->rssi_min_thold && + (last_sig >= ifmgd->rssi_max_thold || + last_sig == 0)) { + ifmgd->last_ave_beacon_signal = sig; + drv_rssi_callback(local, RSSI_EVENT_LOW); + } + } + if (bss_conf->cqm_rssi_thold && ifmgd->count_beacon_signal >= IEEE80211_SIGNAL_AVE_MIN_COUNT && !(local->hw.flags & IEEE80211_HW_SUPPORTS_CQM_RSSI)) { @@ -1892,7 +1982,7 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, rcu_read_unlock(); changed |= ieee80211_enable_ht(sdata, elems.ht_info_elem, - bssid, ap_ht_cap_flags); + bssid, ap_ht_cap_flags, true); } /* Note: country IE parsing is done for us by cfg80211 */ @@ -2028,7 +2118,7 @@ static void ieee80211_sta_timer(unsigned long data) } static void ieee80211_sta_connection_lost(struct ieee80211_sub_if_data *sdata, - u8 *bssid) + u8 *bssid, u8 reason) { struct ieee80211_local *local = sdata->local; struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; @@ -2046,8 +2136,7 @@ static void ieee80211_sta_connection_lost(struct ieee80211_sub_if_data *sdata, * but that's not a problem. */ ieee80211_send_deauth_disassoc(sdata, bssid, - IEEE80211_STYPE_DEAUTH, - WLAN_REASON_DISASSOC_DUE_TO_INACTIVITY, + IEEE80211_STYPE_DEAUTH, reason, NULL, true); mutex_lock(&ifmgd->mtx); } @@ -2093,7 +2182,8 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata) " AP %pM, disconnecting.\n", sdata->name, bssid); #endif - ieee80211_sta_connection_lost(sdata, bssid); + ieee80211_sta_connection_lost(sdata, bssid, + WLAN_REASON_DISASSOC_DUE_TO_INACTIVITY); } } else if (time_is_after_jiffies(ifmgd->probe_timeout)) run_again(ifmgd, ifmgd->probe_timeout); @@ -2105,7 +2195,8 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata) sdata->name, bssid, probe_wait_ms); #endif - ieee80211_sta_connection_lost(sdata, bssid); + ieee80211_sta_connection_lost(sdata, bssid, + WLAN_REASON_DISASSOC_DUE_TO_INACTIVITY); } else if (ifmgd->probe_send_count < max_tries) { #ifdef CONFIG_MAC80211_VERBOSE_DEBUG wiphy_debug(local->hw.wiphy, @@ -2127,7 +2218,8 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata) sdata->name, bssid, probe_wait_ms); - ieee80211_sta_connection_lost(sdata, bssid); + ieee80211_sta_connection_lost(sdata, bssid, + WLAN_REASON_DISASSOC_DUE_TO_INACTIVITY); } } @@ -2196,6 +2288,7 @@ void ieee80211_sta_quiesce(struct ieee80211_sub_if_data *sdata) cancel_work_sync(&ifmgd->request_smps_work); + cancel_work_sync(&ifmgd->monitor_work); cancel_work_sync(&ifmgd->beacon_connection_loss_work); if (del_timer_sync(&ifmgd->timer)) set_bit(TMR_RUNNING_TIMER, &ifmgd->timers_running); @@ -2204,7 +2297,6 @@ void ieee80211_sta_quiesce(struct ieee80211_sub_if_data *sdata) if (del_timer_sync(&ifmgd->chswitch_timer)) set_bit(TMR_RUNNING_CHANSW, &ifmgd->timers_running); - cancel_work_sync(&ifmgd->monitor_work); /* these will just be re-established on connection */ del_timer_sync(&ifmgd->conn_mon_timer); del_timer_sync(&ifmgd->bcn_mon_timer); @@ -2217,12 +2309,31 @@ void ieee80211_sta_restart(struct ieee80211_sub_if_data *sdata) if (!ifmgd->associated) return; + if (sdata->flags & IEEE80211_SDATA_DISCONNECT_RESUME) { + sdata->flags &= ~IEEE80211_SDATA_DISCONNECT_RESUME; + mutex_lock(&ifmgd->mtx); + if (ifmgd->associated) { +#ifdef CONFIG_MAC80211_VERBOSE_DEBUG + wiphy_debug(sdata->local->hw.wiphy, + "%s: driver requested disconnect after resume.\n", + sdata->name); +#endif + ieee80211_sta_connection_lost(sdata, + ifmgd->associated->bssid, + WLAN_REASON_UNSPECIFIED); + mutex_unlock(&ifmgd->mtx); + return; + } + mutex_unlock(&ifmgd->mtx); + } + if (test_and_clear_bit(TMR_RUNNING_TIMER, &ifmgd->timers_running)) add_timer(&ifmgd->timer); if (test_and_clear_bit(TMR_RUNNING_CHANSW, &ifmgd->timers_running)) add_timer(&ifmgd->chswitch_timer); ieee80211_sta_reset_beacon_monitor(sdata); ieee80211_restart_sta_timer(sdata); + ieee80211_queue_work(&sdata->local->hw, &sdata->u.mgd.monitor_work); } #endif @@ -2288,14 +2399,16 @@ static enum work_done_result ieee80211_probe_auth_done(struct ieee80211_work *wk, struct sk_buff *skb) { + struct ieee80211_local *local = wk->sdata->local; + if (!skb) { cfg80211_send_auth_timeout(wk->sdata->dev, wk->filter_ta); - return WORK_DONE_DESTROY; + goto destroy; } if (wk->type == IEEE80211_WORK_AUTH) { cfg80211_send_rx_auth(wk->sdata->dev, skb->data, skb->len); - return WORK_DONE_DESTROY; + goto destroy; } mutex_lock(&wk->sdata->u.mgd.mtx); @@ -2305,6 +2418,12 @@ ieee80211_probe_auth_done(struct ieee80211_work *wk, wk->type = IEEE80211_WORK_AUTH; wk->probe_auth.tries = 0; return WORK_DONE_REQUEUE; + destroy: + if (wk->probe_auth.synced) + drv_finish_tx_sync(local, wk->sdata, wk->filter_ta, + IEEE80211_TX_SYNC_AUTH); + + return WORK_DONE_DESTROY; } int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata, @@ -2374,17 +2493,43 @@ int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata, return 0; } +/* create and insert a dummy station entry */ +static int ieee80211_pre_assoc(struct ieee80211_sub_if_data *sdata, + u8 *bssid) { + struct sta_info *sta; + int err; + + sta = sta_info_alloc(sdata, bssid, GFP_KERNEL); + if (!sta) + return -ENOMEM; + + sta->dummy = true; + + err = sta_info_insert(sta); + sta = NULL; + if (err) { + printk(KERN_DEBUG "%s: failed to insert Dummy STA entry for" + " the AP (error %d)\n", sdata->name, err); + return err; + } + + return 0; +} + static enum work_done_result ieee80211_assoc_done(struct ieee80211_work *wk, struct sk_buff *skb) { + struct ieee80211_local *local = wk->sdata->local; struct ieee80211_mgmt *mgmt; struct ieee80211_rx_status *rx_status; struct ieee802_11_elems elems; + struct cfg80211_bss *cbss = wk->assoc.bss; u16 status; if (!skb) { + sta_info_destroy_addr(wk->sdata, cbss->bssid); cfg80211_send_assoc_timeout(wk->sdata->dev, wk->filter_ta); - return WORK_DONE_DESTROY; + goto destroy; } if (wk->type == IEEE80211_WORK_ASSOC_BEACON_WAIT) { @@ -2404,19 +2549,32 @@ static enum work_done_result ieee80211_assoc_done(struct ieee80211_work *wk, status = le16_to_cpu(mgmt->u.assoc_resp.status_code); if (status == WLAN_STATUS_SUCCESS) { + if (wk->assoc.synced) + drv_finish_tx_sync(local, wk->sdata, wk->filter_ta, + IEEE80211_TX_SYNC_ASSOC); + mutex_lock(&wk->sdata->u.mgd.mtx); if (!ieee80211_assoc_success(wk, mgmt, skb->len)) { mutex_unlock(&wk->sdata->u.mgd.mtx); /* oops -- internal error -- send timeout for now */ + sta_info_destroy_addr(wk->sdata, cbss->bssid); cfg80211_send_assoc_timeout(wk->sdata->dev, wk->filter_ta); return WORK_DONE_DESTROY; } mutex_unlock(&wk->sdata->u.mgd.mtx); + } else { + /* assoc failed - destroy the dummy station entry */ + sta_info_destroy_addr(wk->sdata, cbss->bssid); } cfg80211_send_rx_assoc(wk->sdata->dev, skb->data, skb->len); + destroy: + if (wk->assoc.synced) + drv_finish_tx_sync(local, wk->sdata, wk->filter_ta, + IEEE80211_TX_SYNC_ASSOC); + return WORK_DONE_DESTROY; } @@ -2427,7 +2585,7 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, struct ieee80211_bss *bss = (void *)req->bss->priv; struct ieee80211_work *wk; const u8 *ssid; - int i; + int i, err; mutex_lock(&ifmgd->mtx); if (ifmgd->associated) { @@ -2452,6 +2610,16 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, if (!wk) return -ENOMEM; + /* + * create a dummy station info entry in order + * to start accepting incoming EAPOL packets from the station + */ + err = ieee80211_pre_assoc(sdata, req->bss->bssid); + if (err) { + kfree(wk); + return err; + } + ifmgd->flags &= ~IEEE80211_STA_DISABLE_11N; ifmgd->flags &= ~IEEE80211_STA_NULLFUNC_ACKED; @@ -2551,7 +2719,6 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata, { struct ieee80211_local *local = sdata->local; struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; - struct ieee80211_work *wk; u8 bssid[ETH_ALEN]; bool assoc_bss = false; @@ -2564,30 +2731,47 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata, assoc_bss = true; } else { bool not_auth_yet = false; + struct ieee80211_work *tmp, *wk = NULL; mutex_unlock(&ifmgd->mtx); mutex_lock(&local->mtx); - list_for_each_entry(wk, &local->work_list, list) { - if (wk->sdata != sdata) + list_for_each_entry(tmp, &local->work_list, list) { + if (tmp->sdata != sdata) continue; - if (wk->type != IEEE80211_WORK_DIRECT_PROBE && - wk->type != IEEE80211_WORK_AUTH && - wk->type != IEEE80211_WORK_ASSOC && - wk->type != IEEE80211_WORK_ASSOC_BEACON_WAIT) + if (tmp->type != IEEE80211_WORK_DIRECT_PROBE && + tmp->type != IEEE80211_WORK_AUTH && + tmp->type != IEEE80211_WORK_ASSOC && + tmp->type != IEEE80211_WORK_ASSOC_BEACON_WAIT) continue; - if (memcmp(req->bss->bssid, wk->filter_ta, ETH_ALEN)) + if (memcmp(req->bss->bssid, tmp->filter_ta, ETH_ALEN)) continue; - not_auth_yet = wk->type == IEEE80211_WORK_DIRECT_PROBE; - list_del_rcu(&wk->list); - free_work(wk); + not_auth_yet = tmp->type == IEEE80211_WORK_DIRECT_PROBE; + list_del_rcu(&tmp->list); + synchronize_rcu(); + wk = tmp; break; } mutex_unlock(&local->mtx); + if (wk && wk->type == IEEE80211_WORK_ASSOC) { + /* clean up dummy sta & TX sync */ + sta_info_destroy_addr(wk->sdata, wk->filter_ta); + if (wk->assoc.synced) + drv_finish_tx_sync(local, wk->sdata, + wk->filter_ta, + IEEE80211_TX_SYNC_ASSOC); + } else if (wk && wk->type == IEEE80211_WORK_AUTH) { + if (wk->probe_auth.synced) + drv_finish_tx_sync(local, wk->sdata, + wk->filter_ta, + IEEE80211_TX_SYNC_AUTH); + } + kfree(wk); + /* * If somebody requests authentication and we haven't * sent out an auth frame yet there's no need to send @@ -2609,7 +2793,7 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata, req->reason_code, cookie, !req->local_state_change); if (assoc_bss) - sta_info_destroy_addr(sdata, bssid); + sta_info_flush(sdata->local, sdata); mutex_lock(&sdata->local->mtx); ieee80211_recalc_idle(sdata->local); @@ -2649,7 +2833,7 @@ int ieee80211_mgd_disassoc(struct ieee80211_sub_if_data *sdata, ieee80211_send_deauth_disassoc(sdata, req->bss->bssid, IEEE80211_STYPE_DISASSOC, req->reason_code, cookie, !req->local_state_change); - sta_info_destroy_addr(sdata, bssid); + sta_info_flush(sdata->local, sdata); mutex_lock(&sdata->local->mtx); ieee80211_recalc_idle(sdata->local); @@ -2669,3 +2853,10 @@ void ieee80211_cqm_rssi_notify(struct ieee80211_vif *vif, cfg80211_cqm_rssi_notify(sdata->dev, rssi_event, gfp); } EXPORT_SYMBOL(ieee80211_cqm_rssi_notify); + +unsigned char ieee80211_get_operstate(struct ieee80211_vif *vif) +{ + struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); + return sdata->dev->operstate; +} +EXPORT_SYMBOL(ieee80211_get_operstate); diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c index ecc4922..db2c215 100644 --- a/net/mac80211/offchannel.c +++ b/net/mac80211/offchannel.c @@ -12,19 +12,16 @@ * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ +#include #include #include "ieee80211_i.h" #include "driver-trace.h" /* - * Tell our hardware to disable PS. - * Optionally inform AP that we will go to sleep so that it will buffer - * the frames while we are doing off-channel work. This is optional - * because we *may* be doing work on-operating channel, and want our - * hardware unconditionally awake, but still let the AP send us normal frames. + * inform AP that we will go to sleep so that it will buffer the frames + * while we scan */ -static void ieee80211_offchannel_ps_enable(struct ieee80211_sub_if_data *sdata, - bool tell_ap) +static void ieee80211_offchannel_ps_enable(struct ieee80211_sub_if_data *sdata) { struct ieee80211_local *local = sdata->local; struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; @@ -45,8 +42,8 @@ static void ieee80211_offchannel_ps_enable(struct ieee80211_sub_if_data *sdata, ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_PS); } - if (tell_ap && (!local->offchannel_ps_enabled || - !(local->hw.flags & IEEE80211_HW_PS_NULLFUNC_STACK))) + if (!(local->offchannel_ps_enabled) || + !(local->hw.flags & IEEE80211_HW_PS_NULLFUNC_STACK)) /* * If power save was enabled, no need to send a nullfunc * frame because AP knows that we are sleeping. But if the @@ -81,9 +78,6 @@ static void ieee80211_offchannel_ps_disable(struct ieee80211_sub_if_data *sdata) * we are sleeping, let's just enable power save mode in * hardware. */ - /* TODO: Only set hardware if CONF_PS changed? - * TODO: Should we set offchannel_ps_enabled to false? - */ local->hw.conf.flags |= IEEE80211_CONF_PS; ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_PS); } else if (local->hw.conf.dynamic_ps_timeout > 0) { @@ -102,52 +96,57 @@ static void ieee80211_offchannel_ps_disable(struct ieee80211_sub_if_data *sdata) ieee80211_sta_reset_conn_monitor(sdata); } -void ieee80211_offchannel_stop_vifs(struct ieee80211_local *local) +void ieee80211_offchannel_stop_beaconing(struct ieee80211_local *local) { struct ieee80211_sub_if_data *sdata; - /* - * notify the AP about us leaving the channel and stop all - * STA interfaces. - */ mutex_lock(&local->iflist_mtx); list_for_each_entry(sdata, &local->interfaces, list) { if (!ieee80211_sdata_running(sdata)) continue; - if (sdata->vif.type != NL80211_IFTYPE_MONITOR) - set_bit(SDATA_STATE_OFFCHANNEL, &sdata->state); - - /* Check to see if we should disable beaconing. */ + /* disable beaconing */ if (sdata->vif.type == NL80211_IFTYPE_AP || sdata->vif.type == NL80211_IFTYPE_ADHOC || sdata->vif.type == NL80211_IFTYPE_MESH_POINT) ieee80211_bss_info_change_notify( sdata, BSS_CHANGED_BEACON_ENABLED); - if (sdata->vif.type != NL80211_IFTYPE_MONITOR) { + /* + * only handle non-STA interfaces here, STA interfaces + * are handled in ieee80211_offchannel_stop_station(), + * e.g., from the background scan state machine. + * + * In addition, do not stop monitor interface to allow it to be + * used from user space controlled off-channel operations. + */ + if (sdata->vif.type != NL80211_IFTYPE_STATION && + sdata->vif.type != NL80211_IFTYPE_MONITOR) { + set_bit(SDATA_STATE_OFFCHANNEL, &sdata->state); netif_tx_stop_all_queues(sdata->dev); - if (sdata->vif.type == NL80211_IFTYPE_STATION && - sdata->u.mgd.associated) - ieee80211_offchannel_ps_enable(sdata, true); } } mutex_unlock(&local->iflist_mtx); } -void ieee80211_offchannel_enable_all_ps(struct ieee80211_local *local, - bool tell_ap) +void ieee80211_offchannel_stop_station(struct ieee80211_local *local) { struct ieee80211_sub_if_data *sdata; + /* + * notify the AP about us leaving the channel and stop all STA interfaces + */ mutex_lock(&local->iflist_mtx); list_for_each_entry(sdata, &local->interfaces, list) { if (!ieee80211_sdata_running(sdata)) continue; - if (sdata->vif.type == NL80211_IFTYPE_STATION && - sdata->u.mgd.associated) - ieee80211_offchannel_ps_enable(sdata, tell_ap); + if (sdata->vif.type == NL80211_IFTYPE_STATION) { + set_bit(SDATA_STATE_OFFCHANNEL, &sdata->state); + netif_tx_stop_all_queues(sdata->dev); + if (sdata->u.mgd.associated) + ieee80211_offchannel_ps_enable(sdata); + } } mutex_unlock(&local->iflist_mtx); } @@ -163,9 +162,10 @@ void ieee80211_offchannel_return(struct ieee80211_local *local, continue; /* Tell AP we're back */ - if (sdata->vif.type == NL80211_IFTYPE_STATION && - sdata->u.mgd.associated) - ieee80211_offchannel_ps_disable(sdata); + if (sdata->vif.type == NL80211_IFTYPE_STATION) { + if (sdata->u.mgd.associated) + ieee80211_offchannel_ps_disable(sdata); + } if (sdata->vif.type != NL80211_IFTYPE_MONITOR) { clear_bit(SDATA_STATE_OFFCHANNEL, &sdata->state); @@ -182,7 +182,7 @@ void ieee80211_offchannel_return(struct ieee80211_local *local, netif_tx_wake_all_queues(sdata->dev); } - /* Check to see if we should re-enable beaconing */ + /* re-enable beaconing */ if (enable_beaconing && (sdata->vif.type == NL80211_IFTYPE_AP || sdata->vif.type == NL80211_IFTYPE_ADHOC || diff --git a/net/mac80211/pm.c b/net/mac80211/pm.c index 730778a..9ee7164 100644 --- a/net/mac80211/pm.c +++ b/net/mac80211/pm.c @@ -6,18 +6,43 @@ #include "driver-ops.h" #include "led.h" +/* return value indicates whether the driver should be further notified */ +static bool ieee80211_quiesce(struct ieee80211_sub_if_data *sdata) +{ + switch (sdata->vif.type) { + case NL80211_IFTYPE_STATION: + ieee80211_sta_quiesce(sdata); + return true; + case NL80211_IFTYPE_ADHOC: + ieee80211_ibss_quiesce(sdata); + return true; + case NL80211_IFTYPE_MESH_POINT: + ieee80211_mesh_quiesce(sdata); + return true; + case NL80211_IFTYPE_AP_VLAN: + case NL80211_IFTYPE_MONITOR: + /* don't tell driver about this */ + return false; + default: + return true; + } +} + int __ieee80211_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan) { struct ieee80211_local *local = hw_to_local(hw); struct ieee80211_sub_if_data *sdata; struct sta_info *sta; + if (!local->open_count) + goto suspend; + ieee80211_scan_cancel(local); if (hw->flags & IEEE80211_HW_AMPDU_AGGREGATION) { mutex_lock(&local->sta_mtx); list_for_each_entry(sta, &local->sta_list, list) { - set_sta_flags(sta, WLAN_STA_BLOCK_BA); + set_sta_flag(sta, WLAN_STA_BLOCK_BA); ieee80211_sta_tear_down_BA_sessions(sta, true); } mutex_unlock(&local->sta_mtx); @@ -50,11 +75,19 @@ int __ieee80211_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan) local->wowlan = wowlan && local->open_count; if (local->wowlan) { int err = drv_suspend(local, wowlan); - if (err) { + if (err < 0) { local->quiescing = false; return err; + } else if (err > 0) { + WARN_ON(err != 1); + local->wowlan = false; + } else { + list_for_each_entry(sdata, &local->interfaces, list) { + cancel_work_sync(&sdata->work); + ieee80211_quiesce(sdata); + } + goto suspend; } - goto suspend; } /* disable keys */ @@ -82,23 +115,8 @@ int __ieee80211_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan) list_for_each_entry(sdata, &local->interfaces, list) { cancel_work_sync(&sdata->work); - switch(sdata->vif.type) { - case NL80211_IFTYPE_STATION: - ieee80211_sta_quiesce(sdata); - break; - case NL80211_IFTYPE_ADHOC: - ieee80211_ibss_quiesce(sdata); - break; - case NL80211_IFTYPE_MESH_POINT: - ieee80211_mesh_quiesce(sdata); - break; - case NL80211_IFTYPE_AP_VLAN: - case NL80211_IFTYPE_MONITOR: - /* don't tell driver about this */ + if (!ieee80211_quiesce(sdata)) continue; - default: - break; - } if (!ieee80211_sdata_running(sdata)) continue; diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c index 816590b..7d84b87 100644 --- a/net/mac80211/rate.c +++ b/net/mac80211/rate.c @@ -11,6 +11,7 @@ #include #include #include +#include #include "rate.h" #include "ieee80211_i.h" #include "debugfs.h" @@ -199,7 +200,7 @@ static void rate_control_release(struct kref *kref) kfree(ctrl_ref); } -static bool rc_no_data_or_no_ack(struct ieee80211_tx_rate_control *txrc) +static bool rc_no_data_or_no_ack_use_min(struct ieee80211_tx_rate_control *txrc) { struct sk_buff *skb = txrc->skb; struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; @@ -208,7 +209,9 @@ static bool rc_no_data_or_no_ack(struct ieee80211_tx_rate_control *txrc) fc = hdr->frame_control; - return (info->flags & IEEE80211_TX_CTL_NO_ACK) || !ieee80211_is_data(fc); + return (info->flags & (IEEE80211_TX_CTL_NO_ACK | + IEEE80211_TX_CTL_USE_MINRATE)) || + !ieee80211_is_data(fc); } static void rc_send_low_broadcast(s8 *idx, u32 basic_rates, @@ -233,6 +236,27 @@ static void rc_send_low_broadcast(s8 *idx, u32 basic_rates, /* could not find a basic rate; use original selection */ } +static inline s8 +rate_lowest_non_cck_index(struct ieee80211_supported_band *sband, + struct ieee80211_sta *sta) +{ + int i; + + for (i = 0; i < sband->n_bitrates; i++) { + struct ieee80211_rate *srate = &sband->bitrates[i]; + if ((srate->bitrate == 10) || (srate->bitrate == 20) || + (srate->bitrate == 55) || (srate->bitrate == 110)) + continue; + + if (rate_supported(sta, sband->band, i)) + return i; + } + + /* No matching rate found */ + return 0; +} + + bool rate_control_send_low(struct ieee80211_sta *sta, void *priv_sta, struct ieee80211_tx_rate_control *txrc) @@ -241,8 +265,14 @@ bool rate_control_send_low(struct ieee80211_sta *sta, struct ieee80211_supported_band *sband = txrc->sband; int mcast_rate; - if (!sta || !priv_sta || rc_no_data_or_no_ack(txrc)) { - info->control.rates[0].idx = rate_lowest_index(txrc->sband, sta); + if (!sta || !priv_sta || rc_no_data_or_no_ack_use_min(txrc)) { + if ((sband->band != IEEE80211_BAND_2GHZ) || + !(info->flags & IEEE80211_TX_CTL_NO_CCK_RATE)) + info->control.rates[0].idx = + rate_lowest_index(txrc->sband, sta); + else + info->control.rates[0].idx = + rate_lowest_non_cck_index(txrc->sband, sta); info->control.rates[0].count = (info->flags & IEEE80211_TX_CTL_NO_ACK) ? 1 : txrc->hw->max_rate_tries; diff --git a/net/mac80211/rc80211_minstrel.c b/net/mac80211/rc80211_minstrel.c index 8adac67..58a8955 100644 --- a/net/mac80211/rc80211_minstrel.c +++ b/net/mac80211/rc80211_minstrel.c @@ -532,12 +532,21 @@ minstrel_alloc(struct ieee80211_hw *hw, struct dentry *debugfsdir) mp->hw = hw; mp->update_interval = 100; +#ifdef CONFIG_MAC80211_DEBUGFS + mp->fixed_rate_idx = (u32) -1; + mp->dbg_fixed_rate = debugfs_create_u32("fixed_rate_idx", + S_IRUGO | S_IWUGO, debugfsdir, &mp->fixed_rate_idx); +#endif + return mp; } static void minstrel_free(void *priv) { +#ifdef CONFIG_MAC80211_DEBUGFS + debugfs_remove(((struct minstrel_priv *)priv)->dbg_fixed_rate); +#endif kfree(priv); } diff --git a/net/mac80211/rc80211_minstrel.h b/net/mac80211/rc80211_minstrel.h index 0f5a833..5d278ec 100644 --- a/net/mac80211/rc80211_minstrel.h +++ b/net/mac80211/rc80211_minstrel.h @@ -78,6 +78,18 @@ struct minstrel_priv { unsigned int update_interval; unsigned int lookaround_rate; unsigned int lookaround_rate_mrr; + +#ifdef CONFIG_MAC80211_DEBUGFS + /* + * enable fixed rate processing per RC + * - write static index to debugfs:ieee80211/phyX/rc/fixed_rate_idx + * - write -1 to enable RC processing again + * - setting will be applied on next update + */ + u32 fixed_rate_idx; + struct dentry *dbg_fixed_rate; +#endif + }; struct minstrel_debugfs_info { diff --git a/net/mac80211/rc80211_minstrel_debugfs.c b/net/mac80211/rc80211_minstrel_debugfs.c index a290ad2..d5a5622 100644 --- a/net/mac80211/rc80211_minstrel_debugfs.c +++ b/net/mac80211/rc80211_minstrel_debugfs.c @@ -50,6 +50,7 @@ #include #include #include +#include #include #include "rc80211_minstrel.h" diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c index 333b511..cdb2853 100644 --- a/net/mac80211/rc80211_minstrel_ht.c +++ b/net/mac80211/rc80211_minstrel_ht.c @@ -281,6 +281,8 @@ minstrel_ht_update_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi) mr = minstrel_get_ratestats(mi, mg->max_tp_rate); if (cur_tp < mr->cur_tp) { + mi->max_tp_rate2 = mi->max_tp_rate; + cur_tp2 = cur_tp; mi->max_tp_rate = mg->max_tp_rate; cur_tp = mr->cur_tp; } @@ -452,7 +454,8 @@ minstrel_ht_tx_status(void *priv, struct ieee80211_supported_band *sband, if (time_after(jiffies, mi->stats_update + (mp->update_interval / 2 * HZ) / 1000)) { minstrel_ht_update_stats(mp, mi); - minstrel_aggr_check(mp, sta, skb); + if (!(info->flags & IEEE80211_TX_CTL_AMPDU)) + minstrel_aggr_check(mp, sta, skb); } } @@ -608,7 +611,20 @@ minstrel_ht_get_rate(void *priv, struct ieee80211_sta *sta, void *priv_sta, return mac80211_minstrel.get_rate(priv, sta, &msp->legacy, txrc); info->flags |= mi->tx_flags; - sample_idx = minstrel_get_sample_rate(mp, mi); + + /* Don't use EAPOL frames for sampling on non-mrr hw */ + if (mp->hw->max_rates == 1 && + txrc->skb->protocol == cpu_to_be16(ETH_P_PAE)) + sample_idx = -1; + else + sample_idx = minstrel_get_sample_rate(mp, mi); + +#ifdef CONFIG_MAC80211_DEBUGFS + /* use fixed index if set */ + if (mp->fixed_rate_idx != -1) + sample_idx = mp->fixed_rate_idx; +#endif + if (sample_idx >= 0) { sample = true; minstrel_ht_set_rate(mp, mi, &ar[0], sample_idx, diff --git a/net/mac80211/rc80211_minstrel_ht_debugfs.c b/net/mac80211/rc80211_minstrel_ht_debugfs.c index cefcb5d..e788f76 100644 --- a/net/mac80211/rc80211_minstrel_ht_debugfs.c +++ b/net/mac80211/rc80211_minstrel_ht_debugfs.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include "rc80211_minstrel.h" #include "rc80211_minstrel_ht.h" diff --git a/net/mac80211/rc80211_pid_debugfs.c b/net/mac80211/rc80211_pid_debugfs.c index 4851e9e..c97a065 100644 --- a/net/mac80211/rc80211_pid_debugfs.c +++ b/net/mac80211/rc80211_pid_debugfs.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include "rate.h" diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 10e8842..7c53eff 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include @@ -334,15 +335,18 @@ static void ieee80211_parse_qos(struct ieee80211_rx_data *rx) { struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)rx->skb->data; struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(rx->skb); - int tid; + int tid, seqno_idx, security_idx; /* does the frame have a qos control field? */ if (ieee80211_is_data_qos(hdr->frame_control)) { u8 *qc = ieee80211_get_qos_ctl(hdr); /* frame has qos control */ tid = *qc & IEEE80211_QOS_CTL_TID_MASK; - if (*qc & IEEE80211_QOS_CONTROL_A_MSDU_PRESENT) + if (*qc & IEEE80211_QOS_CTL_A_MSDU_PRESENT) status->rx_flags |= IEEE80211_RX_AMSDU; + + seqno_idx = tid; + security_idx = tid; } else { /* * IEEE 802.11-2007, 7.1.3.4.1 ("Sequence Number field"): @@ -355,10 +359,15 @@ static void ieee80211_parse_qos(struct ieee80211_rx_data *rx) * * We also use that counter for non-QoS STAs. */ - tid = NUM_RX_DATA_QUEUES - 1; + seqno_idx = NUM_RX_DATA_QUEUES; + security_idx = 0; + if (ieee80211_is_mgmt(hdr->frame_control)) + security_idx = NUM_RX_DATA_QUEUES; + tid = 0; } - rx->queue = tid; + rx->seqno_idx = seqno_idx; + rx->security_idx = security_idx; /* Set skb->priority to 1d tag if highest order bit of TID is not set. * For now, set skb->priority to 0 for other cases. */ rx->skb->priority = (tid > 7) ? 0 : tid; @@ -412,10 +421,16 @@ ieee80211_rx_h_passive_scan(struct ieee80211_rx_data *rx) return RX_CONTINUE; if (test_bit(SCAN_HW_SCANNING, &local->scanning) || - test_bit(SCAN_SW_SCANNING, &local->scanning) || local->sched_scanning) return ieee80211_scan_rx(rx->sdata, skb); + if (test_bit(SCAN_SW_SCANNING, &local->scanning)) { + /* drop all the other packets during a software scan anyway */ + if (ieee80211_scan_rx(rx->sdata, skb) != RX_QUEUED) + dev_kfree_skb(skb); + return RX_QUEUED; + } + /* scanning finished during invoking of handlers */ I802_DEBUG_INC(local->rx_handlers_drop_passive_scan); return RX_DROP_UNUSABLE; @@ -471,7 +486,6 @@ static ieee80211_rx_result ieee80211_rx_mesh_check(struct ieee80211_rx_data *rx) { struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)rx->skb->data; - unsigned int hdrlen = ieee80211_hdrlen(hdr->frame_control); char *dev_addr = rx->sdata->vif.addr; if (ieee80211_is_data(hdr->frame_control)) { @@ -501,6 +515,11 @@ ieee80211_rx_mesh_check(struct ieee80211_rx_data *rx) if (ieee80211_is_action(hdr->frame_control)) { u8 category; + + /* make sure category field is present */ + if (rx->skb->len < IEEE80211_MIN_ACTION_SIZE) + return RX_DROP_MONITOR; + mgmt = (struct ieee80211_mgmt *)hdr; category = mgmt->u.action.category; if (category != WLAN_CATEGORY_MESH_ACTION && @@ -519,14 +538,6 @@ ieee80211_rx_mesh_check(struct ieee80211_rx_data *rx) } -#define msh_h_get(h, l) ((struct ieee80211s_hdr *) ((u8 *)h + l)) - - if (ieee80211_is_data(hdr->frame_control) && - is_multicast_ether_addr(hdr->addr1) && - mesh_rmc_check(hdr->addr3, msh_h_get(hdr, hdrlen), rx->sdata)) - return RX_DROP_MONITOR; -#undef msh_h_get - return RX_CONTINUE; } @@ -659,9 +670,10 @@ static void ieee80211_sta_reorder_release(struct ieee80211_hw *hw, set_release_timer: - mod_timer(&tid_agg_rx->reorder_timer, - tid_agg_rx->reorder_time[j] + 1 + - HT_RX_REORDER_BUF_TIMEOUT); + if (!tid_agg_rx->removed) + mod_timer(&tid_agg_rx->reorder_timer, + tid_agg_rx->reorder_time[j] + 1 + + HT_RX_REORDER_BUF_TIMEOUT); } else { del_timer(&tid_agg_rx->reorder_timer); } @@ -753,7 +765,8 @@ static void ieee80211_rx_reorder_ampdu(struct ieee80211_rx_data *rx) u16 sc; int tid; - if (!ieee80211_is_data_qos(hdr->frame_control)) + if (!ieee80211_is_data_qos(hdr->frame_control) || + is_multicast_ether_addr(hdr->addr1)) goto dont_reorder; /* @@ -819,7 +832,7 @@ ieee80211_rx_h_check(struct ieee80211_rx_data *rx) !ieee80211_is_qos_nullfunc(hdr->frame_control) && !is_multicast_ether_addr(hdr->addr1)) { if (unlikely(ieee80211_has_retry(hdr->frame_control) && - rx->sta->last_seq_ctrl[rx->queue] == + rx->sta->last_seq_ctrl[rx->seqno_idx] == hdr->seq_ctrl)) { if (status->rx_flags & IEEE80211_RX_RA_MATCH) { rx->local->dot11FrameDuplicateCount++; @@ -827,7 +840,7 @@ ieee80211_rx_h_check(struct ieee80211_rx_data *rx) } return RX_DROP_UNUSABLE; } else - rx->sta->last_seq_ctrl[rx->queue] = hdr->seq_ctrl; + rx->sta->last_seq_ctrl[rx->seqno_idx] = hdr->seq_ctrl; } if (unlikely(rx->skb->len < 16)) { @@ -851,8 +864,23 @@ ieee80211_rx_h_check(struct ieee80211_rx_data *rx) ieee80211_is_pspoll(hdr->frame_control)) && rx->sdata->vif.type != NL80211_IFTYPE_ADHOC && rx->sdata->vif.type != NL80211_IFTYPE_WDS && - (!rx->sta || !test_sta_flags(rx->sta, WLAN_STA_ASSOC)))) + (!rx->sta || !test_sta_flag(rx->sta, WLAN_STA_ASSOC)))) { + if (rx->sta && rx->sta->dummy && + ieee80211_is_data_present(hdr->frame_control)) { + unsigned int hdrlen; + __be16 ethertype; + + hdrlen = ieee80211_hdrlen(hdr->frame_control); + + if (rx->skb->len < hdrlen + 8) + return RX_DROP_MONITOR; + + skb_copy_bits(rx->skb, hdrlen + 6, ðertype, 2); + if (ethertype == rx->sdata->control_port_protocol) + return RX_CONTINUE; + } return RX_DROP_MONITOR; + } return RX_CONTINUE; } @@ -1020,6 +1048,9 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx) } if (rx->key) { + if (unlikely(rx->key->flags & KEY_FLAG_TAINTED)) + return RX_DROP_MONITOR; + rx->key->tx_rx_count++; /* TODO: add threshold stuff again */ } else { @@ -1104,7 +1135,7 @@ static void ap_sta_ps_start(struct sta_info *sta) struct ieee80211_local *local = sdata->local; atomic_inc(&sdata->bss->num_sta_ps); - set_sta_flags(sta, WLAN_STA_PS_STA); + set_sta_flag(sta, WLAN_STA_PS_STA); if (!(local->hw.flags & IEEE80211_HW_AP_LINK_PS)) drv_sta_notify(local, sdata, STA_NOTIFY_SLEEP, &sta->sta); #ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG @@ -1124,7 +1155,7 @@ static void ap_sta_ps_end(struct sta_info *sta) sdata->name, sta->sta.addr, sta->sta.aid); #endif /* CONFIG_MAC80211_VERBOSE_PS_DEBUG */ - if (test_sta_flags(sta, WLAN_STA_PS_DRIVER)) { + if (test_sta_flag(sta, WLAN_STA_PS_DRIVER)) { #ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG printk(KERN_DEBUG "%s: STA %pM aid %d driver-ps-blocked\n", sdata->name, sta->sta.addr, sta->sta.aid); @@ -1143,7 +1174,7 @@ int ieee80211_sta_ps_transition(struct ieee80211_sta *sta, bool start) WARN_ON(!(sta_inf->local->hw.flags & IEEE80211_HW_AP_LINK_PS)); /* Don't let the same PS state be set twice */ - in_ps = test_sta_flags(sta_inf, WLAN_STA_PS_STA); + in_ps = test_sta_flag(sta_inf, WLAN_STA_PS_STA); if ((start && in_ps) || (!start && !in_ps)) return -EINVAL; @@ -1157,6 +1188,81 @@ int ieee80211_sta_ps_transition(struct ieee80211_sta *sta, bool start) EXPORT_SYMBOL(ieee80211_sta_ps_transition); static ieee80211_rx_result debug_noinline +ieee80211_rx_h_uapsd_and_pspoll(struct ieee80211_rx_data *rx) +{ + struct ieee80211_sub_if_data *sdata = rx->sdata; + struct ieee80211_hdr *hdr = (void *)rx->skb->data; + struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(rx->skb); + int tid, ac; + + if (!rx->sta || !(status->rx_flags & IEEE80211_RX_RA_MATCH)) + return RX_CONTINUE; + + if (sdata->vif.type != NL80211_IFTYPE_AP && + sdata->vif.type != NL80211_IFTYPE_AP_VLAN) + return RX_CONTINUE; + + /* + * The device handles station powersave, so don't do anything about + * uAPSD and PS-Poll frames (the latter shouldn't even come up from + * it to mac80211 since they're handled.) + */ + if (sdata->local->hw.flags & IEEE80211_HW_AP_LINK_PS) + return RX_CONTINUE; + + /* + * Don't do anything if the station isn't already asleep. In + * the uAPSD case, the station will probably be marked asleep, + * in the PS-Poll case the station must be confused ... + */ + if (!test_sta_flag(rx->sta, WLAN_STA_PS_STA)) + return RX_CONTINUE; + + if (unlikely(ieee80211_is_pspoll(hdr->frame_control))) { + if (!test_sta_flag(rx->sta, WLAN_STA_SP)) { + if (!test_sta_flag(rx->sta, WLAN_STA_PS_DRIVER)) + ieee80211_sta_ps_deliver_poll_response(rx->sta); + else + set_sta_flag(rx->sta, WLAN_STA_PSPOLL); + } + + /* Free PS Poll skb here instead of returning RX_DROP that would + * count as an dropped frame. */ + dev_kfree_skb(rx->skb); + + return RX_QUEUED; + } else if (!ieee80211_has_morefrags(hdr->frame_control) && + !(status->rx_flags & IEEE80211_RX_DEFERRED_RELEASE) && + ieee80211_has_pm(hdr->frame_control) && + (ieee80211_is_data_qos(hdr->frame_control) || + ieee80211_is_qos_nullfunc(hdr->frame_control))) { + tid = *ieee80211_get_qos_ctl(hdr) & IEEE80211_QOS_CTL_TID_MASK; + ac = ieee802_1d_to_ac[tid & 7]; + + /* + * If this AC is not trigger-enabled do nothing. + * + * NB: This could/should check a separate bitmap of trigger- + * enabled queues, but for now we only implement uAPSD w/o + * TSPEC changes to the ACs, so they're always the same. + */ + if (!(rx->sta->sta.uapsd_queues & BIT(ac))) + return RX_CONTINUE; + + /* if we are in a service period, do nothing */ + if (test_sta_flag(rx->sta, WLAN_STA_SP)) + return RX_CONTINUE; + + if (!test_sta_flag(rx->sta, WLAN_STA_PS_DRIVER)) + ieee80211_sta_ps_deliver_uapsd(rx->sta); + else + set_sta_flag(rx->sta, WLAN_STA_UAPSD); + } + + return RX_CONTINUE; +} + +static ieee80211_rx_result debug_noinline ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx) { struct sta_info *sta = rx->sta; @@ -1214,7 +1320,7 @@ ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx) !(status->rx_flags & IEEE80211_RX_DEFERRED_RELEASE) && (rx->sdata->vif.type == NL80211_IFTYPE_AP || rx->sdata->vif.type == NL80211_IFTYPE_AP_VLAN)) { - if (test_sta_flags(sta, WLAN_STA_PS_STA)) { + if (test_sta_flag(sta, WLAN_STA_PS_STA)) { /* * Ignore doze->wake transitions that are * indicated by non-data frames, the standard @@ -1365,11 +1471,14 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx) sc = le16_to_cpu(hdr->seq_ctrl); frag = sc & IEEE80211_SCTL_FRAG; - if (likely((!ieee80211_has_morefrags(fc) && frag == 0) || - is_multicast_ether_addr(hdr->addr1))) { - /* not fragmented */ - goto out; + if (is_multicast_ether_addr(hdr->addr1)) { + rx->local->dot11MulticastReceivedFrameCount++; + goto out_no_led; } + + if (likely(!ieee80211_has_morefrags(fc) && frag == 0)) + goto out; + I802_DEBUG_INC(rx->local->rx_handlers_fragments); if (skb_linearize(rx->skb)) @@ -1386,11 +1495,10 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx) if (frag == 0) { /* This is the first fragment of a new frame. */ entry = ieee80211_reassemble_add(rx->sdata, frag, seq, - rx->queue, &(rx->skb)); + rx->seqno_idx, &(rx->skb)); if (rx->key && rx->key->conf.cipher == WLAN_CIPHER_SUITE_CCMP && ieee80211_has_protected(fc)) { - int queue = ieee80211_is_mgmt(fc) ? - NUM_RX_DATA_QUEUES : rx->queue; + int queue = rx->security_idx; /* Store CCMP PN so that we can verify that the next * fragment has a sequential PN value. */ entry->ccmp = 1; @@ -1404,7 +1512,8 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx) /* This is a fragment for a frame that should already be pending in * fragment cache. Add this fragment to the end of the pending entry. */ - entry = ieee80211_reassemble_find(rx->sdata, frag, seq, rx->queue, hdr); + entry = ieee80211_reassemble_find(rx->sdata, frag, seq, + rx->seqno_idx, hdr); if (!entry) { I802_DEBUG_INC(rx->local->rx_handlers_drop_defrag); return RX_DROP_MONITOR; @@ -1424,8 +1533,7 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx) if (pn[i]) break; } - queue = ieee80211_is_mgmt(fc) ? - NUM_RX_DATA_QUEUES : rx->queue; + queue = rx->security_idx; rpn = rx->key->u.ccmp.rx_pn[queue]; if (memcmp(pn, rpn, CCMP_PN_LEN)) return RX_DROP_UNUSABLE; @@ -1461,43 +1569,14 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx) status->rx_flags |= IEEE80211_RX_FRAGMENTED; out: + ieee80211_led_rx(rx->local); + out_no_led: if (rx->sta) rx->sta->rx_packets++; - if (is_multicast_ether_addr(hdr->addr1)) - rx->local->dot11MulticastReceivedFrameCount++; - else - ieee80211_led_rx(rx->local); return RX_CONTINUE; } static ieee80211_rx_result debug_noinline -ieee80211_rx_h_ps_poll(struct ieee80211_rx_data *rx) -{ - struct ieee80211_sub_if_data *sdata = rx->sdata; - __le16 fc = ((struct ieee80211_hdr *)rx->skb->data)->frame_control; - struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(rx->skb); - - if (likely(!rx->sta || !ieee80211_is_pspoll(fc) || - !(status->rx_flags & IEEE80211_RX_RA_MATCH))) - return RX_CONTINUE; - - if ((sdata->vif.type != NL80211_IFTYPE_AP) && - (sdata->vif.type != NL80211_IFTYPE_AP_VLAN)) - return RX_DROP_UNUSABLE; - - if (!test_sta_flags(rx->sta, WLAN_STA_PS_DRIVER)) - ieee80211_sta_ps_deliver_poll_response(rx->sta); - else - set_sta_flags(rx->sta, WLAN_STA_PSPOLL); - - /* Free PS Poll skb here instead of returning RX_DROP that would - * count as an dropped frame. */ - dev_kfree_skb(rx->skb); - - return RX_QUEUED; -} - -static ieee80211_rx_result debug_noinline ieee80211_rx_h_remove_qos_control(struct ieee80211_rx_data *rx) { u8 *data = rx->skb->data; @@ -1520,7 +1599,7 @@ static int ieee80211_802_1x_port_control(struct ieee80211_rx_data *rx) { if (unlikely(!rx->sta || - !test_sta_flags(rx->sta, WLAN_STA_AUTHORIZED))) + !test_sta_flag(rx->sta, WLAN_STA_AUTHORIZED))) return -EACCES; return 0; @@ -1563,7 +1642,7 @@ ieee80211_drop_unencrypted_mgmt(struct ieee80211_rx_data *rx) if (status->flag & RX_FLAG_DECRYPTED) return 0; - if (rx->sta && test_sta_flags(rx->sta, WLAN_STA_MFP)) { + if (rx->sta && test_sta_flag(rx->sta, WLAN_STA_MFP)) { if (unlikely(!ieee80211_has_protected(fc) && ieee80211_is_unicast_robust_mgmt_frame(rx->skb) && rx->key)) { @@ -1827,15 +1906,45 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx) hdr = (struct ieee80211_hdr *) skb->data; hdrlen = ieee80211_hdrlen(hdr->frame_control); + + /* make sure fixed part of mesh header is there, also checks skb len */ + if (!pskb_may_pull(rx->skb, hdrlen + 6)) + return RX_DROP_MONITOR; + mesh_hdr = (struct ieee80211s_hdr *) (skb->data + hdrlen); - if (!ieee80211_is_data(hdr->frame_control)) + /* make sure full mesh header is there, also checks skb len */ + if (!pskb_may_pull(rx->skb, + hdrlen + ieee80211_get_mesh_hdrlen(mesh_hdr))) + return RX_DROP_MONITOR; + + /* reload pointers */ + hdr = (struct ieee80211_hdr *) skb->data; + mesh_hdr = (struct ieee80211s_hdr *) (skb->data + hdrlen); + + if (ieee80211_drop_unencrypted(rx, hdr->frame_control)) + return RX_DROP_MONITOR; + + /* frame is in RMC, don't forward */ + if (ieee80211_is_data(hdr->frame_control) && + is_multicast_ether_addr(hdr->addr1) && + mesh_rmc_check(hdr->addr3, mesh_hdr, rx->sdata)) + return RX_DROP_MONITOR; + + if (!ieee80211_is_data(hdr->frame_control) || + !(status->rx_flags & IEEE80211_RX_RA_MATCH)) return RX_CONTINUE; if (!mesh_hdr->ttl) /* illegal frame */ return RX_DROP_MONITOR; + if (ieee80211_queue_stopped(&local->hw, skb_get_queue_mapping(skb))) { + IEEE80211_IFSTA_MESH_CTR_INC(&sdata->u.mesh, + dropped_frames_congestion); + return RX_DROP_MONITOR; + } + if (mesh_hdr->flags & MESH_FLAGS_AE) { struct mesh_path *mppath; char *proxied_addr; @@ -1844,9 +1953,12 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx) if (is_multicast_ether_addr(hdr->addr1)) { mpp_addr = hdr->addr3; proxied_addr = mesh_hdr->eaddr1; - } else { + } else if (mesh_hdr->flags & MESH_FLAGS_AE_A5_A6) { + /* has_a4 already checked in ieee80211_rx_mesh_check */ mpp_addr = hdr->addr4; proxied_addr = mesh_hdr->eaddr2; + } else { + return RX_DROP_MONITOR; } rcu_read_lock(); @@ -1869,7 +1981,7 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx) mesh_hdr->ttl--; - if (status->rx_flags & IEEE80211_RX_RA_MATCH) { + { if (!mesh_hdr->ttl) IEEE80211_IFSTA_MESH_CTR_INC(&rx->sdata->u.mesh, dropped_frames_ttl); @@ -1891,13 +2003,13 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx) memset(info, 0, sizeof(*info)); info->flags |= IEEE80211_TX_INTFL_NEED_TXPROCESSING; info->control.vif = &rx->sdata->vif; - skb_set_queue_mapping(skb, - ieee80211_select_queue(rx->sdata, fwd_skb)); - ieee80211_set_qos_hdr(local, skb); - if (is_multicast_ether_addr(fwd_hdr->addr1)) + if (is_multicast_ether_addr(fwd_hdr->addr1)) { IEEE80211_IFSTA_MESH_CTR_INC(&sdata->u.mesh, fwded_mcast); - else { + skb_set_queue_mapping(fwd_skb, + ieee80211_select_queue(sdata, fwd_skb)); + ieee80211_set_qos_hdr(sdata, fwd_skb); + } else { int err; /* * Save TA to addr1 to send TA a path error if a @@ -2222,12 +2334,37 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) goto handled; } break; + case WLAN_CATEGORY_SELF_PROTECTED: + if (len < (IEEE80211_MIN_ACTION_SIZE + + sizeof(mgmt->u.action.u.self_prot.action_code))) + break; + + switch (mgmt->u.action.u.self_prot.action_code) { + case WLAN_SP_MESH_PEERING_OPEN: + case WLAN_SP_MESH_PEERING_CLOSE: + case WLAN_SP_MESH_PEERING_CONFIRM: + if (!ieee80211_vif_is_mesh(&sdata->vif)) + goto invalid; + if (sdata->u.mesh.security != IEEE80211_MESH_SEC_NONE) + /* userspace handles this frame */ + break; + goto queue; + case WLAN_SP_MGK_INFORM: + case WLAN_SP_MGK_ACK: + if (!ieee80211_vif_is_mesh(&sdata->vif)) + goto invalid; + break; + } + break; case WLAN_CATEGORY_MESH_ACTION: + if (len < (IEEE80211_MIN_ACTION_SIZE + + sizeof(mgmt->u.action.u.mesh_action.action_code))) + break; + if (!ieee80211_vif_is_mesh(&sdata->vif)) break; - goto queue; - case WLAN_CATEGORY_MESH_PATH_SEL: - if (!mesh_path_sel_is_hwmp(sdata)) + if (mesh_action_is_path_sel(mgmt) && + (!mesh_path_sel_is_hwmp(sdata))) break; goto queue; } @@ -2539,17 +2676,17 @@ static void ieee80211_rx_handlers(struct ieee80211_rx_data *rx) CALL_RXH(ieee80211_rx_h_decrypt) CALL_RXH(ieee80211_rx_h_check_more_data) + CALL_RXH(ieee80211_rx_h_uapsd_and_pspoll) CALL_RXH(ieee80211_rx_h_sta_process) CALL_RXH(ieee80211_rx_h_defragment) - CALL_RXH(ieee80211_rx_h_ps_poll) CALL_RXH(ieee80211_rx_h_michael_mic_verify) /* must be after MMIC verify so header is counted in MPDU mic */ - CALL_RXH(ieee80211_rx_h_remove_qos_control) - CALL_RXH(ieee80211_rx_h_amsdu) #ifdef CONFIG_MAC80211_MESH if (ieee80211_vif_is_mesh(&rx->sdata->vif)) CALL_RXH(ieee80211_rx_h_mesh_fwding); #endif + CALL_RXH(ieee80211_rx_h_remove_qos_control) + CALL_RXH(ieee80211_rx_h_amsdu) CALL_RXH(ieee80211_rx_h_data) CALL_RXH(ieee80211_rx_h_ctrl); CALL_RXH(ieee80211_rx_h_mgmt_check) @@ -2605,7 +2742,9 @@ void ieee80211_release_reorder_timeout(struct sta_info *sta, int tid) .sta = sta, .sdata = sta->sdata, .local = sta->local, - .queue = tid, + /* This is OK -- must be QoS data frame */ + .security_idx = tid, + .seqno_idx = tid, .flags = 0, }; struct tid_ampdu_rx *tid_agg_rx; @@ -2647,6 +2786,9 @@ static int prepare_for_handlers(struct ieee80211_rx_data *rx, case NL80211_IFTYPE_ADHOC: if (!bssid) return 0; + if (compare_ether_addr(sdata->vif.addr, hdr->addr2) == 0 || + compare_ether_addr(sdata->u.ibss.bssid, hdr->addr2) == 0) + return 0; if (ieee80211_is_beacon(hdr->frame_control)) { return 1; } @@ -2689,7 +2831,9 @@ static int prepare_for_handlers(struct ieee80211_rx_data *rx, } else if (!ieee80211_bssid_match(bssid, sdata->vif.addr)) { if (!(status->rx_flags & IEEE80211_RX_IN_SCAN) && - !ieee80211_is_beacon(hdr->frame_control)) + !ieee80211_is_beacon(hdr->frame_control) && + !(ieee80211_is_action(hdr->frame_control) && + sdata->vif.p2p)) return 0; status->rx_flags &= ~IEEE80211_RX_RA_MATCH; } @@ -2774,7 +2918,7 @@ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw, local->dot11ReceivedFragmentCount++; if (unlikely(test_bit(SCAN_HW_SCANNING, &local->scanning) || - test_bit(SCAN_SW_SCANNING, &local->scanning))) + test_bit(SCAN_OFF_CHANNEL, &local->scanning))) status->rx_flags |= IEEE80211_RX_IN_SCAN; if (ieee80211_is_mgmt(fc)) { @@ -2799,7 +2943,7 @@ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw, if (ieee80211_is_data(fc)) { prev_sta = NULL; - for_each_sta_info(local, hdr->addr2, sta, tmp) { + for_each_sta_info_rx(local, hdr->addr2, sta, tmp) { if (!prev_sta) { prev_sta = sta; continue; @@ -2843,7 +2987,7 @@ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw, continue; } - rx.sta = sta_info_get_bss(prev, hdr->addr2); + rx.sta = sta_info_get_bss_rx(prev, hdr->addr2); rx.sdata = prev; ieee80211_prepare_and_rx_handle(&rx, skb, false); @@ -2851,7 +2995,7 @@ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw, } if (prev) { - rx.sta = sta_info_get_bss(prev, hdr->addr2); + rx.sta = sta_info_get_bss_rx(prev, hdr->addr2); rx.sdata = prev; if (ieee80211_prepare_and_rx_handle(&rx, skb, true)) diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index 7c75741..0aeea49 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -14,9 +14,10 @@ #include #include -#include +#include #include #include +#include #include #include "ieee80211_i.h" @@ -212,14 +213,6 @@ ieee80211_scan_rx(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb) if (bss) ieee80211_rx_bss_put(sdata->local, bss); - /* If we are on-operating-channel, and this packet is for the - * current channel, pass the pkt on up the stack so that - * the rest of the stack can make use of it. - */ - if (ieee80211_cfg_on_oper_channel(sdata->local) - && (channel == sdata->local->oper_channel)) - return RX_CONTINUE; - dev_kfree_skb(skb); return RX_QUEUED; } @@ -231,6 +224,9 @@ static bool ieee80211_prep_hw_scan(struct ieee80211_local *local) enum ieee80211_band band; int i, ielen, n_chans; + if (test_bit(SCAN_HW_CANCELLED, &local->scanning)) + return false; + do { if (local->hw_scan_band == IEEE80211_NUM_BANDS) return false; @@ -251,9 +247,10 @@ static bool ieee80211_prep_hw_scan(struct ieee80211_local *local) local->hw_scan_req->n_channels = n_chans; ielen = ieee80211_build_preq_ies(local, (u8 *)local->hw_scan_req->ie, - req->ie, req->ie_len, band, (u32) -1, - 0); + req->ie, req->ie_len, band, + req->rates[band], 0); local->hw_scan_req->ie_len = ielen; + local->hw_scan_req->no_cck = req->no_cck; return true; } @@ -262,8 +259,6 @@ static void __ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted, bool was_hw_scan) { struct ieee80211_local *local = hw_to_local(hw); - bool on_oper_chan; - bool enable_beacons = false; lockdep_assert_held(&local->mtx); @@ -296,25 +291,11 @@ static void __ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted, local->scanning = 0; local->scan_channel = NULL; - on_oper_chan = ieee80211_cfg_on_oper_channel(local); - - if (was_hw_scan || !on_oper_chan) - ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL); - else - /* Set power back to normal operating levels. */ - ieee80211_hw_config(local, 0); - + ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL); if (!was_hw_scan) { - bool on_oper_chan2; ieee80211_configure_filter(local); drv_sw_scan_complete(local); - on_oper_chan2 = ieee80211_cfg_on_oper_channel(local); - /* We should always be on-channel at this point. */ - WARN_ON(!on_oper_chan2); - if (on_oper_chan2 && (on_oper_chan != on_oper_chan2)) - enable_beacons = true; - - ieee80211_offchannel_return(local, enable_beacons); + ieee80211_offchannel_return(local, true); } ieee80211_recalc_idle(local); @@ -355,15 +336,13 @@ static int ieee80211_start_sw_scan(struct ieee80211_local *local) */ drv_sw_scan_start(local); + ieee80211_offchannel_stop_beaconing(local); + local->leave_oper_channel_time = 0; local->next_scan_state = SCAN_DECISION; local->scan_channel_idx = 0; - /* We always want to use off-channel PS, even if we - * are not really leaving oper-channel. Don't - * tell the AP though, as long as we are on-channel. - */ - ieee80211_offchannel_enable_all_ps(local, false); + drv_flush(local, false); ieee80211_configure_filter(local); @@ -506,20 +485,7 @@ static void ieee80211_scan_state_decision(struct ieee80211_local *local, } mutex_unlock(&local->iflist_mtx); - next_chan = local->scan_req->channels[local->scan_channel_idx]; - - if (ieee80211_cfg_on_oper_channel(local)) { - /* We're currently on operating channel. */ - if (next_chan == local->oper_channel) - /* We don't need to move off of operating channel. */ - local->next_scan_state = SCAN_SET_CHANNEL; - else - /* - * We do need to leave operating channel, as next - * scan is somewhere else. - */ - local->next_scan_state = SCAN_LEAVE_OPER_CHANNEL; - } else { + if (local->scan_channel) { /* * we're currently scanning a different channel, let's * see if we can scan another channel without interfering @@ -535,6 +501,7 @@ static void ieee80211_scan_state_decision(struct ieee80211_local *local, * * Otherwise switch back to the operating channel. */ + next_chan = local->scan_req->channels[local->scan_channel_idx]; bad_latency = time_after(jiffies + ieee80211_scan_get_channel_time(next_chan), @@ -552,6 +519,12 @@ static void ieee80211_scan_state_decision(struct ieee80211_local *local, local->next_scan_state = SCAN_ENTER_OPER_CHANNEL; else local->next_scan_state = SCAN_SET_CHANNEL; + } else { + /* + * we're on the operating channel currently, let's + * leave that channel now to scan another one + */ + local->next_scan_state = SCAN_LEAVE_OPER_CHANNEL; } *next_delay = 0; @@ -560,10 +533,9 @@ static void ieee80211_scan_state_decision(struct ieee80211_local *local, static void ieee80211_scan_state_leave_oper_channel(struct ieee80211_local *local, unsigned long *next_delay) { - /* PS will already be in off-channel mode, - * we do that once at the beginning of scanning. - */ - ieee80211_offchannel_stop_vifs(local); + ieee80211_offchannel_stop_station(local); + + __set_bit(SCAN_OFF_CHANNEL, &local->scanning); /* * What if the nullfunc frames didn't arrive? @@ -586,15 +558,15 @@ static void ieee80211_scan_state_enter_oper_channel(struct ieee80211_local *loca { /* switch back to the operating channel */ local->scan_channel = NULL; - if (!ieee80211_cfg_on_oper_channel(local)) - ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL); + ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL); /* - * Re-enable vifs and beaconing. Leave PS - * in off-channel state..will put that back - * on-channel at the end of scanning. + * Only re-enable station mode interface now; beaconing will be + * re-enabled once the full scan has been completed. */ - ieee80211_offchannel_return(local, true); + ieee80211_offchannel_return(local, false); + + __clear_bit(SCAN_OFF_CHANNEL, &local->scanning); *next_delay = HZ / 5; local->next_scan_state = SCAN_DECISION; @@ -652,13 +624,16 @@ static void ieee80211_scan_state_send_probe(struct ieee80211_local *local, { int i; struct ieee80211_sub_if_data *sdata = local->scan_sdata; + enum ieee80211_band band = local->hw.conf.channel->band; for (i = 0; i < local->scan_req->n_ssids; i++) ieee80211_send_probe_req( sdata, NULL, local->scan_req->ssids[i].ssid, local->scan_req->ssids[i].ssid_len, - local->scan_req->ie, local->scan_req->ie_len); + local->scan_req->ie, local->scan_req->ie_len, + local->scan_req->rates[band], false, + local->scan_req->no_cck); /* * After sending probe requests, wait for probe responses @@ -821,10 +796,8 @@ int ieee80211_request_internal_scan(struct ieee80211_sub_if_data *sdata, */ void ieee80211_scan_cancel(struct ieee80211_local *local) { - bool abortscan; - /* - * We are only canceling software scan, or deferred scan that was not + * We are canceling software scan, or deferred scan that was not * yet really started (see __ieee80211_start_scan ). * * Regarding hardware scan: @@ -836,23 +809,46 @@ void ieee80211_scan_cancel(struct ieee80211_local *local) * - we can not cancel scan_work since driver can schedule it * by ieee80211_scan_completed(..., true) to finish scan * - * Hence low lever driver is responsible for canceling HW scan. + * Hence we only call the cancel_hw_scan() callback, but the low-level + * driver is still responsible for calling ieee80211_scan_completed() + * after the scan was completed/aborted. */ mutex_lock(&local->mtx); - abortscan = local->scan_req && !test_bit(SCAN_HW_SCANNING, &local->scanning); - if (abortscan) { + if (!local->scan_req) + goto out; + + /* + * We have a scan running and the driver already reported completion, + * but the worker hasn't run yet or is stuck on the mutex - mark it as + * cancelled. + */ + if (test_bit(SCAN_HW_SCANNING, &local->scanning) && + test_bit(SCAN_COMPLETED, &local->scanning)) { + set_bit(SCAN_HW_CANCELLED, &local->scanning); + goto out; + } + + if (test_bit(SCAN_HW_SCANNING, &local->scanning)) { /* - * The scan is canceled, but stop work from being pending. - * - * If the work is currently running, it must be blocked on - * the mutex, but we'll set scan_sdata = NULL and it'll - * simply exit once it acquires the mutex. + * Make sure that __ieee80211_scan_completed doesn't trigger a + * scan on another band. */ - cancel_delayed_work(&local->scan_work); - /* and clean up */ - __ieee80211_scan_completed(&local->hw, true, false); + set_bit(SCAN_HW_CANCELLED, &local->scanning); + if (local->ops->cancel_hw_scan) + drv_cancel_hw_scan(local, local->scan_sdata); + goto out; } + + /* + * If the work is currently running, it must be blocked on + * the mutex, but we'll set scan_sdata = NULL and it'll + * simply exit once it acquires the mutex. + */ + cancel_delayed_work(&local->scan_work); + /* and clean up */ + __ieee80211_scan_completed(&local->hw, true, false); +out: mutex_unlock(&local->mtx); } diff --git a/net/mac80211/spectmgmt.c b/net/mac80211/spectmgmt.c index 7733f66..578eea3 100644 --- a/net/mac80211/spectmgmt.c +++ b/net/mac80211/spectmgmt.c @@ -32,12 +32,8 @@ static void ieee80211_send_refuse_measurement_request(struct ieee80211_sub_if_da skb = dev_alloc_skb(sizeof(*msr_report) + local->hw.extra_tx_headroom + sizeof(struct ieee80211_msrment_ie)); - - if (!skb) { - printk(KERN_ERR "%s: failed to allocate buffer for " - "measurement report frame\n", sdata->name); + if (!skb) return; - } skb_reserve(skb, local->hw.extra_tx_headroom); msr_report = (struct ieee80211_mgmt *)skb_put(skb, 24); diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 065a971..1914f5a 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -24,6 +24,7 @@ #include "sta_info.h" #include "debugfs_sta.h" #include "mesh.h" +#include "wme.h" /** * DOC: STA information lifetime rules @@ -97,7 +98,27 @@ struct sta_info *sta_info_get(struct ieee80211_sub_if_data *sdata, struct sta_info *sta; sta = rcu_dereference_check(local->sta_hash[STA_HASH(addr)], - rcu_read_lock_held() || + lockdep_is_held(&local->sta_lock) || + lockdep_is_held(&local->sta_mtx)); + while (sta) { + if (sta->sdata == sdata && !sta->dummy && + memcmp(sta->sta.addr, addr, ETH_ALEN) == 0) + break; + sta = rcu_dereference_check(sta->hnext, + lockdep_is_held(&local->sta_lock) || + lockdep_is_held(&local->sta_mtx)); + } + return sta; +} + +/* get a station info entry even if it is a dummy station*/ +struct sta_info *sta_info_get_rx(struct ieee80211_sub_if_data *sdata, + const u8 *addr) +{ + struct ieee80211_local *local = sdata->local; + struct sta_info *sta; + + sta = rcu_dereference_check(local->sta_hash[STA_HASH(addr)], lockdep_is_held(&local->sta_lock) || lockdep_is_held(&local->sta_mtx)); while (sta) { @@ -105,7 +126,6 @@ struct sta_info *sta_info_get(struct ieee80211_sub_if_data *sdata, memcmp(sta->sta.addr, addr, ETH_ALEN) == 0) break; sta = rcu_dereference_check(sta->hnext, - rcu_read_lock_held() || lockdep_is_held(&local->sta_lock) || lockdep_is_held(&local->sta_mtx)); } @@ -123,7 +143,32 @@ struct sta_info *sta_info_get_bss(struct ieee80211_sub_if_data *sdata, struct sta_info *sta; sta = rcu_dereference_check(local->sta_hash[STA_HASH(addr)], - rcu_read_lock_held() || + lockdep_is_held(&local->sta_lock) || + lockdep_is_held(&local->sta_mtx)); + while (sta) { + if ((sta->sdata == sdata || + (sta->sdata->bss && sta->sdata->bss == sdata->bss)) && + !sta->dummy && + memcmp(sta->sta.addr, addr, ETH_ALEN) == 0) + break; + sta = rcu_dereference_check(sta->hnext, + lockdep_is_held(&local->sta_lock) || + lockdep_is_held(&local->sta_mtx)); + } + return sta; +} + +/* + * Get sta info either from the specified interface + * or from one of its vlans (including dummy stations) + */ +struct sta_info *sta_info_get_bss_rx(struct ieee80211_sub_if_data *sdata, + const u8 *addr) +{ + struct ieee80211_local *local = sdata->local; + struct sta_info *sta; + + sta = rcu_dereference_check(local->sta_hash[STA_HASH(addr)], lockdep_is_held(&local->sta_lock) || lockdep_is_held(&local->sta_mtx)); while (sta) { @@ -132,7 +177,6 @@ struct sta_info *sta_info_get_bss(struct ieee80211_sub_if_data *sdata, memcmp(sta->sta.addr, addr, ETH_ALEN) == 0) break; sta = rcu_dereference_check(sta->hnext, - rcu_read_lock_held() || lockdep_is_held(&local->sta_lock) || lockdep_is_held(&local->sta_mtx)); } @@ -200,13 +244,22 @@ static void sta_unblock(struct work_struct *wk) if (sta->dead) return; - if (!test_sta_flags(sta, WLAN_STA_PS_STA)) + if (!test_sta_flag(sta, WLAN_STA_PS_STA)) ieee80211_sta_ps_deliver_wakeup(sta); - else if (test_and_clear_sta_flags(sta, WLAN_STA_PSPOLL)) { - clear_sta_flags(sta, WLAN_STA_PS_DRIVER); + else if (test_and_clear_sta_flag(sta, WLAN_STA_PSPOLL)) { + clear_sta_flag(sta, WLAN_STA_PS_DRIVER); + + local_bh_disable(); ieee80211_sta_ps_deliver_poll_response(sta); + local_bh_enable(); + } else if (test_and_clear_sta_flag(sta, WLAN_STA_UAPSD)) { + clear_sta_flag(sta, WLAN_STA_PS_DRIVER); + + local_bh_disable(); + ieee80211_sta_ps_deliver_uapsd(sta); + local_bh_enable(); } else - clear_sta_flags(sta, WLAN_STA_PS_DRIVER); + clear_sta_flag(sta, WLAN_STA_PS_DRIVER); } static int sta_prepare_rate_control(struct ieee80211_local *local, @@ -239,7 +292,7 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata, return NULL; spin_lock_init(&sta->lock); - spin_lock_init(&sta->flaglock); + spin_lock_init(&sta->ps_lock); INIT_WORK(&sta->drv_unblock_wk, sta_unblock); INIT_WORK(&sta->ampdu_mlme.work, ieee80211_ba_session_work); mutex_init(&sta->ampdu_mlme.mtx); @@ -266,8 +319,10 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata, */ sta->timer_to_tid[i] = i; } - skb_queue_head_init(&sta->ps_tx_buf); - skb_queue_head_init(&sta->tx_filtered); + for (i = 0; i < IEEE80211_NUM_ACS; i++) { + skb_queue_head_init(&sta->ps_tx_buf[i]); + skb_queue_head_init(&sta->tx_filtered[i]); + } for (i = 0; i < NUM_RX_DATA_QUEUES; i++) sta->last_seq_ctrl[i] = cpu_to_le16(USHRT_MAX); @@ -284,7 +339,8 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata, return sta; } -static int sta_info_finish_insert(struct sta_info *sta, bool async) +static int sta_info_finish_insert(struct sta_info *sta, + bool async, bool dummy_reinsert) { struct ieee80211_local *local = sta->local; struct ieee80211_sub_if_data *sdata = sta->sdata; @@ -294,51 +350,58 @@ static int sta_info_finish_insert(struct sta_info *sta, bool async) lockdep_assert_held(&local->sta_mtx); - /* notify driver */ - if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) - sdata = container_of(sdata->bss, - struct ieee80211_sub_if_data, - u.ap); - err = drv_sta_add(local, sdata, &sta->sta); - if (err) { - if (!async) - return err; - printk(KERN_DEBUG "%s: failed to add IBSS STA %pM to driver (%d)" - " - keeping it anyway.\n", - sdata->name, sta->sta.addr, err); - } else { - sta->uploaded = true; + if (!sta->dummy || dummy_reinsert) { + /* notify driver */ + if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) + sdata = container_of(sdata->bss, + struct ieee80211_sub_if_data, + u.ap); + err = drv_sta_add(local, sdata, &sta->sta); + if (err) { + if (!async) + return err; + printk(KERN_DEBUG "%s: failed to add IBSS STA %pM to " + "driver (%d) - keeping it anyway.\n", + sdata->name, sta->sta.addr, err); + } else { + sta->uploaded = true; #ifdef CONFIG_MAC80211_VERBOSE_DEBUG - if (async) - wiphy_debug(local->hw.wiphy, - "Finished adding IBSS STA %pM\n", - sta->sta.addr); + if (async) + wiphy_debug(local->hw.wiphy, + "Finished adding IBSS STA %pM\n", + sta->sta.addr); #endif + } + + sdata = sta->sdata; } - sdata = sta->sdata; + if (!dummy_reinsert) { + if (!async) { + local->num_sta++; + local->sta_generation++; + smp_mb(); - if (!async) { - local->num_sta++; - local->sta_generation++; - smp_mb(); + /* make the station visible */ + spin_lock_irqsave(&local->sta_lock, flags); + sta_info_hash_add(local, sta); + spin_unlock_irqrestore(&local->sta_lock, flags); + } - /* make the station visible */ - spin_lock_irqsave(&local->sta_lock, flags); - sta_info_hash_add(local, sta); - spin_unlock_irqrestore(&local->sta_lock, flags); + list_add(&sta->list, &local->sta_list); + } else { + sta->dummy = false; } - list_add(&sta->list, &local->sta_list); - - ieee80211_sta_debugfs_add(sta); - rate_control_add_sta_debugfs(sta); - - memset(&sinfo, 0, sizeof(sinfo)); - sinfo.filled = 0; - sinfo.generation = local->sta_generation; - cfg80211_new_sta(sdata->dev, sta->sta.addr, &sinfo, GFP_KERNEL); + if (!sta->dummy) { + ieee80211_sta_debugfs_add(sta); + rate_control_add_sta_debugfs(sta); + memset(&sinfo, 0, sizeof(sinfo)); + sinfo.filled = 0; + sinfo.generation = local->sta_generation; + cfg80211_new_sta(sdata->dev, sta->sta.addr, &sinfo, GFP_KERNEL); + } return 0; } @@ -355,7 +418,7 @@ static void sta_info_finish_pending(struct ieee80211_local *local) list_del(&sta->list); spin_unlock_irqrestore(&local->sta_lock, flags); - sta_info_finish_insert(sta, true); + sta_info_finish_insert(sta, true, false); spin_lock_irqsave(&local->sta_lock, flags); } @@ -372,106 +435,117 @@ static void sta_info_finish_work(struct work_struct *work) mutex_unlock(&local->sta_mtx); } -int sta_info_insert_rcu(struct sta_info *sta) __acquires(RCU) +static int sta_info_insert_check(struct sta_info *sta) { - struct ieee80211_local *local = sta->local; struct ieee80211_sub_if_data *sdata = sta->sdata; - unsigned long flags; - int err = 0; /* * Can't be a WARN_ON because it can be triggered through a race: * something inserts a STA (on one CPU) without holding the RTNL * and another CPU turns off the net device. */ - if (unlikely(!ieee80211_sdata_running(sdata))) { - err = -ENETDOWN; - rcu_read_lock(); - goto out_free; - } + if (unlikely(!ieee80211_sdata_running(sdata))) + return -ENETDOWN; if (WARN_ON(compare_ether_addr(sta->sta.addr, sdata->vif.addr) == 0 || - is_multicast_ether_addr(sta->sta.addr))) { - err = -EINVAL; + is_multicast_ether_addr(sta->sta.addr))) + return -EINVAL; + + return 0; +} + +static int sta_info_insert_ibss(struct sta_info *sta) __acquires(RCU) +{ + struct ieee80211_local *local = sta->local; + struct ieee80211_sub_if_data *sdata = sta->sdata; + unsigned long flags; + + spin_lock_irqsave(&local->sta_lock, flags); + /* check if STA exists already */ + if (sta_info_get_bss_rx(sdata, sta->sta.addr)) { + spin_unlock_irqrestore(&local->sta_lock, flags); rcu_read_lock(); - goto out_free; + return -EEXIST; } - /* - * In ad-hoc mode, we sometimes need to insert stations - * from tasklet context from the RX path. To avoid races, - * always do so in that case -- see the comment below. - */ - if (sdata->vif.type == NL80211_IFTYPE_ADHOC) { - spin_lock_irqsave(&local->sta_lock, flags); - /* check if STA exists already */ - if (sta_info_get_bss(sdata, sta->sta.addr)) { - spin_unlock_irqrestore(&local->sta_lock, flags); - rcu_read_lock(); - err = -EEXIST; - goto out_free; - } - - local->num_sta++; - local->sta_generation++; - smp_mb(); - sta_info_hash_add(local, sta); + local->num_sta++; + local->sta_generation++; + smp_mb(); + sta_info_hash_add(local, sta); - list_add_tail(&sta->list, &local->sta_pending_list); + list_add_tail(&sta->list, &local->sta_pending_list); - rcu_read_lock(); - spin_unlock_irqrestore(&local->sta_lock, flags); + rcu_read_lock(); + spin_unlock_irqrestore(&local->sta_lock, flags); #ifdef CONFIG_MAC80211_VERBOSE_DEBUG - wiphy_debug(local->hw.wiphy, "Added IBSS STA %pM\n", - sta->sta.addr); + wiphy_debug(local->hw.wiphy, "Added IBSS STA %pM\n", + sta->sta.addr); #endif /* CONFIG_MAC80211_VERBOSE_DEBUG */ - ieee80211_queue_work(&local->hw, &local->sta_finish_work); + ieee80211_queue_work(&local->hw, &local->sta_finish_work); - return 0; - } + return 0; +} + +/* + * should be called with sta_mtx locked + * this function replaces the mutex lock + * with a RCU lock + */ +static int sta_info_insert_non_ibss(struct sta_info *sta) __acquires(RCU) +{ + struct ieee80211_local *local = sta->local; + struct ieee80211_sub_if_data *sdata = sta->sdata; + unsigned long flags; + struct sta_info *exist_sta; + bool dummy_reinsert = false; + int err = 0; + + lockdep_assert_held(&local->sta_mtx); /* * On first glance, this will look racy, because the code - * below this point, which inserts a station with sleeping, + * in this function, which inserts a station with sleeping, * unlocks the sta_lock between checking existence in the * hash table and inserting into it. * * However, it is not racy against itself because it keeps - * the mutex locked. It still seems to race against the - * above code that atomically inserts the station... That, - * however, is not true because the above code can only - * be invoked for IBSS interfaces, and the below code will - * not be -- and the two do not race against each other as - * the hash table also keys off the interface. + * the mutex locked. */ - might_sleep(); - - mutex_lock(&local->sta_mtx); - spin_lock_irqsave(&local->sta_lock, flags); - /* check if STA exists already */ - if (sta_info_get_bss(sdata, sta->sta.addr)) { - spin_unlock_irqrestore(&local->sta_lock, flags); - mutex_unlock(&local->sta_mtx); - rcu_read_lock(); - err = -EEXIST; - goto out_free; + /* + * check if STA exists already. + * only accept a scenario of a second call to sta_info_insert_non_ibss + * with a dummy station entry that was inserted earlier + * in that case - assume that the dummy station flag should + * be removed. + */ + exist_sta = sta_info_get_bss_rx(sdata, sta->sta.addr); + if (exist_sta) { + if (exist_sta == sta && sta->dummy) { + dummy_reinsert = true; + } else { + spin_unlock_irqrestore(&local->sta_lock, flags); + mutex_unlock(&local->sta_mtx); + rcu_read_lock(); + return -EEXIST; + } } spin_unlock_irqrestore(&local->sta_lock, flags); - err = sta_info_finish_insert(sta, false); + err = sta_info_finish_insert(sta, false, dummy_reinsert); if (err) { mutex_unlock(&local->sta_mtx); rcu_read_lock(); - goto out_free; + return err; } #ifdef CONFIG_MAC80211_VERBOSE_DEBUG - wiphy_debug(local->hw.wiphy, "Inserted STA %pM\n", sta->sta.addr); + wiphy_debug(local->hw.wiphy, "Inserted %sSTA %pM\n", + sta->dummy ? "dummy " : "", sta->sta.addr); #endif /* CONFIG_MAC80211_VERBOSE_DEBUG */ /* move reference to rcu-protected */ @@ -482,6 +556,51 @@ int sta_info_insert_rcu(struct sta_info *sta) __acquires(RCU) mesh_accept_plinks_update(sdata); return 0; +} + +int sta_info_insert_rcu(struct sta_info *sta) __acquires(RCU) +{ + struct ieee80211_local *local = sta->local; + struct ieee80211_sub_if_data *sdata = sta->sdata; + int err = 0; + + err = sta_info_insert_check(sta); + if (err) { + rcu_read_lock(); + goto out_free; + } + + /* + * In ad-hoc mode, we sometimes need to insert stations + * from tasklet context from the RX path. To avoid races, + * always do so in that case -- see the comment below. + */ + if (sdata->vif.type == NL80211_IFTYPE_ADHOC) { + err = sta_info_insert_ibss(sta); + if (err) + goto out_free; + + return 0; + } + + /* + * It might seem that the function called below is in race against + * the function call above that atomically inserts the station... That, + * however, is not true because the above code can only + * be invoked for IBSS interfaces, and the below code will + * not be -- and the two do not race against each other as + * the hash table also keys off the interface. + */ + + might_sleep(); + + mutex_lock(&local->sta_mtx); + + err = sta_info_insert_non_ibss(sta); + if (err) + goto out_free; + + return 0; out_free: BUG_ON(!err); __sta_info_free(local, sta); @@ -497,6 +616,25 @@ int sta_info_insert(struct sta_info *sta) return err; } +/* Caller must hold sta->local->sta_mtx */ +int sta_info_reinsert(struct sta_info *sta) +{ + struct ieee80211_local *local = sta->local; + int err = 0; + + err = sta_info_insert_check(sta); + if (err) { + mutex_unlock(&local->sta_mtx); + return err; + } + + might_sleep(); + + err = sta_info_insert_non_ibss(sta); + rcu_read_unlock(); + return err; +} + static inline void __bss_tim_set(struct ieee80211_if_ap *bss, u16 aid) { /* @@ -515,64 +653,93 @@ static inline void __bss_tim_clear(struct ieee80211_if_ap *bss, u16 aid) bss->tim[aid / 8] &= ~(1 << (aid % 8)); } -static void __sta_info_set_tim_bit(struct ieee80211_if_ap *bss, - struct sta_info *sta) +static unsigned long ieee80211_tids_for_ac(int ac) { - BUG_ON(!bss); - - __bss_tim_set(bss, sta->sta.aid); - - if (sta->local->ops->set_tim) { - sta->local->tim_in_locked_section = true; - drv_set_tim(sta->local, &sta->sta, true); - sta->local->tim_in_locked_section = false; + /* If we ever support TIDs > 7, this obviously needs to be adjusted */ + switch (ac) { + case IEEE80211_AC_VO: + return BIT(6) | BIT(7); + case IEEE80211_AC_VI: + return BIT(4) | BIT(5); + case IEEE80211_AC_BE: + return BIT(0) | BIT(3); + case IEEE80211_AC_BK: + return BIT(1) | BIT(2); + default: + WARN_ON(1); + return 0; } } -void sta_info_set_tim_bit(struct sta_info *sta) +void sta_info_recalc_tim(struct sta_info *sta) { + struct ieee80211_local *local = sta->local; + struct ieee80211_if_ap *bss = sta->sdata->bss; unsigned long flags; + bool indicate_tim = false; + u8 ignore_for_tim = sta->sta.uapsd_queues; + int ac; - BUG_ON(!sta->sdata->bss); + if (WARN_ON_ONCE(!sta->sdata->bss)) + return; - spin_lock_irqsave(&sta->local->sta_lock, flags); - __sta_info_set_tim_bit(sta->sdata->bss, sta); - spin_unlock_irqrestore(&sta->local->sta_lock, flags); -} + /* No need to do anything if the driver does all */ + if (local->hw.flags & IEEE80211_HW_AP_LINK_PS) + return; -static void __sta_info_clear_tim_bit(struct ieee80211_if_ap *bss, - struct sta_info *sta) -{ - BUG_ON(!bss); + if (sta->dead) + goto done; - __bss_tim_clear(bss, sta->sta.aid); + /* + * If all ACs are delivery-enabled then we should build + * the TIM bit for all ACs anyway; if only some are then + * we ignore those and build the TIM bit using only the + * non-enabled ones. + */ + if (ignore_for_tim == BIT(IEEE80211_NUM_ACS) - 1) + ignore_for_tim = 0; + + for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) { + unsigned long tids; + + if (ignore_for_tim & BIT(ac)) + continue; + + indicate_tim |= !skb_queue_empty(&sta->tx_filtered[ac]) || + !skb_queue_empty(&sta->ps_tx_buf[ac]); + if (indicate_tim) + break; - if (sta->local->ops->set_tim) { - sta->local->tim_in_locked_section = true; - drv_set_tim(sta->local, &sta->sta, false); - sta->local->tim_in_locked_section = false; + tids = ieee80211_tids_for_ac(ac); + + indicate_tim |= + sta->driver_buffered_tids & tids; } -} -void sta_info_clear_tim_bit(struct sta_info *sta) -{ - unsigned long flags; + done: + spin_lock_irqsave(&local->sta_lock, flags); - BUG_ON(!sta->sdata->bss); + if (indicate_tim) + __bss_tim_set(bss, sta->sta.aid); + else + __bss_tim_clear(bss, sta->sta.aid); - spin_lock_irqsave(&sta->local->sta_lock, flags); - __sta_info_clear_tim_bit(sta->sdata->bss, sta); - spin_unlock_irqrestore(&sta->local->sta_lock, flags); + if (local->ops->set_tim) { + local->tim_in_locked_section = true; + drv_set_tim(local, &sta->sta, indicate_tim); + local->tim_in_locked_section = false; + } + + spin_unlock_irqrestore(&local->sta_lock, flags); } -static int sta_info_buffer_expired(struct sta_info *sta, - struct sk_buff *skb) +static bool sta_info_buffer_expired(struct sta_info *sta, struct sk_buff *skb) { struct ieee80211_tx_info *info; int timeout; if (!skb) - return 0; + return false; info = IEEE80211_SKB_CB(skb); @@ -586,24 +753,59 @@ static int sta_info_buffer_expired(struct sta_info *sta, } -static bool sta_info_cleanup_expire_buffered(struct ieee80211_local *local, - struct sta_info *sta) +static bool sta_info_cleanup_expire_buffered_ac(struct ieee80211_local *local, + struct sta_info *sta, int ac) { unsigned long flags; struct sk_buff *skb; - if (skb_queue_empty(&sta->ps_tx_buf)) - return false; + /* + * First check for frames that should expire on the filtered + * queue. Frames here were rejected by the driver and are on + * a separate queue to avoid reordering with normal PS-buffered + * frames. They also aren't accounted for right now in the + * total_ps_buffered counter. + */ + for (;;) { + spin_lock_irqsave(&sta->tx_filtered[ac].lock, flags); + skb = skb_peek(&sta->tx_filtered[ac]); + if (sta_info_buffer_expired(sta, skb)) + skb = __skb_dequeue(&sta->tx_filtered[ac]); + else + skb = NULL; + spin_unlock_irqrestore(&sta->tx_filtered[ac].lock, flags); + + /* + * Frames are queued in order, so if this one + * hasn't expired yet we can stop testing. If + * we actually reached the end of the queue we + * also need to stop, of course. + */ + if (!skb) + break; + dev_kfree_skb(skb); + } + /* + * Now also check the normal PS-buffered queue, this will + * only find something if the filtered queue was emptied + * since the filtered frames are all before the normal PS + * buffered frames. + */ for (;;) { - spin_lock_irqsave(&sta->ps_tx_buf.lock, flags); - skb = skb_peek(&sta->ps_tx_buf); + spin_lock_irqsave(&sta->ps_tx_buf[ac].lock, flags); + skb = skb_peek(&sta->ps_tx_buf[ac]); if (sta_info_buffer_expired(sta, skb)) - skb = __skb_dequeue(&sta->ps_tx_buf); + skb = __skb_dequeue(&sta->ps_tx_buf[ac]); else skb = NULL; - spin_unlock_irqrestore(&sta->ps_tx_buf.lock, flags); + spin_unlock_irqrestore(&sta->ps_tx_buf[ac].lock, flags); + /* + * frames are queued in order, so if this one + * hasn't expired yet (or we reached the end of + * the queue) we can stop testing + */ if (!skb) break; @@ -613,22 +815,47 @@ static bool sta_info_cleanup_expire_buffered(struct ieee80211_local *local, sta->sta.addr); #endif dev_kfree_skb(skb); - - if (skb_queue_empty(&sta->ps_tx_buf) && - !test_sta_flags(sta, WLAN_STA_PS_DRIVER_BUF)) - sta_info_clear_tim_bit(sta); } - return true; + /* + * Finally, recalculate the TIM bit for this station -- it might + * now be clear because the station was too slow to retrieve its + * frames. + */ + sta_info_recalc_tim(sta); + + /* + * Return whether there are any frames still buffered, this is + * used to check whether the cleanup timer still needs to run, + * if there are no frames we don't need to rearm the timer. + */ + return !(skb_queue_empty(&sta->ps_tx_buf[ac]) && + skb_queue_empty(&sta->tx_filtered[ac])); +} + +static bool sta_info_cleanup_expire_buffered(struct ieee80211_local *local, + struct sta_info *sta) +{ + bool have_buffered = false; + int ac; + + /* This is only necessary for stations on BSS interfaces */ + if (!sta->sdata->bss) + return false; + + for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) + have_buffered |= + sta_info_cleanup_expire_buffered_ac(local, sta, ac); + + return have_buffered; } static int __must_check __sta_info_destroy(struct sta_info *sta) { struct ieee80211_local *local; struct ieee80211_sub_if_data *sdata; - struct sk_buff *skb; unsigned long flags; - int ret, i; + int ret, i, ac; might_sleep(); @@ -644,7 +871,7 @@ static int __must_check __sta_info_destroy(struct sta_info *sta) * sessions -- block that to make sure the tear-down * will be sufficient. */ - set_sta_flags(sta, WLAN_STA_BLOCK_BA); + set_sta_flag(sta, WLAN_STA_BLOCK_BA); ieee80211_sta_tear_down_BA_sessions(sta, true); spin_lock_irqsave(&local->sta_lock, flags); @@ -665,19 +892,22 @@ static int __must_check __sta_info_destroy(struct sta_info *sta) sta->dead = true; - if (test_and_clear_sta_flags(sta, - WLAN_STA_PS_STA | WLAN_STA_PS_DRIVER)) { + if (test_sta_flag(sta, WLAN_STA_PS_STA) || + test_sta_flag(sta, WLAN_STA_PS_DRIVER)) { BUG_ON(!sdata->bss); + clear_sta_flag(sta, WLAN_STA_PS_STA); + clear_sta_flag(sta, WLAN_STA_PS_DRIVER); + atomic_dec(&sdata->bss->num_sta_ps); - sta_info_clear_tim_bit(sta); + sta_info_recalc_tim(sta); } local->num_sta--; local->sta_generation++; if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) - rcu_assign_pointer(sdata->u.vlan.sta, NULL); + RCU_INIT_POINTER(sdata->u.vlan.sta, NULL); if (sta->uploaded) { if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) @@ -696,6 +926,12 @@ static int __must_check __sta_info_destroy(struct sta_info *sta) */ synchronize_rcu(); + for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) { + local->total_ps_buffered -= skb_queue_len(&sta->ps_tx_buf[ac]); + __skb_queue_purge(&sta->ps_tx_buf[ac]); + __skb_queue_purge(&sta->tx_filtered[ac]); + } + #ifdef CONFIG_MAC80211_MESH if (ieee80211_vif_is_mesh(&sdata->vif)) mesh_accept_plinks_update(sdata); @@ -718,14 +954,6 @@ static int __must_check __sta_info_destroy(struct sta_info *sta) } #endif - while ((skb = skb_dequeue(&sta->ps_tx_buf)) != NULL) { - local->total_ps_buffered--; - dev_kfree_skb_any(skb); - } - - while ((skb = skb_dequeue(&sta->tx_filtered)) != NULL) - dev_kfree_skb_any(skb); - __sta_info_free(local, sta); return 0; @@ -737,7 +965,7 @@ int sta_info_destroy_addr(struct ieee80211_sub_if_data *sdata, const u8 *addr) int ret; mutex_lock(&sdata->local->sta_mtx); - sta = sta_info_get(sdata, addr); + sta = sta_info_get_rx(sdata, addr); ret = __sta_info_destroy(sta); mutex_unlock(&sdata->local->sta_mtx); @@ -751,7 +979,7 @@ int sta_info_destroy_addr_bss(struct ieee80211_sub_if_data *sdata, int ret; mutex_lock(&sdata->local->sta_mtx); - sta = sta_info_get_bss(sdata, addr); + sta = sta_info_get_bss_rx(sdata, addr); ret = __sta_info_destroy(sta); mutex_unlock(&sdata->local->sta_mtx); @@ -891,7 +1119,8 @@ static void clear_sta_ps_flags(void *_sta) { struct sta_info *sta = _sta; - clear_sta_flags(sta, WLAN_STA_PS_DRIVER | WLAN_STA_PS_STA); + clear_sta_flag(sta, WLAN_STA_PS_DRIVER); + clear_sta_flag(sta, WLAN_STA_PS_STA); } /* powersave support code */ @@ -899,88 +1128,350 @@ void ieee80211_sta_ps_deliver_wakeup(struct sta_info *sta) { struct ieee80211_sub_if_data *sdata = sta->sdata; struct ieee80211_local *local = sdata->local; - int sent, buffered; + struct sk_buff_head pending; + int filtered = 0, buffered = 0, ac; + unsigned long flags; + + clear_sta_flag(sta, WLAN_STA_SP); + + BUILD_BUG_ON(BITS_TO_LONGS(STA_TID_NUM) > 1); + sta->driver_buffered_tids = 0; - clear_sta_flags(sta, WLAN_STA_PS_DRIVER_BUF); if (!(local->hw.flags & IEEE80211_HW_AP_LINK_PS)) drv_sta_notify(local, sdata, STA_NOTIFY_AWAKE, &sta->sta); - if (!skb_queue_empty(&sta->ps_tx_buf)) - sta_info_clear_tim_bit(sta); + skb_queue_head_init(&pending); + /* sync with ieee80211_tx_h_unicast_ps_buf */ + spin_lock(&sta->ps_lock); /* Send all buffered frames to the station */ - sent = ieee80211_add_pending_skbs(local, &sta->tx_filtered); - buffered = ieee80211_add_pending_skbs_fn(local, &sta->ps_tx_buf, - clear_sta_ps_flags, sta); - sent += buffered; + for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) { + int count = skb_queue_len(&pending), tmp; + + spin_lock_irqsave(&sta->tx_filtered[ac].lock, flags); + skb_queue_splice_tail_init(&sta->tx_filtered[ac], &pending); + spin_unlock_irqrestore(&sta->tx_filtered[ac].lock, flags); + tmp = skb_queue_len(&pending); + filtered += tmp - count; + count = tmp; + + spin_lock_irqsave(&sta->ps_tx_buf[ac].lock, flags); + skb_queue_splice_tail_init(&sta->ps_tx_buf[ac], &pending); + spin_unlock_irqrestore(&sta->ps_tx_buf[ac].lock, flags); + tmp = skb_queue_len(&pending); + buffered += tmp - count; + } + + ieee80211_add_pending_skbs_fn(local, &pending, clear_sta_ps_flags, sta); + spin_unlock(&sta->ps_lock); + local->total_ps_buffered -= buffered; + sta_info_recalc_tim(sta); + #ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG printk(KERN_DEBUG "%s: STA %pM aid %d sending %d filtered/%d PS frames " "since STA not sleeping anymore\n", sdata->name, - sta->sta.addr, sta->sta.aid, sent - buffered, buffered); + sta->sta.addr, sta->sta.aid, filtered, buffered); #endif /* CONFIG_MAC80211_VERBOSE_PS_DEBUG */ } -void ieee80211_sta_ps_deliver_poll_response(struct sta_info *sta) +static void ieee80211_send_null_response(struct ieee80211_sub_if_data *sdata, + struct sta_info *sta, int tid, + enum ieee80211_frame_release_type reason) { - struct ieee80211_sub_if_data *sdata = sta->sdata; struct ieee80211_local *local = sdata->local; + struct ieee80211_qos_hdr *nullfunc; struct sk_buff *skb; - int no_pending_pkts; + int size = sizeof(*nullfunc); + __le16 fc; + bool qos = test_sta_flag(sta, WLAN_STA_WME); + struct ieee80211_tx_info *info; - skb = skb_dequeue(&sta->tx_filtered); - if (!skb) { - skb = skb_dequeue(&sta->ps_tx_buf); - if (skb) - local->total_ps_buffered--; + if (qos) { + fc = cpu_to_le16(IEEE80211_FTYPE_DATA | + IEEE80211_STYPE_QOS_NULLFUNC | + IEEE80211_FCTL_FROMDS); + } else { + size -= 2; + fc = cpu_to_le16(IEEE80211_FTYPE_DATA | + IEEE80211_STYPE_NULLFUNC | + IEEE80211_FCTL_FROMDS); + } + + skb = dev_alloc_skb(local->hw.extra_tx_headroom + size); + if (!skb) + return; + + skb_reserve(skb, local->hw.extra_tx_headroom); + + nullfunc = (void *) skb_put(skb, size); + nullfunc->frame_control = fc; + nullfunc->duration_id = 0; + memcpy(nullfunc->addr1, sta->sta.addr, ETH_ALEN); + memcpy(nullfunc->addr2, sdata->vif.addr, ETH_ALEN); + memcpy(nullfunc->addr3, sdata->vif.addr, ETH_ALEN); + nullfunc->seq_ctrl = 0; + + skb->priority = tid; + skb_set_queue_mapping(skb, ieee802_1d_to_ac[tid]); + if (qos) { + nullfunc->qos_ctrl = cpu_to_le16(tid); + + if (reason == IEEE80211_FRAME_RELEASE_UAPSD) + nullfunc->qos_ctrl |= + cpu_to_le16(IEEE80211_QOS_CTL_EOSP); } - no_pending_pkts = skb_queue_empty(&sta->tx_filtered) && - skb_queue_empty(&sta->ps_tx_buf); - if (skb) { - struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); - struct ieee80211_hdr *hdr = - (struct ieee80211_hdr *) skb->data; + info = IEEE80211_SKB_CB(skb); + + /* + * Tell TX path to send this frame even though the + * STA may still remain is PS mode after this frame + * exchange. Also set EOSP to indicate this packet + * ends the poll/service period. + */ + info->flags |= IEEE80211_TX_CTL_POLL_RESPONSE | + IEEE80211_TX_STATUS_EOSP | + IEEE80211_TX_CTL_REQ_TX_STATUS; + + drv_allow_buffered_frames(local, sta, BIT(tid), 1, reason, false); + + ieee80211_xmit(sdata, skb); +} + +static void +ieee80211_sta_ps_deliver_response(struct sta_info *sta, + int n_frames, u8 ignored_acs, + enum ieee80211_frame_release_type reason) +{ + struct ieee80211_sub_if_data *sdata = sta->sdata; + struct ieee80211_local *local = sdata->local; + bool found = false; + bool more_data = false; + int ac; + unsigned long driver_release_tids = 0; + struct sk_buff_head frames; + + /* Service or PS-Poll period starts */ + set_sta_flag(sta, WLAN_STA_SP); + + __skb_queue_head_init(&frames); + + /* + * Get response frame(s) and more data bit for it. + */ + for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) { + unsigned long tids; + + if (ignored_acs & BIT(ac)) + continue; + + tids = ieee80211_tids_for_ac(ac); + + if (!found) { + driver_release_tids = sta->driver_buffered_tids & tids; + if (driver_release_tids) { + found = true; + } else { + struct sk_buff *skb; + + while (n_frames > 0) { + skb = skb_dequeue(&sta->tx_filtered[ac]); + if (!skb) { + skb = skb_dequeue( + &sta->ps_tx_buf[ac]); + if (skb) + local->total_ps_buffered--; + } + if (!skb) + break; + n_frames--; + found = true; + __skb_queue_tail(&frames, skb); + } + } + + /* + * If the driver has data on more than one TID then + * certainly there's more data if we release just a + * single frame now (from a single TID). + */ + if (reason == IEEE80211_FRAME_RELEASE_PSPOLL && + hweight16(driver_release_tids) > 1) { + more_data = true; + driver_release_tids = + BIT(ffs(driver_release_tids) - 1); + break; + } + } + + if (!skb_queue_empty(&sta->tx_filtered[ac]) || + !skb_queue_empty(&sta->ps_tx_buf[ac])) { + more_data = true; + break; + } + } + + if (!found) { + int tid; /* - * Tell TX path to send this frame even though the STA may - * still remain is PS mode after this frame exchange. + * For PS-Poll, this can only happen due to a race condition + * when we set the TIM bit and the station notices it, but + * before it can poll for the frame we expire it. + * + * For uAPSD, this is said in the standard (11.2.1.5 h): + * At each unscheduled SP for a non-AP STA, the AP shall + * attempt to transmit at least one MSDU or MMPDU, but no + * more than the value specified in the Max SP Length field + * in the QoS Capability element from delivery-enabled ACs, + * that are destined for the non-AP STA. + * + * Since we have no other MSDU/MMPDU, transmit a QoS null frame. */ - info->flags |= IEEE80211_TX_CTL_PSPOLL_RESPONSE; -#ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG - printk(KERN_DEBUG "STA %pM aid %d: PS Poll (entries after %d)\n", - sta->sta.addr, sta->sta.aid, - skb_queue_len(&sta->ps_tx_buf)); -#endif /* CONFIG_MAC80211_VERBOSE_PS_DEBUG */ + /* This will evaluate to 1, 3, 5 or 7. */ + tid = 7 - ((ffs(~ignored_acs) - 1) << 1); - /* Use MoreData flag to indicate whether there are more - * buffered frames for this STA */ - if (no_pending_pkts) - hdr->frame_control &= cpu_to_le16(~IEEE80211_FCTL_MOREDATA); - else - hdr->frame_control |= cpu_to_le16(IEEE80211_FCTL_MOREDATA); + ieee80211_send_null_response(sdata, sta, tid, reason); + return; + } + + if (!driver_release_tids) { + struct sk_buff_head pending; + struct sk_buff *skb; + int num = 0; + u16 tids = 0; + + skb_queue_head_init(&pending); + + while ((skb = __skb_dequeue(&frames))) { + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); + struct ieee80211_hdr *hdr = (void *) skb->data; + u8 *qoshdr = NULL; + + num++; + + /* + * Tell TX path to send this frame even though the + * STA may still remain is PS mode after this frame + * exchange. + */ + info->flags |= IEEE80211_TX_CTL_POLL_RESPONSE; + + /* + * Use MoreData flag to indicate whether there are + * more buffered frames for this STA + */ + if (more_data || !skb_queue_empty(&frames)) + hdr->frame_control |= + cpu_to_le16(IEEE80211_FCTL_MOREDATA); + else + hdr->frame_control &= + cpu_to_le16(~IEEE80211_FCTL_MOREDATA); + + if (ieee80211_is_data_qos(hdr->frame_control) || + ieee80211_is_qos_nullfunc(hdr->frame_control)) + qoshdr = ieee80211_get_qos_ctl(hdr); + + /* set EOSP for the frame */ + if (reason == IEEE80211_FRAME_RELEASE_UAPSD && + qoshdr && skb_queue_empty(&frames)) + *qoshdr |= IEEE80211_QOS_CTL_EOSP; + + info->flags |= IEEE80211_TX_STATUS_EOSP | + IEEE80211_TX_CTL_REQ_TX_STATUS; + + if (qoshdr) + tids |= BIT(*qoshdr & IEEE80211_QOS_CTL_TID_MASK); + else + tids |= BIT(0); + + __skb_queue_tail(&pending, skb); + } - ieee80211_add_pending_skb(local, skb); + drv_allow_buffered_frames(local, sta, tids, num, + reason, more_data); - if (no_pending_pkts) - sta_info_clear_tim_bit(sta); -#ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG + ieee80211_add_pending_skbs(local, &pending); + + sta_info_recalc_tim(sta); } else { /* - * FIXME: This can be the result of a race condition between - * us expiring a frame and the station polling for it. - * Should we send it a null-func frame indicating we - * have nothing buffered for it? + * We need to release a frame that is buffered somewhere in the + * driver ... it'll have to handle that. + * Note that, as per the comment above, it'll also have to see + * if there is more than just one frame on the specific TID that + * we're releasing from, and it needs to set the more-data bit + * accordingly if we tell it that there's no more data. If we do + * tell it there's more data, then of course the more-data bit + * needs to be set anyway. + */ + drv_release_buffered_frames(local, sta, driver_release_tids, + n_frames, reason, more_data); + + /* + * Note that we don't recalculate the TIM bit here as it would + * most likely have no effect at all unless the driver told us + * that the TID became empty before returning here from the + * release function. + * Either way, however, when the driver tells us that the TID + * became empty we'll do the TIM recalculation. */ - printk(KERN_DEBUG "%s: STA %pM sent PS Poll even " - "though there are no buffered frames for it\n", - sdata->name, sta->sta.addr); -#endif /* CONFIG_MAC80211_VERBOSE_PS_DEBUG */ } } +void ieee80211_sta_ps_deliver_poll_response(struct sta_info *sta) +{ + u8 ignore_for_response = sta->sta.uapsd_queues; + + /* + * If all ACs are delivery-enabled then we should reply + * from any of them, if only some are enabled we reply + * only from the non-enabled ones. + */ + if (ignore_for_response == BIT(IEEE80211_NUM_ACS) - 1) + ignore_for_response = 0; + + ieee80211_sta_ps_deliver_response(sta, 1, ignore_for_response, + IEEE80211_FRAME_RELEASE_PSPOLL); +} + +void ieee80211_sta_ps_deliver_uapsd(struct sta_info *sta) +{ + int n_frames = sta->sta.max_sp; + u8 delivery_enabled = sta->sta.uapsd_queues; + + /* + * If we ever grow support for TSPEC this might happen if + * the TSPEC update from hostapd comes in between a trigger + * frame setting WLAN_STA_UAPSD in the RX path and this + * actually getting called. + */ + if (!delivery_enabled) + return; + + switch (sta->sta.max_sp) { + case 1: + n_frames = 2; + break; + case 2: + n_frames = 4; + break; + case 3: + n_frames = 6; + break; + case 0: + /* XXX: what is a good value? */ + n_frames = 8; + break; + } + + ieee80211_sta_ps_deliver_response(sta, n_frames, ~delivery_enabled, + IEEE80211_FRAME_RELEASE_UAPSD); +} + void ieee80211_sta_block_awake(struct ieee80211_hw *hw, struct ieee80211_sta *pubsta, bool block) { @@ -989,17 +1480,50 @@ void ieee80211_sta_block_awake(struct ieee80211_hw *hw, trace_api_sta_block_awake(sta->local, pubsta, block); if (block) - set_sta_flags(sta, WLAN_STA_PS_DRIVER); - else if (test_sta_flags(sta, WLAN_STA_PS_DRIVER)) + set_sta_flag(sta, WLAN_STA_PS_DRIVER); + else if (test_sta_flag(sta, WLAN_STA_PS_DRIVER)) ieee80211_queue_work(hw, &sta->drv_unblock_wk); } EXPORT_SYMBOL(ieee80211_sta_block_awake); -void ieee80211_sta_set_tim(struct ieee80211_sta *pubsta) +void ieee80211_sta_eosp_irqsafe(struct ieee80211_sta *pubsta) { struct sta_info *sta = container_of(pubsta, struct sta_info, sta); + struct ieee80211_local *local = sta->local; + struct sk_buff *skb; + struct skb_eosp_msg_data *data; + + trace_api_eosp(local, pubsta); + + skb = alloc_skb(0, GFP_ATOMIC); + if (!skb) { + /* too bad ... but race is better than loss */ + clear_sta_flag(sta, WLAN_STA_SP); + return; + } + + data = (void *)skb->cb; + memcpy(data->sta, pubsta->addr, ETH_ALEN); + memcpy(data->iface, sta->sdata->vif.addr, ETH_ALEN); + skb->pkt_type = IEEE80211_EOSP_MSG; + skb_queue_tail(&local->skb_queue, skb); + tasklet_schedule(&local->tasklet); +} +EXPORT_SYMBOL(ieee80211_sta_eosp_irqsafe); + +void ieee80211_sta_set_buffered(struct ieee80211_sta *pubsta, + u8 tid, bool buffered) +{ + struct sta_info *sta = container_of(pubsta, struct sta_info, sta); + + if (WARN_ON(tid >= STA_TID_NUM)) + return; + + if (buffered) + set_bit(tid, &sta->driver_buffered_tids); + else + clear_bit(tid, &sta->driver_buffered_tids); - set_sta_flags(sta, WLAN_STA_PS_DRIVER_BUF); - sta_info_set_tim_bit(sta); + sta_info_recalc_tim(sta); } -EXPORT_SYMBOL(ieee80211_sta_set_tim); +EXPORT_SYMBOL(ieee80211_sta_set_buffered); diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index c6ae871..556fbcc 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -19,7 +19,8 @@ /** * enum ieee80211_sta_info_flags - Stations flags * - * These flags are used with &struct sta_info's @flags member. + * These flags are used with &struct sta_info's @flags member, but + * only indirectly with set_sta_flag() and friends. * * @WLAN_STA_AUTH: Station is authenticated. * @WLAN_STA_ASSOC: Station is associated. @@ -43,24 +44,33 @@ * be in the queues * @WLAN_STA_PSPOLL: Station sent PS-poll while driver was keeping * station in power-save mode, reply when the driver unblocks. - * @WLAN_STA_PS_DRIVER_BUF: Station has frames pending in driver internal - * buffers. Automatically cleared on station wake-up. + * @WLAN_STA_TDLS_PEER: Station is a TDLS peer. + * @WLAN_STA_TDLS_PEER_AUTH: This TDLS peer is authorized to send direct + * packets. This means the link is enabled. + * @WLAN_STA_UAPSD: Station requested unscheduled SP while driver was + * keeping station in power-save mode, reply when the driver + * unblocks the station. + * @WLAN_STA_SP: Station is in a service period, so don't try to + * reply to other uAPSD trigger frames or PS-Poll. */ enum ieee80211_sta_info_flags { - WLAN_STA_AUTH = 1<<0, - WLAN_STA_ASSOC = 1<<1, - WLAN_STA_PS_STA = 1<<2, - WLAN_STA_AUTHORIZED = 1<<3, - WLAN_STA_SHORT_PREAMBLE = 1<<4, - WLAN_STA_ASSOC_AP = 1<<5, - WLAN_STA_WME = 1<<6, - WLAN_STA_WDS = 1<<7, - WLAN_STA_CLEAR_PS_FILT = 1<<9, - WLAN_STA_MFP = 1<<10, - WLAN_STA_BLOCK_BA = 1<<11, - WLAN_STA_PS_DRIVER = 1<<12, - WLAN_STA_PSPOLL = 1<<13, - WLAN_STA_PS_DRIVER_BUF = 1<<14, + WLAN_STA_AUTH, + WLAN_STA_ASSOC, + WLAN_STA_PS_STA, + WLAN_STA_AUTHORIZED, + WLAN_STA_SHORT_PREAMBLE, + WLAN_STA_ASSOC_AP, + WLAN_STA_WME, + WLAN_STA_WDS, + WLAN_STA_CLEAR_PS_FILT, + WLAN_STA_MFP, + WLAN_STA_BLOCK_BA, + WLAN_STA_PS_DRIVER, + WLAN_STA_PSPOLL, + WLAN_STA_TDLS_PEER, + WLAN_STA_TDLS_PEER_AUTH, + WLAN_STA_UAPSD, + WLAN_STA_SP, }; #define STA_TID_NUM 16 @@ -86,6 +96,8 @@ enum ieee80211_sta_info_flags { * @stop_initiator: initiator of a session stop * @tx_stop: TX DelBA frame when stopping * @buf_size: reorder buffer size at receiver + * @failed_bar_ssn: ssn of the last failed BAR tx attempt + * @bar_pending: BAR needs to be re-sent * * This structure's lifetime is managed by RCU, assignments to * the array holding it must hold the aggregation mutex. @@ -106,6 +118,9 @@ struct tid_ampdu_tx { u8 stop_initiator; bool tx_stop; u8 buf_size; + + u16 failed_bar_ssn; + bool bar_pending; }; /** @@ -123,6 +138,7 @@ struct tid_ampdu_tx { * @dialog_token: dialog token for aggregation session * @rcu_head: RCU head used for freeing this struct * @reorder_lock: serializes access to reorder buffer, see below. + * @removed: this session is removed (but might have been found due to RCU) * * This structure's lifetime is managed by RCU, assignments to * the array holding it must hold the aggregation mutex. @@ -145,6 +161,7 @@ struct tid_ampdu_rx { u16 buf_size; u16 timeout; u8 dialog_token; + bool removed; }; /** @@ -158,6 +175,8 @@ struct tid_ampdu_rx { * @work: work struct for starting/stopping aggregation * @tid_rx_timer_expired: bitmap indicating on which TIDs the * RX timer expired until the work for it runs + * @tid_rx_stop_requested: bitmap indicating which BA sessions per TID the + * driver requested to close until the work for it runs * @mtx: mutex to protect all TX data (except non-NULL assignments * to tid_tx[idx], which are protected by the sta spinlock) */ @@ -166,6 +185,7 @@ struct sta_ampdu_mlme { /* rx */ struct tid_ampdu_rx __rcu *tid_rx[STA_TID_NUM]; unsigned long tid_rx_timer_expired[BITS_TO_LONGS(STA_TID_NUM)]; + unsigned long tid_rx_stop_requested[BITS_TO_LONGS(STA_TID_NUM)]; /* tx */ struct work_struct work; struct tid_ampdu_tx __rcu *tid_tx[STA_TID_NUM]; @@ -195,15 +215,17 @@ struct sta_ampdu_mlme { * @last_rx_rate_flag: rx status flag of the last data packet * @lock: used for locking all fields that require locking, see comments * in the header file. - * @flaglock: spinlock for flags accesses * @drv_unblock_wk: used for driver PS unblocking * @listen_interval: listen interval of this station, when we're acting as AP - * @flags: STA flags, see &enum ieee80211_sta_info_flags - * @ps_tx_buf: buffer of frames to transmit to this station - * when it leaves power saving state - * @tx_filtered: buffer of frames we already tried to transmit - * but were filtered by hardware due to STA having entered - * power saving state + * @_flags: STA flags, see &enum ieee80211_sta_info_flags, do not use directly + * @ps_lock: used for powersave (when mac80211 is the AP) related locking + * @ps_tx_buf: buffers (per AC) of frames to transmit to this station + * when it leaves power saving state or polls + * @tx_filtered: buffers (per AC) of frames we already tried to + * transmit but were filtered by hardware due to STA having + * entered power saving state, these are also delivered to + * the station when it leaves powersave or polls for frames + * @driver_buffered_tids: bitmap of TIDs the driver has data buffered on * @rx_packets: Number of MSDUs received from this STA * @rx_bytes: Number of bytes received from this STA * @wep_weak_iv_count: number of weak WEP IVs received from this station @@ -235,10 +257,12 @@ struct sta_ampdu_mlme { * @plink_timer: peer link watch timer * @plink_timer_was_running: used by suspend/resume to restore timers * @debugfs: debug filesystem info - * @sta: station information we share with the driver * @dead: set to true when sta is unlinked * @uploaded: set to true when sta is uploaded to the driver * @lost_packets: number of consecutive lost packets + * @dummy: indicate a dummy station created for receiving + * EAP frames before association + * @sta: station information we share with the driver */ struct sta_info { /* General information, mostly static */ @@ -251,7 +275,6 @@ struct sta_info { struct rate_control_ref *rate_ctrl; void *rate_ctrl_priv; spinlock_t lock; - spinlock_t flaglock; struct work_struct drv_unblock_wk; @@ -261,18 +284,14 @@ struct sta_info { bool uploaded; - /* - * frequently updated, locked with own spinlock (flaglock), - * use the accessors defined below - */ - u32 flags; + /* use the accessors defined below */ + unsigned long _flags; - /* - * STA powersave frame queues, no more than the internal - * locking required. - */ - struct sk_buff_head ps_tx_buf; - struct sk_buff_head tx_filtered; + /* STA powersave lock and frame queues */ + spinlock_t ps_lock; + struct sk_buff_head ps_tx_buf[IEEE80211_NUM_ACS]; + struct sk_buff_head tx_filtered[IEEE80211_NUM_ACS]; + unsigned long driver_buffered_tids; /* Updated from RX path only, no locking requirements */ unsigned long rx_packets, rx_bytes; @@ -284,7 +303,8 @@ struct sta_info { unsigned long rx_dropped; int last_signal; struct ewma avg_signal; - __le16 last_seq_ctrl[NUM_RX_DATA_QUEUES]; + /* Plus 1 for non-QoS frames */ + __le16 last_seq_ctrl[NUM_RX_DATA_QUEUES + 1]; /* Updated from TX status path only, no locking requirements */ unsigned long tx_filtered_count; @@ -332,6 +352,9 @@ struct sta_info { unsigned int lost_packets; + /* should be right in front of sta to be in the same cache line */ + bool dummy; + /* keep last! */ struct ieee80211_sta sta; }; @@ -344,60 +367,28 @@ static inline enum nl80211_plink_state sta_plink_state(struct sta_info *sta) return NL80211_PLINK_LISTEN; } -static inline void set_sta_flags(struct sta_info *sta, const u32 flags) +static inline void set_sta_flag(struct sta_info *sta, + enum ieee80211_sta_info_flags flag) { - unsigned long irqfl; - - spin_lock_irqsave(&sta->flaglock, irqfl); - sta->flags |= flags; - spin_unlock_irqrestore(&sta->flaglock, irqfl); + set_bit(flag, &sta->_flags); } -static inline void clear_sta_flags(struct sta_info *sta, const u32 flags) +static inline void clear_sta_flag(struct sta_info *sta, + enum ieee80211_sta_info_flags flag) { - unsigned long irqfl; - - spin_lock_irqsave(&sta->flaglock, irqfl); - sta->flags &= ~flags; - spin_unlock_irqrestore(&sta->flaglock, irqfl); + clear_bit(flag, &sta->_flags); } -static inline u32 test_sta_flags(struct sta_info *sta, const u32 flags) +static inline int test_sta_flag(struct sta_info *sta, + enum ieee80211_sta_info_flags flag) { - u32 ret; - unsigned long irqfl; - - spin_lock_irqsave(&sta->flaglock, irqfl); - ret = sta->flags & flags; - spin_unlock_irqrestore(&sta->flaglock, irqfl); - - return ret; + return test_bit(flag, &sta->_flags); } -static inline u32 test_and_clear_sta_flags(struct sta_info *sta, - const u32 flags) +static inline int test_and_clear_sta_flag(struct sta_info *sta, + enum ieee80211_sta_info_flags flag) { - u32 ret; - unsigned long irqfl; - - spin_lock_irqsave(&sta->flaglock, irqfl); - ret = sta->flags & flags; - sta->flags &= ~flags; - spin_unlock_irqrestore(&sta->flaglock, irqfl); - - return ret; -} - -static inline u32 get_sta_flags(struct sta_info *sta) -{ - u32 ret; - unsigned long irqfl; - - spin_lock_irqsave(&sta->flaglock, irqfl); - ret = sta->flags; - spin_unlock_irqrestore(&sta->flaglock, irqfl); - - return ret; + return test_and_clear_bit(flag, &sta->_flags); } void ieee80211_assign_tid_tx(struct sta_info *sta, int tid, @@ -415,8 +406,8 @@ rcu_dereference_protected_tid_tx(struct sta_info *sta, int tid) #define STA_HASH(sta) (sta[5]) -/* Maximum number of frames to buffer per power saving station */ -#define STA_MAX_TX_BUFFER 128 +/* Maximum number of frames to buffer per power saving station per AC */ +#define STA_MAX_TX_BUFFER 64 /* Minimum buffered frame expiry time. If STA uses listen interval that is * smaller than this value, the minimum value here is used instead. */ @@ -432,9 +423,15 @@ rcu_dereference_protected_tid_tx(struct sta_info *sta, int tid) struct sta_info *sta_info_get(struct ieee80211_sub_if_data *sdata, const u8 *addr); +struct sta_info *sta_info_get_rx(struct ieee80211_sub_if_data *sdata, + const u8 *addr); + struct sta_info *sta_info_get_bss(struct ieee80211_sub_if_data *sdata, const u8 *addr); +struct sta_info *sta_info_get_bss_rx(struct ieee80211_sub_if_data *sdata, + const u8 *addr); + static inline void for_each_sta_info_type_check(struct ieee80211_local *local, const u8 *addr, @@ -455,6 +452,22 @@ void for_each_sta_info_type_check(struct ieee80211_local *local, _sta = nxt, \ nxt = _sta ? rcu_dereference(_sta->hnext) : NULL \ ) \ + /* run code only if address matches and it's not a dummy sta */ \ + if (memcmp(_sta->sta.addr, (_addr), ETH_ALEN) == 0 && \ + !_sta->dummy) + +#define for_each_sta_info_rx(local, _addr, _sta, nxt) \ + for ( /* initialise loop */ \ + _sta = rcu_dereference(local->sta_hash[STA_HASH(_addr)]),\ + nxt = _sta ? rcu_dereference(_sta->hnext) : NULL; \ + /* typecheck */ \ + for_each_sta_info_type_check(local, (_addr), _sta, nxt),\ + /* continue condition */ \ + _sta; \ + /* advance loop */ \ + _sta = nxt, \ + nxt = _sta ? rcu_dereference(_sta->hnext) : NULL \ + ) \ /* compare address and run code only if it matches */ \ if (memcmp(_sta->sta.addr, (_addr), ETH_ALEN) == 0) @@ -480,14 +493,14 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata, int sta_info_insert(struct sta_info *sta); int sta_info_insert_rcu(struct sta_info *sta) __acquires(RCU); int sta_info_insert_atomic(struct sta_info *sta); +int sta_info_reinsert(struct sta_info *sta); int sta_info_destroy_addr(struct ieee80211_sub_if_data *sdata, const u8 *addr); int sta_info_destroy_addr_bss(struct ieee80211_sub_if_data *sdata, const u8 *addr); -void sta_info_set_tim_bit(struct sta_info *sta); -void sta_info_clear_tim_bit(struct sta_info *sta); +void sta_info_recalc_tim(struct sta_info *sta); void sta_info_init(struct ieee80211_local *local); void sta_info_stop(struct ieee80211_local *local); @@ -498,5 +511,6 @@ void ieee80211_sta_expire(struct ieee80211_sub_if_data *sdata, void ieee80211_sta_ps_deliver_wakeup(struct sta_info *sta); void ieee80211_sta_ps_deliver_poll_response(struct sta_info *sta); +void ieee80211_sta_ps_deliver_uapsd(struct sta_info *sta); #endif /* STA_INFO_H */ diff --git a/net/mac80211/status.c b/net/mac80211/status.c index 04cdbaf..1a49354 100644 --- a/net/mac80211/status.c +++ b/net/mac80211/status.c @@ -9,11 +9,13 @@ * published by the Free Software Foundation. */ +#include #include #include "ieee80211_i.h" #include "rate.h" #include "mesh.h" #include "led.h" +#include "wme.h" void ieee80211_tx_status_irqsafe(struct ieee80211_hw *hw, @@ -43,6 +45,8 @@ static void ieee80211_handle_filtered_frame(struct ieee80211_local *local, struct sk_buff *skb) { struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); + struct ieee80211_hdr *hdr = (void *)skb->data; + int ac; /* * This skb 'survived' a round-trip through the driver, and @@ -63,11 +67,37 @@ static void ieee80211_handle_filtered_frame(struct ieee80211_local *local, sta->tx_filtered_count++; /* + * Clear more-data bit on filtered frames, it might be set + * but later frames might time out so it might have to be + * clear again ... It's all rather unlikely (this frame + * should time out first, right?) but let's not confuse + * peers unnecessarily. + */ + if (hdr->frame_control & cpu_to_le16(IEEE80211_FCTL_MOREDATA)) + hdr->frame_control &= ~cpu_to_le16(IEEE80211_FCTL_MOREDATA); + + if (ieee80211_is_data_qos(hdr->frame_control)) { + u8 *p = ieee80211_get_qos_ctl(hdr); + int tid = *p & IEEE80211_QOS_CTL_TID_MASK; + + /* + * Clear EOSP if set, this could happen e.g. + * if an absence period (us being a P2P GO) + * shortens the SP. + */ + if (*p & IEEE80211_QOS_CTL_EOSP) + *p &= ~IEEE80211_QOS_CTL_EOSP; + ac = ieee802_1d_to_ac[tid & 7]; + } else { + ac = IEEE80211_AC_BE; + } + + /* * Clear the TX filter mask for this STA when sending the next * packet. If the STA went to power save mode, this will happen * when it wakes up for the next time. */ - set_sta_flags(sta, WLAN_STA_CLEAR_PS_FILT); + set_sta_flag(sta, WLAN_STA_CLEAR_PS_FILT); /* * This code races in the following way: @@ -103,13 +133,19 @@ static void ieee80211_handle_filtered_frame(struct ieee80211_local *local, * changes before calling TX status events if ordering can be * unknown. */ - if (test_sta_flags(sta, WLAN_STA_PS_STA) && - skb_queue_len(&sta->tx_filtered) < STA_MAX_TX_BUFFER) { - skb_queue_tail(&sta->tx_filtered, skb); + if (test_sta_flag(sta, WLAN_STA_PS_STA) && + skb_queue_len(&sta->tx_filtered[ac]) < STA_MAX_TX_BUFFER) { + skb_queue_tail(&sta->tx_filtered[ac], skb); + sta_info_recalc_tim(sta); + + if (!timer_pending(&local->sta_cleanup)) + mod_timer(&local->sta_cleanup, + round_jiffies(jiffies + + STA_INFO_CLEANUP_INTERVAL)); return; } - if (!test_sta_flags(sta, WLAN_STA_PS_STA) && + if (!test_sta_flag(sta, WLAN_STA_PS_STA) && !(info->flags & IEEE80211_TX_INTFL_RETRIED)) { /* Software retry the packet once */ info->flags |= IEEE80211_TX_INTFL_RETRIED; @@ -121,18 +157,41 @@ static void ieee80211_handle_filtered_frame(struct ieee80211_local *local, if (net_ratelimit()) wiphy_debug(local->hw.wiphy, "dropped TX filtered frame, queue_len=%d PS=%d @%lu\n", - skb_queue_len(&sta->tx_filtered), - !!test_sta_flags(sta, WLAN_STA_PS_STA), jiffies); + skb_queue_len(&sta->tx_filtered[ac]), + !!test_sta_flag(sta, WLAN_STA_PS_STA), jiffies); #endif dev_kfree_skb(skb); } +static void ieee80211_check_pending_bar(struct sta_info *sta, u8 *addr, u8 tid) +{ + struct tid_ampdu_tx *tid_tx; + + tid_tx = rcu_dereference(sta->ampdu_mlme.tid_tx[tid]); + if (!tid_tx || !tid_tx->bar_pending) + return; + + tid_tx->bar_pending = false; + ieee80211_send_bar(&sta->sdata->vif, addr, tid, tid_tx->failed_bar_ssn); +} + static void ieee80211_frame_acked(struct sta_info *sta, struct sk_buff *skb) { struct ieee80211_mgmt *mgmt = (void *) skb->data; struct ieee80211_local *local = sta->local; struct ieee80211_sub_if_data *sdata = sta->sdata; + if (local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS) + sta->last_rx = jiffies; + + if (ieee80211_is_data_qos(mgmt->frame_control)) { + struct ieee80211_hdr *hdr = (void *) skb->data; + u8 *qc = ieee80211_get_qos_ctl(hdr); + u16 tid = qc[0] & 0xf; + + ieee80211_check_pending_bar(sta, hdr->addr1, tid); + } + if (ieee80211_is_action(mgmt->frame_control) && sdata->vif.type == NL80211_IFTYPE_STATION && mgmt->u.action.category == WLAN_CATEGORY_HT && @@ -161,6 +220,114 @@ static void ieee80211_frame_acked(struct sta_info *sta, struct sk_buff *skb) } } +static void ieee80211_set_bar_pending(struct sta_info *sta, u8 tid, u16 ssn) +{ + struct tid_ampdu_tx *tid_tx; + + tid_tx = rcu_dereference(sta->ampdu_mlme.tid_tx[tid]); + if (!tid_tx) + return; + + tid_tx->failed_bar_ssn = ssn; + tid_tx->bar_pending = true; +} + +static int ieee80211_tx_radiotap_len(struct ieee80211_tx_info *info) +{ + int len = sizeof(struct ieee80211_radiotap_header); + + /* IEEE80211_RADIOTAP_RATE rate */ + if (info->status.rates[0].idx >= 0 && + !(info->status.rates[0].flags & IEEE80211_TX_RC_MCS)) + len += 2; + + /* IEEE80211_RADIOTAP_TX_FLAGS */ + len += 2; + + /* IEEE80211_RADIOTAP_DATA_RETRIES */ + len += 1; + + /* IEEE80211_TX_RC_MCS */ + if (info->status.rates[0].idx >= 0 && + info->status.rates[0].flags & IEEE80211_TX_RC_MCS) + len += 3; + + return len; +} + +static void ieee80211_add_tx_radiotap_header(struct ieee80211_supported_band + *sband, struct sk_buff *skb, + int retry_count, int rtap_len) +{ + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; + struct ieee80211_radiotap_header *rthdr; + unsigned char *pos; + u16 txflags; + + rthdr = (struct ieee80211_radiotap_header *) skb_push(skb, rtap_len); + + memset(rthdr, 0, rtap_len); + rthdr->it_len = cpu_to_le16(rtap_len); + rthdr->it_present = + cpu_to_le32((1 << IEEE80211_RADIOTAP_TX_FLAGS) | + (1 << IEEE80211_RADIOTAP_DATA_RETRIES)); + pos = (unsigned char *)(rthdr + 1); + + /* + * XXX: Once radiotap gets the bitmap reset thing the vendor + * extensions proposal contains, we can actually report + * the whole set of tries we did. + */ + + /* IEEE80211_RADIOTAP_RATE */ + if (info->status.rates[0].idx >= 0 && + !(info->status.rates[0].flags & IEEE80211_TX_RC_MCS)) { + rthdr->it_present |= cpu_to_le32(1 << IEEE80211_RADIOTAP_RATE); + *pos = sband->bitrates[info->status.rates[0].idx].bitrate / 5; + /* padding for tx flags */ + pos += 2; + } + + /* IEEE80211_RADIOTAP_TX_FLAGS */ + txflags = 0; + if (!(info->flags & IEEE80211_TX_STAT_ACK) && + !is_multicast_ether_addr(hdr->addr1)) + txflags |= IEEE80211_RADIOTAP_F_TX_FAIL; + + if ((info->status.rates[0].flags & IEEE80211_TX_RC_USE_RTS_CTS) || + (info->status.rates[0].flags & IEEE80211_TX_RC_USE_CTS_PROTECT)) + txflags |= IEEE80211_RADIOTAP_F_TX_CTS; + else if (info->status.rates[0].flags & IEEE80211_TX_RC_USE_RTS_CTS) + txflags |= IEEE80211_RADIOTAP_F_TX_RTS; + + put_unaligned_le16(txflags, pos); + pos += 2; + + /* IEEE80211_RADIOTAP_DATA_RETRIES */ + /* for now report the total retry_count */ + *pos = retry_count; + pos++; + + /* IEEE80211_TX_RC_MCS */ + if (info->status.rates[0].idx >= 0 && + info->status.rates[0].flags & IEEE80211_TX_RC_MCS) { + rthdr->it_present |= cpu_to_le32(1 << IEEE80211_RADIOTAP_MCS); + pos[0] = IEEE80211_RADIOTAP_MCS_HAVE_MCS | + IEEE80211_RADIOTAP_MCS_HAVE_GI | + IEEE80211_RADIOTAP_MCS_HAVE_BW; + if (info->status.rates[0].flags & IEEE80211_TX_RC_SHORT_GI) + pos[1] |= IEEE80211_RADIOTAP_MCS_SGI; + if (info->status.rates[0].flags & IEEE80211_TX_RC_40_MHZ_WIDTH) + pos[1] |= IEEE80211_RADIOTAP_MCS_BW_40; + if (info->status.rates[0].flags & IEEE80211_TX_RC_GREEN_FIELD) + pos[1] |= IEEE80211_RADIOTAP_MCS_FMT_GF; + pos[2] = info->status.rates[0].idx; + pos += 3; + } + +} + /* * Use a static threshold for now, best value to be determined * by testing ... @@ -179,7 +346,6 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) u16 frag, type; __le16 fc; struct ieee80211_supported_band *sband; - struct ieee80211_tx_status_rtap_hdr *rthdr; struct ieee80211_sub_if_data *sdata; struct net_device *prev_dev = NULL; struct sta_info *sta, *tmp; @@ -187,6 +353,9 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) int rates_idx = -1; bool send_to_cooked; bool acked; + struct ieee80211_bar *bar; + u16 tid; + int rtap_len; for (i = 0; i < IEEE80211_TX_MAX_RATES; i++) { if (info->status.rates[i].idx < 0) { @@ -215,8 +384,11 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) if (memcmp(hdr->addr2, sta->sdata->vif.addr, ETH_ALEN)) continue; + if (info->flags & IEEE80211_TX_STATUS_EOSP) + clear_sta_flag(sta, WLAN_STA_SP); + acked = !!(info->flags & IEEE80211_TX_STAT_ACK); - if (!acked && test_sta_flags(sta, WLAN_STA_PS_STA)) { + if (!acked && test_sta_flag(sta, WLAN_STA_PS_STA)) { /* * The STA is in power save mode, so assume * that this TX packet failed because of that. @@ -239,10 +411,35 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) tid = qc[0] & 0xf; ssn = ((le16_to_cpu(hdr->seq_ctrl) + 0x10) & IEEE80211_SCTL_SEQ); - ieee80211_send_bar(sta->sdata, hdr->addr1, + ieee80211_send_bar(&sta->sdata->vif, hdr->addr1, tid, ssn); } + if (!acked && ieee80211_is_back_req(fc)) { + u16 control; + + /* + * BAR failed, store the last SSN and retry sending + * the BAR when the next unicast transmission on the + * same TID succeeds. + */ + bar = (struct ieee80211_bar *) skb->data; + control = le16_to_cpu(bar->control); + if (!(control & IEEE80211_BAR_CTRL_MULTI_TID)) { + u16 ssn = le16_to_cpu(bar->start_seq_num); + + tid = (control & + IEEE80211_BAR_CTRL_TID_INFO_MASK) >> + IEEE80211_BAR_CTRL_TID_INFO_SHIFT; + + if (local->hw.flags & + IEEE80211_HW_TEARDOWN_AGGR_ON_BAR_FAIL) + ieee80211_stop_tx_ba_session(&sta->sta, tid); + else + ieee80211_set_bar_pending(sta, tid, ssn); + } + } + if (info->flags & IEEE80211_TX_STAT_TX_FILTERED) { ieee80211_handle_filtered_frame(local, sta, skb); rcu_read_unlock(); @@ -345,9 +542,6 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) local->hw_roc_skb_for_status = NULL; } - if (cookie == local->hw_offchan_tx_cookie) - local->hw_offchan_tx_cookie = 0; - cfg80211_mgmt_tx_status( skb->dev, cookie, skb->data, skb->len, !!(info->flags & IEEE80211_TX_STAT_ACK), GFP_ATOMIC); @@ -370,44 +564,13 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) } /* send frame to monitor interfaces now */ - - if (skb_headroom(skb) < sizeof(*rthdr)) { + rtap_len = ieee80211_tx_radiotap_len(info); + if (WARN_ON_ONCE(skb_headroom(skb) < rtap_len)) { printk(KERN_ERR "ieee80211_tx_status: headroom too small\n"); dev_kfree_skb(skb); return; } - - rthdr = (struct ieee80211_tx_status_rtap_hdr *) - skb_push(skb, sizeof(*rthdr)); - - memset(rthdr, 0, sizeof(*rthdr)); - rthdr->hdr.it_len = cpu_to_le16(sizeof(*rthdr)); - rthdr->hdr.it_present = - cpu_to_le32((1 << IEEE80211_RADIOTAP_TX_FLAGS) | - (1 << IEEE80211_RADIOTAP_DATA_RETRIES) | - (1 << IEEE80211_RADIOTAP_RATE)); - - if (!(info->flags & IEEE80211_TX_STAT_ACK) && - !is_multicast_ether_addr(hdr->addr1)) - rthdr->tx_flags |= cpu_to_le16(IEEE80211_RADIOTAP_F_TX_FAIL); - - /* - * XXX: Once radiotap gets the bitmap reset thing the vendor - * extensions proposal contains, we can actually report - * the whole set of tries we did. - */ - if ((info->status.rates[0].flags & IEEE80211_TX_RC_USE_RTS_CTS) || - (info->status.rates[0].flags & IEEE80211_TX_RC_USE_CTS_PROTECT)) - rthdr->tx_flags |= cpu_to_le16(IEEE80211_RADIOTAP_F_TX_CTS); - else if (info->status.rates[0].flags & IEEE80211_TX_RC_USE_RTS_CTS) - rthdr->tx_flags |= cpu_to_le16(IEEE80211_RADIOTAP_F_TX_RTS); - if (info->status.rates[0].idx >= 0 && - !(info->status.rates[0].flags & IEEE80211_TX_RC_MCS)) - rthdr->rate = sband->bitrates[ - info->status.rates[0].idx].bitrate / 5; - - /* for now report the total retry_count */ - rthdr->data_retries = retry_count; + ieee80211_add_tx_radiotap_header(sband, skb, retry_count, rtap_len); /* XXX: is this sufficient for BPF? */ skb_set_mac_header(skb, 0); diff --git a/net/mac80211/tkip.c b/net/mac80211/tkip.c index 757e4eb..51077a9 100644 --- a/net/mac80211/tkip.c +++ b/net/mac80211/tkip.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include @@ -101,6 +102,7 @@ static void tkip_mixing_phase1(const u8 *tk, struct tkip_ctx *ctx, p1k[4] += tkipS(p1k[3] ^ get_unaligned_le16(tk + 0 + j)) + i; } ctx->state = TKIP_STATE_PHASE1_DONE; + ctx->p1k_iv32 = tsc_IV32; } static void tkip_mixing_phase2(const u8 *tk, struct tkip_ctx *ctx, @@ -140,60 +142,80 @@ static void tkip_mixing_phase2(const u8 *tk, struct tkip_ctx *ctx, /* Add TKIP IV and Ext. IV at @pos. @iv0, @iv1, and @iv2 are the first octets * of the IV. Returns pointer to the octet following IVs (i.e., beginning of * the packet payload). */ -u8 *ieee80211_tkip_add_iv(u8 *pos, struct ieee80211_key *key, u16 iv16) +u8 *ieee80211_tkip_add_iv(u8 *pos, struct ieee80211_key *key) { - pos = write_tkip_iv(pos, iv16); + lockdep_assert_held(&key->u.tkip.txlock); + + pos = write_tkip_iv(pos, key->u.tkip.tx.iv16); *pos++ = (key->conf.keyidx << 6) | (1 << 5) /* Ext IV */; put_unaligned_le32(key->u.tkip.tx.iv32, pos); return pos + 4; } -void ieee80211_get_tkip_key(struct ieee80211_key_conf *keyconf, - struct sk_buff *skb, enum ieee80211_tkip_key_type type, - u8 *outkey) +static void ieee80211_compute_tkip_p1k(struct ieee80211_key *key, u32 iv32) { - struct ieee80211_key *key = (struct ieee80211_key *) - container_of(keyconf, struct ieee80211_key, conf); - struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data; - u8 *data; - const u8 *tk; - struct tkip_ctx *ctx; - u16 iv16; - u32 iv32; + struct ieee80211_sub_if_data *sdata = key->sdata; + struct tkip_ctx *ctx = &key->u.tkip.tx; + const u8 *tk = &key->conf.key[NL80211_TKIP_DATA_OFFSET_ENCR_KEY]; - data = (u8 *)hdr + ieee80211_hdrlen(hdr->frame_control); - iv16 = data[2] | (data[0] << 8); - iv32 = get_unaligned_le32(&data[4]); + lockdep_assert_held(&key->u.tkip.txlock); + + /* + * Update the P1K when the IV32 is different from the value it + * had when we last computed it (or when not initialised yet). + * This might flip-flop back and forth if packets are processed + * out-of-order due to the different ACs, but then we have to + * just compute the P1K more often. + */ + if (ctx->p1k_iv32 != iv32 || ctx->state == TKIP_STATE_NOT_INIT) + tkip_mixing_phase1(tk, ctx, sdata->vif.addr, iv32); +} - tk = &key->conf.key[NL80211_TKIP_DATA_OFFSET_ENCR_KEY]; - ctx = &key->u.tkip.tx; +void ieee80211_get_tkip_p1k_iv(struct ieee80211_key_conf *keyconf, + u32 iv32, u16 *p1k) +{ + struct ieee80211_key *key = (struct ieee80211_key *) + container_of(keyconf, struct ieee80211_key, conf); + struct tkip_ctx *ctx = &key->u.tkip.tx; + unsigned long flags; -#ifdef CONFIG_MAC80211_TKIP_DEBUG - printk(KERN_DEBUG "TKIP encrypt: iv16 = 0x%04x, iv32 = 0x%08x\n", - iv16, iv32); - - if (iv32 != ctx->iv32) { - printk(KERN_DEBUG "skb: iv32 = 0x%08x key: iv32 = 0x%08x\n", - iv32, ctx->iv32); - printk(KERN_DEBUG "Wrap around of iv16 in the middle of a " - "fragmented packet\n"); - } -#endif + spin_lock_irqsave(&key->u.tkip.txlock, flags); + ieee80211_compute_tkip_p1k(key, iv32); + memcpy(p1k, ctx->p1k, sizeof(ctx->p1k)); + spin_unlock_irqrestore(&key->u.tkip.txlock, flags); +} +EXPORT_SYMBOL(ieee80211_get_tkip_p1k_iv); - /* Update the p1k only when the iv16 in the packet wraps around, this - * might occur after the wrap around of iv16 in the key in case of - * fragmented packets. */ - if (iv16 == 0 || ctx->state == TKIP_STATE_NOT_INIT) - tkip_mixing_phase1(tk, ctx, hdr->addr2, iv32); +void ieee80211_get_tkip_rx_p1k(struct ieee80211_key_conf *keyconf, + const u8 *ta, u32 iv32, u16 *p1k) +{ + const u8 *tk = &keyconf->key[NL80211_TKIP_DATA_OFFSET_ENCR_KEY]; + struct tkip_ctx ctx; - if (type == IEEE80211_TKIP_P1_KEY) { - memcpy(outkey, ctx->p1k, sizeof(u16) * 5); - return; - } + tkip_mixing_phase1(tk, &ctx, ta, iv32); + memcpy(p1k, ctx.p1k, sizeof(ctx.p1k)); +} +EXPORT_SYMBOL(ieee80211_get_tkip_rx_p1k); - tkip_mixing_phase2(tk, ctx, iv16, outkey); +void ieee80211_get_tkip_p2k(struct ieee80211_key_conf *keyconf, + struct sk_buff *skb, u8 *p2k) +{ + struct ieee80211_key *key = (struct ieee80211_key *) + container_of(keyconf, struct ieee80211_key, conf); + const u8 *tk = &key->conf.key[NL80211_TKIP_DATA_OFFSET_ENCR_KEY]; + struct tkip_ctx *ctx = &key->u.tkip.tx; + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data; + const u8 *data = (u8 *)hdr + ieee80211_hdrlen(hdr->frame_control); + u32 iv32 = get_unaligned_le32(&data[4]); + u16 iv16 = data[2] | (data[0] << 8); + unsigned long flags; + + spin_lock_irqsave(&key->u.tkip.txlock, flags); + ieee80211_compute_tkip_p1k(key, iv32); + tkip_mixing_phase2(tk, ctx, iv16, p2k); + spin_unlock_irqrestore(&key->u.tkip.txlock, flags); } -EXPORT_SYMBOL(ieee80211_get_tkip_key); +EXPORT_SYMBOL(ieee80211_get_tkip_p2k); /* * Encrypt packet payload with TKIP using @key. @pos is a pointer to the @@ -204,19 +226,15 @@ EXPORT_SYMBOL(ieee80211_get_tkip_key); */ int ieee80211_tkip_encrypt_data(struct crypto_cipher *tfm, struct ieee80211_key *key, - u8 *pos, size_t payload_len, u8 *ta) + struct sk_buff *skb, + u8 *payload, size_t payload_len) { u8 rc4key[16]; - struct tkip_ctx *ctx = &key->u.tkip.tx; - const u8 *tk = &key->conf.key[NL80211_TKIP_DATA_OFFSET_ENCR_KEY]; - - /* Calculate per-packet key */ - if (ctx->iv16 == 0 || ctx->state == TKIP_STATE_NOT_INIT) - tkip_mixing_phase1(tk, ctx, ta, ctx->iv32); - tkip_mixing_phase2(tk, ctx, ctx->iv16, rc4key); + ieee80211_get_tkip_p2k(&key->conf, skb, rc4key); - return ieee80211_wep_encrypt_data(tfm, rc4key, 16, pos, payload_len); + return ieee80211_wep_encrypt_data(tfm, rc4key, 16, + payload, payload_len); } /* Decrypt packet payload with TKIP using @key. @pos is a pointer to the diff --git a/net/mac80211/tkip.h b/net/mac80211/tkip.h index 1cab9c8..e3ecb65 100644 --- a/net/mac80211/tkip.h +++ b/net/mac80211/tkip.h @@ -13,11 +13,13 @@ #include #include "key.h" -u8 *ieee80211_tkip_add_iv(u8 *pos, struct ieee80211_key *key, u16 iv16); +u8 *ieee80211_tkip_add_iv(u8 *pos, struct ieee80211_key *key); int ieee80211_tkip_encrypt_data(struct crypto_cipher *tfm, - struct ieee80211_key *key, - u8 *pos, size_t payload_len, u8 *ta); + struct ieee80211_key *key, + struct sk_buff *skb, + u8 *payload, size_t payload_len); + enum { TKIP_DECRYPT_OK = 0, TKIP_DECRYPT_NO_EXT_IV = -1, diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index da878c1..91826b6 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -253,13 +254,12 @@ ieee80211_tx_h_check_assoc(struct ieee80211_tx_data *tx) struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)tx->skb->data; struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb); - u32 sta_flags; + bool assoc = false; if (unlikely(info->flags & IEEE80211_TX_CTL_INJECTED)) return TX_CONTINUE; - if (unlikely(test_bit(SCAN_SW_SCANNING, &tx->local->scanning)) && - test_bit(SDATA_STATE_OFFCHANNEL, &tx->sdata->state) && + if (unlikely(test_bit(SCAN_OFF_CHANNEL, &tx->local->scanning)) && !ieee80211_is_probe_req(hdr->frame_control) && !ieee80211_is_nullfunc(hdr->frame_control)) /* @@ -278,16 +278,14 @@ ieee80211_tx_h_check_assoc(struct ieee80211_tx_data *tx) if (tx->sdata->vif.type == NL80211_IFTYPE_WDS) return TX_CONTINUE; - if (tx->sdata->vif.type == NL80211_IFTYPE_MESH_POINT) - return TX_CONTINUE; - if (tx->flags & IEEE80211_TX_PS_BUFFERED) return TX_CONTINUE; - sta_flags = tx->sta ? get_sta_flags(tx->sta) : 0; + if (tx->sta) + assoc = test_sta_flag(tx->sta, WLAN_STA_ASSOC); if (likely(tx->flags & IEEE80211_TX_UNICAST)) { - if (unlikely(!(sta_flags & WLAN_STA_ASSOC) && + if (unlikely(!assoc && tx->sdata->vif.type != NL80211_IFTYPE_ADHOC && ieee80211_is_data(hdr->frame_control))) { #ifdef CONFIG_MAC80211_VERBOSE_DEBUG @@ -343,13 +341,22 @@ static void purge_old_ps_buffers(struct ieee80211_local *local) total += skb_queue_len(&ap->ps_bc_buf); } + /* + * Drop one frame from each station from the lowest-priority + * AC that has frames at all. + */ list_for_each_entry_rcu(sta, &local->sta_list, list) { - skb = skb_dequeue(&sta->ps_tx_buf); - if (skb) { - purged++; - dev_kfree_skb(skb); + int ac; + + for (ac = IEEE80211_AC_BK; ac >= IEEE80211_AC_VO; ac--) { + skb = skb_dequeue(&sta->ps_tx_buf[ac]); + total += skb_queue_len(&sta->ps_tx_buf[ac]); + if (skb) { + purged++; + dev_kfree_skb(skb); + break; + } } - total += skb_queue_len(&sta->ps_tx_buf); } rcu_read_unlock(); @@ -418,7 +425,7 @@ static int ieee80211_use_mfp(__le16 fc, struct sta_info *sta, if (!ieee80211_is_mgmt(fc)) return 0; - if (sta == NULL || !test_sta_flags(sta, WLAN_STA_MFP)) + if (sta == NULL || !test_sta_flag(sta, WLAN_STA_MFP)) return 0; if (!ieee80211_is_robust_mgmt_frame((struct ieee80211_hdr *) @@ -435,7 +442,6 @@ ieee80211_tx_h_unicast_ps_buf(struct ieee80211_tx_data *tx) struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb); struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)tx->skb->data; struct ieee80211_local *local = tx->local; - u32 staflags; if (unlikely(!sta || ieee80211_is_probe_resp(hdr->frame_control) || @@ -444,57 +450,67 @@ ieee80211_tx_h_unicast_ps_buf(struct ieee80211_tx_data *tx) ieee80211_is_reassoc_resp(hdr->frame_control))) return TX_CONTINUE; - staflags = get_sta_flags(sta); + if (unlikely((test_sta_flag(sta, WLAN_STA_PS_STA) || + test_sta_flag(sta, WLAN_STA_PS_DRIVER)) && + !(info->flags & IEEE80211_TX_CTL_POLL_RESPONSE))) { + int ac = skb_get_queue_mapping(tx->skb); - if (unlikely((staflags & (WLAN_STA_PS_STA | WLAN_STA_PS_DRIVER)) && - !(info->flags & IEEE80211_TX_CTL_PSPOLL_RESPONSE))) { #ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG - printk(KERN_DEBUG "STA %pM aid %d: PS buffer (entries " - "before %d)\n", - sta->sta.addr, sta->sta.aid, - skb_queue_len(&sta->ps_tx_buf)); + printk(KERN_DEBUG "STA %pM aid %d: PS buffer for AC %d\n", + sta->sta.addr, sta->sta.aid, ac); #endif /* CONFIG_MAC80211_VERBOSE_PS_DEBUG */ if (tx->local->total_ps_buffered >= TOTAL_MAX_TX_BUFFER) purge_old_ps_buffers(tx->local); - if (skb_queue_len(&sta->ps_tx_buf) >= STA_MAX_TX_BUFFER) { - struct sk_buff *old = skb_dequeue(&sta->ps_tx_buf); + + /* sync with ieee80211_sta_ps_deliver_wakeup */ + spin_lock(&sta->ps_lock); + /* + * STA woke up the meantime and all the frames on ps_tx_buf have + * been queued to pending queue. No reordering can happen, go + * ahead and Tx the packet. + */ + if (!test_sta_flag(sta, WLAN_STA_PS_STA) && + !test_sta_flag(sta, WLAN_STA_PS_DRIVER)) { + spin_unlock(&sta->ps_lock); + return TX_CONTINUE; + } + + if (skb_queue_len(&sta->ps_tx_buf[ac]) >= STA_MAX_TX_BUFFER) { + struct sk_buff *old = skb_dequeue(&sta->ps_tx_buf[ac]); #ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG - if (net_ratelimit()) { - printk(KERN_DEBUG "%s: STA %pM TX " - "buffer full - dropping oldest frame\n", - tx->sdata->name, sta->sta.addr); - } + if (net_ratelimit()) + printk(KERN_DEBUG "%s: STA %pM TX buffer for " + "AC %d full - dropping oldest frame\n", + tx->sdata->name, sta->sta.addr, ac); #endif dev_kfree_skb(old); } else tx->local->total_ps_buffered++; - /* - * Queue frame to be sent after STA wakes up/polls, - * but don't set the TIM bit if the driver is blocking - * wakeup or poll response transmissions anyway. - */ - if (skb_queue_empty(&sta->ps_tx_buf) && - !(staflags & WLAN_STA_PS_DRIVER)) - sta_info_set_tim_bit(sta); - info->control.jiffies = jiffies; info->control.vif = &tx->sdata->vif; info->flags |= IEEE80211_TX_INTFL_NEED_TXPROCESSING; - skb_queue_tail(&sta->ps_tx_buf, tx->skb); + skb_queue_tail(&sta->ps_tx_buf[ac], tx->skb); + spin_unlock(&sta->ps_lock); if (!timer_pending(&local->sta_cleanup)) mod_timer(&local->sta_cleanup, round_jiffies(jiffies + STA_INFO_CLEANUP_INTERVAL)); + /* + * We queued up some frames, so the TIM bit might + * need to be set, recalculate it. + */ + sta_info_recalc_tim(sta); + return TX_QUEUED; } #ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG - else if (unlikely(staflags & WLAN_STA_PS_STA)) { - printk(KERN_DEBUG "%s: STA %pM in PS mode, but pspoll " - "set -> send frame\n", tx->sdata->name, - sta->sta.addr); + else if (unlikely(test_sta_flag(sta, WLAN_STA_PS_STA))) { + printk(KERN_DEBUG + "%s: STA %pM in PS mode, but polling/in SP -> send frame\n", + tx->sdata->name, sta->sta.addr); } #endif /* CONFIG_MAC80211_VERBOSE_PS_DEBUG */ @@ -518,9 +534,11 @@ ieee80211_tx_h_check_control_port_protocol(struct ieee80211_tx_data *tx) { struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb); - if (unlikely(tx->sdata->control_port_protocol == tx->skb->protocol && - tx->sdata->control_port_no_encrypt)) - info->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT; + if (unlikely(tx->sdata->control_port_protocol == tx->skb->protocol)) { + if (tx->sdata->control_port_no_encrypt) + info->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT; + info->flags |= IEEE80211_TX_CTL_USE_MINRATE; + } return TX_CONTINUE; } @@ -552,7 +570,7 @@ ieee80211_tx_h_select_key(struct ieee80211_tx_data *tx) !(info->flags & IEEE80211_TX_CTL_INJECTED) && (!ieee80211_is_robust_mgmt_frame(hdr) || (ieee80211_is_action(hdr->frame_control) && - tx->sta && test_sta_flags(tx->sta, WLAN_STA_MFP)))) { + tx->sta && test_sta_flag(tx->sta, WLAN_STA_MFP)))) { I802_DEBUG_INC(tx->local->tx_handlers_drop_unencrypted); return TX_DROP; } else @@ -589,6 +607,9 @@ ieee80211_tx_h_select_key(struct ieee80211_tx_data *tx) break; } + if (unlikely(tx->key && tx->key->flags & KEY_FLAG_TAINTED)) + return TX_DROP; + if (!skip_hw && tx->key && tx->key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE) info->control.hw_key = &tx->key->conf; @@ -608,7 +629,7 @@ ieee80211_tx_h_rate_ctrl(struct ieee80211_tx_data *tx) u32 len; bool inval = false, rts = false, short_preamble = false; struct ieee80211_tx_rate_control txrc; - u32 sta_flags; + bool assoc = false; memset(&txrc, 0, sizeof(txrc)); @@ -644,17 +665,17 @@ ieee80211_tx_h_rate_ctrl(struct ieee80211_tx_data *tx) */ if (tx->sdata->vif.bss_conf.use_short_preamble && (ieee80211_is_data(hdr->frame_control) || - (tx->sta && test_sta_flags(tx->sta, WLAN_STA_SHORT_PREAMBLE)))) + (tx->sta && test_sta_flag(tx->sta, WLAN_STA_SHORT_PREAMBLE)))) txrc.short_preamble = short_preamble = true; - sta_flags = tx->sta ? get_sta_flags(tx->sta) : 0; + if (tx->sta) + assoc = test_sta_flag(tx->sta, WLAN_STA_ASSOC); /* * Lets not bother rate control if we're associated and cannot * talk to the sta. This should not happen. */ - if (WARN(test_bit(SCAN_SW_SCANNING, &tx->local->scanning) && - (sta_flags & WLAN_STA_ASSOC) && + if (WARN(test_bit(SCAN_SW_SCANNING, &tx->local->scanning) && assoc && !rate_usable_index_exists(sband, &tx->sta->sta), "%s: Dropped data frame as no usable bitrate found while " "scanning and associated. Target station: " @@ -797,6 +818,9 @@ ieee80211_tx_h_sequence(struct ieee80211_tx_data *tx) if (ieee80211_hdrlen(hdr->frame_control) < 24) return TX_CONTINUE; + if (ieee80211_is_qos_nullfunc(hdr->frame_control)) + return TX_CONTINUE; + /* * Anything but QoS data that has a sequence number field * (is long enough) gets a sequence number from the global @@ -874,7 +898,7 @@ static int ieee80211_fragment(struct ieee80211_local *local, pos += fraglen; } - skb->len = hdrlen + per_fragm; + skb_trim(skb, hdrlen + per_fragm); return 0; } @@ -888,7 +912,10 @@ ieee80211_tx_h_fragment(struct ieee80211_tx_data *tx) int hdrlen; int fragnum; - if (!(tx->flags & IEEE80211_TX_FRAGMENTED)) + if (info->flags & IEEE80211_TX_CTL_DONTFRAG) + return TX_CONTINUE; + + if (tx->local->ops->set_frag_threshold) return TX_CONTINUE; /* @@ -901,7 +928,7 @@ ieee80211_tx_h_fragment(struct ieee80211_tx_data *tx) hdrlen = ieee80211_hdrlen(hdr->frame_control); - /* internal error, why is TX_FRAGMENTED set? */ + /* internal error, why isn't DONTFRAG set? */ if (WARN_ON(skb->len + FCS_LEN <= frag_threshold)) return TX_DROP; @@ -1022,100 +1049,6 @@ ieee80211_tx_h_calculate_duration(struct ieee80211_tx_data *tx) /* actual transmit path */ -/* - * deal with packet injection down monitor interface - * with Radiotap Header -- only called for monitor mode interface - */ -static bool __ieee80211_parse_tx_radiotap(struct ieee80211_tx_data *tx, - struct sk_buff *skb) -{ - /* - * this is the moment to interpret and discard the radiotap header that - * must be at the start of the packet injected in Monitor mode - * - * Need to take some care with endian-ness since radiotap - * args are little-endian - */ - - struct ieee80211_radiotap_iterator iterator; - struct ieee80211_radiotap_header *rthdr = - (struct ieee80211_radiotap_header *) skb->data; - bool hw_frag; - struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); - int ret = ieee80211_radiotap_iterator_init(&iterator, rthdr, skb->len, - NULL); - - info->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT; - tx->flags &= ~IEEE80211_TX_FRAGMENTED; - - /* packet is fragmented in HW if we have a non-NULL driver callback */ - hw_frag = (tx->local->ops->set_frag_threshold != NULL); - - /* - * for every radiotap entry that is present - * (ieee80211_radiotap_iterator_next returns -ENOENT when no more - * entries present, or -EINVAL on error) - */ - - while (!ret) { - ret = ieee80211_radiotap_iterator_next(&iterator); - - if (ret) - continue; - - /* see if this argument is something we can use */ - switch (iterator.this_arg_index) { - /* - * You must take care when dereferencing iterator.this_arg - * for multibyte types... the pointer is not aligned. Use - * get_unaligned((type *)iterator.this_arg) to dereference - * iterator.this_arg for type "type" safely on all arches. - */ - case IEEE80211_RADIOTAP_FLAGS: - if (*iterator.this_arg & IEEE80211_RADIOTAP_F_FCS) { - /* - * this indicates that the skb we have been - * handed has the 32-bit FCS CRC at the end... - * we should react to that by snipping it off - * because it will be recomputed and added - * on transmission - */ - if (skb->len < (iterator._max_length + FCS_LEN)) - return false; - - skb_trim(skb, skb->len - FCS_LEN); - } - if (*iterator.this_arg & IEEE80211_RADIOTAP_F_WEP) - info->flags &= ~IEEE80211_TX_INTFL_DONT_ENCRYPT; - if ((*iterator.this_arg & IEEE80211_RADIOTAP_F_FRAG) && - !hw_frag) - tx->flags |= IEEE80211_TX_FRAGMENTED; - break; - - /* - * Please update the file - * Documentation/networking/mac80211-injection.txt - * when parsing new fields here. - */ - - default: - break; - } - } - - if (ret != -ENOENT) /* ie, if we didn't simply run out of fields */ - return false; - - /* - * remove the radiotap header - * iterator->_max_length was sanity-checked against - * skb->len by iterator init - */ - skb_pull(skb, iterator._max_length); - - return true; -} - static bool ieee80211_tx_prep_agg(struct ieee80211_tx_data *tx, struct sk_buff *skb, struct ieee80211_tx_info *info, @@ -1180,7 +1113,7 @@ ieee80211_tx_prepare(struct ieee80211_sub_if_data *sdata, struct ieee80211_local *local = sdata->local; struct ieee80211_hdr *hdr; struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); - int hdrlen, tid; + int tid; u8 *qc; memset(tx, 0, sizeof(*tx)); @@ -1188,26 +1121,6 @@ ieee80211_tx_prepare(struct ieee80211_sub_if_data *sdata, tx->local = local; tx->sdata = sdata; tx->channel = local->hw.conf.channel; - /* - * Set this flag (used below to indicate "automatic fragmentation"), - * it will be cleared/left by radiotap as desired. - * Only valid when fragmentation is done by the stack. - */ - if (!local->ops->set_frag_threshold) - tx->flags |= IEEE80211_TX_FRAGMENTED; - - /* process and remove the injection radiotap header */ - if (unlikely(info->flags & IEEE80211_TX_INTFL_HAS_RADIOTAP)) { - if (!__ieee80211_parse_tx_radiotap(tx, skb)) - return TX_DROP; - - /* - * __ieee80211_parse_tx_radiotap has now removed - * the radiotap header that was present and pre-filled - * 'tx' with tx control information. - */ - info->flags &= ~IEEE80211_TX_INTFL_HAS_RADIOTAP; - } /* * If this flag is set to true anywhere, and we get here, @@ -1230,7 +1143,9 @@ ieee80211_tx_prepare(struct ieee80211_sub_if_data *sdata, tx->sta = sta_info_get(sdata, hdr->addr1); if (tx->sta && ieee80211_is_data_qos(hdr->frame_control) && - (local->hw.flags & IEEE80211_HW_AMPDU_AGGREGATION)) { + !ieee80211_is_qos_nullfunc(hdr->frame_control) && + (local->hw.flags & IEEE80211_HW_AMPDU_AGGREGATION) && + !(local->hw.flags & IEEE80211_HW_TX_AMPDU_SETUP_IN_HW)) { struct tid_ampdu_tx *tid_tx; qc = ieee80211_get_qos_ctl(hdr); @@ -1255,29 +1170,25 @@ ieee80211_tx_prepare(struct ieee80211_sub_if_data *sdata, tx->flags |= IEEE80211_TX_UNICAST; if (unlikely(local->wifi_wme_noack_test)) info->flags |= IEEE80211_TX_CTL_NO_ACK; - else - info->flags &= ~IEEE80211_TX_CTL_NO_ACK; + /* + * Flags are initialized to 0. Hence, no need to + * explicitly unset IEEE80211_TX_CTL_NO_ACK since + * it might already be set for injected frames. + */ } - if (tx->flags & IEEE80211_TX_FRAGMENTED) { - if ((tx->flags & IEEE80211_TX_UNICAST) && - skb->len + FCS_LEN > local->hw.wiphy->frag_threshold && - !(info->flags & IEEE80211_TX_CTL_AMPDU)) - tx->flags |= IEEE80211_TX_FRAGMENTED; - else - tx->flags &= ~IEEE80211_TX_FRAGMENTED; + if (!(info->flags & IEEE80211_TX_CTL_DONTFRAG)) { + if (!(tx->flags & IEEE80211_TX_UNICAST) || + skb->len + FCS_LEN <= local->hw.wiphy->frag_threshold || + info->flags & IEEE80211_TX_CTL_AMPDU) + info->flags |= IEEE80211_TX_CTL_DONTFRAG; } if (!tx->sta) info->flags |= IEEE80211_TX_CTL_CLEAR_PS_FILT; - else if (test_and_clear_sta_flags(tx->sta, WLAN_STA_CLEAR_PS_FILT)) + else if (test_and_clear_sta_flag(tx->sta, WLAN_STA_CLEAR_PS_FILT)) info->flags |= IEEE80211_TX_CTL_CLEAR_PS_FILT; - hdrlen = ieee80211_hdrlen(hdr->frame_control); - if (skb->len > hdrlen + sizeof(rfc1042_header) + 2) { - u8 *pos = &skb->data[hdrlen + sizeof(rfc1042_header)]; - tx->ethertype = (pos[0] << 8) | pos[1]; - } info->flags |= IEEE80211_TX_CTL_FIRST_FRAGMENT; return TX_CONTINUE; @@ -1475,28 +1386,19 @@ static bool ieee80211_tx(struct ieee80211_sub_if_data *sdata, /* device xmit handlers */ -static int ieee80211_skb_resize(struct ieee80211_local *local, +static int ieee80211_skb_resize(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb, int head_need, bool may_encrypt) { + struct ieee80211_local *local = sdata->local; int tail_need = 0; - /* - * This could be optimised, devices that do full hardware - * crypto (including TKIP MMIC) need no tailroom... But we - * have no drivers for such devices currently. - */ - if (may_encrypt) { + if (may_encrypt && sdata->crypto_tx_tailroom_needed_cnt) { tail_need = IEEE80211_ENCRYPT_TAILROOM; tail_need -= skb_tailroom(skb); tail_need = max_t(int, tail_need, 0); } - if (head_need || tail_need) { - /* Sorry. Can't account for this any more */ - skb_orphan(skb); - } - if (skb_cloned(skb)) I802_DEBUG_INC(local->tx_expand_skb_head_cloned); else if (head_need || tail_need) @@ -1510,67 +1412,19 @@ static int ieee80211_skb_resize(struct ieee80211_local *local, return -ENOMEM; } - /* update truesize too */ - skb->truesize += head_need + tail_need; - return 0; } -static void ieee80211_xmit(struct ieee80211_sub_if_data *sdata, - struct sk_buff *skb) +void ieee80211_xmit(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb) { struct ieee80211_local *local = sdata->local; struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; - struct ieee80211_sub_if_data *tmp_sdata; int headroom; bool may_encrypt; rcu_read_lock(); - if (unlikely(sdata->vif.type == NL80211_IFTYPE_MONITOR)) { - int hdrlen; - u16 len_rthdr; - - info->flags |= IEEE80211_TX_CTL_INJECTED | - IEEE80211_TX_INTFL_HAS_RADIOTAP; - - len_rthdr = ieee80211_get_radiotap_len(skb->data); - hdr = (struct ieee80211_hdr *)(skb->data + len_rthdr); - hdrlen = ieee80211_hdrlen(hdr->frame_control); - - /* check the header is complete in the frame */ - if (likely(skb->len >= len_rthdr + hdrlen)) { - /* - * We process outgoing injected frames that have a - * local address we handle as though they are our - * own frames. - * This code here isn't entirely correct, the local - * MAC address is not necessarily enough to find - * the interface to use; for that proper VLAN/WDS - * support we will need a different mechanism. - */ - - list_for_each_entry_rcu(tmp_sdata, &local->interfaces, - list) { - if (!ieee80211_sdata_running(tmp_sdata)) - continue; - if (tmp_sdata->vif.type == - NL80211_IFTYPE_MONITOR || - tmp_sdata->vif.type == - NL80211_IFTYPE_AP_VLAN || - tmp_sdata->vif.type == - NL80211_IFTYPE_WDS) - continue; - if (compare_ether_addr(tmp_sdata->vif.addr, - hdr->addr2) == 0) { - sdata = tmp_sdata; - break; - } - } - } - } - may_encrypt = !(info->flags & IEEE80211_TX_INTFL_DONT_ENCRYPT); headroom = local->tx_headroom; @@ -1579,7 +1433,7 @@ static void ieee80211_xmit(struct ieee80211_sub_if_data *sdata, headroom -= skb_headroom(skb); headroom = max_t(int, 0, headroom); - if (ieee80211_skb_resize(local, skb, headroom, may_encrypt)) { + if (ieee80211_skb_resize(sdata, skb, headroom, may_encrypt)) { dev_kfree_skb(skb); rcu_read_unlock(); return; @@ -1597,11 +1451,94 @@ static void ieee80211_xmit(struct ieee80211_sub_if_data *sdata, return; } - ieee80211_set_qos_hdr(local, skb); + ieee80211_set_qos_hdr(sdata, skb); ieee80211_tx(sdata, skb, false); rcu_read_unlock(); } +static bool ieee80211_parse_tx_radiotap(struct sk_buff *skb) +{ + struct ieee80211_radiotap_iterator iterator; + struct ieee80211_radiotap_header *rthdr = + (struct ieee80211_radiotap_header *) skb->data; + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); + int ret = ieee80211_radiotap_iterator_init(&iterator, rthdr, skb->len, + NULL); + u16 txflags; + + info->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT | + IEEE80211_TX_CTL_DONTFRAG; + + /* + * for every radiotap entry that is present + * (ieee80211_radiotap_iterator_next returns -ENOENT when no more + * entries present, or -EINVAL on error) + */ + + while (!ret) { + ret = ieee80211_radiotap_iterator_next(&iterator); + + if (ret) + continue; + + /* see if this argument is something we can use */ + switch (iterator.this_arg_index) { + /* + * You must take care when dereferencing iterator.this_arg + * for multibyte types... the pointer is not aligned. Use + * get_unaligned((type *)iterator.this_arg) to dereference + * iterator.this_arg for type "type" safely on all arches. + */ + case IEEE80211_RADIOTAP_FLAGS: + if (*iterator.this_arg & IEEE80211_RADIOTAP_F_FCS) { + /* + * this indicates that the skb we have been + * handed has the 32-bit FCS CRC at the end... + * we should react to that by snipping it off + * because it will be recomputed and added + * on transmission + */ + if (skb->len < (iterator._max_length + FCS_LEN)) + return false; + + skb_trim(skb, skb->len - FCS_LEN); + } + if (*iterator.this_arg & IEEE80211_RADIOTAP_F_WEP) + info->flags &= ~IEEE80211_TX_INTFL_DONT_ENCRYPT; + if (*iterator.this_arg & IEEE80211_RADIOTAP_F_FRAG) + info->flags &= ~IEEE80211_TX_CTL_DONTFRAG; + break; + + case IEEE80211_RADIOTAP_TX_FLAGS: + txflags = get_unaligned_le16(iterator.this_arg); + if (txflags & IEEE80211_RADIOTAP_F_TX_NOACK) + info->flags |= IEEE80211_TX_CTL_NO_ACK; + break; + + /* + * Please update the file + * Documentation/networking/mac80211-injection.txt + * when parsing new fields here. + */ + + default: + break; + } + } + + if (ret != -ENOENT) /* ie, if we didn't simply run out of fields */ + return false; + + /* + * remove the radiotap header + * iterator->_max_length was sanity-checked against + * skb->len by iterator init + */ + skb_pull(skb, iterator._max_length); + + return true; +} + netdev_tx_t ieee80211_monitor_start_xmit(struct sk_buff *skb, struct net_device *dev) { @@ -1610,7 +1547,10 @@ netdev_tx_t ieee80211_monitor_start_xmit(struct sk_buff *skb, struct ieee80211_radiotap_header *prthdr = (struct ieee80211_radiotap_header *)skb->data; struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); + struct ieee80211_hdr *hdr; + struct ieee80211_sub_if_data *tmp_sdata, *sdata; u16 len_rthdr; + int hdrlen; /* * Frame injection is not allowed if beaconing is not allowed @@ -1661,12 +1601,65 @@ netdev_tx_t ieee80211_monitor_start_xmit(struct sk_buff *skb, skb_set_network_header(skb, len_rthdr); skb_set_transport_header(skb, len_rthdr); + if (skb->len < len_rthdr + 2) + goto fail; + + hdr = (struct ieee80211_hdr *)(skb->data + len_rthdr); + hdrlen = ieee80211_hdrlen(hdr->frame_control); + + if (skb->len < len_rthdr + hdrlen) + goto fail; + + /* + * Initialize skb->protocol if the injected frame is a data frame + * carrying a rfc1042 header + */ + if (ieee80211_is_data(hdr->frame_control) && + skb->len >= len_rthdr + hdrlen + sizeof(rfc1042_header) + 2) { + u8 *payload = (u8 *)hdr + hdrlen; + + if (compare_ether_addr(payload, rfc1042_header) == 0) + skb->protocol = cpu_to_be16((payload[6] << 8) | + payload[7]); + } + memset(info, 0, sizeof(*info)); - info->flags |= IEEE80211_TX_CTL_REQ_TX_STATUS; + info->flags = IEEE80211_TX_CTL_REQ_TX_STATUS | + IEEE80211_TX_CTL_INJECTED; + + /* process and remove the injection radiotap header */ + if (!ieee80211_parse_tx_radiotap(skb)) + goto fail; + + rcu_read_lock(); + + /* + * We process outgoing injected frames that have a local address + * we handle as though they are non-injected frames. + * This code here isn't entirely correct, the local MAC address + * isn't always enough to find the interface to use; for proper + * VLAN/WDS support we will need a different mechanism (which + * likely isn't going to be monitor interfaces). + */ + sdata = IEEE80211_DEV_TO_SUB_IF(dev); + + list_for_each_entry_rcu(tmp_sdata, &local->interfaces, list) { + if (!ieee80211_sdata_running(tmp_sdata)) + continue; + if (tmp_sdata->vif.type == NL80211_IFTYPE_MONITOR || + tmp_sdata->vif.type == NL80211_IFTYPE_AP_VLAN || + tmp_sdata->vif.type == NL80211_IFTYPE_WDS) + continue; + if (compare_ether_addr(tmp_sdata->vif.addr, hdr->addr2) == 0) { + sdata = tmp_sdata; + break; + } + } + + ieee80211_xmit(sdata, skb); + rcu_read_unlock(); - /* pass the radiotap header up to xmit */ - ieee80211_xmit(IEEE80211_DEV_TO_SUB_IF(dev), skb); return NETDEV_TX_OK; fail: @@ -1705,8 +1698,9 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb, int encaps_len, skip_header_bytes; int nh_pos, h_pos; struct sta_info *sta = NULL; - u32 sta_flags = 0; + bool wme_sta = false, authorized = false, tdls_auth = false; struct sk_buff *tmp_skb; + bool tdls_direct = false; if (unlikely(skb->len < ETH_HLEN)) { ret = NETDEV_TX_OK; @@ -1730,7 +1724,8 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb, memcpy(hdr.addr3, skb->data, ETH_ALEN); memcpy(hdr.addr4, skb->data + ETH_ALEN, ETH_ALEN); hdrlen = 30; - sta_flags = get_sta_flags(sta); + authorized = test_sta_flag(sta, WLAN_STA_AUTHORIZED); + wme_sta = test_sta_flag(sta, WLAN_STA_WME); } rcu_read_unlock(); if (sta) @@ -1818,11 +1813,50 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb, break; #endif case NL80211_IFTYPE_STATION: - memcpy(hdr.addr1, sdata->u.mgd.bssid, ETH_ALEN); - if (sdata->u.mgd.use_4addr && - cpu_to_be16(ethertype) != sdata->control_port_protocol) { - fc |= cpu_to_le16(IEEE80211_FCTL_FROMDS | IEEE80211_FCTL_TODS); + if (sdata->wdev.wiphy->flags & WIPHY_FLAG_SUPPORTS_TDLS) { + bool tdls_peer = false; + + rcu_read_lock(); + sta = sta_info_get(sdata, skb->data); + if (sta) { + authorized = test_sta_flag(sta, + WLAN_STA_AUTHORIZED); + wme_sta = test_sta_flag(sta, WLAN_STA_WME); + tdls_peer = test_sta_flag(sta, + WLAN_STA_TDLS_PEER); + tdls_auth = test_sta_flag(sta, + WLAN_STA_TDLS_PEER_AUTH); + } + rcu_read_unlock(); + + /* + * If the TDLS link is enabled, send everything + * directly. Otherwise, allow TDLS setup frames + * to be transmitted indirectly. + */ + tdls_direct = tdls_peer && (tdls_auth || + !(ethertype == ETH_P_TDLS && skb->len > 14 && + skb->data[14] == WLAN_TDLS_SNAP_RFTYPE)); + } + + if (tdls_direct) { + /* link during setup - throw out frames to peer */ + if (!tdls_auth) { + ret = NETDEV_TX_OK; + goto fail; + } + + /* DA SA BSSID */ + memcpy(hdr.addr1, skb->data, ETH_ALEN); + memcpy(hdr.addr2, skb->data + ETH_ALEN, ETH_ALEN); + memcpy(hdr.addr3, sdata->u.mgd.bssid, ETH_ALEN); + hdrlen = 24; + } else if (sdata->u.mgd.use_4addr && + cpu_to_be16(ethertype) != sdata->control_port_protocol) { + fc |= cpu_to_le16(IEEE80211_FCTL_FROMDS | + IEEE80211_FCTL_TODS); /* RA TA DA SA */ + memcpy(hdr.addr1, sdata->u.mgd.bssid, ETH_ALEN); memcpy(hdr.addr2, sdata->vif.addr, ETH_ALEN); memcpy(hdr.addr3, skb->data, ETH_ALEN); memcpy(hdr.addr4, skb->data + ETH_ALEN, ETH_ALEN); @@ -1830,6 +1864,7 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb, } else { fc |= cpu_to_le16(IEEE80211_FCTL_TODS); /* BSSID SA DA */ + memcpy(hdr.addr1, sdata->u.mgd.bssid, ETH_ALEN); memcpy(hdr.addr2, skb->data + ETH_ALEN, ETH_ALEN); memcpy(hdr.addr3, skb->data, ETH_ALEN); hdrlen = 24; @@ -1855,13 +1890,19 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb, if (!is_multicast_ether_addr(hdr.addr1)) { rcu_read_lock(); sta = sta_info_get(sdata, hdr.addr1); - if (sta) - sta_flags = get_sta_flags(sta); + if (sta) { + authorized = test_sta_flag(sta, WLAN_STA_AUTHORIZED); + wme_sta = test_sta_flag(sta, WLAN_STA_WME); + } rcu_read_unlock(); } + /* For mesh, the use of the QoS header is mandatory */ + if (ieee80211_vif_is_mesh(&sdata->vif)) + wme_sta = true; + /* receiver and we are QoS enabled, use a QoS type frame */ - if ((sta_flags & WLAN_STA_WME) && local->hw.queues >= 4) { + if (wme_sta && local->hw.queues >= 4) { fc |= cpu_to_le16(IEEE80211_STYPE_QOS_DATA); hdrlen += 2; } @@ -1870,12 +1911,10 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb, * Drop unicast frames to unauthorised stations unless they are * EAPOL frames from the local station. */ - if (!ieee80211_vif_is_mesh(&sdata->vif) && - unlikely(!is_multicast_ether_addr(hdr.addr1) && - !(sta_flags & WLAN_STA_AUTHORIZED) && - !(cpu_to_be16(ethertype) == sdata->control_port_protocol && - compare_ether_addr(sdata->vif.addr, - skb->data + ETH_ALEN) == 0))) { + if (unlikely(!ieee80211_vif_is_mesh(&sdata->vif) && + !is_multicast_ether_addr(hdr.addr1) && !authorized && + (cpu_to_be16(ethertype) != sdata->control_port_protocol || + compare_ether_addr(sdata->vif.addr, skb->data + ETH_ALEN)))) { #ifdef CONFIG_MAC80211_VERBOSE_DEBUG if (net_ratelimit()) printk(KERN_DEBUG "%s: dropped frame to %pM" @@ -1946,7 +1985,7 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb, head_need += IEEE80211_ENCRYPT_HEADROOM; head_need += local->tx_headroom; head_need = max_t(int, 0, head_need); - if (ieee80211_skb_resize(local, skb, head_need, true)) + if (ieee80211_skb_resize(sdata, skb, head_need, true)) goto fail; } @@ -2277,13 +2316,23 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw, memcpy(mgmt->bssid, sdata->vif.addr, ETH_ALEN); mgmt->u.beacon.beacon_int = cpu_to_le16(sdata->vif.bss_conf.beacon_int); - mgmt->u.beacon.capab_info = 0x0; /* 0x0 for MPs */ + mgmt->u.beacon.capab_info |= cpu_to_le16( + sdata->u.mesh.security ? WLAN_CAPABILITY_PRIVACY : 0); pos = skb_put(skb, 2); *pos++ = WLAN_EID_SSID; *pos++ = 0x0; - mesh_mgmt_ies_add(skb, sdata); + if (ieee80211_add_srates_ie(&sdata->vif, skb) || + mesh_add_ds_params_ie(skb, sdata) || + ieee80211_add_ext_srates_ie(&sdata->vif, skb) || + mesh_add_rsn_ie(skb, sdata) || + mesh_add_meshid_ie(skb, sdata) || + mesh_add_meshconf_ie(skb, sdata) || + mesh_add_vendor_ies(skb, sdata)) { + pr_err("o11s: couldn't add ies!\n"); + goto out; + } } else { WARN_ON(1); goto out; @@ -2337,11 +2386,9 @@ struct sk_buff *ieee80211_pspoll_get(struct ieee80211_hw *hw, local = sdata->local; skb = dev_alloc_skb(local->hw.extra_tx_headroom + sizeof(*pspoll)); - if (!skb) { - printk(KERN_DEBUG "%s: failed to allocate buffer for " - "pspoll template\n", sdata->name); + if (!skb) return NULL; - } + skb_reserve(skb, local->hw.extra_tx_headroom); pspoll = (struct ieee80211_pspoll *) skb_put(skb, sizeof(*pspoll)); @@ -2377,11 +2424,9 @@ struct sk_buff *ieee80211_nullfunc_get(struct ieee80211_hw *hw, local = sdata->local; skb = dev_alloc_skb(local->hw.extra_tx_headroom + sizeof(*nullfunc)); - if (!skb) { - printk(KERN_DEBUG "%s: failed to allocate buffer for nullfunc " - "template\n", sdata->name); + if (!skb) return NULL; - } + skb_reserve(skb, local->hw.extra_tx_headroom); nullfunc = (struct ieee80211_hdr_3addr *) skb_put(skb, @@ -2416,11 +2461,8 @@ struct sk_buff *ieee80211_probereq_get(struct ieee80211_hw *hw, skb = dev_alloc_skb(local->hw.extra_tx_headroom + sizeof(*hdr) + ie_ssid_len + ie_len); - if (!skb) { - printk(KERN_DEBUG "%s: failed to allocate buffer for probe " - "request template\n", sdata->name); + if (!skb) return NULL; - } skb_reserve(skb, local->hw.extra_tx_headroom); diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 11d9d49..7095ae5 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -13,13 +13,13 @@ #include #include +#include #include #include #include #include #include #include -#include #include #include #include @@ -368,14 +368,14 @@ void ieee80211_add_pending_skb(struct ieee80211_local *local, spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags); } -int ieee80211_add_pending_skbs_fn(struct ieee80211_local *local, - struct sk_buff_head *skbs, - void (*fn)(void *data), void *data) +void ieee80211_add_pending_skbs_fn(struct ieee80211_local *local, + struct sk_buff_head *skbs, + void (*fn)(void *data), void *data) { struct ieee80211_hw *hw = &local->hw; struct sk_buff *skb; unsigned long flags; - int queue, ret = 0, i; + int queue, i; spin_lock_irqsave(&local->queue_stop_reason_lock, flags); for (i = 0; i < hw->queues; i++) @@ -390,7 +390,6 @@ int ieee80211_add_pending_skbs_fn(struct ieee80211_local *local, continue; } - ret++; queue = skb_get_queue_mapping(skb); __skb_queue_tail(&local->pending[queue], skb); } @@ -402,14 +401,12 @@ int ieee80211_add_pending_skbs_fn(struct ieee80211_local *local, __ieee80211_wake_queue(hw, i, IEEE80211_QUEUE_STOP_REASON_SKB_ADD); spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags); - - return ret; } -int ieee80211_add_pending_skbs(struct ieee80211_local *local, - struct sk_buff_head *skbs) +void ieee80211_add_pending_skbs(struct ieee80211_local *local, + struct sk_buff_head *skbs) { - return ieee80211_add_pending_skbs_fn(local, skbs, NULL, NULL); + ieee80211_add_pending_skbs_fn(local, skbs, NULL, NULL); } void ieee80211_stop_queues_by_reason(struct ieee80211_hw *hw, @@ -573,172 +570,6 @@ void ieee802_11_parse_elems(u8 *start, size_t len, ieee802_11_parse_elems_crc(start, len, elems, 0, 0); } -u32 ieee802_11_parse_elems_crc(u8 *start, size_t len, - struct ieee802_11_elems *elems, - u64 filter, u32 crc) -{ - size_t left = len; - u8 *pos = start; - bool calc_crc = filter != 0; - - memset(elems, 0, sizeof(*elems)); - elems->ie_start = start; - elems->total_len = len; - - while (left >= 2) { - u8 id, elen; - - id = *pos++; - elen = *pos++; - left -= 2; - - if (elen > left) - break; - - if (calc_crc && id < 64 && (filter & (1ULL << id))) - crc = crc32_be(crc, pos - 2, elen + 2); - - switch (id) { - case WLAN_EID_SSID: - elems->ssid = pos; - elems->ssid_len = elen; - break; - case WLAN_EID_SUPP_RATES: - elems->supp_rates = pos; - elems->supp_rates_len = elen; - break; - case WLAN_EID_FH_PARAMS: - elems->fh_params = pos; - elems->fh_params_len = elen; - break; - case WLAN_EID_DS_PARAMS: - elems->ds_params = pos; - elems->ds_params_len = elen; - break; - case WLAN_EID_CF_PARAMS: - elems->cf_params = pos; - elems->cf_params_len = elen; - break; - case WLAN_EID_TIM: - if (elen >= sizeof(struct ieee80211_tim_ie)) { - elems->tim = (void *)pos; - elems->tim_len = elen; - } - break; - case WLAN_EID_IBSS_PARAMS: - elems->ibss_params = pos; - elems->ibss_params_len = elen; - break; - case WLAN_EID_CHALLENGE: - elems->challenge = pos; - elems->challenge_len = elen; - break; - case WLAN_EID_VENDOR_SPECIFIC: - if (elen >= 4 && pos[0] == 0x00 && pos[1] == 0x50 && - pos[2] == 0xf2) { - /* Microsoft OUI (00:50:F2) */ - - if (calc_crc) - crc = crc32_be(crc, pos - 2, elen + 2); - - if (pos[3] == 1) { - /* OUI Type 1 - WPA IE */ - elems->wpa = pos; - elems->wpa_len = elen; - } else if (elen >= 5 && pos[3] == 2) { - /* OUI Type 2 - WMM IE */ - if (pos[4] == 0) { - elems->wmm_info = pos; - elems->wmm_info_len = elen; - } else if (pos[4] == 1) { - elems->wmm_param = pos; - elems->wmm_param_len = elen; - } - } - } - break; - case WLAN_EID_RSN: - elems->rsn = pos; - elems->rsn_len = elen; - break; - case WLAN_EID_ERP_INFO: - elems->erp_info = pos; - elems->erp_info_len = elen; - break; - case WLAN_EID_EXT_SUPP_RATES: - elems->ext_supp_rates = pos; - elems->ext_supp_rates_len = elen; - break; - case WLAN_EID_HT_CAPABILITY: - if (elen >= sizeof(struct ieee80211_ht_cap)) - elems->ht_cap_elem = (void *)pos; - break; - case WLAN_EID_HT_INFORMATION: - if (elen >= sizeof(struct ieee80211_ht_info)) - elems->ht_info_elem = (void *)pos; - break; - case WLAN_EID_MESH_ID: - elems->mesh_id = pos; - elems->mesh_id_len = elen; - break; - case WLAN_EID_MESH_CONFIG: - if (elen >= sizeof(struct ieee80211_meshconf_ie)) - elems->mesh_config = (void *)pos; - break; - case WLAN_EID_PEER_LINK: - elems->peer_link = pos; - elems->peer_link_len = elen; - break; - case WLAN_EID_PREQ: - elems->preq = pos; - elems->preq_len = elen; - break; - case WLAN_EID_PREP: - elems->prep = pos; - elems->prep_len = elen; - break; - case WLAN_EID_PERR: - elems->perr = pos; - elems->perr_len = elen; - break; - case WLAN_EID_RANN: - if (elen >= sizeof(struct ieee80211_rann_ie)) - elems->rann = (void *)pos; - break; - case WLAN_EID_CHANNEL_SWITCH: - elems->ch_switch_elem = pos; - elems->ch_switch_elem_len = elen; - break; - case WLAN_EID_QUIET: - if (!elems->quiet_elem) { - elems->quiet_elem = pos; - elems->quiet_elem_len = elen; - } - elems->num_of_quiet_elem++; - break; - case WLAN_EID_COUNTRY: - elems->country_elem = pos; - elems->country_elem_len = elen; - break; - case WLAN_EID_PWR_CONSTRAINT: - elems->pwr_constr_elem = pos; - elems->pwr_constr_elem_len = elen; - break; - case WLAN_EID_TIMEOUT_INTERVAL: - elems->timeout_int = pos; - elems->timeout_int_len = elen; - break; - default: - break; - } - - left -= elen; - pos += elen; - } - - return crc; -} - void ieee80211_set_wmm_default(struct ieee80211_sub_if_data *sdata) { struct ieee80211_local *local = sdata->local; @@ -799,7 +630,8 @@ void ieee80211_set_wmm_default(struct ieee80211_sub_if_data *sdata) qparam.uapsd = false; - drv_conf_tx(local, queue, &qparam); + sdata->tx_conf[queue] = qparam; + drv_conf_tx(local, sdata, queue, &qparam); } /* after reinitialize QoS TX queues setting to default, @@ -873,11 +705,9 @@ void ieee80211_send_auth(struct ieee80211_sub_if_data *sdata, skb = dev_alloc_skb(local->hw.extra_tx_headroom + sizeof(*mgmt) + 6 + extra_len); - if (!skb) { - printk(KERN_DEBUG "%s: failed to allocate buffer for auth " - "frame\n", sdata->name); + if (!skb) return; - } + skb_reserve(skb, local->hw.extra_tx_headroom); mgmt = (struct ieee80211_mgmt *) skb_put(skb, 24 + 6); @@ -1016,9 +846,10 @@ int ieee80211_build_preq_ies(struct ieee80211_local *local, u8 *buffer, } struct sk_buff *ieee80211_build_probe_req(struct ieee80211_sub_if_data *sdata, - u8 *dst, + u8 *dst, u32 ratemask, const u8 *ssid, size_t ssid_len, - const u8 *ie, size_t ie_len) + const u8 *ie, size_t ie_len, + bool directed) { struct ieee80211_local *local = sdata->local; struct sk_buff *skb; @@ -1029,20 +860,23 @@ struct sk_buff *ieee80211_build_probe_req(struct ieee80211_sub_if_data *sdata, /* FIXME: come up with a proper value */ buf = kmalloc(200 + ie_len, GFP_KERNEL); - if (!buf) { - printk(KERN_DEBUG "%s: failed to allocate temporary IE " - "buffer\n", sdata->name); + if (!buf) return NULL; - } - chan = ieee80211_frequency_to_channel( - local->hw.conf.channel->center_freq); + /* + * Do not send DS Channel parameter for directed probe requests + * in order to maximize the chance that we get a response. Some + * badly-behaved APs don't respond when this parameter is included. + */ + if (directed) + chan = 0; + else + chan = ieee80211_frequency_to_channel( + local->hw.conf.channel->center_freq); buf_len = ieee80211_build_preq_ies(local, buf, ie, ie_len, local->hw.conf.channel->band, - sdata->rc_rateidx_mask - [local->hw.conf.channel->band], - chan); + ratemask, chan); skb = ieee80211_probereq_get(&local->hw, &sdata->vif, ssid, ssid_len, @@ -1066,13 +900,19 @@ struct sk_buff *ieee80211_build_probe_req(struct ieee80211_sub_if_data *sdata, void ieee80211_send_probe_req(struct ieee80211_sub_if_data *sdata, u8 *dst, const u8 *ssid, size_t ssid_len, - const u8 *ie, size_t ie_len) + const u8 *ie, size_t ie_len, + u32 ratemask, bool directed, bool no_cck) { struct sk_buff *skb; - skb = ieee80211_build_probe_req(sdata, dst, ssid, ssid_len, ie, ie_len); - if (skb) + skb = ieee80211_build_probe_req(sdata, dst, ratemask, ssid, ssid_len, + ie, ie_len, directed); + if (skb) { + if (no_cck) + IEEE80211_SKB_CB(skb)->flags |= + IEEE80211_TX_CTL_NO_CCK_RATE; ieee80211_tx_skb(sdata, skb); + } } u32 ieee80211_sta_get_rates(struct ieee80211_local *local, @@ -1127,7 +967,7 @@ int ieee80211_reconfig(struct ieee80211_local *local) struct ieee80211_hw *hw = &local->hw; struct ieee80211_sub_if_data *sdata; struct sta_info *sta; - int res; + int res, i; #ifdef CONFIG_PM if (local->suspended) @@ -1150,27 +990,37 @@ int ieee80211_reconfig(struct ieee80211_local *local) } #endif - /* restart hardware */ - if (local->open_count) { - /* - * Upon resume hardware can sometimes be goofy due to - * various platform / driver / bus issues, so restarting - * the device may at times not work immediately. Propagate - * the error. - */ - res = drv_start(local); - if (res) { - WARN(local->suspended, "Hardware became unavailable " - "upon resume. This could be a software issue " - "prior to suspend or a hardware issue.\n"); - return res; - } + /* setup fragmentation threshold */ + drv_set_frag_threshold(local, hw->wiphy->frag_threshold); + + /* setup RTS threshold */ + drv_set_rts_threshold(local, hw->wiphy->rts_threshold); - ieee80211_led_radio(local, true); - ieee80211_mod_tpt_led_trig(local, - IEEE80211_TPT_LEDTRIG_FL_RADIO, 0); + /* reset coverage class */ + drv_set_coverage_class(local, hw->wiphy->coverage_class); + + /* everything else happens only if HW was up & running */ + if (!local->open_count) + goto wake_up; + + /* + * Upon resume hardware can sometimes be goofy due to + * various platform / driver / bus issues, so restarting + * the device may at times not work immediately. Propagate + * the error. + */ + res = drv_start(local); + if (res) { + WARN(local->suspended, "Hardware became unavailable " + "upon resume. This could be a software issue " + "prior to suspend or a hardware issue.\n"); + return res; } + ieee80211_led_radio(local, true); + ieee80211_mod_tpt_led_trig(local, + IEEE80211_TPT_LEDTRIG_FL_RADIO, 0); + /* add interfaces */ list_for_each_entry(sdata, &local->interfaces, list) { if (sdata->vif.type != NL80211_IFTYPE_AP_VLAN && @@ -1194,11 +1044,16 @@ int ieee80211_reconfig(struct ieee80211_local *local) } mutex_unlock(&local->sta_mtx); - /* setup fragmentation threshold */ - drv_set_frag_threshold(local, hw->wiphy->frag_threshold); + /* reconfigure tx conf */ + list_for_each_entry(sdata, &local->interfaces, list) { + if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN || + sdata->vif.type == NL80211_IFTYPE_MONITOR || + !ieee80211_sdata_running(sdata)) + continue; - /* setup RTS threshold */ - drv_set_rts_threshold(local, hw->wiphy->rts_threshold); + for (i = 0; i < hw->queues; i++) + drv_conf_tx(local, sdata, i, &sdata->tx_conf[i]); + } /* reconfigure hardware */ ieee80211_hw_config(local, ~0); @@ -1234,6 +1089,8 @@ int ieee80211_reconfig(struct ieee80211_local *local) changed |= BSS_CHANGED_IBSS; /* fall through */ case NL80211_IFTYPE_AP: + changed |= BSS_CHANGED_SSID; + /* fall through */ case NL80211_IFTYPE_MESH_POINT: changed |= BSS_CHANGED_BEACON | BSS_CHANGED_BEACON_ENABLED; @@ -1275,7 +1132,7 @@ int ieee80211_reconfig(struct ieee80211_local *local) list_for_each_entry(sta, &local->sta_list, list) { ieee80211_sta_tear_down_BA_sessions(sta, true); - clear_sta_flags(sta, WLAN_STA_BLOCK_BA); + clear_sta_flag(sta, WLAN_STA_BLOCK_BA); } mutex_unlock(&local->sta_mtx); @@ -1325,6 +1182,33 @@ int ieee80211_reconfig(struct ieee80211_local *local) return 0; } +void ieee80211_resume_disconnect(struct ieee80211_vif *vif) +{ + struct ieee80211_sub_if_data *sdata; + struct ieee80211_local *local; + struct ieee80211_key *key; + + if (WARN_ON(!vif)) + return; + + sdata = vif_to_sdata(vif); + local = sdata->local; + + if (WARN_ON(!local->resuming)) + return; + + if (WARN_ON(vif->type != NL80211_IFTYPE_STATION)) + return; + + sdata->flags |= IEEE80211_SDATA_DISCONNECT_RESUME; + + mutex_lock(&local->key_mtx); + list_for_each_entry(key, &sdata->key_list, list) + key->flags |= KEY_FLAG_TAINTED; + mutex_unlock(&local->key_mtx); +} +EXPORT_SYMBOL_GPL(ieee80211_resume_disconnect); + static int check_mgd_smps(struct ieee80211_if_managed *ifmgd, enum ieee80211_smps_mode *smps_mode) { @@ -1441,3 +1325,100 @@ size_t ieee80211_ie_split_vendor(const u8 *ies, size_t ielen, size_t offset) return pos; } + +static void _ieee80211_enable_rssi_reports(struct ieee80211_sub_if_data *sdata, + int rssi_min_thold, + int rssi_max_thold) +{ + trace_api_enable_rssi_reports(sdata, rssi_min_thold, rssi_max_thold); + + if (WARN_ON(sdata->vif.type != NL80211_IFTYPE_STATION)) + return; + + /* + * Scale up threshold values before storing it, as the RSSI averaging + * algorithm uses a scaled up value as well. Change this scaling + * factor if the RSSI averaging algorithm changes. + */ + sdata->u.mgd.rssi_min_thold = rssi_min_thold*16; + sdata->u.mgd.rssi_max_thold = rssi_max_thold*16; +} + +void ieee80211_enable_rssi_reports(struct ieee80211_vif *vif, + int rssi_min_thold, + int rssi_max_thold) +{ + struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); + + WARN_ON(rssi_min_thold == rssi_max_thold || + rssi_min_thold > rssi_max_thold); + + _ieee80211_enable_rssi_reports(sdata, rssi_min_thold, + rssi_max_thold); +} +EXPORT_SYMBOL(ieee80211_enable_rssi_reports); + +void ieee80211_disable_rssi_reports(struct ieee80211_vif *vif) +{ + struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); + + _ieee80211_enable_rssi_reports(sdata, 0, 0); +} +EXPORT_SYMBOL(ieee80211_disable_rssi_reports); + +int ieee80211_add_srates_ie(struct ieee80211_vif *vif, struct sk_buff *skb) +{ + struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); + struct ieee80211_local *local = sdata->local; + struct ieee80211_supported_band *sband; + int rate; + u8 i, rates, *pos; + + sband = local->hw.wiphy->bands[local->hw.conf.channel->band]; + rates = sband->n_bitrates; + if (rates > 8) + rates = 8; + + if (skb_tailroom(skb) < rates + 2) + return -ENOMEM; + + pos = skb_put(skb, rates + 2); + *pos++ = WLAN_EID_SUPP_RATES; + *pos++ = rates; + for (i = 0; i < rates; i++) { + rate = sband->bitrates[i].bitrate; + *pos++ = (u8) (rate / 5); + } + + return 0; +} + +int ieee80211_add_ext_srates_ie(struct ieee80211_vif *vif, struct sk_buff *skb) +{ + struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); + struct ieee80211_local *local = sdata->local; + struct ieee80211_supported_band *sband; + int rate; + u8 i, exrates, *pos; + + sband = local->hw.wiphy->bands[local->hw.conf.channel->band]; + exrates = sband->n_bitrates; + if (exrates > 8) + exrates -= 8; + else + exrates = 0; + + if (skb_tailroom(skb) < exrates + 2) + return -ENOMEM; + + if (exrates) { + pos = skb_put(skb, exrates + 2); + *pos++ = WLAN_EID_EXT_SUPP_RATES; + *pos++ = exrates; + for (i = 8; i < sband->n_bitrates; i++) { + rate = sband->bitrates[i].bitrate; + *pos++ = (u8) (rate / 5); + } + } + return 0; +} diff --git a/net/mac80211/wep.c b/net/mac80211/wep.c index a1c6bfd..34583c5 100644 --- a/net/mac80211/wep.c +++ b/net/mac80211/wep.c @@ -97,8 +97,7 @@ static u8 *ieee80211_wep_add_iv(struct ieee80211_local *local, hdr->frame_control |= cpu_to_le16(IEEE80211_FCTL_PROTECTED); - if (WARN_ON(skb_tailroom(skb) < WEP_ICV_LEN || - skb_headroom(skb) < WEP_IV_LEN)) + if (WARN_ON(skb_headroom(skb) < WEP_IV_LEN)) return NULL; hdrlen = ieee80211_hdrlen(hdr->frame_control); @@ -160,6 +159,9 @@ int ieee80211_wep_encrypt(struct ieee80211_local *local, size_t len; u8 rc4key[3 + WLAN_KEY_LEN_WEP104]; + if (WARN_ON(skb_tailroom(skb) < WEP_ICV_LEN)) + return -1; + iv = ieee80211_wep_add_iv(local, skb, keylen, keyidx); if (!iv) return -1; diff --git a/net/mac80211/wme.c b/net/mac80211/wme.c index 28bc084..fd52e69 100644 --- a/net/mac80211/wme.c +++ b/net/mac80211/wme.c @@ -72,7 +72,7 @@ u16 ieee80211_select_queue(struct ieee80211_sub_if_data *sdata, case NL80211_IFTYPE_AP_VLAN: sta = rcu_dereference(sdata->u.vlan.sta); if (sta) { - qos = get_sta_flags(sta) & WLAN_STA_WME; + qos = test_sta_flag(sta, WLAN_STA_WME); break; } case NL80211_IFTYPE_AP: @@ -83,11 +83,7 @@ u16 ieee80211_select_queue(struct ieee80211_sub_if_data *sdata, break; #ifdef CONFIG_MAC80211_MESH case NL80211_IFTYPE_MESH_POINT: - /* - * XXX: This is clearly broken ... but already was before, - * because ieee80211_fill_mesh_addresses() would clear A1 - * except for multicast addresses. - */ + ra = skb->data; break; #endif case NL80211_IFTYPE_STATION: @@ -103,7 +99,7 @@ u16 ieee80211_select_queue(struct ieee80211_sub_if_data *sdata, if (!sta && ra && !is_multicast_ether_addr(ra)) { sta = sta_info_get(sdata, ra); if (sta) - qos = get_sta_flags(sta) & WLAN_STA_WME; + qos = test_sta_flag(sta, WLAN_STA_WME); } rcu_read_unlock(); @@ -139,7 +135,8 @@ u16 ieee80211_downgrade_queue(struct ieee80211_local *local, return ieee802_1d_to_ac[skb->priority]; } -void ieee80211_set_qos_hdr(struct ieee80211_local *local, struct sk_buff *skb) +void ieee80211_set_qos_hdr(struct ieee80211_sub_if_data *sdata, + struct sk_buff *skb) { struct ieee80211_hdr *hdr = (void *)skb->data; @@ -150,11 +147,11 @@ void ieee80211_set_qos_hdr(struct ieee80211_local *local, struct sk_buff *skb) tid = skb->priority & IEEE80211_QOS_CTL_TAG1D_MASK; - if (unlikely(local->wifi_wme_noack_test)) - ack_policy |= QOS_CONTROL_ACK_POLICY_NOACK << - QOS_CONTROL_ACK_POLICY_SHIFT; - /* qos header is 2 bytes, second reserved */ + if (unlikely(sdata->local->wifi_wme_noack_test)) + ack_policy |= IEEE80211_QOS_CTL_ACK_POLICY_NOACK; + /* qos header is 2 bytes */ *p++ = ack_policy | tid; - *p = 0; + *p = ieee80211_vif_is_mesh(&sdata->vif) ? + (IEEE80211_QOS_CTL_MESH_CONTROL_PRESENT >> 8) : 0; } } diff --git a/net/mac80211/wme.h b/net/mac80211/wme.h index 6053b1c..34e166f 100644 --- a/net/mac80211/wme.h +++ b/net/mac80211/wme.h @@ -13,16 +13,12 @@ #include #include "ieee80211_i.h" -#define QOS_CONTROL_ACK_POLICY_NORMAL 0 -#define QOS_CONTROL_ACK_POLICY_NOACK 1 - -#define QOS_CONTROL_ACK_POLICY_SHIFT 5 - extern const int ieee802_1d_to_ac[8]; u16 ieee80211_select_queue(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb); -void ieee80211_set_qos_hdr(struct ieee80211_local *local, struct sk_buff *skb); +void ieee80211_set_qos_hdr(struct ieee80211_sub_if_data *sdata, + struct sk_buff *skb); u16 ieee80211_downgrade_queue(struct ieee80211_local *local, struct sk_buff *skb); diff --git a/net/mac80211/work.c b/net/mac80211/work.c index c9acfda..99165ef 100644 --- a/net/mac80211/work.c +++ b/net/mac80211/work.c @@ -25,6 +25,7 @@ #include "ieee80211_i.h" #include "rate.h" +#include "driver-ops.h" #define IEEE80211_AUTH_TIMEOUT (HZ / 5) #define IEEE80211_AUTH_MAX_TRIES 3 @@ -228,11 +229,9 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata, wk->ie_len + /* extra IEs */ 9, /* WMM */ GFP_KERNEL); - if (!skb) { - printk(KERN_DEBUG "%s: failed to allocate buffer for assoc " - "frame\n", sdata->name); + if (!skb) return; - } + skb_reserve(skb, local->hw.extra_tx_headroom); capab = WLAN_CAPABILITY_ESS; @@ -427,6 +426,14 @@ ieee80211_direct_probe(struct ieee80211_work *wk) struct ieee80211_sub_if_data *sdata = wk->sdata; struct ieee80211_local *local = sdata->local; + if (!wk->probe_auth.synced) { + int ret = drv_tx_sync(local, sdata, wk->filter_ta, + IEEE80211_TX_SYNC_AUTH); + if (ret) + return WORK_ACT_TIMEOUT; + } + wk->probe_auth.synced = true; + wk->probe_auth.tries++; if (wk->probe_auth.tries > IEEE80211_AUTH_MAX_TRIES) { printk(KERN_DEBUG "%s: direct probe to %pM timed out\n", @@ -450,7 +457,8 @@ ieee80211_direct_probe(struct ieee80211_work *wk) * will not answer to direct packet in unassociated state. */ ieee80211_send_probe_req(sdata, NULL, wk->probe_auth.ssid, - wk->probe_auth.ssid_len, NULL, 0); + wk->probe_auth.ssid_len, NULL, 0, + (u32) -1, true, false); wk->timeout = jiffies + IEEE80211_AUTH_TIMEOUT; run_again(local, wk->timeout); @@ -465,6 +473,14 @@ ieee80211_authenticate(struct ieee80211_work *wk) struct ieee80211_sub_if_data *sdata = wk->sdata; struct ieee80211_local *local = sdata->local; + if (!wk->probe_auth.synced) { + int ret = drv_tx_sync(local, sdata, wk->filter_ta, + IEEE80211_TX_SYNC_AUTH); + if (ret) + return WORK_ACT_TIMEOUT; + } + wk->probe_auth.synced = true; + wk->probe_auth.tries++; if (wk->probe_auth.tries > IEEE80211_AUTH_MAX_TRIES) { printk(KERN_DEBUG "%s: authentication with %pM" @@ -498,6 +514,14 @@ ieee80211_associate(struct ieee80211_work *wk) struct ieee80211_sub_if_data *sdata = wk->sdata; struct ieee80211_local *local = sdata->local; + if (!wk->assoc.synced) { + int ret = drv_tx_sync(local, sdata, wk->filter_ta, + IEEE80211_TX_SYNC_ASSOC); + if (ret) + return WORK_ACT_TIMEOUT; + } + wk->assoc.synced = true; + wk->assoc.tries++; if (wk->assoc.tries > IEEE80211_ASSOC_MAX_TRIES) { printk(KERN_DEBUG "%s: association with %pM" @@ -875,26 +899,6 @@ static bool ieee80211_work_ct_coexists(enum nl80211_channel_type wk_ct, return false; } -static enum nl80211_channel_type -ieee80211_calc_ct(enum nl80211_channel_type wk_ct, - enum nl80211_channel_type oper_ct) -{ - switch (wk_ct) { - case NL80211_CHAN_NO_HT: - return oper_ct; - case NL80211_CHAN_HT20: - if (oper_ct != NL80211_CHAN_NO_HT) - return oper_ct; - return wk_ct; - case NL80211_CHAN_HT40MINUS: - case NL80211_CHAN_HT40PLUS: - return wk_ct; - } - WARN_ON(1); /* shouldn't get here */ - return wk_ct; -} - - static void ieee80211_work_timer(unsigned long data) { struct ieee80211_local *local = (void *) data; @@ -945,50 +949,18 @@ static void ieee80211_work_work(struct work_struct *work) } if (!started && !local->tmp_channel) { - bool on_oper_chan; - bool tmp_chan_changed = false; - bool on_oper_chan2; - enum nl80211_channel_type wk_ct; - on_oper_chan = ieee80211_cfg_on_oper_channel(local); - - /* Work with existing channel type if possible. */ - wk_ct = wk->chan_type; - if (wk->chan == local->hw.conf.channel) - wk_ct = ieee80211_calc_ct(wk->chan_type, - local->hw.conf.channel_type); - - if (local->tmp_channel) - if ((local->tmp_channel != wk->chan) || - (local->tmp_channel_type != wk_ct)) - tmp_chan_changed = true; - - local->tmp_channel = wk->chan; - local->tmp_channel_type = wk_ct; /* - * Leave the station vifs in awake mode if they - * happen to be on the same channel as - * the requested channel. + * TODO: could optimize this by leaving the + * station vifs in awake mode if they + * happen to be on the same channel as + * the requested channel */ - on_oper_chan2 = ieee80211_cfg_on_oper_channel(local); - if (on_oper_chan != on_oper_chan2) { - if (on_oper_chan2) { - /* going off oper channel, PS too */ - ieee80211_offchannel_stop_vifs(local); - ieee80211_hw_config(local, 0); - } else { - /* going on channel, but leave PS - * off-channel. */ - ieee80211_hw_config(local, 0); - ieee80211_offchannel_return(local, - true); - } - } else if (tmp_chan_changed) - /* Still off-channel, but on some other - * channel, so update hardware. - * PS should already be off-channel. - */ - ieee80211_hw_config(local, 0); + ieee80211_offchannel_stop_beaconing(local); + ieee80211_offchannel_stop_station(local); + local->tmp_channel = wk->chan; + local->tmp_channel_type = wk->chan_type; + ieee80211_hw_config(local, 0); started = true; wk->timeout = jiffies; } @@ -1074,8 +1046,7 @@ static void ieee80211_work_work(struct work_struct *work) * we still need to do a hardware config. Currently, * we cannot be here while scanning, however. */ - if (!ieee80211_cfg_on_oper_channel(local)) - ieee80211_hw_config(local, 0); + ieee80211_hw_config(local, 0); /* At the least, we need to disable offchannel_ps, * so just go ahead and run the entire offchannel diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c index d9e03cf..a582504 100644 --- a/net/mac80211/wpa.c +++ b/net/mac80211/wpa.c @@ -15,6 +15,7 @@ #include #include #include +#include #include "ieee80211_i.h" #include "michael.h" @@ -52,7 +53,8 @@ ieee80211_tx_h_michael_mic_add(struct ieee80211_tx_data *tx) } if (info->control.hw_key && - !(tx->flags & IEEE80211_TX_FRAGMENTED) && + (info->flags & IEEE80211_TX_CTL_DONTFRAG || + tx->local->ops->set_frag_threshold) && !(tx->key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_MMIC)) { /* hwaccel - with no need for SW-generated MMIC */ return TX_CONTINUE; @@ -86,11 +88,6 @@ ieee80211_rx_h_michael_mic_verify(struct ieee80211_rx_data *rx) struct sk_buff *skb = rx->skb; struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data; - int queue = rx->queue; - - /* otherwise, TKIP is vulnerable to TID 0 vs. non-QoS replays */ - if (rx->queue == NUM_RX_DATA_QUEUES - 1) - queue = 0; /* * it makes no sense to check for MIC errors on anything other @@ -154,8 +151,8 @@ ieee80211_rx_h_michael_mic_verify(struct ieee80211_rx_data *rx) update_iv: /* update IV in key information to be able to detect replays */ - rx->key->u.tkip.rx[queue].iv32 = rx->tkip_iv32; - rx->key->u.tkip.rx[queue].iv16 = rx->tkip_iv16; + rx->key->u.tkip.rx[rx->security_idx].iv32 = rx->tkip_iv32; + rx->key->u.tkip.rx[rx->security_idx].iv16 = rx->tkip_iv16; return RX_CONTINUE; @@ -177,6 +174,7 @@ static int tkip_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb) struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; struct ieee80211_key *key = tx->key; struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); + unsigned long flags; unsigned int hdrlen; int len, tail; u8 *pos; @@ -204,11 +202,12 @@ static int tkip_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb) pos += hdrlen; /* Increase IV for the frame */ + spin_lock_irqsave(&key->u.tkip.txlock, flags); key->u.tkip.tx.iv16++; if (key->u.tkip.tx.iv16 == 0) key->u.tkip.tx.iv32++; - - pos = ieee80211_tkip_add_iv(pos, key, key->u.tkip.tx.iv16); + pos = ieee80211_tkip_add_iv(pos, key); + spin_unlock_irqrestore(&key->u.tkip.txlock, flags); /* hwaccel - with software IV */ if (info->control.hw_key) @@ -217,9 +216,8 @@ static int tkip_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb) /* Add room for ICV */ skb_put(skb, TKIP_ICV_LEN); - hdr = (struct ieee80211_hdr *) skb->data; return ieee80211_tkip_encrypt_data(tx->local->wep_tx_tfm, - key, pos, len, hdr->addr2); + key, skb, pos, len); } @@ -247,11 +245,6 @@ ieee80211_crypto_tkip_decrypt(struct ieee80211_rx_data *rx) struct ieee80211_key *key = rx->key; struct sk_buff *skb = rx->skb; struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); - int queue = rx->queue; - - /* otherwise, TKIP is vulnerable to TID 0 vs. non-QoS replays */ - if (rx->queue == NUM_RX_DATA_QUEUES - 1) - queue = 0; hdrlen = ieee80211_hdrlen(hdr->frame_control); @@ -272,7 +265,7 @@ ieee80211_crypto_tkip_decrypt(struct ieee80211_rx_data *rx) res = ieee80211_tkip_decrypt_data(rx->local->wep_rx_tfm, key, skb->data + hdrlen, skb->len - hdrlen, rx->sta->sta.addr, - hdr->addr1, hwaccel, queue, + hdr->addr1, hwaccel, rx->security_idx, &rx->tkip_iv32, &rx->tkip_iv16); if (res != TKIP_DECRYPT_OK) @@ -300,8 +293,10 @@ static void ccmp_special_blocks(struct sk_buff *skb, u8 *pn, u8 *scratch, unsigned int hdrlen; struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data; - b_0 = scratch + 3 * AES_BLOCK_LEN; - aad = scratch + 4 * AES_BLOCK_LEN; + memset(scratch, 0, 6 * AES_BLOCK_SIZE); + + b_0 = scratch + 3 * AES_BLOCK_SIZE; + aad = scratch + 4 * AES_BLOCK_SIZE; /* * Mask FC: zero subtype b4 b5 b6 (if not mgmt) @@ -390,8 +385,10 @@ static int ccmp_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb) struct ieee80211_key *key = tx->key; struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); int hdrlen, len, tail; - u8 *pos, *pn; - int i; + u8 *pos; + u8 pn[6]; + u64 pn64; + u8 scratch[6 * AES_BLOCK_SIZE]; if (info->control.hw_key && !(info->control.hw_key->flags & IEEE80211_KEY_FLAG_GENERATE_IV)) { @@ -419,14 +416,14 @@ static int ccmp_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb) hdr = (struct ieee80211_hdr *) pos; pos += hdrlen; - /* PN = PN + 1 */ - pn = key->u.ccmp.tx_pn; + pn64 = atomic64_inc_return(&key->u.ccmp.tx_pn); - for (i = CCMP_PN_LEN - 1; i >= 0; i--) { - pn[i]++; - if (pn[i]) - break; - } + pn[5] = pn64; + pn[4] = pn64 >> 8; + pn[3] = pn64 >> 16; + pn[2] = pn64 >> 24; + pn[1] = pn64 >> 32; + pn[0] = pn64 >> 40; ccmp_pn2hdr(pos, pn, key->conf.keyidx); @@ -435,8 +432,8 @@ static int ccmp_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb) return 0; pos += CCMP_HDR_LEN; - ccmp_special_blocks(skb, pn, key->u.ccmp.tx_crypto_buf, 0); - ieee80211_aes_ccm_encrypt(key->u.ccmp.tfm, key->u.ccmp.tx_crypto_buf, pos, len, + ccmp_special_blocks(skb, pn, scratch, 0); + ieee80211_aes_ccm_encrypt(key->u.ccmp.tfm, scratch, pos, len, pos, skb_put(skb, CCMP_MIC_LEN)); return 0; @@ -483,8 +480,7 @@ ieee80211_crypto_ccmp_decrypt(struct ieee80211_rx_data *rx) ccmp_hdr2pn(pn, skb->data + hdrlen); - queue = ieee80211_is_mgmt(hdr->frame_control) ? - NUM_RX_DATA_QUEUES : rx->queue; + queue = rx->security_idx; if (memcmp(pn, key->u.ccmp.rx_pn[queue], CCMP_PN_LEN) <= 0) { key->u.ccmp.replays++; @@ -492,11 +488,12 @@ ieee80211_crypto_ccmp_decrypt(struct ieee80211_rx_data *rx) } if (!(status->flag & RX_FLAG_DECRYPTED)) { + u8 scratch[6 * AES_BLOCK_SIZE]; /* hardware didn't decrypt/verify MIC */ - ccmp_special_blocks(skb, pn, key->u.ccmp.rx_crypto_buf, 1); + ccmp_special_blocks(skb, pn, scratch, 1); if (ieee80211_aes_ccm_decrypt( - key->u.ccmp.tfm, key->u.ccmp.rx_crypto_buf, + key->u.ccmp.tfm, scratch, skb->data + hdrlen + CCMP_HDR_LEN, data_len, skb->data + skb->len - CCMP_MIC_LEN, skb->data + hdrlen + CCMP_HDR_LEN)) @@ -527,6 +524,16 @@ static void bip_aad(struct sk_buff *skb, u8 *aad) } +static inline void bip_ipn_set64(u8 *d, u64 pn) +{ + *d++ = pn; + *d++ = pn >> 8; + *d++ = pn >> 16; + *d++ = pn >> 24; + *d++ = pn >> 32; + *d = pn >> 40; +} + static inline void bip_ipn_swap(u8 *d, const u8 *s) { *d++ = s[5]; @@ -545,8 +552,8 @@ ieee80211_crypto_aes_cmac_encrypt(struct ieee80211_tx_data *tx) struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); struct ieee80211_key *key = tx->key; struct ieee80211_mmie *mmie; - u8 *pn, aad[20]; - int i; + u8 aad[20]; + u64 pn64; if (info->control.hw_key) return 0; @@ -560,22 +567,17 @@ ieee80211_crypto_aes_cmac_encrypt(struct ieee80211_tx_data *tx) mmie->key_id = cpu_to_le16(key->conf.keyidx); /* PN = PN + 1 */ - pn = key->u.aes_cmac.tx_pn; + pn64 = atomic64_inc_return(&key->u.aes_cmac.tx_pn); - for (i = sizeof(key->u.aes_cmac.tx_pn) - 1; i >= 0; i--) { - pn[i]++; - if (pn[i]) - break; - } - bip_ipn_swap(mmie->sequence_number, pn); + bip_ipn_set64(mmie->sequence_number, pn64); bip_aad(skb, aad); /* * MIC = AES-128-CMAC(IGTK, AAD || Management Frame Body || MMIE, 64) */ - ieee80211_aes_cmac(key->u.aes_cmac.tfm, key->u.aes_cmac.tx_crypto_buf, - aad, skb->data + 24, skb->len - 24, mmie->mic); + ieee80211_aes_cmac(key->u.aes_cmac.tfm, aad, + skb->data + 24, skb->len - 24, mmie->mic); return TX_CONTINUE; } @@ -613,8 +615,7 @@ ieee80211_crypto_aes_cmac_decrypt(struct ieee80211_rx_data *rx) if (!(status->flag & RX_FLAG_DECRYPTED)) { /* hardware didn't decrypt/verify MIC */ bip_aad(skb, aad); - ieee80211_aes_cmac(key->u.aes_cmac.tfm, - key->u.aes_cmac.rx_crypto_buf, aad, + ieee80211_aes_cmac(key->u.aes_cmac.tfm, aad, skb->data + 24, skb->len - 24, mic); if (memcmp(mic, mmie->mic, sizeof(mmie->mic)) != 0) { key->u.aes_cmac.icverrors++; diff --git a/net/netlabel/Makefile b/net/netlabel/Makefile index ea750e9..d2732fc 100644 --- a/net/netlabel/Makefile +++ b/net/netlabel/Makefile @@ -1,8 +1,6 @@ # # Makefile for the NetLabel subsystem. # -# Feb 9, 2006, Paul Moore -# # base objects obj-y := netlabel_user.o netlabel_kapi.o diff --git a/net/netlabel/netlabel_addrlist.c b/net/netlabel/netlabel_addrlist.c index c051913..96b749d 100644 --- a/net/netlabel/netlabel_addrlist.c +++ b/net/netlabel/netlabel_addrlist.c @@ -6,7 +6,7 @@ * system manages static and dynamic label mappings for network protocols such * as CIPSO and RIPSO. * - * Author: Paul Moore + * Author: Paul Moore * */ diff --git a/net/netlabel/netlabel_addrlist.h b/net/netlabel/netlabel_addrlist.h index 2b9644e..fdbc1d2 100644 --- a/net/netlabel/netlabel_addrlist.h +++ b/net/netlabel/netlabel_addrlist.h @@ -6,7 +6,7 @@ * system manages static and dynamic label mappings for network protocols such * as CIPSO and RIPSO. * - * Author: Paul Moore + * Author: Paul Moore * */ diff --git a/net/netlabel/netlabel_cipso_v4.c b/net/netlabel/netlabel_cipso_v4.c index bae5756..6bf8783 100644 --- a/net/netlabel/netlabel_cipso_v4.c +++ b/net/netlabel/netlabel_cipso_v4.c @@ -5,7 +5,7 @@ * NetLabel system manages static and dynamic label mappings for network * protocols such as CIPSO and RIPSO. * - * Author: Paul Moore + * Author: Paul Moore * */ @@ -39,7 +39,7 @@ #include #include #include -#include +#include #include "netlabel_user.h" #include "netlabel_cipso_v4.h" diff --git a/net/netlabel/netlabel_cipso_v4.h b/net/netlabel/netlabel_cipso_v4.h index af7f335..d24d774 100644 --- a/net/netlabel/netlabel_cipso_v4.h +++ b/net/netlabel/netlabel_cipso_v4.h @@ -5,7 +5,7 @@ * NetLabel system manages static and dynamic label mappings for network * protocols such as CIPSO and RIPSO. * - * Author: Paul Moore + * Author: Paul Moore * */ diff --git a/net/netlabel/netlabel_domainhash.c b/net/netlabel/netlabel_domainhash.c index 10b273a..bf99567 100644 --- a/net/netlabel/netlabel_domainhash.c +++ b/net/netlabel/netlabel_domainhash.c @@ -6,7 +6,7 @@ * system manages static and dynamic label mappings for network protocols such * as CIPSO and RIPSO. * - * Author: Paul Moore + * Author: Paul Moore * */ @@ -55,8 +55,7 @@ struct netlbl_domhsh_tbl { * should be okay */ static DEFINE_SPINLOCK(netlbl_domhsh_lock); #define netlbl_domhsh_rcu_deref(p) \ - rcu_dereference_check(p, rcu_read_lock_held() || \ - lockdep_is_held(&netlbl_domhsh_lock)) + rcu_dereference_check(p, lockdep_is_held(&netlbl_domhsh_lock)) static struct netlbl_domhsh_tbl *netlbl_domhsh = NULL; static struct netlbl_dom_map *netlbl_domhsh_def = NULL; @@ -258,7 +257,7 @@ static int netlbl_domhsh_validate(const struct netlbl_dom_map *entry) { struct netlbl_af4list *iter4; struct netlbl_domaddr4_map *map4; -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) struct netlbl_af6list *iter6; struct netlbl_domaddr6_map *map6; #endif /* IPv6 */ @@ -292,7 +291,7 @@ static int netlbl_domhsh_validate(const struct netlbl_dom_map *entry) return -EINVAL; } } -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) netlbl_af6list_foreach(iter6, &entry->type_def.addrsel->list6) { map6 = netlbl_domhsh_addr6_entry(iter6); switch (map6->type) { @@ -521,7 +520,7 @@ int netlbl_domhsh_remove_entry(struct netlbl_dom_map *entry, if (entry != rcu_dereference(netlbl_domhsh_def)) list_del_rcu(&entry->list); else - rcu_assign_pointer(netlbl_domhsh_def, NULL); + RCU_INIT_POINTER(netlbl_domhsh_def, NULL); } else ret_val = -ENOENT; spin_unlock(&netlbl_domhsh_lock); diff --git a/net/netlabel/netlabel_domainhash.h b/net/netlabel/netlabel_domainhash.h index 0261dda..bfcc0f7 100644 --- a/net/netlabel/netlabel_domainhash.h +++ b/net/netlabel/netlabel_domainhash.h @@ -6,7 +6,7 @@ * system manages static and dynamic label mappings for network protocols such * as CIPSO and RIPSO. * - * Author: Paul Moore + * Author: Paul Moore * */ diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c index 1b83e00..bcecae0 100644 --- a/net/netlabel/netlabel_kapi.c +++ b/net/netlabel/netlabel_kapi.c @@ -5,7 +5,7 @@ * system manages static and dynamic label mappings for network protocols such * as CIPSO and RIPSO. * - * Author: Paul Moore + * Author: Paul Moore * */ @@ -39,7 +39,7 @@ #include #include #include -#include +#include #include "netlabel_domainhash.h" #include "netlabel_unlabeled.h" @@ -111,8 +111,6 @@ int netlbl_cfg_unlbl_map_add(const char *domain, struct netlbl_domaddr_map *addrmap = NULL; struct netlbl_domaddr4_map *map4 = NULL; struct netlbl_domaddr6_map *map6 = NULL; - const struct in_addr *addr4, *mask4; - const struct in6_addr *addr6, *mask6; entry = kzalloc(sizeof(*entry), GFP_ATOMIC); if (entry == NULL) @@ -133,9 +131,9 @@ int netlbl_cfg_unlbl_map_add(const char *domain, INIT_LIST_HEAD(&addrmap->list6); switch (family) { - case AF_INET: - addr4 = addr; - mask4 = mask; + case AF_INET: { + const struct in_addr *addr4 = addr; + const struct in_addr *mask4 = mask; map4 = kzalloc(sizeof(*map4), GFP_ATOMIC); if (map4 == NULL) goto cfg_unlbl_map_add_failure; @@ -148,9 +146,11 @@ int netlbl_cfg_unlbl_map_add(const char *domain, if (ret_val != 0) goto cfg_unlbl_map_add_failure; break; - case AF_INET6: - addr6 = addr; - mask6 = mask; + } +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + case AF_INET6: { + const struct in6_addr *addr6 = addr; + const struct in6_addr *mask6 = mask; map6 = kzalloc(sizeof(*map6), GFP_ATOMIC); if (map6 == NULL) goto cfg_unlbl_map_add_failure; @@ -162,11 +162,13 @@ int netlbl_cfg_unlbl_map_add(const char *domain, map6->list.addr.s6_addr32[3] &= mask6->s6_addr32[3]; ipv6_addr_copy(&map6->list.mask, mask6); map6->list.valid = 1; - ret_val = netlbl_af4list_add(&map4->list, - &addrmap->list4); + ret_val = netlbl_af6list_add(&map6->list, + &addrmap->list6); if (ret_val != 0) goto cfg_unlbl_map_add_failure; break; + } +#endif /* IPv6 */ default: goto cfg_unlbl_map_add_failure; break; @@ -225,9 +227,11 @@ int netlbl_cfg_unlbl_static_add(struct net *net, case AF_INET: addr_len = sizeof(struct in_addr); break; +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) case AF_INET6: addr_len = sizeof(struct in6_addr); break; +#endif /* IPv6 */ default: return -EPFNOSUPPORT; } @@ -266,9 +270,11 @@ int netlbl_cfg_unlbl_static_del(struct net *net, case AF_INET: addr_len = sizeof(struct in_addr); break; +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) case AF_INET6: addr_len = sizeof(struct in6_addr); break; +#endif /* IPv6 */ default: return -EPFNOSUPPORT; } @@ -341,11 +347,11 @@ int netlbl_cfg_cipsov4_map_add(u32 doi, entry = kzalloc(sizeof(*entry), GFP_ATOMIC); if (entry == NULL) - return -ENOMEM; + goto out_entry; if (domain != NULL) { entry->domain = kstrdup(domain, GFP_ATOMIC); if (entry->domain == NULL) - goto cfg_cipsov4_map_add_failure; + goto out_domain; } if (addr == NULL && mask == NULL) { @@ -354,13 +360,13 @@ int netlbl_cfg_cipsov4_map_add(u32 doi, } else if (addr != NULL && mask != NULL) { addrmap = kzalloc(sizeof(*addrmap), GFP_ATOMIC); if (addrmap == NULL) - goto cfg_cipsov4_map_add_failure; + goto out_addrmap; INIT_LIST_HEAD(&addrmap->list4); INIT_LIST_HEAD(&addrmap->list6); addrinfo = kzalloc(sizeof(*addrinfo), GFP_ATOMIC); if (addrinfo == NULL) - goto cfg_cipsov4_map_add_failure; + goto out_addrinfo; addrinfo->type_def.cipsov4 = doi_def; addrinfo->type = NETLBL_NLTYPE_CIPSOV4; addrinfo->list.addr = addr->s_addr & mask->s_addr; @@ -374,7 +380,7 @@ int netlbl_cfg_cipsov4_map_add(u32 doi, entry->type = NETLBL_NLTYPE_ADDRSELECT; } else { ret_val = -EINVAL; - goto cfg_cipsov4_map_add_failure; + goto out_addrmap; } ret_val = netlbl_domhsh_add(entry, audit_info); @@ -384,11 +390,15 @@ int netlbl_cfg_cipsov4_map_add(u32 doi, return 0; cfg_cipsov4_map_add_failure: - cipso_v4_doi_putdef(doi_def); + kfree(addrinfo); +out_addrinfo: + kfree(addrmap); +out_addrmap: kfree(entry->domain); +out_domain: kfree(entry); - kfree(addrmap); - kfree(addrinfo); +out_entry: + cipso_v4_doi_putdef(doi_def); return ret_val; } @@ -513,7 +523,7 @@ int netlbl_secattr_catmap_walk_rng(struct netlbl_lsm_secattr_catmap *catmap, /** * netlbl_secattr_catmap_setbit - Set a bit in a LSM secattr catmap - * @catmap: the category bitmap + * @catmap: pointer to the category bitmap * @bit: the bit to set * @flags: memory allocation flags * @@ -522,18 +532,25 @@ int netlbl_secattr_catmap_walk_rng(struct netlbl_lsm_secattr_catmap *catmap, * negative values on failure. * */ -int netlbl_secattr_catmap_setbit(struct netlbl_lsm_secattr_catmap *catmap, +int netlbl_secattr_catmap_setbit(struct netlbl_lsm_secattr_catmap **catmap, u32 bit, gfp_t flags) { - struct netlbl_lsm_secattr_catmap *iter = catmap; + struct netlbl_lsm_secattr_catmap *iter = *catmap; u32 node_bit; u32 node_idx; while (iter->next != NULL && bit >= (iter->startbit + NETLBL_CATMAP_SIZE)) iter = iter->next; - if (bit >= (iter->startbit + NETLBL_CATMAP_SIZE)) { + if (bit < iter->startbit) { + iter = netlbl_secattr_catmap_alloc(flags); + if (iter == NULL) + return -ENOMEM; + iter->next = *catmap; + iter->startbit = bit & ~(NETLBL_CATMAP_SIZE - 1); + *catmap = iter; + } else if (bit >= (iter->startbit + NETLBL_CATMAP_SIZE)) { iter->next = netlbl_secattr_catmap_alloc(flags); if (iter->next == NULL) return -ENOMEM; @@ -551,7 +568,7 @@ int netlbl_secattr_catmap_setbit(struct netlbl_lsm_secattr_catmap *catmap, /** * netlbl_secattr_catmap_setrng - Set a range of bits in a LSM secattr catmap - * @catmap: the category bitmap + * @catmap: pointer to the category bitmap * @start: the starting bit * @end: the last bit in the string * @flags: memory allocation flags @@ -561,15 +578,16 @@ int netlbl_secattr_catmap_setbit(struct netlbl_lsm_secattr_catmap *catmap, * on success, negative values on failure. * */ -int netlbl_secattr_catmap_setrng(struct netlbl_lsm_secattr_catmap *catmap, +int netlbl_secattr_catmap_setrng(struct netlbl_lsm_secattr_catmap **catmap, u32 start, u32 end, gfp_t flags) { int ret_val = 0; - struct netlbl_lsm_secattr_catmap *iter = catmap; + struct netlbl_lsm_secattr_catmap *iter = *catmap; u32 iter_max_spot; u32 spot; + u32 orig_spot = iter->startbit; /* XXX - This could probably be made a bit faster by combining writes * to the catmap instead of setting a single bit each time, but for @@ -587,7 +605,9 @@ int netlbl_secattr_catmap_setrng(struct netlbl_lsm_secattr_catmap *catmap, iter = iter->next; iter_max_spot = iter->startbit + NETLBL_CATMAP_SIZE; } - ret_val = netlbl_secattr_catmap_setbit(iter, spot, GFP_ATOMIC); + ret_val = netlbl_secattr_catmap_setbit(&iter, spot, flags); + if (iter->startbit < orig_spot) + *catmap = iter; } return ret_val; diff --git a/net/netlabel/netlabel_mgmt.c b/net/netlabel/netlabel_mgmt.c index 4f251b1..bfa5558 100644 --- a/net/netlabel/netlabel_mgmt.c +++ b/net/netlabel/netlabel_mgmt.c @@ -5,7 +5,7 @@ * NetLabel system manages static and dynamic label mappings for network * protocols such as CIPSO and RIPSO. * - * Author: Paul Moore + * Author: Paul Moore * */ @@ -42,7 +42,7 @@ #include #include #include -#include +#include #include "netlabel_domainhash.h" #include "netlabel_user.h" diff --git a/net/netlabel/netlabel_mgmt.h b/net/netlabel/netlabel_mgmt.h index 0a25838..5a9f31c 100644 --- a/net/netlabel/netlabel_mgmt.h +++ b/net/netlabel/netlabel_mgmt.h @@ -5,7 +5,7 @@ * NetLabel system manages static and dynamic label mappings for network * protocols such as CIPSO and RIPSO. * - * Author: Paul Moore + * Author: Paul Moore * */ @@ -32,7 +32,7 @@ #define _NETLABEL_MGMT_H #include -#include +#include /* * The following NetLabel payloads are supported by the management interface. diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c index 0f0e907..23267b3 100644 --- a/net/netlabel/netlabel_unlabeled.c +++ b/net/netlabel/netlabel_unlabeled.c @@ -5,7 +5,7 @@ * NetLabel system. The NetLabel system manages static and dynamic label * mappings for network protocols such as CIPSO and RIPSO. * - * Author: Paul Moore + * Author: Paul Moore * */ @@ -52,7 +52,7 @@ #include #include #include -#include +#include #include "netlabel_user.h" #include "netlabel_addrlist.h" @@ -116,8 +116,7 @@ struct netlbl_unlhsh_walk_arg { * hash table should be okay */ static DEFINE_SPINLOCK(netlbl_unlhsh_lock); #define netlbl_unlhsh_rcu_deref(p) \ - rcu_dereference_check(p, rcu_read_lock_held() || \ - lockdep_is_held(&netlbl_unlhsh_lock)) + rcu_dereference_check(p, lockdep_is_held(&netlbl_unlhsh_lock)) static struct netlbl_unlhsh_tbl *netlbl_unlhsh = NULL; static struct netlbl_unlhsh_iface *netlbl_unlhsh_def = NULL; @@ -426,10 +425,9 @@ int netlbl_unlhsh_add(struct net *net, audit_info); switch (addr_len) { case sizeof(struct in_addr): { - struct in_addr *addr4, *mask4; + const struct in_addr *addr4 = addr; + const struct in_addr *mask4 = mask; - addr4 = (struct in_addr *)addr; - mask4 = (struct in_addr *)mask; ret_val = netlbl_unlhsh_add_addr4(iface, addr4, mask4, secid); if (audit_buf != NULL) netlbl_af4list_audit_addr(audit_buf, 1, @@ -440,10 +438,9 @@ int netlbl_unlhsh_add(struct net *net, } #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) case sizeof(struct in6_addr): { - struct in6_addr *addr6, *mask6; + const struct in6_addr *addr6 = addr; + const struct in6_addr *mask6 = mask; - addr6 = (struct in6_addr *)addr; - mask6 = (struct in6_addr *)mask; ret_val = netlbl_unlhsh_add_addr6(iface, addr6, mask6, secid); if (audit_buf != NULL) netlbl_af6list_audit_addr(audit_buf, 1, @@ -624,7 +621,7 @@ static void netlbl_unlhsh_condremove_iface(struct netlbl_unlhsh_iface *iface) if (iface->ifindex > 0) list_del_rcu(&iface->list); else - rcu_assign_pointer(netlbl_unlhsh_def, NULL); + RCU_INIT_POINTER(netlbl_unlhsh_def, NULL); spin_unlock(&netlbl_unlhsh_lock); call_rcu(&iface->rcu, netlbl_unlhsh_free_iface); @@ -1445,11 +1442,9 @@ int __init netlbl_unlabel_init(u32 size) for (iter = 0; iter < hsh_tbl->size; iter++) INIT_LIST_HEAD(&hsh_tbl->tbl[iter]); - rcu_read_lock(); spin_lock(&netlbl_unlhsh_lock); rcu_assign_pointer(netlbl_unlhsh, hsh_tbl); spin_unlock(&netlbl_unlhsh_lock); - rcu_read_unlock(); register_netdevice_notifier(&netlbl_unlhsh_netdev_notifier); diff --git a/net/netlabel/netlabel_unlabeled.h b/net/netlabel/netlabel_unlabeled.h index 0bc8dc3..700af49 100644 --- a/net/netlabel/netlabel_unlabeled.h +++ b/net/netlabel/netlabel_unlabeled.h @@ -5,7 +5,7 @@ * NetLabel system. The NetLabel system manages static and dynamic label * mappings for network protocols such as CIPSO and RIPSO. * - * Author: Paul Moore + * Author: Paul Moore * */ diff --git a/net/netlabel/netlabel_user.c b/net/netlabel/netlabel_user.c index a3fd75a..9fae63f 100644 --- a/net/netlabel/netlabel_user.c +++ b/net/netlabel/netlabel_user.c @@ -5,7 +5,7 @@ * NetLabel system manages static and dynamic label mappings for network * protocols such as CIPSO and RIPSO. * - * Author: Paul Moore + * Author: Paul Moore * */ diff --git a/net/netlabel/netlabel_user.h b/net/netlabel/netlabel_user.h index f4fc4c9..8196978 100644 --- a/net/netlabel/netlabel_user.h +++ b/net/netlabel/netlabel_user.h @@ -5,7 +5,7 @@ * NetLabel system manages static and dynamic label mappings for network * protocols such as CIPSO and RIPSO. * - * Author: Paul Moore + * Author: Paul Moore * */ diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index 3df7c5a..b4d889b 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -1182,10 +1182,9 @@ static int nr_recvmsg(struct kiocb *iocb, struct socket *sock, sax->sax25_family = AF_NETROM; skb_copy_from_linear_data_offset(skb, 7, sax->sax25_call.ax25_call, AX25_ADDR_LEN); + msg->msg_namelen = sizeof(*sax); } - msg->msg_namelen = sizeof(*sax); - skb_free_datagram(sk, skb); release_sock(sk); diff --git a/net/netrom/nr_route.c b/net/netrom/nr_route.c index 44059d0..915a87b 100644 --- a/net/netrom/nr_route.c +++ b/net/netrom/nr_route.c @@ -37,6 +37,7 @@ #include #include #include +#include static unsigned int nr_neigh_no = 1; @@ -257,9 +258,12 @@ static int __must_check nr_add_node(ax25_address *nr, const char *mnemonic, case 3: if (nr_node->routes[1].quality > nr_node->routes[0].quality) { switch (nr_node->which) { - case 0: nr_node->which = 1; break; - case 1: nr_node->which = 0; break; - default: break; + case 0: + nr_node->which = 1; + break; + case 1: + nr_node->which = 0; + break; } nr_route = nr_node->routes[0]; nr_node->routes[0] = nr_node->routes[1]; @@ -505,12 +509,13 @@ static int nr_dec_obs(void) s->count--; switch (i) { - case 0: - s->routes[0] = s->routes[1]; - case 1: - s->routes[1] = s->routes[2]; - case 2: - break; + case 0: + s->routes[0] = s->routes[1]; + /* Fallthrough */ + case 1: + s->routes[1] = s->routes[2]; + case 2: + break; } break; diff --git a/net/rfkill/Kconfig b/net/rfkill/Kconfig index 8e12c8a..78efe89 100644 --- a/net/rfkill/Kconfig +++ b/net/rfkill/Kconfig @@ -10,11 +10,6 @@ menuconfig RFKILL To compile this driver as a module, choose M here: the module will be called rfkill. -config RFKILL_PM - bool "Power off on suspend" - depends on RFKILL && PM - default y - # LED trigger support config RFKILL_LEDS bool diff --git a/net/rfkill/core.c b/net/rfkill/core.c index df2dae6..5be1957 100644 --- a/net/rfkill/core.c +++ b/net/rfkill/core.c @@ -235,7 +235,7 @@ static bool __rfkill_set_hw_state(struct rfkill *rfkill, else rfkill->state &= ~RFKILL_BLOCK_HW; *change = prev != blocked; - any = rfkill->state & RFKILL_BLOCK_ANY; + any = !!(rfkill->state & RFKILL_BLOCK_ANY); spin_unlock_irqrestore(&rfkill->lock, flags); rfkill_led_trigger_event(rfkill); @@ -769,7 +769,6 @@ void rfkill_pause_polling(struct rfkill *rfkill) } EXPORT_SYMBOL(rfkill_pause_polling); -#ifdef CONFIG_RFKILL_PM void rfkill_resume_polling(struct rfkill *rfkill) { BUG_ON(!rfkill); @@ -804,17 +803,14 @@ static int rfkill_resume(struct device *dev) return 0; } -#endif static struct class rfkill_class = { .name = "rfkill", .dev_release = rfkill_release, .dev_attrs = rfkill_dev_attrs, .dev_uevent = rfkill_dev_uevent, -#ifdef CONFIG_RFKILL_PM .suspend = rfkill_suspend, .resume = rfkill_resume, -#endif }; bool rfkill_blocked(struct rfkill *rfkill) diff --git a/net/rfkill/input.c b/net/rfkill/input.c index 1bca6d4..24c55c5 100644 --- a/net/rfkill/input.c +++ b/net/rfkill/input.c @@ -15,6 +15,7 @@ #include #include +#include #include #include #include diff --git a/net/rfkill/rfkill-gpio.c b/net/rfkill/rfkill-gpio.c index 256c5dd..128677d 100644 --- a/net/rfkill/rfkill-gpio.c +++ b/net/rfkill/rfkill-gpio.c @@ -101,6 +101,14 @@ static int rfkill_gpio_probe(struct platform_device *pdev) if (!rfkill) return -ENOMEM; + if (pdata->gpio_runtime_setup) { + ret = pdata->gpio_runtime_setup(pdev); + if (ret) { + pr_warn("%s: can't set up gpio\n", __func__); + return ret; + } + } + rfkill->pdata = pdata; len = strlen(pdata->name); @@ -182,7 +190,10 @@ fail_alloc: static int rfkill_gpio_remove(struct platform_device *pdev) { struct rfkill_gpio_data *rfkill = platform_get_drvdata(pdev); + struct rfkill_gpio_platform_data *pdata = pdev->dev.platform_data; + if (pdata->gpio_runtime_close) + pdata->gpio_runtime_close(pdev); rfkill_unregister(rfkill->rfkill_dev); rfkill_destroy(rfkill->rfkill_dev); if (gpio_is_valid(rfkill->pdata->shutdown_gpio)) diff --git a/net/rfkill/rfkill-regulator.c b/net/rfkill/rfkill-regulator.c index 18dc512..3ca7277 100644 --- a/net/rfkill/rfkill-regulator.c +++ b/net/rfkill/rfkill-regulator.c @@ -90,7 +90,6 @@ static int __devinit rfkill_regulator_probe(struct platform_device *pdev) pdata->type, &rfkill_regulator_ops, rfkill_data); if (rf_kill == NULL) { - dev_err(&pdev->dev, "Cannot alloc rfkill device\n"); ret = -ENOMEM; goto err_rfkill_alloc; } diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index 1f96fb9..233dbe6 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -195,7 +195,8 @@ static void rose_kill_by_device(struct net_device *dev) if (rose->device == dev) { rose_disconnect(s, ENETUNREACH, ROSE_OUT_OF_ORDER, 0); - rose->neighbour->use--; + if (rose->neighbour) + rose->neighbour->use--; rose->device = NULL; } } @@ -1221,7 +1222,6 @@ static int rose_recvmsg(struct kiocb *iocb, struct socket *sock, { struct sock *sk = sock->sk; struct rose_sock *rose = rose_sk(sk); - struct sockaddr_rose *srose = (struct sockaddr_rose *)msg->msg_name; size_t copied; unsigned char *asmptr; struct sk_buff *skb; @@ -1257,24 +1257,19 @@ static int rose_recvmsg(struct kiocb *iocb, struct socket *sock, skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied); - if (srose != NULL) { - memset(srose, 0, msg->msg_namelen); + if (msg->msg_name) { + struct sockaddr_rose *srose; + struct full_sockaddr_rose *full_srose = msg->msg_name; + + memset(msg->msg_name, 0, sizeof(struct full_sockaddr_rose)); + srose = msg->msg_name; srose->srose_family = AF_ROSE; srose->srose_addr = rose->dest_addr; srose->srose_call = rose->dest_call; srose->srose_ndigis = rose->dest_ndigis; - if (msg->msg_namelen >= sizeof(struct full_sockaddr_rose)) { - struct full_sockaddr_rose *full_srose = (struct full_sockaddr_rose *)msg->msg_name; - for (n = 0 ; n < rose->dest_ndigis ; n++) - full_srose->srose_digis[n] = rose->dest_digis[n]; - msg->msg_namelen = sizeof(struct full_sockaddr_rose); - } else { - if (rose->dest_ndigis >= 1) { - srose->srose_ndigis = 1; - srose->srose_digi = rose->dest_digis[0]; - } - msg->msg_namelen = sizeof(struct sockaddr_rose); - } + for (n = 0 ; n < rose->dest_ndigis ; n++) + full_srose->srose_digis[n] = rose->dest_digis[n]; + msg->msg_namelen = sizeof(struct full_sockaddr_rose); } skb_free_datagram(sk, skb); diff --git a/net/rose/rose_link.c b/net/rose/rose_link.c index fa5f564..7a02bd1 100644 --- a/net/rose/rose_link.c +++ b/net/rose/rose_link.c @@ -266,13 +266,6 @@ void rose_transmit_link(struct sk_buff *skb, struct rose_neigh *neigh) { unsigned char *dptr; -#if 0 - if (call_fw_firewall(PF_ROSE, skb->dev, skb->data, NULL, &skb) != FW_ACCEPT) { - kfree_skb(skb); - return; - } -#endif - if (neigh->loopback) { rose_loopback_queue(skb, neigh); return; diff --git a/net/rose/rose_route.c b/net/rose/rose_route.c index 479cae5..cd9b7ee 100644 --- a/net/rose/rose_route.c +++ b/net/rose/rose_route.c @@ -36,6 +36,7 @@ #include #include #include +#include static unsigned int rose_neigh_no = 1; @@ -864,11 +865,6 @@ int rose_route_frame(struct sk_buff *skb, ax25_cb *ax25) int res = 0; char buf[11]; -#if 0 - if (call_in_firewall(PF_ROSE, skb->dev, skb->data, NULL, &skb) != FW_ACCEPT) - return res; -#endif - if (skb->len < ROSE_MIN_LEN) return res; frametype = skb->data[2]; diff --git a/net/rxrpc/ar-output.c b/net/rxrpc/ar-output.c index 5f22e26..338d793 100644 --- a/net/rxrpc/ar-output.c +++ b/net/rxrpc/ar-output.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include "ar-internal.h" diff --git a/net/rxrpc/ar-recvmsg.c b/net/rxrpc/ar-recvmsg.c index 0c65013..5cc2da5 100644 --- a/net/rxrpc/ar-recvmsg.c +++ b/net/rxrpc/ar-recvmsg.c @@ -11,6 +11,7 @@ #include #include +#include #include #include #include "ar-internal.h" @@ -86,7 +87,7 @@ int rxrpc_recvmsg(struct kiocb *iocb, struct socket *sock, if (!skb) { /* nothing remains on the queue */ if (copied && - (msg->msg_flags & MSG_PEEK || timeo == 0)) + (flags & MSG_PEEK || timeo == 0)) goto out; /* wait for a message to turn up */ @@ -142,10 +143,13 @@ int rxrpc_recvmsg(struct kiocb *iocb, struct socket *sock, /* copy the peer address and timestamp */ if (!continue_call) { - if (msg->msg_name && msg->msg_namelen > 0) + if (msg->msg_name) { + size_t len = + sizeof(call->conn->trans->peer->srx); memcpy(msg->msg_name, - &call->conn->trans->peer->srx, - sizeof(call->conn->trans->peer->srx)); + &call->conn->trans->peer->srx, len); + msg->msg_namelen = len; + } sock_recv_ts_and_drops(msg, &rx->sk, skb); } diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 6c8c8da..d014b05 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -280,6 +280,9 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a asoc->peer.asconf_capable = 0; if (sctp_addip_noauth) asoc->peer.asconf_capable = 1; + asoc->asconf_addr_del_pending = NULL; + asoc->src_out_of_asoc_ok = 0; + asoc->new_transport = NULL; /* Create an input queue. */ sctp_inq_init(&asoc->base.inqueue); @@ -386,7 +389,7 @@ void sctp_association_free(struct sctp_association *asoc) /* Only real associations count against the endpoint, so * don't bother for if this is a temporary association. */ - if (!asoc->temp) { + if (!list_empty(&asoc->asocs)) { list_del(&asoc->asocs); /* Decrement the backlog value for a TCP-style listening @@ -446,6 +449,10 @@ void sctp_association_free(struct sctp_association *asoc) sctp_asconf_queue_teardown(asoc); + /* Free pending address space being deleted */ + if (asoc->asconf_addr_del_pending != NULL) + kfree(asoc->asconf_addr_del_pending); + /* AUTH - Free the endpoint shared keys */ sctp_auth_destroy_keys(&asoc->endpoint_shared_keys); @@ -1181,6 +1188,7 @@ void sctp_assoc_update(struct sctp_association *asoc, asoc->c = new->c; asoc->peer.rwnd = new->peer.rwnd; asoc->peer.sack_needed = new->peer.sack_needed; + asoc->peer.auth_capable = new->peer.auth_capable; asoc->peer.i = new->peer.i; sctp_tsnmap_init(&asoc->peer.tsn_map, SCTP_TSN_MAP_INITIAL, asoc->peer.i.initial_tsn, GFP_ATOMIC); @@ -1264,7 +1272,6 @@ void sctp_assoc_update(struct sctp_association *asoc, asoc->peer.peer_hmacs = new->peer.peer_hmacs; new->peer.peer_hmacs = NULL; - sctp_auth_key_put(asoc->asoc_shared_key); sctp_auth_asoc_init_active_key(asoc, GFP_ATOMIC); } @@ -1630,6 +1637,8 @@ struct sctp_chunk *sctp_assoc_lookup_asconf_ack( * ack chunk whose serial number matches that of the request. */ list_for_each_entry(ack, &asoc->asconf_ack_list, transmitted_list) { + if (sctp_chunk_pending(ack)) + continue; if (ack->subh.addip_hdr->serial == serial) { sctp_chunk_hold(ack); return ack; diff --git a/net/sctp/auth.c b/net/sctp/auth.c index 418ebe4..53d455c 100644 --- a/net/sctp/auth.c +++ b/net/sctp/auth.c @@ -82,7 +82,7 @@ static struct sctp_auth_bytes *sctp_auth_create_key(__u32 key_len, gfp_t gfp) struct sctp_auth_bytes *key; /* Verify that we are not going to overflow INT_MAX */ - if ((INT_MAX - key_len) < sizeof(struct sctp_auth_bytes)) + if (key_len > (INT_MAX - sizeof(struct sctp_auth_bytes))) return NULL; /* Allocate the shared key */ @@ -866,8 +866,6 @@ int sctp_auth_set_key(struct sctp_endpoint *ep, list_add(&cur_key->key_list, sh_keys); cur_key->key = key; - sctp_auth_key_hold(key); - return 0; nomem: if (!replace) diff --git a/net/sctp/bind_addr.c b/net/sctp/bind_addr.c index 83e3011..4ece451 100644 --- a/net/sctp/bind_addr.c +++ b/net/sctp/bind_addr.c @@ -430,7 +430,7 @@ union sctp_addr *sctp_find_unmatch_addr(struct sctp_bind_addr *bp, list_for_each_entry(laddr, &bp->address_list, list) { addr_buf = (union sctp_addr *)addrs; for (i = 0; i < addrcnt; i++) { - addr = (union sctp_addr *)addr_buf; + addr = addr_buf; af = sctp_get_af_specific(addr->v4.sin_family); if (!af) break; @@ -534,6 +534,21 @@ int sctp_in_scope(const union sctp_addr *addr, sctp_scope_t scope) return 0; } +int sctp_is_ep_boundall(struct sock *sk) +{ + struct sctp_bind_addr *bp; + struct sctp_sockaddr_entry *addr; + + bp = &sctp_sk(sk)->ep->base.bind_addr; + if (sctp_list_single_entry(&bp->address_list)) { + addr = list_entry(bp->address_list.next, + struct sctp_sockaddr_entry, list); + if (sctp_is_any(sk, &addr->a)) + return 1; + } + return 0; +} + /******************************************************************** * 3rd Level Abstractions ********************************************************************/ diff --git a/net/sctp/input.c b/net/sctp/input.c index cd9eded..0fc18c7 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -510,8 +510,7 @@ struct sock *sctp_err_lookup(int family, struct sk_buff *skb, * discard the packet. */ if (vtag == 0) { - chunkhdr = (struct sctp_init_chunk *)((void *)sctphdr - + sizeof(struct sctphdr)); + chunkhdr = (void *)sctphdr + sizeof(struct sctphdr); if (len < sizeof(struct sctphdr) + sizeof(sctp_chunkhdr_t) + sizeof(__be32) || chunkhdr->chunk_hdr.type != SCTP_CID_INIT || diff --git a/net/sctp/inqueue.c b/net/sctp/inqueue.c index 397296f..32421ae 100644 --- a/net/sctp/inqueue.c +++ b/net/sctp/inqueue.c @@ -152,18 +152,9 @@ struct sctp_chunk *sctp_inq_pop(struct sctp_inq *queue) } else { /* Nothing to do. Next chunk in the packet, please. */ ch = (sctp_chunkhdr_t *) chunk->chunk_end; - /* Force chunk->skb->data to chunk->chunk_end. */ - skb_pull(chunk->skb, - chunk->chunk_end - chunk->skb->data); - - /* Verify that we have at least chunk headers - * worth of buffer left. - */ - if (skb_headlen(chunk->skb) < sizeof(sctp_chunkhdr_t)) { - sctp_chunk_free(chunk); - chunk = queue->in_progress = NULL; - } + skb_pull(chunk->skb, chunk->chunk_end - chunk->skb->data); + /* We are guaranteed to pull a SCTP header. */ } } @@ -199,24 +190,14 @@ struct sctp_chunk *sctp_inq_pop(struct sctp_inq *queue) skb_pull(chunk->skb, sizeof(sctp_chunkhdr_t)); chunk->subh.v = NULL; /* Subheader is no longer valid. */ - if (chunk->chunk_end < skb_tail_pointer(chunk->skb)) { + if (chunk->chunk_end + sizeof(sctp_chunkhdr_t) < + skb_tail_pointer(chunk->skb)) { /* This is not a singleton */ chunk->singleton = 0; } else if (chunk->chunk_end > skb_tail_pointer(chunk->skb)) { - /* RFC 2960, Section 6.10 Bundling - * - * Partial chunks MUST NOT be placed in an SCTP packet. - * If the receiver detects a partial chunk, it MUST drop - * the chunk. - * - * Since the end of the chunk is past the end of our buffer - * (which contains the whole packet, we can freely discard - * the whole packet. - */ - sctp_chunk_free(chunk); - chunk = queue->in_progress = NULL; - - return NULL; + /* Discard inside state machine. */ + chunk->pdiscard = 1; + chunk->chunk_end = skb_tail_pointer(chunk->skb); } else { /* We are at the end of the packet, so mark the chunk * in case we need to send a SACK. diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index 609adfa..0b6a391 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -112,6 +112,7 @@ static int sctp_inet6addr_event(struct notifier_block *this, unsigned long ev, addr->valid = 1; spin_lock_bh(&sctp_local_addr_lock); list_add_tail_rcu(&addr->list, &sctp_local_addr_list); + sctp_addr_wq_mgmt(addr, SCTP_ADDR_NEW); spin_unlock_bh(&sctp_local_addr_lock); } break; @@ -122,6 +123,7 @@ static int sctp_inet6addr_event(struct notifier_block *this, unsigned long ev, if (addr->a.sa.sa_family == AF_INET6 && ipv6_addr_equal(&addr->a.v6.sin6_addr, &ifa->addr)) { + sctp_addr_wq_mgmt(addr, SCTP_ADDR_DEL); found = 1; addr->valid = 0; list_del_rcu(&addr->list); @@ -213,12 +215,14 @@ static int sctp_v6_xmit(struct sk_buff *skb, struct sctp_transport *transport) __func__, skb, skb->len, &fl6->saddr, &fl6->daddr); + IP6_ECN_flow_xmit(sk, fl6->flowlabel); + if (!(transport->param_flags & SPP_PMTUD_ENABLE)) skb->local_df = 1; SCTP_INC_STATS(SCTP_MIB_OUTSCTPPACKS); - return ip6_xmit(sk, skb, fl6, np->opt); + return ip6_xmit(sk, skb, fl6, np->opt, np->tclass); } /* Returns the dst cache entry for the given source and destination ip diff --git a/net/sctp/output.c b/net/sctp/output.c index 32ba8d0..c3b8549 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -384,12 +384,12 @@ int sctp_packet_transmit(struct sctp_packet *packet) sk = chunk->skb->sk; /* Allocate the new skb. */ - nskb = alloc_skb(packet->size + LL_MAX_HEADER, GFP_ATOMIC); + nskb = alloc_skb(packet->size + MAX_HEADER, GFP_ATOMIC); if (!nskb) goto nomem; /* Make sure the outbound skb has enough header room reserved. */ - skb_reserve(nskb, packet->overhead + LL_MAX_HEADER); + skb_reserve(nskb, packet->overhead + MAX_HEADER); /* Set the owning socket so that we know where to get the * destination IP address. @@ -518,7 +518,8 @@ int sctp_packet_transmit(struct sctp_packet *packet) * by CRC32-C as described in . */ if (!sctp_checksum_disable) { - if (!(dst->dev->features & NETIF_F_SCTP_CSUM)) { + if (!(dst->dev->features & NETIF_F_SCTP_CSUM) || + (dst_xfrm(dst) != NULL) || packet->ipfragok) { __u32 crc32 = sctp_start_cksum((__u8 *)sh, cksum_buf_len); /* 3) Put the resultant value into the checksum field in the @@ -586,7 +587,7 @@ out: return err; no_route: kfree_skb(nskb); - IP_INC_STATS_BH(&init_net, IPSTATS_MIB_OUTNOROUTES); + IP_INC_STATS(&init_net, IPSTATS_MIB_OUTNOROUTES); /* FIXME: Returning the 'err' will effect all the associations * associated with a socket, although only one of the paths of the diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index 35e44e2..3dd7207 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -752,6 +752,16 @@ static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout) */ list_for_each_entry_safe(chunk, tmp, &q->control_chunk_list, list) { + /* RFC 5061, 5.3 + * F1) This means that until such time as the ASCONF + * containing the add is acknowledged, the sender MUST + * NOT use the new IP address as a source for ANY SCTP + * packet except on carrying an ASCONF Chunk. + */ + if (asoc->src_out_of_asoc_ok && + chunk->chunk_hdr->type != SCTP_CID_ASCONF) + continue; + list_del_init(&chunk->list); /* Pick the right transport to use. */ @@ -879,6 +889,9 @@ static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout) } } + if (q->asoc->src_out_of_asoc_ok) + goto sctp_flush_out; + /* Is it OK to send data chunks? */ switch (asoc->state) { case SCTP_STATE_COOKIE_ECHOED: @@ -902,6 +915,8 @@ static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout) * current cwnd). */ if (!list_empty(&q->retransmit)) { + if (asoc->peer.retran_path->state == SCTP_UNCONFIRMED) + goto sctp_flush_out; if (transport == asoc->peer.retran_path) goto retran; @@ -974,6 +989,8 @@ static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout) ((new_transport->state == SCTP_INACTIVE) || (new_transport->state == SCTP_UNCONFIRMED))) new_transport = asoc->peer.active_path; + if (new_transport->state == SCTP_UNCONFIRMED) + continue; /* Change packets if necessary. */ if (new_transport != transport) { diff --git a/net/sctp/proc.c b/net/sctp/proc.c index 05a6ce2..1e2eee8 100644 --- a/net/sctp/proc.c +++ b/net/sctp/proc.c @@ -37,6 +37,7 @@ #include #include #include +#include #include #include /* for snmp_fold_field */ diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 946afd6..de35e01 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -503,7 +503,9 @@ static void sctp_v4_get_dst(struct sctp_transport *t, union sctp_addr *saddr, sctp_v4_dst_saddr(&dst_saddr, fl4, htons(bp->port)); rcu_read_lock(); list_for_each_entry_rcu(laddr, &bp->address_list, list) { - if (!laddr->valid || (laddr->state != SCTP_ADDR_SRC)) + if (!laddr->valid || (laddr->state == SCTP_ADDR_DEL) || + (laddr->state != SCTP_ADDR_SRC && + !asoc->src_out_of_asoc_ok)) continue; if (sctp_v4_cmp_addr(&dst_saddr, &laddr->a)) goto out_unlock; @@ -526,8 +528,13 @@ static void sctp_v4_get_dst(struct sctp_transport *t, union sctp_addr *saddr, continue; if ((laddr->state == SCTP_ADDR_SRC) && (AF_INET == laddr->a.sa.sa_family)) { - fl4->saddr = laddr->a.v4.sin_addr.s_addr; fl4->fl4_sport = laddr->a.v4.sin_port; + flowi4_update_output(fl4, + asoc->base.sk->sk_bound_dev_if, + RT_CONN_FLAGS(asoc->base.sk), + daddr->v4.sin_addr.s_addr, + laddr->a.v4.sin_addr.s_addr); + rt = ip_route_output_key(&init_net, fl4); if (!IS_ERR(rt)) { dst = &rt->dst; @@ -623,6 +630,143 @@ static void sctp_v4_ecn_capable(struct sock *sk) INET_ECN_xmit(sk); } +void sctp_addr_wq_timeout_handler(unsigned long arg) +{ + struct sctp_sockaddr_entry *addrw, *temp; + struct sctp_sock *sp; + + spin_lock_bh(&sctp_addr_wq_lock); + + list_for_each_entry_safe(addrw, temp, &sctp_addr_waitq, list) { + SCTP_DEBUG_PRINTK_IPADDR("sctp_addrwq_timo_handler: the first ent in wq %p is ", + " for cmd %d at entry %p\n", &sctp_addr_waitq, &addrw->a, addrw->state, + addrw); + +#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) + /* Now we send an ASCONF for each association */ + /* Note. we currently don't handle link local IPv6 addressees */ + if (addrw->a.sa.sa_family == AF_INET6) { + struct in6_addr *in6; + + if (ipv6_addr_type(&addrw->a.v6.sin6_addr) & + IPV6_ADDR_LINKLOCAL) + goto free_next; + + in6 = (struct in6_addr *)&addrw->a.v6.sin6_addr; + if (ipv6_chk_addr(&init_net, in6, NULL, 0) == 0 && + addrw->state == SCTP_ADDR_NEW) { + unsigned long timeo_val; + + SCTP_DEBUG_PRINTK("sctp_timo_handler: this is on DAD, trying %d sec later\n", + SCTP_ADDRESS_TICK_DELAY); + timeo_val = jiffies; + timeo_val += msecs_to_jiffies(SCTP_ADDRESS_TICK_DELAY); + mod_timer(&sctp_addr_wq_timer, timeo_val); + break; + } + } +#endif + list_for_each_entry(sp, &sctp_auto_asconf_splist, auto_asconf_list) { + struct sock *sk; + + sk = sctp_opt2sk(sp); + /* ignore bound-specific endpoints */ + if (!sctp_is_ep_boundall(sk)) + continue; + sctp_bh_lock_sock(sk); + if (sctp_asconf_mgmt(sp, addrw) < 0) + SCTP_DEBUG_PRINTK("sctp_addrwq_timo_handler: sctp_asconf_mgmt failed\n"); + sctp_bh_unlock_sock(sk); + } +free_next: + list_del(&addrw->list); + kfree(addrw); + } + spin_unlock_bh(&sctp_addr_wq_lock); +} + +static void sctp_free_addr_wq(void) +{ + struct sctp_sockaddr_entry *addrw; + struct sctp_sockaddr_entry *temp; + + spin_lock_bh(&sctp_addr_wq_lock); + del_timer(&sctp_addr_wq_timer); + list_for_each_entry_safe(addrw, temp, &sctp_addr_waitq, list) { + list_del(&addrw->list); + kfree(addrw); + } + spin_unlock_bh(&sctp_addr_wq_lock); +} + +/* lookup the entry for the same address in the addr_waitq + * sctp_addr_wq MUST be locked + */ +static struct sctp_sockaddr_entry *sctp_addr_wq_lookup(struct sctp_sockaddr_entry *addr) +{ + struct sctp_sockaddr_entry *addrw; + + list_for_each_entry(addrw, &sctp_addr_waitq, list) { + if (addrw->a.sa.sa_family != addr->a.sa.sa_family) + continue; + if (addrw->a.sa.sa_family == AF_INET) { + if (addrw->a.v4.sin_addr.s_addr == + addr->a.v4.sin_addr.s_addr) + return addrw; + } else if (addrw->a.sa.sa_family == AF_INET6) { + if (ipv6_addr_equal(&addrw->a.v6.sin6_addr, + &addr->a.v6.sin6_addr)) + return addrw; + } + } + return NULL; +} + +void sctp_addr_wq_mgmt(struct sctp_sockaddr_entry *addr, int cmd) +{ + struct sctp_sockaddr_entry *addrw; + unsigned long timeo_val; + + /* first, we check if an opposite message already exist in the queue. + * If we found such message, it is removed. + * This operation is a bit stupid, but the DHCP client attaches the + * new address after a couple of addition and deletion of that address + */ + + spin_lock_bh(&sctp_addr_wq_lock); + /* Offsets existing events in addr_wq */ + addrw = sctp_addr_wq_lookup(addr); + if (addrw) { + if (addrw->state != cmd) { + SCTP_DEBUG_PRINTK_IPADDR("sctp_addr_wq_mgmt offsets existing entry for %d ", + " in wq %p\n", addrw->state, &addrw->a, + &sctp_addr_waitq); + list_del(&addrw->list); + kfree(addrw); + } + spin_unlock_bh(&sctp_addr_wq_lock); + return; + } + + /* OK, we have to add the new address to the wait queue */ + addrw = kmemdup(addr, sizeof(struct sctp_sockaddr_entry), GFP_ATOMIC); + if (addrw == NULL) { + spin_unlock_bh(&sctp_addr_wq_lock); + return; + } + addrw->state = cmd; + list_add_tail(&addrw->list, &sctp_addr_waitq); + SCTP_DEBUG_PRINTK_IPADDR("sctp_addr_wq_mgmt add new entry for cmd:%d ", + " in wq %p\n", addrw->state, &addrw->a, &sctp_addr_waitq); + + if (!timer_pending(&sctp_addr_wq_timer)) { + timeo_val = jiffies; + timeo_val += msecs_to_jiffies(SCTP_ADDRESS_TICK_DELAY); + mod_timer(&sctp_addr_wq_timer, timeo_val); + } + spin_unlock_bh(&sctp_addr_wq_lock); +} + /* Event handler for inet address addition/deletion events. * The sctp_local_addr_list needs to be protocted by a spin lock since * multiple notifiers (say IPv4 and IPv6) may be running at the same @@ -650,6 +794,7 @@ static int sctp_inetaddr_event(struct notifier_block *this, unsigned long ev, addr->valid = 1; spin_lock_bh(&sctp_local_addr_lock); list_add_tail_rcu(&addr->list, &sctp_local_addr_list); + sctp_addr_wq_mgmt(addr, SCTP_ADDR_NEW); spin_unlock_bh(&sctp_local_addr_lock); } break; @@ -660,6 +805,7 @@ static int sctp_inetaddr_event(struct notifier_block *this, unsigned long ev, if (addr->a.sa.sa_family == AF_INET && addr->a.v4.sin_addr.s_addr == ifa->ifa_local) { + sctp_addr_wq_mgmt(addr, SCTP_ADDR_DEL); found = 1; addr->valid = 0; list_del_rcu(&addr->list); @@ -1161,7 +1307,7 @@ SCTP_STATIC __init int sctp_init(void) max_share = min(4UL*1024*1024, limit); sysctl_sctp_rmem[0] = SK_MEM_QUANTUM; /* give each asoc 1 page min */ - sysctl_sctp_rmem[1] = (1500 *(sizeof(struct sk_buff) + 1)); + sysctl_sctp_rmem[1] = 1500 * SKB_TRUESIZE(1); sysctl_sctp_rmem[2] = max(sysctl_sctp_rmem[1], max_share); sysctl_sctp_wmem[0] = SK_MEM_QUANTUM; @@ -1236,6 +1382,7 @@ SCTP_STATIC __init int sctp_init(void) /* Disable ADDIP by default. */ sctp_addip_enable = 0; sctp_addip_noauth = 0; + sctp_default_auto_asconf = 0; /* Enable PR-SCTP by default. */ sctp_prsctp_enable = 1; @@ -1260,6 +1407,13 @@ SCTP_STATIC __init int sctp_init(void) spin_lock_init(&sctp_local_addr_lock); sctp_get_local_addr_list(); + /* Initialize the address event list */ + INIT_LIST_HEAD(&sctp_addr_waitq); + INIT_LIST_HEAD(&sctp_auto_asconf_splist); + spin_lock_init(&sctp_addr_wq_lock); + sctp_addr_wq_timer.expires = 0; + setup_timer(&sctp_addr_wq_timer, sctp_addr_wq_timeout_handler, 0); + status = sctp_v4_protosw_init(); if (status) @@ -1331,6 +1485,7 @@ SCTP_STATIC __exit void sctp_exit(void) /* Unregister with inet6/inet layers. */ sctp_v6_del_protocol(); sctp_v4_del_protocol(); + sctp_free_addr_wq(); /* Free the control endpoint. */ inet_ctl_sock_destroy(sctp_ctl_sock); diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index 58eb27f..743a644 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -1366,8 +1366,8 @@ static void sctp_chunk_destroy(struct sctp_chunk *chunk) BUG_ON(!list_empty(&chunk->list)); list_del_init(&chunk->transmitted_list); - /* Free the chunk skb data and the SCTP_chunk stub itself. */ - dev_kfree_skb(chunk->skb); + consume_skb(chunk->skb); + consume_skb(chunk->auth_chunk); SCTP_DBG_OBJCNT_DEC(chunk); kmem_cache_free(sctp_chunk_cachep, chunk); @@ -2569,7 +2569,10 @@ do_addr_param: addr_param = param.v + sizeof(sctp_addip_param_t); - af = sctp_get_af_specific(param_type2af(param.p->type)); + af = sctp_get_af_specific(param_type2af(addr_param->p.type)); + if (af == NULL) + break; + af->from_addr_param(&addr, addr_param, htons(asoc->peer.port), 0); @@ -2768,11 +2771,12 @@ struct sctp_chunk *sctp_make_asconf_update_ip(struct sctp_association *asoc, int addr_param_len = 0; int totallen = 0; int i; + int del_pickup = 0; /* Get total length of all the address parameters. */ addr_buf = addrs; for (i = 0; i < addrcnt; i++) { - addr = (union sctp_addr *)addr_buf; + addr = addr_buf; af = sctp_get_af_specific(addr->v4.sin_family); addr_param_len = af->to_addr_param(addr, &addr_param); @@ -2780,6 +2784,13 @@ struct sctp_chunk *sctp_make_asconf_update_ip(struct sctp_association *asoc, totallen += addr_param_len; addr_buf += af->sockaddr_len; + if (asoc->asconf_addr_del_pending && !del_pickup) { + /* reuse the parameter length from the same scope one */ + totallen += paramlen; + totallen += addr_param_len; + del_pickup = 1; + SCTP_DEBUG_PRINTK("mkasconf_update_ip: picked same-scope del_pending addr, totallen for all addresses is %d\n", totallen); + } } /* Create an asconf chunk with the required length. */ @@ -2790,7 +2801,7 @@ struct sctp_chunk *sctp_make_asconf_update_ip(struct sctp_association *asoc, /* Add the address parameters to the asconf chunk. */ addr_buf = addrs; for (i = 0; i < addrcnt; i++) { - addr = (union sctp_addr *)addr_buf; + addr = addr_buf; af = sctp_get_af_specific(addr->v4.sin_family); addr_param_len = af->to_addr_param(addr, &addr_param); param.param_hdr.type = flags; @@ -2802,6 +2813,17 @@ struct sctp_chunk *sctp_make_asconf_update_ip(struct sctp_association *asoc, addr_buf += af->sockaddr_len; } + if (flags == SCTP_PARAM_ADD_IP && del_pickup) { + addr = asoc->asconf_addr_del_pending; + af = sctp_get_af_specific(addr->v4.sin_family); + addr_param_len = af->to_addr_param(addr, &addr_param); + param.param_hdr.type = SCTP_PARAM_DEL_IP; + param.param_hdr.length = htons(paramlen + addr_param_len); + param.crr_id = i; + + sctp_addto_chunk(retval, paramlen, ¶m); + sctp_addto_chunk(retval, addr_param_len, &addr_param); + } return retval; } @@ -2939,8 +2961,7 @@ static __be16 sctp_process_asconf_param(struct sctp_association *asoc, union sctp_addr addr; union sctp_addr_param *addr_param; - addr_param = (union sctp_addr_param *) - ((void *)asconf_param + sizeof(sctp_addip_param_t)); + addr_param = (void *)asconf_param + sizeof(sctp_addip_param_t); if (asconf_param->param_hdr.type != SCTP_PARAM_ADD_IP && asconf_param->param_hdr.type != SCTP_PARAM_DEL_IP && @@ -2997,6 +3018,7 @@ static __be16 sctp_process_asconf_param(struct sctp_association *asoc, /* Start the heartbeat timer. */ if (!mod_timer(&peer->hb_timer, sctp_transport_timeout(peer))) sctp_transport_hold(peer); + asoc->new_transport = peer; break; case SCTP_PARAM_DEL_IP: /* ADDIP 4.3 D7) If a request is received to delete the @@ -3014,7 +3036,7 @@ static __be16 sctp_process_asconf_param(struct sctp_association *asoc, * an Error Cause TLV set to the new error code 'Request to * Delete Source IP Address' */ - if (sctp_cmp_addr_exact(sctp_source(asconf), &addr)) + if (sctp_cmp_addr_exact(&asconf->source, &addr)) return SCTP_ERROR_DEL_SRC_IP; /* Section 4.2.2 @@ -3049,50 +3071,63 @@ static __be16 sctp_process_asconf_param(struct sctp_association *asoc, return SCTP_ERROR_NO_ERROR; } -/* Verify the ASCONF packet before we process it. */ -int sctp_verify_asconf(const struct sctp_association *asoc, - struct sctp_paramhdr *param_hdr, void *chunk_end, - struct sctp_paramhdr **errp) { - sctp_addip_param_t *asconf_param; +/* Verify the ASCONF packet before we process it. */ +bool sctp_verify_asconf(const struct sctp_association *asoc, + struct sctp_chunk *chunk, bool addr_param_needed, + struct sctp_paramhdr **errp) +{ + sctp_addip_chunk_t *addip = (sctp_addip_chunk_t *) chunk->chunk_hdr; union sctp_params param; - int length, plen; + bool addr_param_seen = false; - param.v = (sctp_paramhdr_t *) param_hdr; - while (param.v <= chunk_end - sizeof(sctp_paramhdr_t)) { - length = ntohs(param.p->length); - *errp = param.p; - - if (param.v > chunk_end - length || - length < sizeof(sctp_paramhdr_t)) - return 0; + sctp_walk_params(param, addip, addip_hdr.params) { + size_t length = ntohs(param.p->length); + *errp = param.p; switch (param.p->type) { + case SCTP_PARAM_ERR_CAUSE: + break; + case SCTP_PARAM_IPV4_ADDRESS: + if (length != sizeof(sctp_ipv4addr_param_t)) + return false; + addr_param_seen = true; + break; + case SCTP_PARAM_IPV6_ADDRESS: + if (length != sizeof(sctp_ipv6addr_param_t)) + return false; + addr_param_seen = true; + break; case SCTP_PARAM_ADD_IP: case SCTP_PARAM_DEL_IP: case SCTP_PARAM_SET_PRIMARY: - asconf_param = (sctp_addip_param_t *)param.v; - plen = ntohs(asconf_param->param_hdr.length); - if (plen < sizeof(sctp_addip_param_t) + - sizeof(sctp_paramhdr_t)) - return 0; + /* In ASCONF chunks, these need to be first. */ + if (addr_param_needed && !addr_param_seen) + return false; + length = ntohs(param.addip->param_hdr.length); + if (length < sizeof(sctp_addip_param_t) + + sizeof(sctp_paramhdr_t)) + return false; break; case SCTP_PARAM_SUCCESS_REPORT: case SCTP_PARAM_ADAPTATION_LAYER_IND: if (length != sizeof(sctp_addip_param_t)) - return 0; - + return false; break; default: - break; + /* This is unkown to us, reject! */ + return false; } - - param.v += WORD_ROUND(length); } - if (param.v != chunk_end) - return 0; + /* Remaining sanity checks. */ + if (addr_param_needed && !addr_param_seen) + return false; + if (!addr_param_needed && addr_param_seen) + return false; + if (param.v != chunk->chunk_end) + return false; - return 1; + return true; } /* Process an incoming ASCONF chunk with the next expected serial no. and @@ -3101,16 +3136,17 @@ int sctp_verify_asconf(const struct sctp_association *asoc, struct sctp_chunk *sctp_process_asconf(struct sctp_association *asoc, struct sctp_chunk *asconf) { + sctp_addip_chunk_t *addip = (sctp_addip_chunk_t *) asconf->chunk_hdr; + bool all_param_pass = true; + union sctp_params param; sctp_addiphdr_t *hdr; union sctp_addr_param *addr_param; sctp_addip_param_t *asconf_param; struct sctp_chunk *asconf_ack; - __be16 err_code; int length = 0; int chunk_len; __u32 serial; - int all_param_pass = 1; chunk_len = ntohs(asconf->chunk_hdr->length) - sizeof(sctp_chunkhdr_t); hdr = (sctp_addiphdr_t *)asconf->skb->data; @@ -3125,7 +3161,7 @@ struct sctp_chunk *sctp_process_asconf(struct sctp_association *asoc, * asconf parameter. */ length = ntohs(addr_param->p.length); - asconf_param = (sctp_addip_param_t *)((void *)addr_param + length); + asconf_param = (void *)addr_param + length; chunk_len -= length; /* create an ASCONF_ACK chunk. @@ -3138,9 +3174,14 @@ struct sctp_chunk *sctp_process_asconf(struct sctp_association *asoc, goto done; /* Process the TLVs contained within the ASCONF chunk. */ - while (chunk_len > 0) { + sctp_walk_params(param, addip, addip_hdr.params) { + /* Skip preceeding address parameters. */ + if (param.p->type == SCTP_PARAM_IPV4_ADDRESS || + param.p->type == SCTP_PARAM_IPV6_ADDRESS) + continue; + err_code = sctp_process_asconf_param(asoc, asconf, - asconf_param); + param.addip); /* ADDIP 4.1 A7) * If an error response is received for a TLV parameter, * all TLVs with no response before the failed TLV are @@ -3148,29 +3189,20 @@ struct sctp_chunk *sctp_process_asconf(struct sctp_association *asoc, * the failed response are considered unsuccessful unless * a specific success indication is present for the parameter. */ - if (SCTP_ERROR_NO_ERROR != err_code) - all_param_pass = 0; - + if (err_code != SCTP_ERROR_NO_ERROR) + all_param_pass = false; if (!all_param_pass) - sctp_add_asconf_response(asconf_ack, - asconf_param->crr_id, err_code, - asconf_param); + sctp_add_asconf_response(asconf_ack, param.addip->crr_id, + err_code, param.addip); /* ADDIP 4.3 D11) When an endpoint receiving an ASCONF to add * an IP address sends an 'Out of Resource' in its response, it * MUST also fail any subsequent add or delete requests bundled * in the ASCONF. */ - if (SCTP_ERROR_RSRC_LOW == err_code) + if (err_code == SCTP_ERROR_RSRC_LOW) goto done; - - /* Move to the next ASCONF param. */ - length = ntohs(asconf_param->param_hdr.length); - asconf_param = (sctp_addip_param_t *)((void *)asconf_param + - length); - chunk_len -= length; } - done: asoc->peer.addip_serial++; @@ -3197,8 +3229,7 @@ static void sctp_asconf_param_success(struct sctp_association *asoc, struct sctp_transport *transport; struct sctp_sockaddr_entry *saddr; - addr_param = (union sctp_addr_param *) - ((void *)asconf_param + sizeof(sctp_addip_param_t)); + addr_param = (void *)asconf_param + sizeof(sctp_addip_param_t); /* We have checked the packet before, so we do not check again. */ af = sctp_get_af_specific(param_type2af(addr_param->p.type)); @@ -3224,6 +3255,11 @@ static void sctp_asconf_param_success(struct sctp_association *asoc, case SCTP_PARAM_DEL_IP: local_bh_disable(); sctp_del_bind_addr(bp, &addr); + if (asoc->asconf_addr_del_pending != NULL && + sctp_cmp_addr_exact(asoc->asconf_addr_del_pending, &addr)) { + kfree(asoc->asconf_addr_del_pending); + asoc->asconf_addr_del_pending = NULL; + } local_bh_enable(); list_for_each_entry(transport, &asoc->peer.transport_addr_list, transports) { @@ -3278,8 +3314,7 @@ static __be16 sctp_get_asconf_response(struct sctp_chunk *asconf_ack, return SCTP_ERROR_NO_ERROR; case SCTP_PARAM_ERR_CAUSE: length = sizeof(sctp_addip_param_t); - err_param = (sctp_errhdr_t *) - ((void *)asconf_ack_param + length); + err_param = (void *)asconf_ack_param + length; asconf_ack_len -= length; if (asconf_ack_len > 0) return err_param->cause; @@ -3292,8 +3327,7 @@ static __be16 sctp_get_asconf_response(struct sctp_chunk *asconf_ack, } length = ntohs(asconf_ack_param->param_hdr.length); - asconf_ack_param = (sctp_addip_param_t *) - ((void *)asconf_ack_param + length); + asconf_ack_param = (void *)asconf_ack_param + length; asconf_ack_len -= length; } @@ -3325,7 +3359,7 @@ int sctp_process_asconf_ack(struct sctp_association *asoc, * pointer to the first asconf parameter. */ length = ntohs(addr_param->p.length); - asconf_param = (sctp_addip_param_t *)((void *)addr_param + length); + asconf_param = (void *)addr_param + length; asconf_len -= length; /* ADDIP 4.1 @@ -3376,11 +3410,13 @@ int sctp_process_asconf_ack(struct sctp_association *asoc, * one. */ length = ntohs(asconf_param->param_hdr.length); - asconf_param = (sctp_addip_param_t *)((void *)asconf_param + - length); + asconf_param = (void *)asconf_param + length; asconf_len -= length; } + if (no_err && asoc->src_out_of_asoc_ok) + asoc->src_out_of_asoc_ok = 0; + /* Free the cached last sent asconf chunk. */ list_del_init(&asconf->transmitted_list); sctp_chunk_free(asconf); diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index 6e0f882..581c06a 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -681,7 +681,7 @@ static void sctp_cmd_transport_on(sctp_cmd_seq_t *cmds, * outstanding data and rely on the retransmission limit be reached * to shutdown the association. */ - if (t->asoc->state != SCTP_STATE_SHUTDOWN_PENDING) + if (t->asoc->state < SCTP_STATE_SHUTDOWN_PENDING) t->asoc->overall_error_count = 0; /* Clear the hb_sent flag to signal that we had a good @@ -1210,7 +1210,7 @@ static int sctp_cmd_interpreter(sctp_event_t event_type, int local_cork = 0; if (SCTP_EVENT_T_TIMEOUT != event_type) - chunk = (struct sctp_chunk *) event_arg; + chunk = event_arg; /* Note: This whole file is a huge candidate for rework. * For example, each command could either have its own handler, so @@ -1689,6 +1689,11 @@ static int sctp_cmd_interpreter(sctp_event_t event_type, case SCTP_CMD_PURGE_ASCONF_QUEUE: sctp_asconf_queue_teardown(asoc); break; + + case SCTP_CMD_SET_ASOC: + asoc = cmd->obj.asoc; + break; + default: pr_warn("Impossible command: %u, %p\n", cmd->verb, cmd->obj.ptr); diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 2461171..d02dd3c 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -163,6 +163,9 @@ sctp_chunk_length_valid(struct sctp_chunk *chunk, { __u16 chunk_length = ntohs(chunk->chunk_hdr->length); + /* Previously already marked? */ + if (unlikely(chunk->pdiscard)) + return 0; if (unlikely(chunk_length < required_length)) return 0; @@ -747,6 +750,12 @@ sctp_disposition_t sctp_sf_do_5_1D_ce(const struct sctp_endpoint *ep, struct sctp_chunk auth; sctp_ierror_t ret; + /* Make sure that we and the peer are AUTH capable */ + if (!sctp_auth_enable || !new_asoc->peer.auth_capable) { + sctp_association_free(new_asoc); + return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + } + /* set-up our fake chunk so that we can process it */ auth.skb = chunk->auth_chunk; auth.asoc = chunk->asoc; @@ -757,10 +766,6 @@ sctp_disposition_t sctp_sf_do_5_1D_ce(const struct sctp_endpoint *ep, auth.transport = chunk->transport; ret = sctp_sf_authenticate(ep, new_asoc, type, &auth); - - /* We can now safely free the auth_chunk clone */ - kfree_skb(chunk->auth_chunk); - if (ret != SCTP_IERROR_NO_ERROR) { sctp_association_free(new_asoc); return sctp_sf_pdiscard(ep, asoc, type, arg, commands); @@ -2044,9 +2049,15 @@ sctp_disposition_t sctp_sf_do_5_2_4_dupcook(const struct sctp_endpoint *ep, } /* Delete the tempory new association. */ - sctp_add_cmd_sf(commands, SCTP_CMD_NEW_ASOC, SCTP_ASOC(new_asoc)); + sctp_add_cmd_sf(commands, SCTP_CMD_SET_ASOC, SCTP_ASOC(new_asoc)); sctp_add_cmd_sf(commands, SCTP_CMD_DELETE_TCB, SCTP_NULL()); + /* Restore association pointer to provide SCTP command interpeter + * with a valid context in case it needs to manipulate + * the queues */ + sctp_add_cmd_sf(commands, SCTP_CMD_SET_ASOC, + SCTP_ASOC((struct sctp_association *)asoc)); + return retval; nomem: @@ -3508,9 +3519,7 @@ sctp_disposition_t sctp_sf_do_asconf(const struct sctp_endpoint *ep, struct sctp_chunk *asconf_ack = NULL; struct sctp_paramhdr *err_param = NULL; sctp_addiphdr_t *hdr; - union sctp_addr_param *addr_param; __u32 serial; - int length; if (!sctp_vtag_verify(chunk, asoc)) { sctp_add_cmd_sf(commands, SCTP_CMD_REPORT_BAD_TAG, @@ -3535,17 +3544,8 @@ sctp_disposition_t sctp_sf_do_asconf(const struct sctp_endpoint *ep, hdr = (sctp_addiphdr_t *)chunk->skb->data; serial = ntohl(hdr->serial); - addr_param = (union sctp_addr_param *)hdr->params; - length = ntohs(addr_param->p.length); - if (length < sizeof(sctp_paramhdr_t)) - return sctp_sf_violation_paramlen(ep, asoc, type, arg, - (void *)addr_param, commands); - /* Verify the ASCONF chunk before processing it. */ - if (!sctp_verify_asconf(asoc, - (sctp_paramhdr_t *)((void *)addr_param + length), - (void *)chunk->chunk_end, - &err_param)) + if (!sctp_verify_asconf(asoc, chunk, true, &err_param)) return sctp_sf_violation_paramlen(ep, asoc, type, arg, (void *)err_param, commands); @@ -3612,6 +3612,11 @@ sctp_disposition_t sctp_sf_do_asconf(const struct sctp_endpoint *ep, */ asconf_ack->dest = chunk->source; sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(asconf_ack)); + if (asoc->new_transport) { + sctp_sf_heartbeat(ep, asoc, type, asoc->new_transport, + commands); + ((struct sctp_association *)asoc)->new_transport = NULL; + } return SCTP_DISPOSITION_CONSUME; } @@ -3657,10 +3662,7 @@ sctp_disposition_t sctp_sf_do_asconf_ack(const struct sctp_endpoint *ep, rcvd_serial = ntohl(addip_hdr->serial); /* Verify the ASCONF-ACK chunk before processing it. */ - if (!sctp_verify_asconf(asoc, - (sctp_paramhdr_t *)addip_hdr->params, - (void *)asconf_ack->chunk_end, - &err_param)) + if (!sctp_verify_asconf(asoc, asconf_ack, false, &err_param)) return sctp_sf_violation_paramlen(ep, asoc, type, arg, (void *)err_param, commands); @@ -4008,31 +4010,32 @@ sctp_disposition_t sctp_sf_eat_auth(const struct sctp_endpoint *ep, auth_hdr = (struct sctp_authhdr *)chunk->skb->data; error = sctp_sf_authenticate(ep, asoc, type, chunk); switch (error) { - case SCTP_IERROR_AUTH_BAD_HMAC: - /* Generate the ERROR chunk and discard the rest - * of the packet - */ - err_chunk = sctp_make_op_error(asoc, chunk, - SCTP_ERROR_UNSUP_HMAC, - &auth_hdr->hmac_id, - sizeof(__u16), 0); - if (err_chunk) { - sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, - SCTP_CHUNK(err_chunk)); - } - /* Fall Through */ - case SCTP_IERROR_AUTH_BAD_KEYID: - case SCTP_IERROR_BAD_SIG: - return sctp_sf_pdiscard(ep, asoc, type, arg, commands); - break; - case SCTP_IERROR_PROTO_VIOLATION: - return sctp_sf_violation_chunklen(ep, asoc, type, arg, - commands); - break; - case SCTP_IERROR_NOMEM: - return SCTP_DISPOSITION_NOMEM; - default: - break; + case SCTP_IERROR_AUTH_BAD_HMAC: + /* Generate the ERROR chunk and discard the rest + * of the packet + */ + err_chunk = sctp_make_op_error(asoc, chunk, + SCTP_ERROR_UNSUP_HMAC, + &auth_hdr->hmac_id, + sizeof(__u16), 0); + if (err_chunk) { + sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, + SCTP_CHUNK(err_chunk)); + } + /* Fall Through */ + case SCTP_IERROR_AUTH_BAD_KEYID: + case SCTP_IERROR_BAD_SIG: + return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + + case SCTP_IERROR_PROTO_VIOLATION: + return sctp_sf_violation_chunklen(ep, asoc, type, arg, + commands); + + case SCTP_IERROR_NOMEM: + return SCTP_DISPOSITION_NOMEM; + + default: /* Prevent gcc warnings */ + break; } if (asoc->active_key_id != ntohs(auth_hdr->shkey_id)) { diff --git a/net/sctp/socket.c b/net/sctp/socket.c index d0a8a77..24e88af 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -70,6 +70,7 @@ #include #include #include +#include #include #include @@ -78,6 +79,7 @@ #include #include /* for sa_family_t */ +#include #include #include #include @@ -476,7 +478,7 @@ static int sctp_bindx_add(struct sock *sk, struct sockaddr *addrs, int addrcnt) /* The list may contain either IPv4 or IPv6 address; * determine the address length for walking thru the list. */ - sa_addr = (struct sockaddr *)addr_buf; + sa_addr = addr_buf; af = sctp_get_af_specific(sa_addr->sa_family); if (!af) { retval = -EINVAL; @@ -555,7 +557,7 @@ static int sctp_send_asconf_add_ip(struct sock *sk, */ addr_buf = addrs; for (i = 0; i < addrcnt; i++) { - addr = (union sctp_addr *)addr_buf; + addr = addr_buf; af = sctp_get_af_specific(addr->v4.sin_family); if (!af) { retval = -EINVAL; @@ -583,22 +585,35 @@ static int sctp_send_asconf_add_ip(struct sock *sk, goto out; } - retval = sctp_send_asconf(asoc, chunk); - if (retval) - goto out; - /* Add the new addresses to the bind address list with * use_as_src set to 0. */ addr_buf = addrs; for (i = 0; i < addrcnt; i++) { - addr = (union sctp_addr *)addr_buf; + addr = addr_buf; af = sctp_get_af_specific(addr->v4.sin_family); memcpy(&saveaddr, addr, af->sockaddr_len); retval = sctp_add_bind_addr(bp, &saveaddr, SCTP_ADDR_NEW, GFP_ATOMIC); addr_buf += af->sockaddr_len; } + if (asoc->src_out_of_asoc_ok) { + struct sctp_transport *trans; + + list_for_each_entry(trans, + &asoc->peer.transport_addr_list, transports) { + /* Clear the source and route cache */ + dst_release(trans->dst); + trans->cwnd = min(4*asoc->pathmtu, max_t(__u32, + 2*asoc->pathmtu, 4380)); + trans->ssthresh = asoc->peer.i.a_rwnd; + trans->rto = asoc->rto_initial; + trans->rtt = trans->srtt = trans->rttvar = 0; + sctp_transport_route(trans, NULL, + sctp_sk(asoc->base.sk)); + } + } + retval = sctp_send_asconf(asoc, chunk); } out: @@ -646,7 +661,7 @@ static int sctp_bindx_rem(struct sock *sk, struct sockaddr *addrs, int addrcnt) goto err_bindx_rem; } - sa_addr = (union sctp_addr *)addr_buf; + sa_addr = addr_buf; af = sctp_get_af_specific(sa_addr->sa.sa_family); if (!af) { retval = -EINVAL; @@ -715,7 +730,9 @@ static int sctp_send_asconf_del_ip(struct sock *sk, struct sctp_sockaddr_entry *saddr; int i; int retval = 0; + int stored = 0; + chunk = NULL; if (!sctp_addip_enable) return retval; @@ -743,7 +760,7 @@ static int sctp_send_asconf_del_ip(struct sock *sk, */ addr_buf = addrs; for (i = 0; i < addrcnt; i++) { - laddr = (union sctp_addr *)addr_buf; + laddr = addr_buf; af = sctp_get_af_specific(laddr->v4.sin_family); if (!af) { retval = -EINVAL; @@ -766,8 +783,40 @@ static int sctp_send_asconf_del_ip(struct sock *sk, bp = &asoc->base.bind_addr; laddr = sctp_find_unmatch_addr(bp, (union sctp_addr *)addrs, addrcnt, sp); - if (!laddr) - continue; + if ((laddr == NULL) && (addrcnt == 1)) { + if (asoc->asconf_addr_del_pending) + continue; + asoc->asconf_addr_del_pending = + kzalloc(sizeof(union sctp_addr), GFP_ATOMIC); + if (asoc->asconf_addr_del_pending == NULL) { + retval = -ENOMEM; + goto out; + } + asoc->asconf_addr_del_pending->sa.sa_family = + addrs->sa_family; + asoc->asconf_addr_del_pending->v4.sin_port = + htons(bp->port); + if (addrs->sa_family == AF_INET) { + struct sockaddr_in *sin; + + sin = (struct sockaddr_in *)addrs; + asoc->asconf_addr_del_pending->v4.sin_addr.s_addr = sin->sin_addr.s_addr; + } else if (addrs->sa_family == AF_INET6) { + struct sockaddr_in6 *sin6; + + sin6 = (struct sockaddr_in6 *)addrs; + ipv6_addr_copy(&asoc->asconf_addr_del_pending->v6.sin6_addr, &sin6->sin6_addr); + } + SCTP_DEBUG_PRINTK_IPADDR("send_asconf_del_ip: keep the last address asoc: %p ", + " at %p\n", asoc, asoc->asconf_addr_del_pending, + asoc->asconf_addr_del_pending); + asoc->src_out_of_asoc_ok = 1; + stored = 1; + goto skip_mkasconf; + } + + if (laddr == NULL) + return -EINVAL; /* We do not need RCU protection throughout this loop * because this is done under a socket lock from the @@ -780,12 +829,13 @@ static int sctp_send_asconf_del_ip(struct sock *sk, goto out; } +skip_mkasconf: /* Reset use_as_src flag for the addresses in the bind address * list that are to be deleted. */ addr_buf = addrs; for (i = 0; i < addrcnt; i++) { - laddr = (union sctp_addr *)addr_buf; + laddr = addr_buf; af = sctp_get_af_specific(laddr->v4.sin_family); list_for_each_entry(saddr, &bp->address_list, list) { if (sctp_cmp_addr_exact(&saddr->a, laddr)) @@ -805,12 +855,37 @@ static int sctp_send_asconf_del_ip(struct sock *sk, sctp_sk(asoc->base.sk)); } + if (stored) + /* We don't need to transmit ASCONF */ + continue; retval = sctp_send_asconf(asoc, chunk); } out: return retval; } +/* set addr events to assocs in the endpoint. ep and addr_wq must be locked */ +int sctp_asconf_mgmt(struct sctp_sock *sp, struct sctp_sockaddr_entry *addrw) +{ + struct sock *sk = sctp_opt2sk(sp); + union sctp_addr *addr; + struct sctp_af *af; + + /* It is safe to write port space in caller. */ + addr = &addrw->a; + addr->v4.sin_port = htons(sp->ep->base.bind_addr.port); + af = sctp_get_af_specific(addr->sa.sa_family); + if (!af) + return -EINVAL; + if (sctp_verify_addr(sk, addr, af->sockaddr_len)) + return -EINVAL; + + if (addrw->state == SCTP_ADDR_NEW) + return sctp_send_asconf_add_ip(sk, (struct sockaddr *)addr, 1); + else + return sctp_send_asconf_del_ip(sk, (struct sockaddr *)addr, 1); +} + /* Helper for tunneling sctp_bindx() requests through sctp_setsockopt() * * API 8.1 @@ -927,7 +1002,7 @@ SCTP_STATIC int sctp_setsockopt_bindx(struct sock* sk, return -EINVAL; } - sa_addr = (struct sockaddr *)addr_buf; + sa_addr = addr_buf; af = sctp_get_af_specific(sa_addr->sa_family); /* If the address family is not supported or if this address @@ -1018,7 +1093,7 @@ static int __sctp_connect(struct sock* sk, goto out_free; } - sa_addr = (union sctp_addr *)addr_buf; + sa_addr = addr_buf; af = sctp_get_af_specific(sa_addr->sa.sa_family); /* If the address family is not supported or if this address @@ -1302,11 +1377,19 @@ SCTP_STATIC int sctp_setsockopt_connectx(struct sock* sk, /* * New (hopefully final) interface for the API. * We use the sctp_getaddrs_old structure so that use-space library - * can avoid any unnecessary allocations. The only defferent part + * can avoid any unnecessary allocations. The only different part * is that we store the actual length of the address buffer into the - * addrs_num structure member. That way we can re-use the existing + * addrs_num structure member. That way we can re-use the existing * code. */ +#ifdef CONFIG_COMPAT +struct compat_sctp_getaddrs_old { + sctp_assoc_t assoc_id; + s32 addr_num; + compat_uptr_t addrs; /* struct sockaddr * */ +}; +#endif + SCTP_STATIC int sctp_getsockopt_connectx3(struct sock* sk, int len, char __user *optval, int __user *optlen) @@ -1315,16 +1398,30 @@ SCTP_STATIC int sctp_getsockopt_connectx3(struct sock* sk, int len, sctp_assoc_t assoc_id = 0; int err = 0; - if (len < sizeof(param)) - return -EINVAL; +#ifdef CONFIG_COMPAT + if (is_compat_task()) { + struct compat_sctp_getaddrs_old param32; - if (copy_from_user(¶m, optval, sizeof(param))) - return -EFAULT; + if (len < sizeof(param32)) + return -EINVAL; + if (copy_from_user(¶m32, optval, sizeof(param32))) + return -EFAULT; - err = __sctp_setsockopt_connectx(sk, - (struct sockaddr __user *)param.addrs, - param.addr_num, &assoc_id); + param.assoc_id = param32.assoc_id; + param.addr_num = param32.addr_num; + param.addrs = compat_ptr(param32.addrs); + } else +#endif + { + if (len < sizeof(param)) + return -EINVAL; + if (copy_from_user(¶m, optval, sizeof(param))) + return -EFAULT; + } + err = __sctp_setsockopt_connectx(sk, (struct sockaddr __user *) + param.addrs, param.addr_num, + &assoc_id); if (err == 0 || err == -EINPROGRESS) { if (copy_to_user(optval, &assoc_id, sizeof(assoc_id))) return -EFAULT; @@ -1442,8 +1539,10 @@ SCTP_STATIC void sctp_close(struct sock *sk, long timeout) /* Supposedly, no process has access to the socket, but * the net layers still may. + * Also, sctp_destroy_sock() needs to be called with addr_wq_lock + * held and that should be grabbed before socket lock. */ - sctp_local_bh_disable(); + spin_lock_bh(&sctp_globals.addr_wq_lock); sctp_bh_lock_sock(sk); /* Hold the sock, since sk_common_release() will put sock_put() @@ -1453,7 +1552,7 @@ SCTP_STATIC void sctp_close(struct sock *sk, long timeout) sk_common_release(sk); sctp_bh_unlock_sock(sk); - sctp_local_bh_enable(); + spin_unlock_bh(&sctp_globals.addr_wq_lock); sock_put(sk); @@ -1514,6 +1613,7 @@ SCTP_STATIC int sctp_sendmsg(struct kiocb *iocb, struct sock *sk, sctp_scope_t scope; long timeo; __u16 sinfo_flags = 0; + bool wait_connect = false; struct sctp_datamsg *datamsg; int msg_flags = msg->msg_flags; @@ -1832,6 +1932,7 @@ SCTP_STATIC int sctp_sendmsg(struct kiocb *iocb, struct sock *sk, err = sctp_primitive_ASSOCIATE(asoc, NULL); if (err < 0) goto out_free; + wait_connect = true; SCTP_DEBUG_PRINTK("We associated primitively.\n"); } @@ -1871,6 +1972,11 @@ SCTP_STATIC int sctp_sendmsg(struct kiocb *iocb, struct sock *sk, else err = msg_len; + if (unlikely(wait_connect)) { + timeo = sock_sndtimeo(sk, msg_flags & MSG_DONTWAIT); + sctp_wait_for_connect(asoc, &timeo); + } + /* If we are already past ASSOCIATE, the lower * layers are responsible for association cleanup. */ @@ -3219,11 +3325,11 @@ static int sctp_setsockopt_auth_chunk(struct sock *sk, return -EFAULT; switch (val.sauth_chunk) { - case SCTP_CID_INIT: - case SCTP_CID_INIT_ACK: - case SCTP_CID_SHUTDOWN_COMPLETE: - case SCTP_CID_AUTH: - return -EINVAL; + case SCTP_CID_INIT: + case SCTP_CID_INIT_ACK: + case SCTP_CID_SHUTDOWN_COMPLETE: + case SCTP_CID_AUTH: + return -EINVAL; } /* add this chunk id to the endpoint */ @@ -3366,6 +3472,48 @@ static int sctp_setsockopt_del_key(struct sock *sk, } +/* + * 8.1.23 SCTP_AUTO_ASCONF + * + * This option will enable or disable the use of the automatic generation of + * ASCONF chunks to add and delete addresses to an existing association. Note + * that this option has two caveats namely: a) it only affects sockets that + * are bound to all addresses available to the SCTP stack, and b) the system + * administrator may have an overriding control that turns the ASCONF feature + * off no matter what setting the socket option may have. + * This option expects an integer boolean flag, where a non-zero value turns on + * the option, and a zero value turns off the option. + * Note. In this implementation, socket operation overrides default parameter + * being set by sysctl as well as FreeBSD implementation + */ +static int sctp_setsockopt_auto_asconf(struct sock *sk, char __user *optval, + unsigned int optlen) +{ + int val; + struct sctp_sock *sp = sctp_sk(sk); + + if (optlen < sizeof(int)) + return -EINVAL; + if (get_user(val, (int __user *)optval)) + return -EFAULT; + if (!sctp_is_ep_boundall(sk) && val) + return -EINVAL; + if ((val && sp->do_auto_asconf) || (!val && !sp->do_auto_asconf)) + return 0; + + spin_lock_bh(&sctp_globals.addr_wq_lock); + if (val == 0 && sp->do_auto_asconf) { + list_del(&sp->auto_asconf_list); + sp->do_auto_asconf = 0; + } else if (val && !sp->do_auto_asconf) { + list_add_tail(&sp->auto_asconf_list, + &sctp_auto_asconf_splist); + sp->do_auto_asconf = 1; + } + spin_unlock_bh(&sctp_globals.addr_wq_lock); + return 0; +} + /* API 6.2 setsockopt(), getsockopt() * @@ -3513,6 +3661,9 @@ SCTP_STATIC int sctp_setsockopt(struct sock *sk, int level, int optname, case SCTP_AUTH_DELETE_KEY: retval = sctp_setsockopt_del_key(sk, optval, optlen); break; + case SCTP_AUTO_ASCONF: + retval = sctp_setsockopt_auto_asconf(sk, optval, optlen); + break; default: retval = -ENOPROTOOPT; break; @@ -3795,27 +3946,47 @@ SCTP_STATIC int sctp_init_sock(struct sock *sk) local_bh_disable(); percpu_counter_inc(&sctp_sockets_allocated); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); + + /* Nothing can fail after this block, otherwise + * sctp_destroy_sock() will be called without addr_wq_lock held + */ + if (sctp_default_auto_asconf) { + spin_lock(&sctp_globals.addr_wq_lock); + list_add_tail(&sp->auto_asconf_list, + &sctp_auto_asconf_splist); + sp->do_auto_asconf = 1; + spin_unlock(&sctp_globals.addr_wq_lock); + } else { + sp->do_auto_asconf = 0; + } + local_bh_enable(); return 0; } -/* Cleanup any SCTP per socket resources. */ +/* Cleanup any SCTP per socket resources. Must be called with + * sctp_globals.addr_wq_lock held if sp->do_auto_asconf is true + */ SCTP_STATIC void sctp_destroy_sock(struct sock *sk) { - struct sctp_endpoint *ep; + struct sctp_sock *sp; SCTP_DEBUG_PRINTK("sctp_destroy_sock(sk: %p)\n", sk); /* Release our hold on the endpoint. */ - ep = sctp_sk(sk)->ep; + sp = sctp_sk(sk); /* This could happen during socket init, thus we bail out * early, since the rest of the below is not setup either. */ - if (ep == NULL) + if (sp->ep == NULL) return; - sctp_endpoint_free(ep); + if (sp->do_auto_asconf) { + sp->do_auto_asconf = 0; + list_del(&sp->auto_asconf_list); + } + sctp_endpoint_free(sp->ep); local_bh_disable(); percpu_counter_dec(&sctp_sockets_allocated); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); @@ -5316,6 +5487,28 @@ static int sctp_getsockopt_assoc_number(struct sock *sk, int len, } /* + * 8.1.23 SCTP_AUTO_ASCONF + * See the corresponding setsockopt entry as description + */ +static int sctp_getsockopt_auto_asconf(struct sock *sk, int len, + char __user *optval, int __user *optlen) +{ + int val = 0; + + if (len < sizeof(int)) + return -EINVAL; + + len = sizeof(int); + if (sctp_sk(sk)->do_auto_asconf && sctp_is_ep_boundall(sk)) + val = 1; + if (put_user(len, optlen)) + return -EFAULT; + if (copy_to_user(optval, &val, len)) + return -EFAULT; + return 0; +} + +/* * 8.2.6. Get the Current Identifiers of Associations * (SCTP_GET_ASSOC_ID_LIST) * @@ -5499,6 +5692,9 @@ SCTP_STATIC int sctp_getsockopt(struct sock *sk, int level, int optname, case SCTP_GET_ASSOC_ID_LIST: retval = sctp_getsockopt_assoc_ids(sk, len, optval, optlen); break; + case SCTP_AUTO_ASCONF: + retval = sctp_getsockopt_auto_asconf(sk, len, optval, optlen); + break; default: retval = -ENOPROTOOPT; break; @@ -6537,6 +6733,19 @@ void sctp_copy_sock(struct sock *newsk, struct sock *sk, newinet->mc_list = NULL; } +static inline void sctp_copy_descendant(struct sock *sk_to, + const struct sock *sk_from) +{ + int ancestor_size = sizeof(struct inet_sock) + + sizeof(struct sctp_sock) - + offsetof(struct sctp_sock, auto_asconf_list); + + if (sk_from->sk_family == PF_INET6) + ancestor_size += sizeof(struct ipv6_pinfo); + + __inet_sk_copy_descendant(sk_to, sk_from, ancestor_size); +} + /* Populate the fields of the newsk from the oldsk and migrate the assoc * and its messages to the newsk. */ @@ -6558,7 +6767,7 @@ static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk, newsk->sk_sndbuf = oldsk->sk_sndbuf; newsk->sk_rcvbuf = oldsk->sk_rcvbuf; /* Brute force copy old sctp opt. */ - inet_sk_copy_descendant(newsk, oldsk); + sctp_copy_descendant(newsk, oldsk); /* Restore the ep value that was overwritten with the above structure * copy. diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c index 6752f48..60ffbd0 100644 --- a/net/sctp/sysctl.c +++ b/net/sctp/sysctl.c @@ -187,6 +187,13 @@ static ctl_table sctp_table[] = { .proc_handler = proc_dointvec, }, { + .procname = "default_auto_asconf", + .data = &sctp_default_auto_asconf, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { .procname = "prsctp_enable", .data = &sctp_prsctp_enable, .maxlen = sizeof(int), diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c index 8a84017..57da447 100644 --- a/net/sctp/ulpevent.c +++ b/net/sctp/ulpevent.c @@ -373,9 +373,10 @@ fail: * specification [SCTP] and any extensions for a list of possible * error formats. */ -struct sctp_ulpevent *sctp_ulpevent_make_remote_error( - const struct sctp_association *asoc, struct sctp_chunk *chunk, - __u16 flags, gfp_t gfp) +struct sctp_ulpevent * +sctp_ulpevent_make_remote_error(const struct sctp_association *asoc, + struct sctp_chunk *chunk, __u16 flags, + gfp_t gfp) { struct sctp_ulpevent *event; struct sctp_remote_error *sre; @@ -394,8 +395,7 @@ struct sctp_ulpevent *sctp_ulpevent_make_remote_error( /* Copy the skb to a new skb with room for us to prepend * notification with. */ - skb = skb_copy_expand(chunk->skb, sizeof(struct sctp_remote_error), - 0, gfp); + skb = skb_copy_expand(chunk->skb, sizeof(*sre), 0, gfp); /* Pull off the rest of the cause TLV from the chunk. */ skb_pull(chunk->skb, elen); @@ -406,62 +406,21 @@ struct sctp_ulpevent *sctp_ulpevent_make_remote_error( event = sctp_skb2event(skb); sctp_ulpevent_init(event, MSG_NOTIFICATION, skb->truesize); - sre = (struct sctp_remote_error *) - skb_push(skb, sizeof(struct sctp_remote_error)); + sre = (struct sctp_remote_error *) skb_push(skb, sizeof(*sre)); /* Trim the buffer to the right length. */ - skb_trim(skb, sizeof(struct sctp_remote_error) + elen); + skb_trim(skb, sizeof(*sre) + elen); - /* Socket Extensions for SCTP - * 5.3.1.3 SCTP_REMOTE_ERROR - * - * sre_type: - * It should be SCTP_REMOTE_ERROR. - */ + /* RFC6458, Section 6.1.3. SCTP_REMOTE_ERROR */ + memset(sre, 0, sizeof(*sre)); sre->sre_type = SCTP_REMOTE_ERROR; - - /* - * Socket Extensions for SCTP - * 5.3.1.3 SCTP_REMOTE_ERROR - * - * sre_flags: 16 bits (unsigned integer) - * Currently unused. - */ sre->sre_flags = 0; - - /* Socket Extensions for SCTP - * 5.3.1.3 SCTP_REMOTE_ERROR - * - * sre_length: sizeof (__u32) - * - * This field is the total length of the notification data, - * including the notification header. - */ sre->sre_length = skb->len; - - /* Socket Extensions for SCTP - * 5.3.1.3 SCTP_REMOTE_ERROR - * - * sre_error: 16 bits (unsigned integer) - * This value represents one of the Operational Error causes defined in - * the SCTP specification, in network byte order. - */ sre->sre_error = cause; - - /* Socket Extensions for SCTP - * 5.3.1.3 SCTP_REMOTE_ERROR - * - * sre_assoc_id: sizeof (sctp_assoc_t) - * - * The association id field, holds the identifier for the association. - * All notifications for a given association have the same association - * identifier. For TCP style socket, this field is ignored. - */ sctp_ulpevent_set_owner(event, asoc); sre->sre_assoc_id = sctp_assoc2id(asoc); return event; - fail: return NULL; } @@ -904,7 +863,9 @@ __u16 sctp_ulpevent_get_notification_type(const struct sctp_ulpevent *event) return notification->sn_header.sn_type; } -/* Copy out the sndrcvinfo into a msghdr. */ +/* RFC6458, Section 5.3.2. SCTP Header Information Structure + * (SCTP_SNDRCV, DEPRECATED) + */ void sctp_ulpevent_read_sndrcvinfo(const struct sctp_ulpevent *event, struct msghdr *msghdr) { @@ -913,74 +874,21 @@ void sctp_ulpevent_read_sndrcvinfo(const struct sctp_ulpevent *event, if (sctp_ulpevent_is_notification(event)) return; - /* Sockets API Extensions for SCTP - * Section 5.2.2 SCTP Header Information Structure (SCTP_SNDRCV) - * - * sinfo_stream: 16 bits (unsigned integer) - * - * For recvmsg() the SCTP stack places the message's stream number in - * this value. - */ + memset(&sinfo, 0, sizeof(sinfo)); sinfo.sinfo_stream = event->stream; - /* sinfo_ssn: 16 bits (unsigned integer) - * - * For recvmsg() this value contains the stream sequence number that - * the remote endpoint placed in the DATA chunk. For fragmented - * messages this is the same number for all deliveries of the message - * (if more than one recvmsg() is needed to read the message). - */ sinfo.sinfo_ssn = event->ssn; - /* sinfo_ppid: 32 bits (unsigned integer) - * - * In recvmsg() this value is - * the same information that was passed by the upper layer in the peer - * application. Please note that byte order issues are NOT accounted - * for and this information is passed opaquely by the SCTP stack from - * one end to the other. - */ sinfo.sinfo_ppid = event->ppid; - /* sinfo_flags: 16 bits (unsigned integer) - * - * This field may contain any of the following flags and is composed of - * a bitwise OR of these values. - * - * recvmsg() flags: - * - * SCTP_UNORDERED - This flag is present when the message was sent - * non-ordered. - */ sinfo.sinfo_flags = event->flags; - /* sinfo_tsn: 32 bit (unsigned integer) - * - * For the receiving side, this field holds a TSN that was - * assigned to one of the SCTP Data Chunks. - */ sinfo.sinfo_tsn = event->tsn; - /* sinfo_cumtsn: 32 bit (unsigned integer) - * - * This field will hold the current cumulative TSN as - * known by the underlying SCTP layer. Note this field is - * ignored when sending and only valid for a receive - * operation when sinfo_flags are set to SCTP_UNORDERED. - */ sinfo.sinfo_cumtsn = event->cumtsn; - /* sinfo_assoc_id: sizeof (sctp_assoc_t) - * - * The association handle field, sinfo_assoc_id, holds the identifier - * for the association announced in the COMMUNICATION_UP notification. - * All notifications for a given association have the same identifier. - * Ignored for one-to-one style sockets. - */ sinfo.sinfo_assoc_id = sctp_assoc2id(event->asoc); - - /* context value that is set via SCTP_CONTEXT socket option. */ + /* Context value that is set via SCTP_CONTEXT socket option. */ sinfo.sinfo_context = event->asoc->default_rcv_context; - /* These fields are not used while receiving. */ sinfo.sinfo_timetolive = 0; put_cmsg(msghdr, IPPROTO_SCTP, SCTP_SNDRCV, - sizeof(struct sctp_sndrcvinfo), (void *)&sinfo); + sizeof(sinfo), &sinfo); } /* Do accounting for bytes received and hold a reference to the association diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index 5867429..07b9973 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -237,21 +237,21 @@ static int x25_device_event(struct notifier_block *this, unsigned long event, #endif ) { switch (event) { - case NETDEV_UP: - x25_link_device_up(dev); - break; - case NETDEV_GOING_DOWN: - nb = x25_get_neigh(dev); - if (nb) { - x25_terminate_link(nb); - x25_neigh_put(nb); - } - break; - case NETDEV_DOWN: - x25_kill_by_device(dev); - x25_route_device_down(dev); - x25_link_device_down(dev); - break; + case NETDEV_UP: + x25_link_device_up(dev); + break; + case NETDEV_GOING_DOWN: + nb = x25_get_neigh(dev); + if (nb) { + x25_terminate_link(nb); + x25_neigh_put(nb); + } + break; + case NETDEV_DOWN: + x25_kill_by_device(dev); + x25_route_device_down(dev); + x25_link_device_down(dev); + break; } } @@ -1261,14 +1261,19 @@ static int x25_recvmsg(struct kiocb *iocb, struct socket *sock, struct x25_sock *x25 = x25_sk(sk); struct sockaddr_x25 *sx25 = (struct sockaddr_x25 *)msg->msg_name; size_t copied; - int qbit, header_len = x25->neighbour->extended ? - X25_EXT_MIN_LEN : X25_STD_MIN_LEN; - + int qbit, header_len; struct sk_buff *skb; unsigned char *asmptr; int rc = -ENOTCONN; lock_sock(sk); + + if (x25->neighbour == NULL) + goto out; + + header_len = x25->neighbour->extended ? + X25_EXT_MIN_LEN : X25_STD_MIN_LEN; + /* * This works for seqpacket too. The receiver has ordered the queue for * us! We do one quick check first though @@ -1338,10 +1343,9 @@ static int x25_recvmsg(struct kiocb *iocb, struct socket *sock, if (sx25) { sx25->sx25_family = AF_X25; sx25->sx25_addr = x25->dest_addr; + msg->msg_namelen = sizeof(*sx25); } - msg->msg_namelen = sizeof(struct sockaddr_x25); - x25_check_rbuf(sk); rc = copied; out_free_dgram: @@ -1360,257 +1364,254 @@ static int x25_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) int rc; switch (cmd) { - case TIOCOUTQ: { - int amount; + case TIOCOUTQ: { + int amount; - amount = sk->sk_sndbuf - sk_wmem_alloc_get(sk); - if (amount < 0) - amount = 0; - rc = put_user(amount, (unsigned int __user *)argp); - break; - } + amount = sk->sk_sndbuf - sk_wmem_alloc_get(sk); + if (amount < 0) + amount = 0; + rc = put_user(amount, (unsigned int __user *)argp); + break; + } - case TIOCINQ: { - struct sk_buff *skb; - int amount = 0; - /* - * These two are safe on a single CPU system as - * only user tasks fiddle here - */ - lock_sock(sk); - if ((skb = skb_peek(&sk->sk_receive_queue)) != NULL) - amount = skb->len; - release_sock(sk); - rc = put_user(amount, (unsigned int __user *)argp); - break; - } + case TIOCINQ: { + struct sk_buff *skb; + int amount = 0; + /* + * These two are safe on a single CPU system as + * only user tasks fiddle here + */ + lock_sock(sk); + if ((skb = skb_peek(&sk->sk_receive_queue)) != NULL) + amount = skb->len; + release_sock(sk); + rc = put_user(amount, (unsigned int __user *)argp); + break; + } - case SIOCGSTAMP: - rc = -EINVAL; - if (sk) - rc = sock_get_timestamp(sk, + case SIOCGSTAMP: + rc = -EINVAL; + if (sk) + rc = sock_get_timestamp(sk, (struct timeval __user *)argp); + break; + case SIOCGSTAMPNS: + rc = -EINVAL; + if (sk) + rc = sock_get_timestampns(sk, + (struct timespec __user *)argp); + break; + case SIOCGIFADDR: + case SIOCSIFADDR: + case SIOCGIFDSTADDR: + case SIOCSIFDSTADDR: + case SIOCGIFBRDADDR: + case SIOCSIFBRDADDR: + case SIOCGIFNETMASK: + case SIOCSIFNETMASK: + case SIOCGIFMETRIC: + case SIOCSIFMETRIC: + rc = -EINVAL; + break; + case SIOCADDRT: + case SIOCDELRT: + rc = -EPERM; + if (!capable(CAP_NET_ADMIN)) break; - case SIOCGSTAMPNS: - rc = -EINVAL; - if (sk) - rc = sock_get_timestampns(sk, - (struct timespec __user *)argp); - break; - case SIOCGIFADDR: - case SIOCSIFADDR: - case SIOCGIFDSTADDR: - case SIOCSIFDSTADDR: - case SIOCGIFBRDADDR: - case SIOCSIFBRDADDR: - case SIOCGIFNETMASK: - case SIOCSIFNETMASK: - case SIOCGIFMETRIC: - case SIOCSIFMETRIC: - rc = -EINVAL; - break; - case SIOCADDRT: - case SIOCDELRT: - rc = -EPERM; - if (!capable(CAP_NET_ADMIN)) - break; - rc = x25_route_ioctl(cmd, argp); - break; - case SIOCX25GSUBSCRIP: - rc = x25_subscr_ioctl(cmd, argp); - break; - case SIOCX25SSUBSCRIP: - rc = -EPERM; - if (!capable(CAP_NET_ADMIN)) - break; - rc = x25_subscr_ioctl(cmd, argp); - break; - case SIOCX25GFACILITIES: { - lock_sock(sk); - rc = copy_to_user(argp, &x25->facilities, - sizeof(x25->facilities)) - ? -EFAULT : 0; - release_sock(sk); + rc = x25_route_ioctl(cmd, argp); + break; + case SIOCX25GSUBSCRIP: + rc = x25_subscr_ioctl(cmd, argp); + break; + case SIOCX25SSUBSCRIP: + rc = -EPERM; + if (!capable(CAP_NET_ADMIN)) break; - } + rc = x25_subscr_ioctl(cmd, argp); + break; + case SIOCX25GFACILITIES: { + lock_sock(sk); + rc = copy_to_user(argp, &x25->facilities, + sizeof(x25->facilities)) + ? -EFAULT : 0; + release_sock(sk); + break; + } - case SIOCX25SFACILITIES: { - struct x25_facilities facilities; - rc = -EFAULT; - if (copy_from_user(&facilities, argp, - sizeof(facilities))) - break; - rc = -EINVAL; - lock_sock(sk); - if (sk->sk_state != TCP_LISTEN && - sk->sk_state != TCP_CLOSE) - goto out_fac_release; - if (facilities.pacsize_in < X25_PS16 || - facilities.pacsize_in > X25_PS4096) - goto out_fac_release; - if (facilities.pacsize_out < X25_PS16 || - facilities.pacsize_out > X25_PS4096) - goto out_fac_release; - if (facilities.winsize_in < 1 || - facilities.winsize_in > 127) + case SIOCX25SFACILITIES: { + struct x25_facilities facilities; + rc = -EFAULT; + if (copy_from_user(&facilities, argp, sizeof(facilities))) + break; + rc = -EINVAL; + lock_sock(sk); + if (sk->sk_state != TCP_LISTEN && + sk->sk_state != TCP_CLOSE) + goto out_fac_release; + if (facilities.pacsize_in < X25_PS16 || + facilities.pacsize_in > X25_PS4096) + goto out_fac_release; + if (facilities.pacsize_out < X25_PS16 || + facilities.pacsize_out > X25_PS4096) + goto out_fac_release; + if (facilities.winsize_in < 1 || + facilities.winsize_in > 127) + goto out_fac_release; + if (facilities.throughput) { + int out = facilities.throughput & 0xf0; + int in = facilities.throughput & 0x0f; + if (!out) + facilities.throughput |= + X25_DEFAULT_THROUGHPUT << 4; + else if (out < 0x30 || out > 0xD0) goto out_fac_release; - if (facilities.throughput) { - int out = facilities.throughput & 0xf0; - int in = facilities.throughput & 0x0f; - if (!out) - facilities.throughput |= - X25_DEFAULT_THROUGHPUT << 4; - else if (out < 0x30 || out > 0xD0) - goto out_fac_release; - if (!in) - facilities.throughput |= - X25_DEFAULT_THROUGHPUT; - else if (in < 0x03 || in > 0x0D) - goto out_fac_release; - } - if (facilities.reverse && - (facilities.reverse & 0x81) != 0x81) + if (!in) + facilities.throughput |= + X25_DEFAULT_THROUGHPUT; + else if (in < 0x03 || in > 0x0D) goto out_fac_release; - x25->facilities = facilities; - rc = 0; -out_fac_release: - release_sock(sk); - break; - } - - case SIOCX25GDTEFACILITIES: { - lock_sock(sk); - rc = copy_to_user(argp, &x25->dte_facilities, - sizeof(x25->dte_facilities)); - release_sock(sk); - if (rc) - rc = -EFAULT; - break; } + if (facilities.reverse && + (facilities.reverse & 0x81) != 0x81) + goto out_fac_release; + x25->facilities = facilities; + rc = 0; +out_fac_release: + release_sock(sk); + break; + } - case SIOCX25SDTEFACILITIES: { - struct x25_dte_facilities dtefacs; + case SIOCX25GDTEFACILITIES: { + lock_sock(sk); + rc = copy_to_user(argp, &x25->dte_facilities, + sizeof(x25->dte_facilities)); + release_sock(sk); + if (rc) rc = -EFAULT; - if (copy_from_user(&dtefacs, argp, sizeof(dtefacs))) - break; - rc = -EINVAL; - lock_sock(sk); - if (sk->sk_state != TCP_LISTEN && - sk->sk_state != TCP_CLOSE) - goto out_dtefac_release; - if (dtefacs.calling_len > X25_MAX_AE_LEN) - goto out_dtefac_release; - if (dtefacs.calling_ae == NULL) - goto out_dtefac_release; - if (dtefacs.called_len > X25_MAX_AE_LEN) - goto out_dtefac_release; - if (dtefacs.called_ae == NULL) - goto out_dtefac_release; - x25->dte_facilities = dtefacs; - rc = 0; -out_dtefac_release: - release_sock(sk); - break; - } + break; + } - case SIOCX25GCALLUSERDATA: { - lock_sock(sk); - rc = copy_to_user(argp, &x25->calluserdata, - sizeof(x25->calluserdata)) - ? -EFAULT : 0; - release_sock(sk); + case SIOCX25SDTEFACILITIES: { + struct x25_dte_facilities dtefacs; + rc = -EFAULT; + if (copy_from_user(&dtefacs, argp, sizeof(dtefacs))) break; - } + rc = -EINVAL; + lock_sock(sk); + if (sk->sk_state != TCP_LISTEN && + sk->sk_state != TCP_CLOSE) + goto out_dtefac_release; + if (dtefacs.calling_len > X25_MAX_AE_LEN) + goto out_dtefac_release; + if (dtefacs.calling_ae == NULL) + goto out_dtefac_release; + if (dtefacs.called_len > X25_MAX_AE_LEN) + goto out_dtefac_release; + if (dtefacs.called_ae == NULL) + goto out_dtefac_release; + x25->dte_facilities = dtefacs; + rc = 0; +out_dtefac_release: + release_sock(sk); + break; + } - case SIOCX25SCALLUSERDATA: { - struct x25_calluserdata calluserdata; + case SIOCX25GCALLUSERDATA: { + lock_sock(sk); + rc = copy_to_user(argp, &x25->calluserdata, + sizeof(x25->calluserdata)) + ? -EFAULT : 0; + release_sock(sk); + break; + } - rc = -EFAULT; - if (copy_from_user(&calluserdata, argp, - sizeof(calluserdata))) - break; - rc = -EINVAL; - if (calluserdata.cudlength > X25_MAX_CUD_LEN) - break; - lock_sock(sk); - x25->calluserdata = calluserdata; - release_sock(sk); - rc = 0; - break; - } + case SIOCX25SCALLUSERDATA: { + struct x25_calluserdata calluserdata; - case SIOCX25GCAUSEDIAG: { - lock_sock(sk); - rc = copy_to_user(argp, &x25->causediag, - sizeof(x25->causediag)) - ? -EFAULT : 0; - release_sock(sk); + rc = -EFAULT; + if (copy_from_user(&calluserdata, argp, sizeof(calluserdata))) break; - } + rc = -EINVAL; + if (calluserdata.cudlength > X25_MAX_CUD_LEN) + break; + lock_sock(sk); + x25->calluserdata = calluserdata; + release_sock(sk); + rc = 0; + break; + } - case SIOCX25SCAUSEDIAG: { - struct x25_causediag causediag; - rc = -EFAULT; - if (copy_from_user(&causediag, argp, sizeof(causediag))) - break; - lock_sock(sk); - x25->causediag = causediag; - release_sock(sk); - rc = 0; + case SIOCX25GCAUSEDIAG: { + lock_sock(sk); + rc = copy_to_user(argp, &x25->causediag, sizeof(x25->causediag)) + ? -EFAULT : 0; + release_sock(sk); + break; + } + + case SIOCX25SCAUSEDIAG: { + struct x25_causediag causediag; + rc = -EFAULT; + if (copy_from_user(&causediag, argp, sizeof(causediag))) break; + lock_sock(sk); + x25->causediag = causediag; + release_sock(sk); + rc = 0; + break; - } + } - case SIOCX25SCUDMATCHLEN: { - struct x25_subaddr sub_addr; - rc = -EINVAL; - lock_sock(sk); - if(sk->sk_state != TCP_CLOSE) - goto out_cud_release; - rc = -EFAULT; - if (copy_from_user(&sub_addr, argp, - sizeof(sub_addr))) - goto out_cud_release; - rc = -EINVAL; - if(sub_addr.cudmatchlength > X25_MAX_CUD_LEN) - goto out_cud_release; - x25->cudmatchlength = sub_addr.cudmatchlength; - rc = 0; + case SIOCX25SCUDMATCHLEN: { + struct x25_subaddr sub_addr; + rc = -EINVAL; + lock_sock(sk); + if(sk->sk_state != TCP_CLOSE) + goto out_cud_release; + rc = -EFAULT; + if (copy_from_user(&sub_addr, argp, + sizeof(sub_addr))) + goto out_cud_release; + rc = -EINVAL; + if (sub_addr.cudmatchlength > X25_MAX_CUD_LEN) + goto out_cud_release; + x25->cudmatchlength = sub_addr.cudmatchlength; + rc = 0; out_cud_release: - release_sock(sk); - break; - } + release_sock(sk); + break; + } - case SIOCX25CALLACCPTAPPRV: { - rc = -EINVAL; - lock_sock(sk); - if (sk->sk_state == TCP_CLOSE) { - clear_bit(X25_ACCPT_APPRV_FLAG, &x25->flags); - rc = 0; - } - release_sock(sk); - break; + case SIOCX25CALLACCPTAPPRV: { + rc = -EINVAL; + lock_sock(sk); + if (sk->sk_state == TCP_CLOSE) { + clear_bit(X25_ACCPT_APPRV_FLAG, &x25->flags); + rc = 0; } + release_sock(sk); + break; + } - case SIOCX25SENDCALLACCPT: { - rc = -EINVAL; - lock_sock(sk); - if (sk->sk_state != TCP_ESTABLISHED) - goto out_sendcallaccpt_release; - /* must call accptapprv above */ - if (test_bit(X25_ACCPT_APPRV_FLAG, &x25->flags)) - goto out_sendcallaccpt_release; - x25_write_internal(sk, X25_CALL_ACCEPTED); - x25->state = X25_STATE_3; - rc = 0; + case SIOCX25SENDCALLACCPT: { + rc = -EINVAL; + lock_sock(sk); + if (sk->sk_state != TCP_ESTABLISHED) + goto out_sendcallaccpt_release; + /* must call accptapprv above */ + if (test_bit(X25_ACCPT_APPRV_FLAG, &x25->flags)) + goto out_sendcallaccpt_release; + x25_write_internal(sk, X25_CALL_ACCEPTED); + x25->state = X25_STATE_3; + rc = 0; out_sendcallaccpt_release: - release_sock(sk); - break; - } + release_sock(sk); + break; + } - default: - rc = -ENOIOCTLCMD; - break; + default: + rc = -ENOIOCTLCMD; + break; } return rc; diff --git a/net/x25/x25_dev.c b/net/x25/x25_dev.c index 60749c5..fa2b418 100644 --- a/net/x25/x25_dev.c +++ b/net/x25/x25_dev.c @@ -152,21 +152,21 @@ void x25_establish_link(struct x25_neigh *nb) unsigned char *ptr; switch (nb->dev->type) { - case ARPHRD_X25: - if ((skb = alloc_skb(1, GFP_ATOMIC)) == NULL) { - printk(KERN_ERR "x25_dev: out of memory\n"); - return; - } - ptr = skb_put(skb, 1); - *ptr = X25_IFACE_CONNECT; - break; + case ARPHRD_X25: + if ((skb = alloc_skb(1, GFP_ATOMIC)) == NULL) { + printk(KERN_ERR "x25_dev: out of memory\n"); + return; + } + ptr = skb_put(skb, 1); + *ptr = X25_IFACE_CONNECT; + break; #if defined(CONFIG_LLC) || defined(CONFIG_LLC_MODULE) - case ARPHRD_ETHER: - return; + case ARPHRD_ETHER: + return; #endif - default: - return; + default: + return; } skb->protocol = htons(ETH_P_X25); @@ -208,19 +208,19 @@ void x25_send_frame(struct sk_buff *skb, struct x25_neigh *nb) skb_reset_network_header(skb); switch (nb->dev->type) { - case ARPHRD_X25: - dptr = skb_push(skb, 1); - *dptr = X25_IFACE_DATA; - break; + case ARPHRD_X25: + dptr = skb_push(skb, 1); + *dptr = X25_IFACE_DATA; + break; #if defined(CONFIG_LLC) || defined(CONFIG_LLC_MODULE) - case ARPHRD_ETHER: - kfree_skb(skb); - return; + case ARPHRD_ETHER: + kfree_skb(skb); + return; #endif - default: - kfree_skb(skb); - return; + default: + kfree_skb(skb); + return; } skb->protocol = htons(ETH_P_X25); diff --git a/net/x25/x25_in.c b/net/x25/x25_in.c index 36ab913..a49cd4e 100644 --- a/net/x25/x25_in.c +++ b/net/x25/x25_in.c @@ -94,62 +94,62 @@ static int x25_state1_machine(struct sock *sk, struct sk_buff *skb, int frametyp struct x25_sock *x25 = x25_sk(sk); switch (frametype) { - case X25_CALL_ACCEPTED: { - - x25_stop_timer(sk); - x25->condition = 0x00; - x25->vs = 0; - x25->va = 0; - x25->vr = 0; - x25->vl = 0; - x25->state = X25_STATE_3; - sk->sk_state = TCP_ESTABLISHED; - /* - * Parse the data in the frame. - */ - if (!pskb_may_pull(skb, X25_STD_MIN_LEN)) - goto out_clear; - skb_pull(skb, X25_STD_MIN_LEN); - - len = x25_parse_address_block(skb, &source_addr, - &dest_addr); - if (len > 0) - skb_pull(skb, len); - else if (len < 0) + case X25_CALL_ACCEPTED: { + + x25_stop_timer(sk); + x25->condition = 0x00; + x25->vs = 0; + x25->va = 0; + x25->vr = 0; + x25->vl = 0; + x25->state = X25_STATE_3; + sk->sk_state = TCP_ESTABLISHED; + /* + * Parse the data in the frame. + */ + if (!pskb_may_pull(skb, X25_STD_MIN_LEN)) + goto out_clear; + skb_pull(skb, X25_STD_MIN_LEN); + + len = x25_parse_address_block(skb, &source_addr, + &dest_addr); + if (len > 0) + skb_pull(skb, len); + else if (len < 0) + goto out_clear; + + len = x25_parse_facilities(skb, &x25->facilities, + &x25->dte_facilities, + &x25->vc_facil_mask); + if (len > 0) + skb_pull(skb, len); + else if (len < 0) + goto out_clear; + /* + * Copy any Call User Data. + */ + if (skb->len > 0) { + if (skb->len > X25_MAX_CUD_LEN) goto out_clear; - len = x25_parse_facilities(skb, &x25->facilities, - &x25->dte_facilities, - &x25->vc_facil_mask); - if (len > 0) - skb_pull(skb, len); - else if (len < 0) - goto out_clear; - /* - * Copy any Call User Data. - */ - if (skb->len > 0) { - if (skb->len > X25_MAX_CUD_LEN) - goto out_clear; - - skb_copy_bits(skb, 0, x25->calluserdata.cuddata, - skb->len); - x25->calluserdata.cudlength = skb->len; - } - if (!sock_flag(sk, SOCK_DEAD)) - sk->sk_state_change(sk); - break; + skb_copy_bits(skb, 0, x25->calluserdata.cuddata, + skb->len); + x25->calluserdata.cudlength = skb->len; } - case X25_CLEAR_REQUEST: - if (!pskb_may_pull(skb, X25_STD_MIN_LEN + 2)) - goto out_clear; + if (!sock_flag(sk, SOCK_DEAD)) + sk->sk_state_change(sk); + break; + } + case X25_CLEAR_REQUEST: + if (!pskb_may_pull(skb, X25_STD_MIN_LEN + 2)) + goto out_clear; - x25_write_internal(sk, X25_CLEAR_CONFIRMATION); - x25_disconnect(sk, ECONNREFUSED, skb->data[3], skb->data[4]); - break; + x25_write_internal(sk, X25_CLEAR_CONFIRMATION); + x25_disconnect(sk, ECONNREFUSED, skb->data[3], skb->data[4]); + break; - default: - break; + default: + break; } return 0; @@ -387,18 +387,18 @@ int x25_process_rx_frame(struct sock *sk, struct sk_buff *skb) frametype = x25_decode(sk, skb, &ns, &nr, &q, &d, &m); switch (x25->state) { - case X25_STATE_1: - queued = x25_state1_machine(sk, skb, frametype); - break; - case X25_STATE_2: - queued = x25_state2_machine(sk, skb, frametype); - break; - case X25_STATE_3: - queued = x25_state3_machine(sk, skb, frametype, ns, nr, q, d, m); - break; - case X25_STATE_4: - queued = x25_state4_machine(sk, skb, frametype); - break; + case X25_STATE_1: + queued = x25_state1_machine(sk, skb, frametype); + break; + case X25_STATE_2: + queued = x25_state2_machine(sk, skb, frametype); + break; + case X25_STATE_3: + queued = x25_state3_machine(sk, skb, frametype, ns, nr, q, d, m); + break; + case X25_STATE_4: + queued = x25_state4_machine(sk, skb, frametype); + break; } x25_kick(sk); diff --git a/net/x25/x25_link.c b/net/x25/x25_link.c index 0a9e074..4acacf3 100644 --- a/net/x25/x25_link.c +++ b/net/x25/x25_link.c @@ -76,33 +76,32 @@ void x25_link_control(struct sk_buff *skb, struct x25_neigh *nb, int confirm; switch (frametype) { - case X25_RESTART_REQUEST: - confirm = !x25_t20timer_pending(nb); - x25_stop_t20timer(nb); - nb->state = X25_LINK_STATE_3; - if (confirm) - x25_transmit_restart_confirmation(nb); + case X25_RESTART_REQUEST: + confirm = !x25_t20timer_pending(nb); + x25_stop_t20timer(nb); + nb->state = X25_LINK_STATE_3; + if (confirm) + x25_transmit_restart_confirmation(nb); + break; + + case X25_RESTART_CONFIRMATION: + x25_stop_t20timer(nb); + nb->state = X25_LINK_STATE_3; + break; + + case X25_DIAGNOSTIC: + if (!pskb_may_pull(skb, X25_STD_MIN_LEN + 4)) break; - case X25_RESTART_CONFIRMATION: - x25_stop_t20timer(nb); - nb->state = X25_LINK_STATE_3; - break; - - case X25_DIAGNOSTIC: - if (!pskb_may_pull(skb, X25_STD_MIN_LEN + 4)) - break; + printk(KERN_WARNING "x25: diagnostic #%d - %02X %02X %02X\n", + skb->data[3], skb->data[4], + skb->data[5], skb->data[6]); + break; - printk(KERN_WARNING "x25: diagnostic #%d - " - "%02X %02X %02X\n", - skb->data[3], skb->data[4], - skb->data[5], skb->data[6]); - break; - - default: - printk(KERN_WARNING "x25: received unknown %02X " - "with LCI 000\n", frametype); - break; + default: + printk(KERN_WARNING "x25: received unknown %02X with LCI 000\n", + frametype); + break; } if (nb->state == X25_LINK_STATE_3) @@ -196,18 +195,18 @@ void x25_transmit_clear_request(struct x25_neigh *nb, unsigned int lci, void x25_transmit_link(struct sk_buff *skb, struct x25_neigh *nb) { switch (nb->state) { - case X25_LINK_STATE_0: - skb_queue_tail(&nb->queue, skb); - nb->state = X25_LINK_STATE_1; - x25_establish_link(nb); - break; - case X25_LINK_STATE_1: - case X25_LINK_STATE_2: - skb_queue_tail(&nb->queue, skb); - break; - case X25_LINK_STATE_3: - x25_send_frame(skb, nb); - break; + case X25_LINK_STATE_0: + skb_queue_tail(&nb->queue, skb); + nb->state = X25_LINK_STATE_1; + x25_establish_link(nb); + break; + case X25_LINK_STATE_1: + case X25_LINK_STATE_2: + skb_queue_tail(&nb->queue, skb); + break; + case X25_LINK_STATE_3: + x25_send_frame(skb, nb); + break; } } @@ -217,14 +216,14 @@ void x25_transmit_link(struct sk_buff *skb, struct x25_neigh *nb) void x25_link_established(struct x25_neigh *nb) { switch (nb->state) { - case X25_LINK_STATE_0: - nb->state = X25_LINK_STATE_2; - break; - case X25_LINK_STATE_1: - x25_transmit_restart_request(nb); - nb->state = X25_LINK_STATE_2; - x25_start_t20timer(nb); - break; + case X25_LINK_STATE_0: + nb->state = X25_LINK_STATE_2; + break; + case X25_LINK_STATE_1: + x25_transmit_restart_request(nb); + nb->state = X25_LINK_STATE_2; + x25_start_t20timer(nb); + break; } } diff --git a/net/x25/x25_proc.c b/net/x25/x25_proc.c index 7ff3737..2ffde46 100644 --- a/net/x25/x25_proc.c +++ b/net/x25/x25_proc.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include diff --git a/net/x25/x25_subr.c b/net/x25/x25_subr.c index faf98d8..5170d52 100644 --- a/net/x25/x25_subr.c +++ b/net/x25/x25_subr.c @@ -126,32 +126,30 @@ void x25_write_internal(struct sock *sk, int frametype) * Adjust frame size. */ switch (frametype) { - case X25_CALL_REQUEST: - len += 1 + X25_ADDR_LEN + X25_MAX_FAC_LEN + - X25_MAX_CUD_LEN; - break; - case X25_CALL_ACCEPTED: /* fast sel with no restr on resp */ - if(x25->facilities.reverse & 0x80) { - len += 1 + X25_MAX_FAC_LEN + X25_MAX_CUD_LEN; - } else { - len += 1 + X25_MAX_FAC_LEN; - } - break; - case X25_CLEAR_REQUEST: - case X25_RESET_REQUEST: - len += 2; - break; - case X25_RR: - case X25_RNR: - case X25_REJ: - case X25_CLEAR_CONFIRMATION: - case X25_INTERRUPT_CONFIRMATION: - case X25_RESET_CONFIRMATION: - break; - default: - printk(KERN_ERR "X.25: invalid frame type %02X\n", - frametype); - return; + case X25_CALL_REQUEST: + len += 1 + X25_ADDR_LEN + X25_MAX_FAC_LEN + X25_MAX_CUD_LEN; + break; + case X25_CALL_ACCEPTED: /* fast sel with no restr on resp */ + if (x25->facilities.reverse & 0x80) { + len += 1 + X25_MAX_FAC_LEN + X25_MAX_CUD_LEN; + } else { + len += 1 + X25_MAX_FAC_LEN; + } + break; + case X25_CLEAR_REQUEST: + case X25_RESET_REQUEST: + len += 2; + break; + case X25_RR: + case X25_RNR: + case X25_REJ: + case X25_CLEAR_CONFIRMATION: + case X25_INTERRUPT_CONFIRMATION: + case X25_RESET_CONFIRMATION: + break; + default: + printk(KERN_ERR "X.25: invalid frame type %02X\n", frametype); + return; } if ((skb = alloc_skb(len, GFP_ATOMIC)) == NULL) @@ -280,20 +278,20 @@ int x25_decode(struct sock *sk, struct sk_buff *skb, int *ns, int *nr, int *q, *ns = *nr = *q = *d = *m = 0; switch (frame[2]) { - case X25_CALL_REQUEST: - case X25_CALL_ACCEPTED: - case X25_CLEAR_REQUEST: - case X25_CLEAR_CONFIRMATION: - case X25_INTERRUPT: - case X25_INTERRUPT_CONFIRMATION: - case X25_RESET_REQUEST: - case X25_RESET_CONFIRMATION: - case X25_RESTART_REQUEST: - case X25_RESTART_CONFIRMATION: - case X25_REGISTRATION_REQUEST: - case X25_REGISTRATION_CONFIRMATION: - case X25_DIAGNOSTIC: - return frame[2]; + case X25_CALL_REQUEST: + case X25_CALL_ACCEPTED: + case X25_CLEAR_REQUEST: + case X25_CLEAR_CONFIRMATION: + case X25_INTERRUPT: + case X25_INTERRUPT_CONFIRMATION: + case X25_RESET_REQUEST: + case X25_RESET_CONFIRMATION: + case X25_RESTART_REQUEST: + case X25_RESTART_CONFIRMATION: + case X25_REGISTRATION_REQUEST: + case X25_REGISTRATION_CONFIRMATION: + case X25_DIAGNOSTIC: + return frame[2]; } if (x25->neighbour->extended) { diff --git a/scripts/basic/fixdep.c b/scripts/basic/fixdep.c index 291228e..cb1f50c 100644 --- a/scripts/basic/fixdep.c +++ b/scripts/basic/fixdep.c @@ -345,6 +345,7 @@ static void parse_dep_file(void *map, size_t len) memcpy(s, m, p-m); s[p-m] = 0; if (strrcmp(s, "include/generated/autoconf.h") && strrcmp(s, "arch/um/include/uml-config.h") && + strrcmp(s, "include/linux/kconfig.h") && strrcmp(s, ".ver")) { /* * Do not list the source file as dependency, so that diff --git a/scripts/dtc/Makefile b/scripts/dtc/Makefile index 04a31c1..6d1c6bb 100644 --- a/scripts/dtc/Makefile +++ b/scripts/dtc/Makefile @@ -25,31 +25,5 @@ HOSTCFLAGS_dtc-lexer.lex.o := $(HOSTCFLAGS_DTC) HOSTCFLAGS_dtc-parser.tab.o := $(HOSTCFLAGS_DTC) # dependencies on generated files need to be listed explicitly -$(obj)/dtc-parser.tab.o: $(obj)/dtc-parser.tab.c $(obj)/dtc-parser.tab.h -$(obj)/dtc-lexer.lex.o: $(obj)/dtc-lexer.lex.c $(obj)/dtc-parser.tab.h +$(obj)/dtc-lexer.lex.o: $(obj)/dtc-parser.tab.h -targets += dtc-parser.tab.c dtc-lexer.lex.c - -clean-files += dtc-parser.tab.h - -# GENERATE_PARSER := 1 # Uncomment to rebuild flex/bison output - -ifdef GENERATE_PARSER - -BISON = bison -FLEX = flex - -quiet_cmd_bison = BISON $@ - cmd_bison = $(BISON) -o$@ -d $<; cp $@ $@_shipped -quiet_cmd_flex = FLEX $@ - cmd_flex = $(FLEX) -o$@ $<; cp $@ $@_shipped - -$(obj)/dtc-parser.tab.c: $(src)/dtc-parser.y FORCE - $(call if_changed,bison) - -$(obj)/dtc-parser.tab.h: $(obj)/dtc-parser.tab.c - -$(obj)/dtc-lexer.lex.c: $(src)/dtc-lexer.l FORCE - $(call if_changed,flex) - -endif diff --git a/scripts/dtc/dtc-lexer.lex.c_shipped b/scripts/dtc/dtc-lexer.lex.c_shipped index 50c4420..8bbe128 100644 --- a/scripts/dtc/dtc-lexer.lex.c_shipped +++ b/scripts/dtc/dtc-lexer.lex.c_shipped @@ -1,6 +1,5 @@ -#line 2 "dtc-lexer.lex.c" -#line 4 "dtc-lexer.lex.c" +#line 3 "scripts/dtc/dtc-lexer.lex.c_shipped" #define YY_INT_ALIGNED short int @@ -54,6 +53,7 @@ typedef int flex_int32_t; typedef unsigned char flex_uint8_t; typedef unsigned short int flex_uint16_t; typedef unsigned int flex_uint32_t; +#endif /* ! C99 */ /* Limits of integral types. */ #ifndef INT8_MIN @@ -84,8 +84,6 @@ typedef unsigned int flex_uint32_t; #define UINT32_MAX (4294967295U) #endif -#endif /* ! C99 */ - #endif /* ! FLEXINT_H */ #ifdef __cplusplus @@ -142,15 +140,7 @@ typedef unsigned int flex_uint32_t; /* Size of default input buffer. */ #ifndef YY_BUF_SIZE -#ifdef __ia64__ -/* On IA-64, the buffer size is 16k, not 8k. - * Moreover, YY_BUF_SIZE is 2*YY_READ_BUF_SIZE in the general case. - * Ditto for the __ia64__ case accordingly. - */ -#define YY_BUF_SIZE 32768 -#else #define YY_BUF_SIZE 16384 -#endif /* __ia64__ */ #endif /* The state buf must be large enough to hold one state per character in the main buffer. @@ -550,7 +540,6 @@ int yy_flex_debug = 0; #define YY_MORE_ADJ 0 #define YY_RESTORE_YY_MORE_OFFSET char *yytext; -#line 1 "dtc-lexer.l" /* * (C) Copyright David Gibson , IBM Corporation. 2005. * @@ -572,10 +561,6 @@ char *yytext; */ #define YY_NO_INPUT 1 - - - -#line 37 "dtc-lexer.l" #include "dtc.h" #include "srcpos.h" #include "dtc-parser.tab.h" @@ -603,7 +588,6 @@ static int dts_version = 1; static void push_input_file(const char *filename); static int pop_input_file(void); -#line 607 "dtc-lexer.lex.c" #define INITIAL 0 #define INCLUDE 1 @@ -686,12 +670,7 @@ static int input (void ); /* Amount of stuff to slurp up with each read. */ #ifndef YY_READ_BUF_SIZE -#ifdef __ia64__ -/* On IA-64, the buffer size is 16k, not 8k */ -#define YY_READ_BUF_SIZE 16384 -#else #define YY_READ_BUF_SIZE 8192 -#endif /* __ia64__ */ #endif /* Copy whatever the last rule matched to the standard output. */ @@ -710,7 +689,7 @@ static int input (void ); if ( YY_CURRENT_BUFFER_LVALUE->yy_is_interactive ) \ { \ int c = '*'; \ - size_t n; \ + unsigned n; \ for ( n = 0; n < max_size && \ (c = getc( yyin )) != EOF && c != '\n'; ++n ) \ buf[n] = (char) c; \ @@ -792,10 +771,6 @@ YY_DECL register char *yy_cp, *yy_bp; register int yy_act; -#line 66 "dtc-lexer.l" - -#line 798 "dtc-lexer.lex.c" - if ( !(yy_init) ) { (yy_init) = 1; @@ -876,7 +851,6 @@ do_action: /* This label is used only to access EOF actions. */ case 1: /* rule 1 can match eol */ YY_RULE_SETUP -#line 67 "dtc-lexer.l" { char *name = strchr(yytext, '\"') + 1; yytext[yyleng-1] = '\0'; @@ -888,7 +862,6 @@ case YY_STATE_EOF(INCLUDE): case YY_STATE_EOF(BYTESTRING): case YY_STATE_EOF(PROPNODENAME): case YY_STATE_EOF(V1): -#line 73 "dtc-lexer.l" { if (!pop_input_file()) { yyterminate(); @@ -898,7 +871,6 @@ case YY_STATE_EOF(V1): case 2: /* rule 2 can match eol */ YY_RULE_SETUP -#line 79 "dtc-lexer.l" { DPRINT("String: %s\n", yytext); yylval.data = data_copy_escape_string(yytext+1, @@ -908,7 +880,6 @@ YY_RULE_SETUP YY_BREAK case 3: YY_RULE_SETUP -#line 86 "dtc-lexer.l" { DPRINT("Keyword: /dts-v1/\n"); dts_version = 1; @@ -918,7 +889,6 @@ YY_RULE_SETUP YY_BREAK case 4: YY_RULE_SETUP -#line 93 "dtc-lexer.l" { DPRINT("Keyword: /memreserve/\n"); BEGIN_DEFAULT(); @@ -927,7 +897,6 @@ YY_RULE_SETUP YY_BREAK case 5: YY_RULE_SETUP -#line 99 "dtc-lexer.l" { DPRINT("Label: %s\n", yytext); yylval.labelref = xstrdup(yytext); @@ -937,7 +906,6 @@ YY_RULE_SETUP YY_BREAK case 6: YY_RULE_SETUP -#line 106 "dtc-lexer.l" { yylval.literal = xstrdup(yytext); DPRINT("Literal: '%s'\n", yylval.literal); @@ -946,7 +914,6 @@ YY_RULE_SETUP YY_BREAK case 7: YY_RULE_SETUP -#line 112 "dtc-lexer.l" { /* label reference */ DPRINT("Ref: %s\n", yytext+1); yylval.labelref = xstrdup(yytext+1); @@ -955,7 +922,6 @@ YY_RULE_SETUP YY_BREAK case 8: YY_RULE_SETUP -#line 118 "dtc-lexer.l" { /* new-style path reference */ yytext[yyleng-1] = '\0'; DPRINT("Ref: %s\n", yytext+2); @@ -965,7 +931,6 @@ YY_RULE_SETUP YY_BREAK case 9: YY_RULE_SETUP -#line 125 "dtc-lexer.l" { yylval.byte = strtol(yytext, NULL, 16); DPRINT("Byte: %02x\n", (int)yylval.byte); @@ -974,7 +939,6 @@ YY_RULE_SETUP YY_BREAK case 10: YY_RULE_SETUP -#line 131 "dtc-lexer.l" { DPRINT("/BYTESTRING\n"); BEGIN_DEFAULT(); @@ -983,7 +947,6 @@ YY_RULE_SETUP YY_BREAK case 11: YY_RULE_SETUP -#line 137 "dtc-lexer.l" { DPRINT("PropNodeName: %s\n", yytext); yylval.propnodename = xstrdup(yytext); @@ -993,7 +956,6 @@ YY_RULE_SETUP YY_BREAK case 12: YY_RULE_SETUP -#line 144 "dtc-lexer.l" { DPRINT("Binary Include\n"); return DT_INCBIN; @@ -1002,24 +964,20 @@ YY_RULE_SETUP case 13: /* rule 13 can match eol */ YY_RULE_SETUP -#line 149 "dtc-lexer.l" /* eat whitespace */ YY_BREAK case 14: /* rule 14 can match eol */ YY_RULE_SETUP -#line 150 "dtc-lexer.l" /* eat C-style comments */ YY_BREAK case 15: /* rule 15 can match eol */ YY_RULE_SETUP -#line 151 "dtc-lexer.l" /* eat C++-style comments */ YY_BREAK case 16: YY_RULE_SETUP -#line 153 "dtc-lexer.l" { DPRINT("Char: %c (\\x%02x)\n", yytext[0], (unsigned)yytext[0]); @@ -1037,10 +995,8 @@ YY_RULE_SETUP YY_BREAK case 17: YY_RULE_SETUP -#line 168 "dtc-lexer.l" ECHO; YY_BREAK -#line 1044 "dtc-lexer.lex.c" case YY_END_OF_BUFFER: { @@ -1756,8 +1712,8 @@ YY_BUFFER_STATE yy_scan_string (yyconst char * yystr ) /** Setup the input buffer state to scan the given bytes. The next call to yylex() will * scan from a @e copy of @a bytes. - * @param yybytes the byte buffer to scan - * @param _yybytes_len the number of bytes in the buffer pointed to by @a bytes. + * @param bytes the byte buffer to scan + * @param len the number of bytes in the buffer pointed to by @a bytes. * * @return the newly allocated buffer state object. */ @@ -1996,10 +1952,6 @@ void yyfree (void * ptr ) #define YYTABLES_NAME "yytables" -#line 168 "dtc-lexer.l" - - - static void push_input_file(const char *filename) { assert(filename); @@ -2011,7 +1963,6 @@ static void push_input_file(const char *filename) yypush_buffer_state(yy_create_buffer(yyin,YY_BUF_SIZE)); } - static int pop_input_file(void) { if (srcfile_pop() == 0) diff --git a/scripts/dtc/dtc-parser.tab.c_shipped b/scripts/dtc/dtc-parser.tab.c_shipped index 9be2eea..b05921e 100644 --- a/scripts/dtc/dtc-parser.tab.c_shipped +++ b/scripts/dtc/dtc-parser.tab.c_shipped @@ -1,10 +1,9 @@ - -/* A Bison parser, made by GNU Bison 2.4.1. */ +/* A Bison parser, made by GNU Bison 2.4.3. */ /* Skeleton implementation for Bison's Yacc-like parsers in C - Copyright (C) 1984, 1989, 1990, 2000, 2001, 2002, 2003, 2004, 2005, 2006 - Free Software Foundation, Inc. + Copyright (C) 1984, 1989, 1990, 2000, 2001, 2002, 2003, 2004, 2005, 2006, + 2009, 2010 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -46,7 +45,7 @@ #define YYBISON 1 /* Bison version. */ -#define YYBISON_VERSION "2.4.1" +#define YYBISON_VERSION "2.4.3" /* Skeleton name. */ #define YYSKELETON_NAME "yacc.c" @@ -67,8 +66,6 @@ /* Copy the first part of user declarations. */ -/* Line 189 of yacc.c */ -#line 21 "dtc-parser.y" #include @@ -87,12 +84,10 @@ extern int treesource_error; static unsigned long long eval_literal(const char *s, int base, int bits); -/* Line 189 of yacc.c */ -#line 92 "dtc-parser.tab.c" /* Enabling traces. */ #ifndef YYDEBUG -# define YYDEBUG 0 +# define YYDEBUG 1 #endif /* Enabling verbose error messages. */ @@ -134,8 +129,6 @@ static unsigned long long eval_literal(const char *s, int base, int bits); typedef union YYSTYPE { -/* Line 214 of yacc.c */ -#line 39 "dtc-parser.y" char *propnodename; char *literal; @@ -154,8 +147,6 @@ typedef union YYSTYPE -/* Line 214 of yacc.c */ -#line 159 "dtc-parser.tab.c" } YYSTYPE; # define YYSTYPE_IS_TRIVIAL 1 # define yystype YYSTYPE /* obsolescent; will be withdrawn */ @@ -166,8 +157,6 @@ typedef union YYSTYPE /* Copy the second part of user declarations. */ -/* Line 264 of yacc.c */ -#line 171 "dtc-parser.tab.c" #ifdef short # undef short @@ -217,7 +206,7 @@ typedef short int yytype_int16; #define YYSIZE_MAXIMUM ((YYSIZE_T) -1) #ifndef YY_ -# if YYENABLE_NLS +# if defined YYENABLE_NLS && YYENABLE_NLS # if ENABLE_NLS # include /* INFRINGES ON USER NAME SPACE */ # define YY_(msgid) dgettext ("bison-runtime", msgid) @@ -607,9 +596,18 @@ static const yytype_uint8 yystos[] = /* Like YYERROR except do call yyerror. This remains here temporarily to ease the transition to the new meaning of YYERROR, for GCC. - Once GCC version 2 has supplanted version 1, this can go. */ + Once GCC version 2 has supplanted version 1, this can go. However, + YYFAIL appears to be in use. Nevertheless, it is formally deprecated + in Bison 2.4.2's NEWS entry, where a plan to phase it out is + discussed. */ #define YYFAIL goto yyerrlab +#if defined YYFAIL + /* This is here to suppress warnings from the GCC cpp's + -Wunused-macros. Normally we don't worry about that warning, but + some users do, and we want to make it easy for users to remove + YYFAIL uses, which will produce warnings from Bison 2.5. */ +#endif #define YYRECOVERING() (!!yyerrstatus) @@ -666,7 +664,7 @@ while (YYID (0)) we won't break user code: when these are the locations we know. */ #ifndef YY_LOCATION_PRINT -# if YYLTYPE_IS_TRIVIAL +# if defined YYLTYPE_IS_TRIVIAL && YYLTYPE_IS_TRIVIAL # define YY_LOCATION_PRINT(File, Loc) \ fprintf (File, "%d.%d-%d.%d", \ (Loc).first_line, (Loc).first_column, \ @@ -1405,8 +1403,6 @@ yyreduce: { case 2: -/* Line 1455 of yacc.c */ -#line 87 "dtc-parser.y" { the_boot_info = build_boot_info((yyvsp[(3) - (4)].re), (yyvsp[(4) - (4)].node), guess_boot_cpuid((yyvsp[(4) - (4)].node))); @@ -1415,8 +1411,6 @@ yyreduce: case 3: -/* Line 1455 of yacc.c */ -#line 95 "dtc-parser.y" { (yyval.re) = NULL; ;} @@ -1424,8 +1418,6 @@ yyreduce: case 4: -/* Line 1455 of yacc.c */ -#line 99 "dtc-parser.y" { (yyval.re) = chain_reserve_entry((yyvsp[(1) - (2)].re), (yyvsp[(2) - (2)].re)); ;} @@ -1433,8 +1425,6 @@ yyreduce: case 5: -/* Line 1455 of yacc.c */ -#line 106 "dtc-parser.y" { (yyval.re) = build_reserve_entry((yyvsp[(2) - (4)].addr), (yyvsp[(3) - (4)].addr)); ;} @@ -1442,8 +1432,6 @@ yyreduce: case 6: -/* Line 1455 of yacc.c */ -#line 110 "dtc-parser.y" { add_label(&(yyvsp[(2) - (2)].re)->labels, (yyvsp[(1) - (2)].labelref)); (yyval.re) = (yyvsp[(2) - (2)].re); @@ -1452,8 +1440,6 @@ yyreduce: case 7: -/* Line 1455 of yacc.c */ -#line 118 "dtc-parser.y" { (yyval.addr) = eval_literal((yyvsp[(1) - (1)].literal), 0, 64); ;} @@ -1461,8 +1447,6 @@ yyreduce: case 8: -/* Line 1455 of yacc.c */ -#line 125 "dtc-parser.y" { (yyval.node) = name_node((yyvsp[(2) - (2)].node), ""); ;} @@ -1470,8 +1454,6 @@ yyreduce: case 9: -/* Line 1455 of yacc.c */ -#line 129 "dtc-parser.y" { (yyval.node) = merge_nodes((yyvsp[(1) - (3)].node), (yyvsp[(3) - (3)].node)); ;} @@ -1479,8 +1461,6 @@ yyreduce: case 10: -/* Line 1455 of yacc.c */ -#line 133 "dtc-parser.y" { struct node *target = get_node_by_ref((yyvsp[(1) - (3)].node), (yyvsp[(2) - (3)].labelref)); @@ -1494,8 +1474,6 @@ yyreduce: case 11: -/* Line 1455 of yacc.c */ -#line 146 "dtc-parser.y" { (yyval.node) = build_node((yyvsp[(2) - (5)].proplist), (yyvsp[(3) - (5)].nodelist)); ;} @@ -1503,8 +1481,6 @@ yyreduce: case 12: -/* Line 1455 of yacc.c */ -#line 153 "dtc-parser.y" { (yyval.proplist) = NULL; ;} @@ -1512,8 +1488,6 @@ yyreduce: case 13: -/* Line 1455 of yacc.c */ -#line 157 "dtc-parser.y" { (yyval.proplist) = chain_property((yyvsp[(2) - (2)].prop), (yyvsp[(1) - (2)].proplist)); ;} @@ -1521,8 +1495,6 @@ yyreduce: case 14: -/* Line 1455 of yacc.c */ -#line 164 "dtc-parser.y" { (yyval.prop) = build_property((yyvsp[(1) - (4)].propnodename), (yyvsp[(3) - (4)].data)); ;} @@ -1530,8 +1502,6 @@ yyreduce: case 15: -/* Line 1455 of yacc.c */ -#line 168 "dtc-parser.y" { (yyval.prop) = build_property((yyvsp[(1) - (2)].propnodename), empty_data); ;} @@ -1539,8 +1509,6 @@ yyreduce: case 16: -/* Line 1455 of yacc.c */ -#line 172 "dtc-parser.y" { add_label(&(yyvsp[(2) - (2)].prop)->labels, (yyvsp[(1) - (2)].labelref)); (yyval.prop) = (yyvsp[(2) - (2)].prop); @@ -1549,8 +1517,6 @@ yyreduce: case 17: -/* Line 1455 of yacc.c */ -#line 180 "dtc-parser.y" { (yyval.data) = data_merge((yyvsp[(1) - (2)].data), (yyvsp[(2) - (2)].data)); ;} @@ -1558,8 +1524,6 @@ yyreduce: case 18: -/* Line 1455 of yacc.c */ -#line 184 "dtc-parser.y" { (yyval.data) = data_merge((yyvsp[(1) - (4)].data), (yyvsp[(3) - (4)].data)); ;} @@ -1567,8 +1531,6 @@ yyreduce: case 19: -/* Line 1455 of yacc.c */ -#line 188 "dtc-parser.y" { (yyval.data) = data_merge((yyvsp[(1) - (4)].data), (yyvsp[(3) - (4)].data)); ;} @@ -1576,8 +1538,6 @@ yyreduce: case 20: -/* Line 1455 of yacc.c */ -#line 192 "dtc-parser.y" { (yyval.data) = data_add_marker((yyvsp[(1) - (2)].data), REF_PATH, (yyvsp[(2) - (2)].labelref)); ;} @@ -1585,8 +1545,6 @@ yyreduce: case 21: -/* Line 1455 of yacc.c */ -#line 196 "dtc-parser.y" { FILE *f = srcfile_relative_open((yyvsp[(4) - (9)].data).val, NULL); struct data d; @@ -1607,8 +1565,6 @@ yyreduce: case 22: -/* Line 1455 of yacc.c */ -#line 213 "dtc-parser.y" { FILE *f = srcfile_relative_open((yyvsp[(4) - (5)].data).val, NULL); struct data d = empty_data; @@ -1622,8 +1578,6 @@ yyreduce: case 23: -/* Line 1455 of yacc.c */ -#line 223 "dtc-parser.y" { (yyval.data) = data_add_marker((yyvsp[(1) - (2)].data), LABEL, (yyvsp[(2) - (2)].labelref)); ;} @@ -1631,8 +1585,6 @@ yyreduce: case 24: -/* Line 1455 of yacc.c */ -#line 230 "dtc-parser.y" { (yyval.data) = empty_data; ;} @@ -1640,8 +1592,6 @@ yyreduce: case 25: -/* Line 1455 of yacc.c */ -#line 234 "dtc-parser.y" { (yyval.data) = (yyvsp[(1) - (2)].data); ;} @@ -1649,8 +1599,6 @@ yyreduce: case 26: -/* Line 1455 of yacc.c */ -#line 238 "dtc-parser.y" { (yyval.data) = data_add_marker((yyvsp[(1) - (2)].data), LABEL, (yyvsp[(2) - (2)].labelref)); ;} @@ -1658,8 +1606,6 @@ yyreduce: case 27: -/* Line 1455 of yacc.c */ -#line 245 "dtc-parser.y" { (yyval.data) = empty_data; ;} @@ -1667,8 +1613,6 @@ yyreduce: case 28: -/* Line 1455 of yacc.c */ -#line 249 "dtc-parser.y" { (yyval.data) = data_append_cell((yyvsp[(1) - (2)].data), (yyvsp[(2) - (2)].cell)); ;} @@ -1676,8 +1620,6 @@ yyreduce: case 29: -/* Line 1455 of yacc.c */ -#line 253 "dtc-parser.y" { (yyval.data) = data_append_cell(data_add_marker((yyvsp[(1) - (2)].data), REF_PHANDLE, (yyvsp[(2) - (2)].labelref)), -1); @@ -1686,8 +1628,6 @@ yyreduce: case 30: -/* Line 1455 of yacc.c */ -#line 258 "dtc-parser.y" { (yyval.data) = data_add_marker((yyvsp[(1) - (2)].data), LABEL, (yyvsp[(2) - (2)].labelref)); ;} @@ -1695,8 +1635,6 @@ yyreduce: case 31: -/* Line 1455 of yacc.c */ -#line 265 "dtc-parser.y" { (yyval.cell) = eval_literal((yyvsp[(1) - (1)].literal), 0, 32); ;} @@ -1704,8 +1642,6 @@ yyreduce: case 32: -/* Line 1455 of yacc.c */ -#line 272 "dtc-parser.y" { (yyval.data) = empty_data; ;} @@ -1713,8 +1649,6 @@ yyreduce: case 33: -/* Line 1455 of yacc.c */ -#line 276 "dtc-parser.y" { (yyval.data) = data_append_byte((yyvsp[(1) - (2)].data), (yyvsp[(2) - (2)].byte)); ;} @@ -1722,8 +1656,6 @@ yyreduce: case 34: -/* Line 1455 of yacc.c */ -#line 280 "dtc-parser.y" { (yyval.data) = data_add_marker((yyvsp[(1) - (2)].data), LABEL, (yyvsp[(2) - (2)].labelref)); ;} @@ -1731,8 +1663,6 @@ yyreduce: case 35: -/* Line 1455 of yacc.c */ -#line 287 "dtc-parser.y" { (yyval.nodelist) = NULL; ;} @@ -1740,8 +1670,6 @@ yyreduce: case 36: -/* Line 1455 of yacc.c */ -#line 291 "dtc-parser.y" { (yyval.nodelist) = chain_node((yyvsp[(1) - (2)].node), (yyvsp[(2) - (2)].nodelist)); ;} @@ -1749,8 +1677,6 @@ yyreduce: case 37: -/* Line 1455 of yacc.c */ -#line 295 "dtc-parser.y" { print_error("syntax error: properties must precede subnodes"); YYERROR; @@ -1759,8 +1685,6 @@ yyreduce: case 38: -/* Line 1455 of yacc.c */ -#line 303 "dtc-parser.y" { (yyval.node) = name_node((yyvsp[(2) - (2)].node), (yyvsp[(1) - (2)].propnodename)); ;} @@ -1768,8 +1692,6 @@ yyreduce: case 39: -/* Line 1455 of yacc.c */ -#line 307 "dtc-parser.y" { add_label(&(yyvsp[(2) - (2)].node)->labels, (yyvsp[(1) - (2)].labelref)); (yyval.node) = (yyvsp[(2) - (2)].node); @@ -1778,8 +1700,6 @@ yyreduce: -/* Line 1455 of yacc.c */ -#line 1783 "dtc-parser.tab.c" default: break; } YY_SYMBOL_PRINT ("-> $$ =", yyr1[yyn], &yyval, &yyloc); @@ -1990,8 +1910,6 @@ yyreturn: -/* Line 1675 of yacc.c */ -#line 313 "dtc-parser.y" void print_error(char const *fmt, ...) diff --git a/scripts/dtc/dtc-parser.tab.h_shipped b/scripts/dtc/dtc-parser.tab.h_shipped index 95c9547..4ee682b 100644 --- a/scripts/dtc/dtc-parser.tab.h_shipped +++ b/scripts/dtc/dtc-parser.tab.h_shipped @@ -1,10 +1,9 @@ - -/* A Bison parser, made by GNU Bison 2.4.1. */ +/* A Bison parser, made by GNU Bison 2.4.3. */ /* Skeleton interface for Bison's Yacc-like parsers in C - Copyright (C) 1984, 1989, 1990, 2000, 2001, 2002, 2003, 2004, 2005, 2006 - Free Software Foundation, Inc. + Copyright (C) 1984, 1989, 1990, 2000, 2001, 2002, 2003, 2004, 2005, 2006, + 2009, 2010 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -58,8 +57,6 @@ typedef union YYSTYPE { -/* Line 1676 of yacc.c */ -#line 39 "dtc-parser.y" char *propnodename; char *literal; @@ -78,8 +75,6 @@ typedef union YYSTYPE -/* Line 1676 of yacc.c */ -#line 83 "dtc-parser.tab.h" } YYSTYPE; # define YYSTYPE_IS_TRIVIAL 1 # define yystype YYSTYPE /* obsolescent; will be withdrawn */ diff --git a/security/apparmor/apparmorfs.c b/security/apparmor/apparmorfs.c index 0848292..69ddb47 100644 --- a/security/apparmor/apparmorfs.c +++ b/security/apparmor/apparmorfs.c @@ -200,7 +200,7 @@ void __init aa_destroy_aafs(void) * * Returns: error on failure */ -int __init aa_create_aafs(void) +static int __init aa_create_aafs(void) { int error; diff --git a/security/apparmor/domain.c b/security/apparmor/domain.c index 78adc43..c1e18ba 100644 --- a/security/apparmor/domain.c +++ b/security/apparmor/domain.c @@ -67,7 +67,7 @@ static int may_change_ptraced_domain(struct task_struct *task, int error = 0; rcu_read_lock(); - tracer = tracehook_tracer_task(task); + tracer = ptrace_parent(task); if (tracer) { /* released below */ cred = get_task_cred(tracer); diff --git a/security/apparmor/ipc.c b/security/apparmor/ipc.c index 649fad8..7ee05c6 100644 --- a/security/apparmor/ipc.c +++ b/security/apparmor/ipc.c @@ -19,6 +19,7 @@ #include "include/capability.h" #include "include/context.h" #include "include/policy.h" +#include "include/ipc.h" /* call back to audit ptrace fields */ static void audit_cb(struct audit_buffer *ab, void *va) diff --git a/security/apparmor/lib.c b/security/apparmor/lib.c index 506d2ba..9516948 100644 --- a/security/apparmor/lib.c +++ b/security/apparmor/lib.c @@ -12,11 +12,13 @@ * License. */ +#include #include #include #include #include "include/audit.h" +#include "include/apparmor.h" /** diff --git a/security/apparmor/policy_unpack.c b/security/apparmor/policy_unpack.c index d6d9a57..741dd13 100644 --- a/security/apparmor/policy_unpack.c +++ b/security/apparmor/policy_unpack.c @@ -381,11 +381,11 @@ static bool unpack_trans_table(struct aa_ext *e, struct aa_profile *profile) profile->file.trans.size = size; for (i = 0; i < size; i++) { char *str; - int c, j, size = unpack_strdup(e, &str, NULL); + int c, j, size2 = unpack_strdup(e, &str, NULL); /* unpack_strdup verifies that the last character is * null termination byte. */ - if (!size) + if (!size2) goto fail; profile->file.trans.table[i] = str; /* verify that name doesn't start with space */ @@ -393,7 +393,7 @@ static bool unpack_trans_table(struct aa_ext *e, struct aa_profile *profile) goto fail; /* count internal # of internal \0 */ - for (c = j = 0; j < size - 2; j++) { + for (c = j = 0; j < size2 - 2; j++) { if (!str[j]) c++; } @@ -440,11 +440,11 @@ static bool unpack_rlimits(struct aa_ext *e, struct aa_profile *profile) if (size > RLIM_NLIMITS) goto fail; for (i = 0; i < size; i++) { - u64 tmp = 0; + u64 tmp2 = 0; int a = aa_map_resource(i); - if (!unpack_u64(e, &tmp, NULL)) + if (!unpack_u64(e, &tmp2, NULL)) goto fail; - profile->rlimits.limits[a].rlim_max = tmp; + profile->rlimits.limits[a].rlim_max = tmp2; } if (!unpack_nameX(e, AA_ARRAYEND, NULL)) goto fail; diff --git a/security/apparmor/procattr.c b/security/apparmor/procattr.c index 04a2cf8..1b41c54 100644 --- a/security/apparmor/procattr.c +++ b/security/apparmor/procattr.c @@ -16,6 +16,7 @@ #include "include/context.h" #include "include/policy.h" #include "include/domain.h" +#include "include/procattr.h" /** diff --git a/sound/aoa/codecs/onyx.c b/sound/aoa/codecs/onyx.c index 3687a6c..762af68 100644 --- a/sound/aoa/codecs/onyx.c +++ b/sound/aoa/codecs/onyx.c @@ -1067,7 +1067,6 @@ static int onyx_i2c_probe(struct i2c_client *client, printk(KERN_DEBUG PFX "created and attached onyx instance\n"); return 0; fail: - i2c_set_clientdata(client, NULL); kfree(onyx); return -ENODEV; } @@ -1112,8 +1111,7 @@ static int onyx_i2c_remove(struct i2c_client *client) aoa_codec_unregister(&onyx->codec); of_node_put(onyx->codec.node); - if (onyx->codec_info) - kfree(onyx->codec_info); + kfree(onyx->codec_info); kfree(onyx); return 0; } diff --git a/sound/aoa/fabrics/layout.c b/sound/aoa/fabrics/layout.c index 3fd1a7e..552b97a 100644 --- a/sound/aoa/fabrics/layout.c +++ b/sound/aoa/fabrics/layout.c @@ -1073,10 +1073,10 @@ static int aoa_fabric_layout_probe(struct soundbus_dev *sdev) sdev->pcmid = -1; list_del(&ldev->list); layouts_list_items--; + kfree(ldev); outnodev: of_node_put(sound); layout_device = NULL; - kfree(ldev); return -ENODEV; } diff --git a/sound/aoa/soundbus/i2sbus/core.c b/sound/aoa/soundbus/i2sbus/core.c index 3ff8cc5..0106583 100644 --- a/sound/aoa/soundbus/i2sbus/core.c +++ b/sound/aoa/soundbus/i2sbus/core.c @@ -262,8 +262,7 @@ static int i2sbus_add_dev(struct macio_dev *macio, */ dev->allocated_resource[i] = request_mem_region(dev->resources[i].start, - dev->resources[i].end - - dev->resources[i].start + 1, + resource_size(&dev->resources[i]), dev->rnames[i]); if (!dev->allocated_resource[i]) { printk(KERN_ERR "i2sbus: failed to claim resource %d!\n", i); @@ -272,19 +271,19 @@ static int i2sbus_add_dev(struct macio_dev *macio, } r = &dev->resources[aoa_resource_i2smmio]; - rlen = r->end - r->start + 1; + rlen = resource_size(r); if (rlen < sizeof(struct i2s_interface_regs)) goto err; dev->intfregs = ioremap(r->start, rlen); r = &dev->resources[aoa_resource_txdbdma]; - rlen = r->end - r->start + 1; + rlen = resource_size(r); if (rlen < sizeof(struct dbdma_regs)) goto err; dev->out.dbdma = ioremap(r->start, rlen); r = &dev->resources[aoa_resource_rxdbdma]; - rlen = r->end - r->start + 1; + rlen = resource_size(r); if (rlen < sizeof(struct dbdma_regs)) goto err; dev->in.dbdma = ioremap(r->start, rlen); diff --git a/sound/aoa/soundbus/i2sbus/pcm.c b/sound/aoa/soundbus/i2sbus/pcm.c index be83899..19491ed 100644 --- a/sound/aoa/soundbus/i2sbus/pcm.c +++ b/sound/aoa/soundbus/i2sbus/pcm.c @@ -12,6 +12,7 @@ #include #include #include +#include #include "../soundbus.h" #include "i2sbus.h" diff --git a/sound/atmel/abdac.c b/sound/atmel/abdac.c index bfee60c..6fd9391 100644 --- a/sound/atmel/abdac.c +++ b/sound/atmel/abdac.c @@ -448,7 +448,7 @@ static int __devinit atmel_abdac_probe(struct platform_device *pdev) goto out_free_card; } - dac->regs = ioremap(regs->start, regs->end - regs->start + 1); + dac->regs = ioremap(regs->start, resource_size(regs)); if (!dac->regs) { dev_dbg(&pdev->dev, "could not remap register memory\n"); goto out_free_card; diff --git a/sound/atmel/ac97c.c b/sound/atmel/ac97c.c index ac35222..73516f6 100644 --- a/sound/atmel/ac97c.c +++ b/sound/atmel/ac97c.c @@ -899,6 +899,10 @@ static void atmel_ac97c_reset(struct atmel_ac97c *chip) /* AC97 v2.2 specifications says minimum 1 us. */ udelay(2); gpio_set_value(chip->reset_pin, 1); + } else { + ac97c_writel(chip, MR, AC97C_MR_WRST | AC97C_MR_ENA); + udelay(2); + ac97c_writel(chip, MR, AC97C_MR_ENA); } } @@ -971,7 +975,7 @@ static int __devinit atmel_ac97c_probe(struct platform_device *pdev) chip->card = card; chip->pclk = pclk; chip->pdev = pdev; - chip->regs = ioremap(regs->start, regs->end - regs->start + 1); + chip->regs = ioremap(regs->start, resource_size(regs)); if (!chip->regs) { dev_dbg(&pdev->dev, "could not remap register memory\n"); diff --git a/sound/drivers/mpu401/mpu401.c b/sound/drivers/mpu401/mpu401.c index 149d05a..2575690 100644 --- a/sound/drivers/mpu401/mpu401.c +++ b/sound/drivers/mpu401/mpu401.c @@ -24,7 +24,7 @@ #include #include #include -#include +#include #include #include #include @@ -86,8 +86,7 @@ static int snd_mpu401_create(int dev, struct snd_card **rcard) } err = snd_mpu401_uart_new(card, 0, MPU401_HW_MPU401, port[dev], 0, - irq[dev], irq[dev] >= 0 ? IRQF_DISABLED : 0, - NULL); + irq[dev], NULL); if (err < 0) { printk(KERN_ERR "MPU401 not detected at 0x%lx\n", port[dev]); goto _err; diff --git a/sound/drivers/mpu401/mpu401_uart.c b/sound/drivers/mpu401/mpu401_uart.c index 74f5a3d..4608c2c 100644 --- a/sound/drivers/mpu401/mpu401_uart.c +++ b/sound/drivers/mpu401/mpu401_uart.c @@ -3,7 +3,7 @@ * Routines for control of MPU-401 in UART mode * * MPU-401 supports UART mode which is not capable generate transmit - * interrupts thus output is done via polling. Also, if irq < 0, then + * interrupts thus output is done via polling. Without interrupt, * input is done also via polling. Do not expect good performance. * * @@ -33,6 +33,7 @@ #include #include #include +#include #include #include #include @@ -374,7 +375,7 @@ snd_mpu401_uart_input_trigger(struct snd_rawmidi_substream *substream, int up) /* first time - flush FIFO */ while (max-- > 0) mpu->read(mpu, MPU401D(mpu)); - if (mpu->irq < 0) + if (mpu->info_flags & MPU401_INFO_USE_TIMER) snd_mpu401_uart_add_timer(mpu, 1); } @@ -383,7 +384,7 @@ snd_mpu401_uart_input_trigger(struct snd_rawmidi_substream *substream, int up) snd_mpu401_uart_input_read(mpu); spin_unlock_irqrestore(&mpu->input_lock, flags); } else { - if (mpu->irq < 0) + if (mpu->info_flags & MPU401_INFO_USE_TIMER) snd_mpu401_uart_remove_timer(mpu, 1); clear_bit(MPU401_MODE_BIT_INPUT_TRIGGER, &mpu->mode); } @@ -496,7 +497,7 @@ static struct snd_rawmidi_ops snd_mpu401_uart_input = static void snd_mpu401_uart_free(struct snd_rawmidi *rmidi) { struct snd_mpu401 *mpu = rmidi->private_data; - if (mpu->irq_flags && mpu->irq >= 0) + if (mpu->irq >= 0) free_irq(mpu->irq, (void *) mpu); release_and_free_resource(mpu->res); kfree(mpu); @@ -509,8 +510,7 @@ static void snd_mpu401_uart_free(struct snd_rawmidi *rmidi) * @hardware: the hardware type, MPU401_HW_XXXX * @port: the base address of MPU401 port * @info_flags: bitflags MPU401_INFO_XXX - * @irq: the irq number, -1 if no interrupt for mpu - * @irq_flags: the irq request flags (SA_XXX), 0 if irq was already reserved. + * @irq: the ISA irq number, -1 if not to be allocated * @rrawmidi: the pointer to store the new rawmidi instance * * Creates a new MPU-401 instance. @@ -525,7 +525,7 @@ int snd_mpu401_uart_new(struct snd_card *card, int device, unsigned short hardware, unsigned long port, unsigned int info_flags, - int irq, int irq_flags, + int irq, struct snd_rawmidi ** rrawmidi) { struct snd_mpu401 *mpu; @@ -578,8 +578,8 @@ int snd_mpu401_uart_new(struct snd_card *card, int device, mpu->cport = port + 2; else mpu->cport = port + 1; - if (irq >= 0 && irq_flags) { - if (request_irq(irq, snd_mpu401_uart_interrupt, irq_flags, + if (irq >= 0) { + if (request_irq(irq, snd_mpu401_uart_interrupt, 0, "MPU401 UART", (void *) mpu)) { snd_printk(KERN_ERR "mpu401_uart: " "unable to grab IRQ %d\n", irq); @@ -587,9 +587,10 @@ int snd_mpu401_uart_new(struct snd_card *card, int device, return -EBUSY; } } + if (irq < 0 && !(info_flags & MPU401_INFO_IRQ_HOOK)) + info_flags |= MPU401_INFO_USE_TIMER; mpu->info_flags = info_flags; mpu->irq = irq; - mpu->irq_flags = irq_flags; if (card->shortname[0]) snprintf(rmidi->name, sizeof(rmidi->name), "%s MIDI", card->shortname); diff --git a/sound/drivers/opl3/opl3_lib.c b/sound/drivers/opl3/opl3_lib.c index 6e31e46..33d9a85 100644 --- a/sound/drivers/opl3/opl3_lib.c +++ b/sound/drivers/opl3/opl3_lib.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include diff --git a/sound/drivers/opl3/opl3_oss.c b/sound/drivers/opl3/opl3_oss.c index ade3ca5..c1cb249 100644 --- a/sound/drivers/opl3/opl3_oss.c +++ b/sound/drivers/opl3/opl3_oss.c @@ -18,6 +18,7 @@ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ +#include #include "opl3_voice.h" static int snd_opl3_open_seq_oss(struct snd_seq_oss_arg *arg, void *closure); diff --git a/sound/drivers/opl3/opl3_seq.c b/sound/drivers/opl3/opl3_seq.c index 2d33f53..723562e 100644 --- a/sound/drivers/opl3/opl3_seq.c +++ b/sound/drivers/opl3/opl3_seq.c @@ -25,6 +25,7 @@ #include "opl3_voice.h" #include #include +#include #include MODULE_AUTHOR("Uros Bizjak "); diff --git a/sound/drivers/opl3/opl3_synth.c b/sound/drivers/opl3/opl3_synth.c index 301acb6..742a4b6 100644 --- a/sound/drivers/opl3/opl3_synth.c +++ b/sound/drivers/opl3/opl3_synth.c @@ -20,6 +20,7 @@ */ #include +#include #include #include diff --git a/sound/drivers/opl4/opl4_lib.c b/sound/drivers/opl4/opl4_lib.c index f07e38d..b953fb4 100644 --- a/sound/drivers/opl4/opl4_lib.c +++ b/sound/drivers/opl4/opl4_lib.c @@ -22,6 +22,7 @@ #include #include #include +#include #include MODULE_AUTHOR("Clemens Ladisch "); diff --git a/sound/drivers/opl4/opl4_proc.c b/sound/drivers/opl4/opl4_proc.c index df850b8..9b824bf 100644 --- a/sound/drivers/opl4/opl4_proc.c +++ b/sound/drivers/opl4/opl4_proc.c @@ -19,6 +19,7 @@ #include "opl4_local.h" #include +#include #include #ifdef CONFIG_PROC_FS diff --git a/sound/drivers/opl4/opl4_seq.c b/sound/drivers/opl4/opl4_seq.c index 43d8a2b..9919769 100644 --- a/sound/drivers/opl4/opl4_seq.c +++ b/sound/drivers/opl4/opl4_seq.c @@ -34,6 +34,7 @@ #include "opl4_local.h" #include #include +#include #include MODULE_AUTHOR("Clemens Ladisch "); diff --git a/sound/drivers/pcsp/pcsp.c b/sound/drivers/pcsp/pcsp.c index f165c77..e6ad8d4 100644 --- a/sound/drivers/pcsp/pcsp.c +++ b/sound/drivers/pcsp/pcsp.c @@ -6,7 +6,7 @@ */ #include -#include +#include #include #include #include @@ -187,8 +187,8 @@ static int __devinit pcsp_probe(struct platform_device *dev) static int __devexit pcsp_remove(struct platform_device *dev) { struct snd_pcsp *chip = platform_get_drvdata(dev); - alsa_card_pcsp_exit(chip); pcspkr_input_remove(chip->input_dev); + alsa_card_pcsp_exit(chip); platform_set_drvdata(dev, NULL); return 0; } diff --git a/sound/drivers/pcsp/pcsp.h b/sound/drivers/pcsp/pcsp.h index 4ff6c8c..fc7a2dc 100644 --- a/sound/drivers/pcsp/pcsp.h +++ b/sound/drivers/pcsp/pcsp.h @@ -10,14 +10,8 @@ #define __PCSP_H__ #include +#include #include -#if defined(CONFIG_MIPS) || defined(CONFIG_X86) -/* Use the global PIT lock ! */ -#include -#else -#include -static DEFINE_RAW_SPINLOCK(i8253_lock); -#endif #define PCSP_SOUND_VERSION 0x400 /* read 4.00 */ #define PCSP_DEBUG 0 diff --git a/sound/drivers/vx/vx_core.c b/sound/drivers/vx/vx_core.c index 19c6e37..b8e5159 100644 --- a/sound/drivers/vx/vx_core.c +++ b/sound/drivers/vx/vx_core.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include diff --git a/sound/drivers/vx/vx_hwdep.c b/sound/drivers/vx/vx_hwdep.c index f7a6fbd..4a1fae9 100644 --- a/sound/drivers/vx/vx_hwdep.c +++ b/sound/drivers/vx/vx_hwdep.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include diff --git a/sound/firewire/cmp.c b/sound/firewire/cmp.c index 14cacbc..76294f2 100644 --- a/sound/firewire/cmp.c +++ b/sound/firewire/cmp.c @@ -32,7 +32,7 @@ enum bus_reset_handling { SUCCEED_ON_BUS_RESET, }; -static __attribute__((format(printf, 2, 3))) +static __printf(2, 3) void cmp_error(struct cmp_connection *c, const char *fmt, ...) { va_list va; diff --git a/sound/firewire/isight.c b/sound/firewire/isight.c index 4400308..cd094ec 100644 --- a/sound/firewire/isight.c +++ b/sound/firewire/isight.c @@ -51,7 +51,6 @@ struct isight { struct fw_unit *unit; struct fw_device *device; u64 audio_base; - struct fw_address_handler iris_handler; struct snd_pcm_substream *pcm; struct mutex mutex; struct iso_packets_buffer buffer; diff --git a/sound/firewire/iso-resources.c b/sound/firewire/iso-resources.c index ffe20b8..5f17b77 100644 --- a/sound/firewire/iso-resources.c +++ b/sound/firewire/iso-resources.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include diff --git a/sound/firewire/packets-buffer.c b/sound/firewire/packets-buffer.c index 3c61ca2..ea15066 100644 --- a/sound/firewire/packets-buffer.c +++ b/sound/firewire/packets-buffer.c @@ -6,6 +6,7 @@ */ #include +#include #include #include "packets-buffer.h" diff --git a/sound/firewire/speakers.c b/sound/firewire/speakers.c index 5466de8..cbe6bb9 100644 --- a/sound/firewire/speakers.c +++ b/sound/firewire/speakers.c @@ -171,7 +171,7 @@ static int fwspk_open(struct snd_pcm_substream *substream) err = snd_pcm_hw_constraint_minmax(runtime, SNDRV_PCM_HW_PARAM_PERIOD_TIME, - 5000, 8192000); + 5000, UINT_MAX); if (err < 0) return err; @@ -778,9 +778,10 @@ static int __devexit fwspk_remove(struct device *dev) { struct fwspk *fwspk = dev_get_drvdata(dev); - mutex_lock(&fwspk->mutex); amdtp_out_stream_pcm_abort(&fwspk->stream); snd_card_disconnect(fwspk->card); + + mutex_lock(&fwspk->mutex); fwspk_stop_stream(fwspk); mutex_unlock(&fwspk->mutex); @@ -796,8 +797,8 @@ static void fwspk_bus_reset(struct fw_unit *unit) fcp_bus_reset(fwspk->unit); if (cmp_connection_update(&fwspk->connection) < 0) { - mutex_lock(&fwspk->mutex); amdtp_out_stream_pcm_abort(&fwspk->stream); + mutex_lock(&fwspk->mutex); fwspk_stop_stream(fwspk); mutex_unlock(&fwspk->mutex); return; diff --git a/sound/i2c/cs8427.c b/sound/i2c/cs8427.c index 04ae870..6c2dc38 100644 --- a/sound/i2c/cs8427.c +++ b/sound/i2c/cs8427.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include diff --git a/sound/i2c/i2c.c b/sound/i2c/i2c.c index eb7c7d0..4677037 100644 --- a/sound/i2c/i2c.c +++ b/sound/i2c/i2c.c @@ -22,6 +22,7 @@ #include #include +#include #include #include #include diff --git a/sound/i2c/other/ak4113.c b/sound/i2c/other/ak4113.c index c424d32..e1ab3ad 100644 --- a/sound/i2c/other/ak4113.c +++ b/sound/i2c/other/ak4113.c @@ -23,6 +23,7 @@ #include #include +#include #include #include #include @@ -55,8 +56,7 @@ static inline unsigned char reg_read(struct ak4113 *ak4113, unsigned char reg) static void snd_ak4113_free(struct ak4113 *chip) { - chip->init = 1; /* don't schedule new work */ - mb(); + atomic_inc(&chip->wq_processing); /* don't schedule new work */ cancel_delayed_work_sync(&chip->work); kfree(chip); } @@ -88,6 +88,7 @@ int snd_ak4113_create(struct snd_card *card, ak4113_read_t *read, chip->write = write; chip->private_data = private_data; INIT_DELAYED_WORK(&chip->work, ak4113_stats); + atomic_set(&chip->wq_processing, 0); for (reg = 0; reg < AK4113_WRITABLE_REGS ; reg++) chip->regmap[reg] = pgm[reg]; @@ -138,13 +139,11 @@ static void ak4113_init_regs(struct ak4113 *chip) void snd_ak4113_reinit(struct ak4113 *chip) { - chip->init = 1; - mb(); - flush_delayed_work_sync(&chip->work); + if (atomic_inc_return(&chip->wq_processing) == 1) + cancel_delayed_work_sync(&chip->work); ak4113_init_regs(chip); /* bring up statistics / event queing */ - chip->init = 0; - if (chip->kctls[0]) + if (atomic_dec_and_test(&chip->wq_processing)) schedule_delayed_work(&chip->work, HZ / 10); } EXPORT_SYMBOL_GPL(snd_ak4113_reinit); @@ -631,8 +630,9 @@ static void ak4113_stats(struct work_struct *work) { struct ak4113 *chip = container_of(work, struct ak4113, work.work); - if (!chip->init) + if (atomic_inc_return(&chip->wq_processing) == 1) snd_ak4113_check_rate_and_errors(chip, chip->check_flags); - schedule_delayed_work(&chip->work, HZ / 10); + if (atomic_dec_and_test(&chip->wq_processing)) + schedule_delayed_work(&chip->work, HZ / 10); } diff --git a/sound/i2c/other/ak4114.c b/sound/i2c/other/ak4114.c index d9fb537..4c6b379 100644 --- a/sound/i2c/other/ak4114.c +++ b/sound/i2c/other/ak4114.c @@ -22,6 +22,7 @@ #include #include +#include #include #include #include @@ -65,8 +66,7 @@ static void reg_dump(struct ak4114 *ak4114) static void snd_ak4114_free(struct ak4114 *chip) { - chip->init = 1; /* don't schedule new work */ - mb(); + atomic_inc(&chip->wq_processing); /* don't schedule new work */ cancel_delayed_work_sync(&chip->work); kfree(chip); } @@ -99,6 +99,7 @@ int snd_ak4114_create(struct snd_card *card, chip->write = write; chip->private_data = private_data; INIT_DELAYED_WORK(&chip->work, ak4114_stats); + atomic_set(&chip->wq_processing, 0); for (reg = 0; reg < 7; reg++) chip->regmap[reg] = pgm[reg]; @@ -151,13 +152,11 @@ static void ak4114_init_regs(struct ak4114 *chip) void snd_ak4114_reinit(struct ak4114 *chip) { - chip->init = 1; - mb(); - flush_delayed_work_sync(&chip->work); + if (atomic_inc_return(&chip->wq_processing) == 1) + cancel_delayed_work_sync(&chip->work); ak4114_init_regs(chip); /* bring up statistics / event queing */ - chip->init = 0; - if (chip->kctls[0]) + if (atomic_dec_and_test(&chip->wq_processing)) schedule_delayed_work(&chip->work, HZ / 10); } @@ -611,10 +610,10 @@ static void ak4114_stats(struct work_struct *work) { struct ak4114 *chip = container_of(work, struct ak4114, work.work); - if (!chip->init) + if (atomic_inc_return(&chip->wq_processing) == 1) snd_ak4114_check_rate_and_errors(chip, chip->check_flags); - - schedule_delayed_work(&chip->work, HZ / 10); + if (atomic_dec_and_test(&chip->wq_processing)) + schedule_delayed_work(&chip->work, HZ / 10); } EXPORT_SYMBOL(snd_ak4114_create); diff --git a/sound/i2c/other/ak4117.c b/sound/i2c/other/ak4117.c index 2cad2d6..b4b2a51 100644 --- a/sound/i2c/other/ak4117.c +++ b/sound/i2c/other/ak4117.c @@ -22,6 +22,7 @@ #include #include +#include #include #include #include diff --git a/sound/i2c/other/ak4xxx-adda.c b/sound/i2c/other/ak4xxx-adda.c index 57ccba8..ed726d1 100644 --- a/sound/i2c/other/ak4xxx-adda.c +++ b/sound/i2c/other/ak4xxx-adda.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -570,7 +571,7 @@ static int ak4xxx_capture_source_info(struct snd_kcontrol *kcontrol, struct snd_akm4xxx *ak = snd_kcontrol_chip(kcontrol); int mixer_ch = AK_GET_SHIFT(kcontrol->private_value); const char **input_names; - int num_names, idx; + unsigned int num_names, idx; num_names = ak4xxx_capture_num_inputs(ak, mixer_ch); if (!num_names) diff --git a/sound/i2c/other/pt2258.c b/sound/i2c/other/pt2258.c index 797d3a6..9fa390b 100644 --- a/sound/i2c/other/pt2258.c +++ b/sound/i2c/other/pt2258.c @@ -24,6 +24,7 @@ #include #include #include +#include MODULE_AUTHOR("Jochen Voss "); MODULE_DESCRIPTION("PT2258 volume controller (Princeton Technology Corp.)"); diff --git a/sound/i2c/other/tea575x-tuner.c b/sound/i2c/other/tea575x-tuner.c index 4831800..6b68c82 100644 --- a/sound/i2c/other/tea575x-tuner.c +++ b/sound/i2c/other/tea575x-tuner.c @@ -22,11 +22,12 @@ #include #include -#include +#include #include #include #include -#include +#include +#include #include MODULE_AUTHOR("Jaroslav Kysela "); @@ -62,17 +63,6 @@ module_param(radio_nr, int, 0); #define TEA575X_BIT_DUMMY (1<<15) /* buffer */ #define TEA575X_BIT_FREQ_MASK 0x7fff -static struct v4l2_queryctrl radio_qctrl[] = { - { - .id = V4L2_CID_AUDIO_MUTE, - .name = "Mute", - .minimum = 0, - .maximum = 1, - .default_value = 1, - .type = V4L2_CTRL_TYPE_BOOLEAN, - } -}; - /* * lowlevel part */ @@ -266,83 +256,23 @@ static int vidioc_s_audio(struct file *file, void *priv, return 0; } -static int vidioc_queryctrl(struct file *file, void *priv, - struct v4l2_queryctrl *qc) -{ - int i; - - for (i = 0; i < ARRAY_SIZE(radio_qctrl); i++) { - if (qc->id && qc->id == radio_qctrl[i].id) { - memcpy(qc, &(radio_qctrl[i]), - sizeof(*qc)); - return 0; - } - } - return -EINVAL; -} - -static int vidioc_g_ctrl(struct file *file, void *priv, - struct v4l2_control *ctrl) +static int tea575x_s_ctrl(struct v4l2_ctrl *ctrl) { - struct snd_tea575x *tea = video_drvdata(file); + struct snd_tea575x *tea = container_of(ctrl->handler, struct snd_tea575x, ctrl_handler); switch (ctrl->id) { case V4L2_CID_AUDIO_MUTE: - ctrl->value = tea->mute; + tea->mute = ctrl->val; + snd_tea575x_set_freq(tea); return 0; } - return -EINVAL; -} -static int vidioc_s_ctrl(struct file *file, void *priv, - struct v4l2_control *ctrl) -{ - struct snd_tea575x *tea = video_drvdata(file); - - switch (ctrl->id) { - case V4L2_CID_AUDIO_MUTE: - if (tea->mute != ctrl->value) { - tea->mute = ctrl->value; - snd_tea575x_set_freq(tea); - } - return 0; - } return -EINVAL; } -static int vidioc_g_input(struct file *filp, void *priv, unsigned int *i) -{ - *i = 0; - return 0; -} - -static int vidioc_s_input(struct file *filp, void *priv, unsigned int i) -{ - if (i != 0) - return -EINVAL; - return 0; -} - -static int snd_tea575x_exclusive_open(struct file *file) -{ - struct snd_tea575x *tea = video_drvdata(file); - - return test_and_set_bit(0, &tea->in_use) ? -EBUSY : 0; -} - -static int snd_tea575x_exclusive_release(struct file *file) -{ - struct snd_tea575x *tea = video_drvdata(file); - - clear_bit(0, &tea->in_use); - return 0; -} - static const struct v4l2_file_operations tea575x_fops = { .owner = THIS_MODULE, - .open = snd_tea575x_exclusive_open, - .release = snd_tea575x_exclusive_release, - .ioctl = video_ioctl2, + .unlocked_ioctl = video_ioctl2, }; static const struct v4l2_ioctl_ops tea575x_ioctl_ops = { @@ -351,20 +281,19 @@ static const struct v4l2_ioctl_ops tea575x_ioctl_ops = { .vidioc_s_tuner = vidioc_s_tuner, .vidioc_g_audio = vidioc_g_audio, .vidioc_s_audio = vidioc_s_audio, - .vidioc_g_input = vidioc_g_input, - .vidioc_s_input = vidioc_s_input, .vidioc_g_frequency = vidioc_g_frequency, .vidioc_s_frequency = vidioc_s_frequency, - .vidioc_queryctrl = vidioc_queryctrl, - .vidioc_g_ctrl = vidioc_g_ctrl, - .vidioc_s_ctrl = vidioc_s_ctrl, }; static struct video_device tea575x_radio = { .name = "tea575x-tuner", .fops = &tea575x_fops, .ioctl_ops = &tea575x_ioctl_ops, - .release = video_device_release, + .release = video_device_release_empty, +}; + +static const struct v4l2_ctrl_ops tea575x_ctrl_ops = { + .s_ctrl = tea575x_s_ctrl, }; /* @@ -373,7 +302,6 @@ static struct video_device tea575x_radio = { int snd_tea575x_init(struct snd_tea575x *tea) { int retval; - struct video_device *tea575x_radio_inst; tea->mute = 1; @@ -381,43 +309,49 @@ int snd_tea575x_init(struct snd_tea575x *tea) if (snd_tea575x_read(tea) != 0x55AA) return -ENODEV; - tea->in_use = 0; tea->val = TEA575X_BIT_BAND_FM | TEA575X_BIT_SEARCH_10_40; tea->freq = 90500 * 16; /* 90.5Mhz default */ + snd_tea575x_set_freq(tea); - tea575x_radio_inst = video_device_alloc(); - if (tea575x_radio_inst == NULL) { - printk(KERN_ERR "tea575x-tuner: not enough memory\n"); - return -ENOMEM; - } + tea->vd = tea575x_radio; + video_set_drvdata(&tea->vd, tea); + mutex_init(&tea->mutex); + tea->vd.lock = &tea->mutex; - memcpy(tea575x_radio_inst, &tea575x_radio, sizeof(tea575x_radio)); + v4l2_ctrl_handler_init(&tea->ctrl_handler, 1); + tea->vd.ctrl_handler = &tea->ctrl_handler; + v4l2_ctrl_new_std(&tea->ctrl_handler, &tea575x_ctrl_ops, V4L2_CID_AUDIO_MUTE, 0, 1, 1, 1); + retval = tea->ctrl_handler.error; + if (retval) { + printk(KERN_ERR "tea575x-tuner: can't initialize controls\n"); + v4l2_ctrl_handler_free(&tea->ctrl_handler); + return retval; + } - strcpy(tea575x_radio.name, tea->tea5759 ? - "TEA5759 radio" : "TEA5757 radio"); + if (tea->ext_init) { + retval = tea->ext_init(tea); + if (retval) { + v4l2_ctrl_handler_free(&tea->ctrl_handler); + return retval; + } + } - video_set_drvdata(tea575x_radio_inst, tea); + v4l2_ctrl_handler_setup(&tea->ctrl_handler); - retval = video_register_device(tea575x_radio_inst, - VFL_TYPE_RADIO, radio_nr); + retval = video_register_device(&tea->vd, VFL_TYPE_RADIO, radio_nr); if (retval) { printk(KERN_ERR "tea575x-tuner: can't register video device!\n"); - kfree(tea575x_radio_inst); + v4l2_ctrl_handler_free(&tea->ctrl_handler); return retval; } - snd_tea575x_set_freq(tea); - tea->vd = tea575x_radio_inst; - return 0; } void snd_tea575x_exit(struct snd_tea575x *tea) { - if (tea->vd) { - video_unregister_device(tea->vd); - tea->vd = NULL; - } + video_unregister_device(&tea->vd); + v4l2_ctrl_handler_free(&tea->ctrl_handler); } static int __init alsa_tea575x_module_init(void) diff --git a/sound/i2c/tea6330t.c b/sound/i2c/tea6330t.c index 0e3a9f2..2d22310 100644 --- a/sound/i2c/tea6330t.c +++ b/sound/i2c/tea6330t.c @@ -22,6 +22,7 @@ #include #include +#include #include #include #include diff --git a/sound/isa/ad1816a/ad1816a.c b/sound/isa/ad1816a/ad1816a.c index 3cb75bc..cd44c74 100644 --- a/sound/isa/ad1816a/ad1816a.c +++ b/sound/isa/ad1816a/ad1816a.c @@ -22,7 +22,7 @@ #include #include #include -#include +#include #include #include #include @@ -204,7 +204,7 @@ static int __devinit snd_card_ad1816a_probe(int dev, struct pnp_card_link *pcard if (mpu_port[dev] > 0) { if (snd_mpu401_uart_new(card, 0, MPU401_HW_MPU401, - mpu_port[dev], 0, mpu_irq[dev], IRQF_DISABLED, + mpu_port[dev], 0, mpu_irq[dev], NULL) < 0) printk(KERN_ERR PFX "no MPU-401 device at 0x%lx.\n", mpu_port[dev]); } diff --git a/sound/isa/ad1816a/ad1816a_lib.c b/sound/isa/ad1816a/ad1816a_lib.c index 05aef8b..177eed3 100644 --- a/sound/isa/ad1816a/ad1816a_lib.c +++ b/sound/isa/ad1816a/ad1816a_lib.c @@ -595,7 +595,7 @@ int __devinit snd_ad1816a_create(struct snd_card *card, snd_ad1816a_free(chip); return -EBUSY; } - if (request_irq(irq, snd_ad1816a_interrupt, IRQF_DISABLED, "AD1816A", (void *) chip)) { + if (request_irq(irq, snd_ad1816a_interrupt, 0, "AD1816A", (void *) chip)) { snd_printk(KERN_ERR "ad1816a: can't grab IRQ %d\n", irq); snd_ad1816a_free(chip); return -EBUSY; diff --git a/sound/isa/ad1848/ad1848.c b/sound/isa/ad1848/ad1848.c index 4beeb6f..34ab69b 100644 --- a/sound/isa/ad1848/ad1848.c +++ b/sound/isa/ad1848/ad1848.c @@ -26,7 +26,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/sound/isa/als100.c b/sound/isa/als100.c index 20becc8..fc5b38f 100644 --- a/sound/isa/als100.c +++ b/sound/isa/als100.c @@ -28,7 +28,7 @@ #include #include #include -#include +#include #include #include #include @@ -256,7 +256,6 @@ static int __devinit snd_card_als100_probe(int dev, mpu_type, mpu_port[dev], 0, mpu_irq[dev], - mpu_irq[dev] >= 0 ? IRQF_DISABLED : 0, NULL) < 0) snd_printk(KERN_ERR PFX "no MPU-401 device at 0x%lx\n", mpu_port[dev]); } diff --git a/sound/isa/azt2320.c b/sound/isa/azt2320.c index aac8dc1..e55f3eb 100644 --- a/sound/isa/azt2320.c +++ b/sound/isa/azt2320.c @@ -35,7 +35,7 @@ #include #include #include -#include +#include #include #include #include @@ -234,8 +234,7 @@ static int __devinit snd_card_azt2320_probe(int dev, if (mpu_port[dev] > 0 && mpu_port[dev] != SNDRV_AUTO_PORT) { if (snd_mpu401_uart_new(card, 0, MPU401_HW_AZT2320, mpu_port[dev], 0, - mpu_irq[dev], IRQF_DISABLED, - NULL) < 0) + mpu_irq[dev], NULL) < 0) snd_printk(KERN_ERR PFX "no MPU-401 device at 0x%lx\n", mpu_port[dev]); } diff --git a/sound/isa/cmi8330.c b/sound/isa/cmi8330.c index fe79a16..c94578d 100644 --- a/sound/isa/cmi8330.c +++ b/sound/isa/cmi8330.c @@ -47,7 +47,7 @@ #include #include #include -#include +#include #include #include #include @@ -597,7 +597,7 @@ static int __devinit snd_cmi8330_probe(struct snd_card *card, int dev) if (mpuport[dev] != SNDRV_AUTO_PORT) { if (snd_mpu401_uart_new(card, 0, MPU401_HW_MPU401, mpuport[dev], 0, mpuirq[dev], - IRQF_DISABLED, NULL) < 0) + NULL) < 0) printk(KERN_ERR PFX "no MPU-401 device at 0x%lx.\n", mpuport[dev]); } diff --git a/sound/isa/cs423x/cs4231.c b/sound/isa/cs423x/cs4231.c index cb9153e..6d81fa7 100644 --- a/sound/isa/cs423x/cs4231.c +++ b/sound/isa/cs423x/cs4231.c @@ -25,7 +25,7 @@ #include #include #include -#include +#include #include #include #include @@ -131,7 +131,6 @@ static int __devinit snd_cs4231_probe(struct device *dev, unsigned int n) mpu_irq[n] = -1; if (snd_mpu401_uart_new(card, 0, MPU401_HW_CS4232, mpu_port[n], 0, mpu_irq[n], - mpu_irq[n] >= 0 ? IRQF_DISABLED : 0, NULL) < 0) dev_warn(dev, "MPU401 not detected\n"); } diff --git a/sound/isa/cs423x/cs4236.c b/sound/isa/cs423x/cs4236.c index 999dc1e..f5a94b6 100644 --- a/sound/isa/cs423x/cs4236.c +++ b/sound/isa/cs423x/cs4236.c @@ -23,7 +23,7 @@ #include #include #include -#include +#include #include #include #include @@ -449,8 +449,7 @@ static int __devinit snd_cs423x_probe(struct snd_card *card, int dev) mpu_irq[dev] = -1; if (snd_mpu401_uart_new(card, 0, MPU401_HW_CS4232, mpu_port[dev], 0, - mpu_irq[dev], - mpu_irq[dev] >= 0 ? IRQF_DISABLED : 0, NULL) < 0) + mpu_irq[dev], NULL) < 0) printk(KERN_WARNING IDENT ": MPU401 not detected\n"); } diff --git a/sound/isa/es1688/es1688.c b/sound/isa/es1688/es1688.c index 0cde813..9a1a6f2 100644 --- a/sound/isa/es1688/es1688.c +++ b/sound/isa/es1688/es1688.c @@ -25,7 +25,7 @@ #include #include #include -#include +#include #include #include #include @@ -174,7 +174,7 @@ static int __devinit snd_es1688_probe(struct snd_card *card, unsigned int n) chip->mpu_port > 0) { error = snd_mpu401_uart_new(card, 0, MPU401_HW_ES1688, chip->mpu_port, 0, - mpu_irq[n], IRQF_DISABLED, NULL); + mpu_irq[n], NULL); if (error < 0) return error; } diff --git a/sound/isa/es1688/es1688_lib.c b/sound/isa/es1688/es1688_lib.c index 0767620..1d47be8 100644 --- a/sound/isa/es1688/es1688_lib.c +++ b/sound/isa/es1688/es1688_lib.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -661,7 +662,7 @@ int snd_es1688_create(struct snd_card *card, snd_printk(KERN_ERR "es1688: can't grab port 0x%lx\n", port + 4); return -EBUSY; } - if (request_irq(irq, snd_es1688_interrupt, IRQF_DISABLED, "ES1688", (void *) chip)) { + if (request_irq(irq, snd_es1688_interrupt, 0, "ES1688", (void *) chip)) { snd_printk(KERN_ERR "es1688: can't grab IRQ %d\n", irq); return -EBUSY; } diff --git a/sound/isa/es18xx.c b/sound/isa/es18xx.c index fb4d6b3..98e3ac1 100644 --- a/sound/isa/es18xx.c +++ b/sound/isa/es18xx.c @@ -82,7 +82,7 @@ #include #include #include -#include +#include #include #include @@ -1805,7 +1805,7 @@ static int __devinit snd_es18xx_new_device(struct snd_card *card, return -EBUSY; } - if (request_irq(irq, snd_es18xx_interrupt, IRQF_DISABLED, "ES18xx", + if (request_irq(irq, snd_es18xx_interrupt, 0, "ES18xx", (void *) card)) { snd_es18xx_free(card); snd_printk(KERN_ERR PFX "unable to grap IRQ %d\n", irq); @@ -2160,8 +2160,8 @@ static int __devinit snd_audiodrive_probe(struct snd_card *card, int dev) if (mpu_port[dev] > 0 && mpu_port[dev] != SNDRV_AUTO_PORT) { err = snd_mpu401_uart_new(card, 0, MPU401_HW_ES18XX, - mpu_port[dev], 0, - irq[dev], 0, &chip->rmidi); + mpu_port[dev], MPU401_INFO_IRQ_HOOK, + -1, &chip->rmidi); if (err < 0) return err; } diff --git a/sound/isa/galaxy/galaxy.c b/sound/isa/galaxy/galaxy.c index ee54df0..e51d324 100644 --- a/sound/isa/galaxy/galaxy.c +++ b/sound/isa/galaxy/galaxy.c @@ -585,8 +585,7 @@ static int __devinit snd_galaxy_probe(struct device *dev, unsigned int n) if (mpu_port[n] >= 0) { err = snd_mpu401_uart_new(card, 0, MPU401_HW_MPU401, - mpu_port[n], 0, mpu_irq[n], - IRQF_DISABLED, NULL); + mpu_port[n], 0, mpu_irq[n], NULL); if (err < 0) goto error; } diff --git a/sound/isa/gus/gus_main.c b/sound/isa/gus/gus_main.c index 12eb98f..4490ee4 100644 --- a/sound/isa/gus/gus_main.c +++ b/sound/isa/gus/gus_main.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -180,7 +181,7 @@ int snd_gus_create(struct snd_card *card, snd_gus_free(gus); return -EBUSY; } - if (irq >= 0 && request_irq(irq, snd_gus_interrupt, IRQF_DISABLED, "GUS GF1", (void *) gus)) { + if (irq >= 0 && request_irq(irq, snd_gus_interrupt, 0, "GUS GF1", (void *) gus)) { snd_printk(KERN_ERR "gus: can't grab irq %d\n", irq); snd_gus_free(gus); return -EBUSY; diff --git a/sound/isa/gus/gus_volume.c b/sound/isa/gus/gus_volume.c index c3c028a..3dd841a 100644 --- a/sound/isa/gus/gus_volume.c +++ b/sound/isa/gus/gus_volume.c @@ -19,6 +19,7 @@ */ #include +#include #include #include #define __GUS_TABLES_ALLOC__ diff --git a/sound/isa/gus/gusclassic.c b/sound/isa/gus/gusclassic.c index 086b8f0..d729650 100644 --- a/sound/isa/gus/gusclassic.c +++ b/sound/isa/gus/gusclassic.c @@ -24,7 +24,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/sound/isa/gus/gusextreme.c b/sound/isa/gus/gusextreme.c index 008e8e5..597accd 100644 --- a/sound/isa/gus/gusextreme.c +++ b/sound/isa/gus/gusextreme.c @@ -24,7 +24,7 @@ #include #include #include -#include +#include #include #include #include @@ -317,8 +317,7 @@ static int __devinit snd_gusextreme_probe(struct device *dev, unsigned int n) if (es1688->mpu_port >= 0x300) { error = snd_mpu401_uart_new(card, 0, MPU401_HW_ES1688, - es1688->mpu_port, 0, - mpu_irq[n], IRQF_DISABLED, NULL); + es1688->mpu_port, 0, mpu_irq[n], NULL); if (error < 0) goto out; } diff --git a/sound/isa/gus/gusmax.c b/sound/isa/gus/gusmax.c index 3e4a58b..933cb0f 100644 --- a/sound/isa/gus/gusmax.c +++ b/sound/isa/gus/gusmax.c @@ -24,7 +24,7 @@ #include #include #include -#include +#include #include #include #include @@ -291,7 +291,7 @@ static int __devinit snd_gusmax_probe(struct device *pdev, unsigned int dev) goto _err; } - if (request_irq(xirq, snd_gusmax_interrupt, IRQF_DISABLED, "GUS MAX", (void *)maxcard)) { + if (request_irq(xirq, snd_gusmax_interrupt, 0, "GUS MAX", (void *)maxcard)) { snd_printk(KERN_ERR PFX "unable to grab IRQ %d\n", xirq); err = -EBUSY; goto _err; diff --git a/sound/isa/gus/interwave.c b/sound/isa/gus/interwave.c index c7b80e4..8e7e194 100644 --- a/sound/isa/gus/interwave.c +++ b/sound/isa/gus/interwave.c @@ -27,7 +27,7 @@ #include #include #include -#include +#include #include #include #include @@ -684,7 +684,7 @@ static int __devinit snd_interwave_probe(struct snd_card *card, int dev) if ((err = snd_gus_initialize(gus)) < 0) return err; - if (request_irq(xirq, snd_interwave_interrupt, IRQF_DISABLED, + if (request_irq(xirq, snd_interwave_interrupt, 0, "InterWave", iwcard)) { snd_printk(KERN_ERR PFX "unable to grab IRQ %d\n", xirq); return -EBUSY; diff --git a/sound/isa/msnd/msnd.c b/sound/isa/msnd/msnd.c index 3a1526a..1cee18f 100644 --- a/sound/isa/msnd/msnd.c +++ b/sound/isa/msnd/msnd.c @@ -41,6 +41,7 @@ #include #include #include +#include #include #include diff --git a/sound/isa/msnd/msnd.h b/sound/isa/msnd/msnd.h index 3773e24..a168ba3 100644 --- a/sound/isa/msnd/msnd.h +++ b/sound/isa/msnd/msnd.h @@ -249,7 +249,7 @@ struct snd_msnd { /* State variables */ enum { msndClassic, msndPinnacle } type; - mode_t mode; + fmode_t mode; unsigned long flags; #define F_RESETTING 0 #define F_HAVEDIGITAL 1 diff --git a/sound/isa/msnd/msnd_midi.c b/sound/isa/msnd/msnd_midi.c index 7874956..ffc67fd 100644 --- a/sound/isa/msnd/msnd_midi.c +++ b/sound/isa/msnd/msnd_midi.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include diff --git a/sound/isa/msnd/msnd_pinnacle.c b/sound/isa/msnd/msnd_pinnacle.c index 91d6023..a9cc687 100644 --- a/sound/isa/msnd/msnd_pinnacle.c +++ b/sound/isa/msnd/msnd_pinnacle.c @@ -73,9 +73,11 @@ #ifdef MSND_CLASSIC # include "msnd_classic.h" # define LOGNAME "msnd_classic" +# define DEV_NAME "msnd-classic" #else # include "msnd_pinnacle.h" # define LOGNAME "snd_msnd_pinnacle" +# define DEV_NAME "msnd-pinnacle" #endif static void __devinit set_default_audio_parameters(struct snd_msnd *chip) @@ -600,7 +602,7 @@ static int __devinit snd_msnd_attach(struct snd_card *card) mpu_io[0], MPU401_MODE_INPUT | MPU401_MODE_OUTPUT, - mpu_irq[0], IRQF_DISABLED, + mpu_irq[0], &chip->rmidi); if (err < 0) { printk(KERN_ERR LOGNAME @@ -1068,8 +1070,6 @@ static int __devexit snd_msnd_isa_remove(struct device *pdev, unsigned int dev) return 0; } -#define DEV_NAME "msnd-pinnacle" - static struct isa_driver snd_msnd_driver = { .match = snd_msnd_isa_match, .probe = snd_msnd_isa_probe, diff --git a/sound/isa/msnd/msnd_pinnacle_mixer.c b/sound/isa/msnd/msnd_pinnacle_mixer.c index 494058a..1de59d4 100644 --- a/sound/isa/msnd/msnd_pinnacle_mixer.c +++ b/sound/isa/msnd/msnd_pinnacle_mixer.c @@ -16,6 +16,7 @@ ***************************************************************************/ #include +#include #include #include diff --git a/sound/isa/opl3sa2.c b/sound/isa/opl3sa2.c index 9b915e2..64a9a21 100644 --- a/sound/isa/opl3sa2.c +++ b/sound/isa/opl3sa2.c @@ -25,7 +25,7 @@ #include #include #include -#include +#include #include #include #include @@ -667,7 +667,7 @@ static int __devinit snd_opl3sa2_probe(struct snd_card *card, int dev) err = snd_opl3sa2_detect(card); if (err < 0) return err; - err = request_irq(xirq, snd_opl3sa2_interrupt, IRQF_DISABLED, + err = request_irq(xirq, snd_opl3sa2_interrupt, 0, "OPL3-SA2", card); if (err) { snd_printk(KERN_ERR PFX "can't grab IRQ %d\n", xirq); @@ -707,8 +707,9 @@ static int __devinit snd_opl3sa2_probe(struct snd_card *card, int dev) } if (midi_port[dev] >= 0x300 && midi_port[dev] < 0x340) { if ((err = snd_mpu401_uart_new(card, 0, MPU401_HW_OPL3SA2, - midi_port[dev], 0, - xirq, 0, &chip->rmidi)) < 0) + midi_port[dev], + MPU401_INFO_IRQ_HOOK, -1, + &chip->rmidi)) < 0) return err; } sprintf(card->longname, "%s at 0x%lx, irq %d, dma %d", diff --git a/sound/isa/opti9xx/miro.c b/sound/isa/opti9xx/miro.c index 8c24102..3785b7a 100644 --- a/sound/isa/opti9xx/miro.c +++ b/sound/isa/opti9xx/miro.c @@ -28,7 +28,7 @@ #include #include #include -#include +#include #include #include #include @@ -1377,8 +1377,7 @@ static int __devinit snd_miro_probe(struct snd_card *card) rmidi = NULL; else { error = snd_mpu401_uart_new(card, 0, MPU401_HW_MPU401, - mpu_port, 0, miro->mpu_irq, IRQF_DISABLED, - &rmidi); + mpu_port, 0, miro->mpu_irq, &rmidi); if (error < 0) snd_printk(KERN_WARNING "no MPU-401 device at 0x%lx?\n", mpu_port); diff --git a/sound/isa/opti9xx/opti92x-ad1848.c b/sound/isa/opti9xx/opti92x-ad1848.c index 2b83557..cba84ef 100644 --- a/sound/isa/opti9xx/opti92x-ad1848.c +++ b/sound/isa/opti9xx/opti92x-ad1848.c @@ -28,7 +28,7 @@ #include #include #include -#include +#include #include #include #include @@ -888,7 +888,7 @@ static int __devinit snd_opti9xx_probe(struct snd_card *card) #endif #ifdef OPTi93X error = request_irq(irq, snd_opti93x_interrupt, - IRQF_DISABLED, DEV_NAME" - WSS", chip); + 0, DEV_NAME" - WSS", chip); if (error < 0) { snd_printk(KERN_ERR "opti9xx: can't grab IRQ %d\n", irq); return error; @@ -910,7 +910,7 @@ static int __devinit snd_opti9xx_probe(struct snd_card *card) rmidi = NULL; else { error = snd_mpu401_uart_new(card, 0, MPU401_HW_MPU401, - mpu_port, 0, mpu_irq, IRQF_DISABLED, &rmidi); + mpu_port, 0, mpu_irq, &rmidi); if (error) snd_printk(KERN_WARNING "no MPU-401 device at 0x%lx?\n", mpu_port); diff --git a/sound/isa/sb/emu8000.c b/sound/isa/sb/emu8000.c index 5d61f5a..7188787 100644 --- a/sound/isa/sb/emu8000.c +++ b/sound/isa/sb/emu8000.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include diff --git a/sound/isa/sb/emu8000_callback.c b/sound/isa/sb/emu8000_callback.c index 9a3c71c..344b435 100644 --- a/sound/isa/sb/emu8000_callback.c +++ b/sound/isa/sb/emu8000_callback.c @@ -20,6 +20,7 @@ */ #include "emu8000_local.h" +#include #include /* diff --git a/sound/isa/sb/emu8000_patch.c b/sound/isa/sb/emu8000_patch.c index c99c607..e09f144 100644 --- a/sound/isa/sb/emu8000_patch.c +++ b/sound/isa/sb/emu8000_patch.c @@ -22,6 +22,7 @@ #include "emu8000_local.h" #include #include +#include static int emu8000_reset_addr; module_param(emu8000_reset_addr, int, 0444); diff --git a/sound/isa/sb/emu8000_synth.c b/sound/isa/sb/emu8000_synth.c index 0c7905c..4e3fcfb 100644 --- a/sound/isa/sb/emu8000_synth.c +++ b/sound/isa/sb/emu8000_synth.c @@ -22,6 +22,7 @@ #include "emu8000_local.h" #include +#include #include MODULE_AUTHOR("Takashi Iwai, Steve Ratcliffe"); diff --git a/sound/isa/sb/jazz16.c b/sound/isa/sb/jazz16.c index 8ccbcdd..54e3c2c 100644 --- a/sound/isa/sb/jazz16.c +++ b/sound/isa/sb/jazz16.c @@ -322,7 +322,6 @@ static int __devinit snd_jazz16_probe(struct device *devptr, unsigned int dev) MPU401_HW_MPU401, mpu_port[dev], 0, mpu_irq[dev], - mpu_irq[dev] >= 0 ? IRQF_DISABLED : 0, NULL) < 0) snd_printk(KERN_ERR "no MPU-401 device at 0x%lx\n", mpu_port[dev]); diff --git a/sound/isa/sb/sb16.c b/sound/isa/sb/sb16.c index 4d1c5a3..115c774 100644 --- a/sound/isa/sb/sb16.c +++ b/sound/isa/sb/sb16.c @@ -24,7 +24,7 @@ #include #include #include -#include +#include #include #include #include @@ -394,8 +394,9 @@ static int __devinit snd_sb16_probe(struct snd_card *card, int dev) if (chip->mpu_port > 0 && chip->mpu_port != SNDRV_AUTO_PORT) { if ((err = snd_mpu401_uart_new(card, 0, MPU401_HW_SB, - chip->mpu_port, 0, - xirq, 0, &chip->rmidi)) < 0) + chip->mpu_port, + MPU401_INFO_IRQ_HOOK, -1, + &chip->rmidi)) < 0) return err; chip->rmidi_callback = snd_mpu401_uart_interrupt; } diff --git a/sound/isa/sb/sb16_csp.c b/sound/isa/sb/sb16_csp.c index bdc8dde..c1aa21e 100644 --- a/sound/isa/sb/sb16_csp.c +++ b/sound/isa/sb/sb16_csp.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include diff --git a/sound/isa/sb/sb16_main.c b/sound/isa/sb/sb16_main.c index 2a6cc1c..0bbcd47 100644 --- a/sound/isa/sb/sb16_main.c +++ b/sound/isa/sb/sb16_main.c @@ -37,6 +37,7 @@ #include #include #include +#include #include #include #include diff --git a/sound/isa/sb/sb8.c b/sound/isa/sb/sb8.c index 2259e3f..453ef28 100644 --- a/sound/isa/sb/sb8.c +++ b/sound/isa/sb/sb8.c @@ -23,7 +23,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/sound/isa/sb/sb8_main.c b/sound/isa/sb/sb8_main.c index 7d84c9f..24d4121 100644 --- a/sound/isa/sb/sb8_main.c +++ b/sound/isa/sb/sb8_main.c @@ -34,6 +34,7 @@ #include #include #include +#include #include #include diff --git a/sound/isa/sb/sb_common.c b/sound/isa/sb/sb_common.c index eae6c1c..3ef9906 100644 --- a/sound/isa/sb/sb_common.c +++ b/sound/isa/sb/sb_common.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -240,7 +241,7 @@ int snd_sbdsp_create(struct snd_card *card, if (request_irq(irq, irq_handler, (hardware == SB_HW_ALS4000 || hardware == SB_HW_CS5530) ? - IRQF_SHARED : IRQF_DISABLED, + IRQF_SHARED : 0, "SoundBlaster", (void *) chip)) { snd_printk(KERN_ERR "sb: can't grab irq %d\n", irq); snd_sbdsp_free(chip); diff --git a/sound/isa/sc6000.c b/sound/isa/sc6000.c index 9a8bbf6..207c161 100644 --- a/sound/isa/sc6000.c +++ b/sound/isa/sc6000.c @@ -658,8 +658,7 @@ static int __devinit snd_sc6000_probe(struct device *devptr, unsigned int dev) if (snd_mpu401_uart_new(card, 0, MPU401_HW_MPU401, mpu_port[dev], 0, - mpu_irq[dev], IRQF_DISABLED, - NULL) < 0) + mpu_irq[dev], NULL) < 0) snd_printk(KERN_ERR "no MPU-401 device at 0x%lx ?\n", mpu_port[dev]); } diff --git a/sound/isa/sscape.c b/sound/isa/sscape.c index e2d5d2d..b4a6aa9 100644 --- a/sound/isa/sscape.c +++ b/sound/isa/sscape.c @@ -28,7 +28,7 @@ #include #include #include -#include +#include #include #include #include @@ -825,8 +825,7 @@ static int __devinit create_mpu401(struct snd_card *card, int devnum, int err; err = snd_mpu401_uart_new(card, devnum, MPU401_HW_MPU401, port, - MPU401_INFO_INTEGRATED, irq, IRQF_DISABLED, - &rawmidi); + MPU401_INFO_INTEGRATED, irq, &rawmidi); if (err == 0) { struct snd_mpu401 *mpu = rawmidi->private_data; mpu->open_input = mpu401_open; diff --git a/sound/isa/wavefront/wavefront.c b/sound/isa/wavefront/wavefront.c index 711670e..150b96b 100644 --- a/sound/isa/wavefront/wavefront.c +++ b/sound/isa/wavefront/wavefront.c @@ -24,7 +24,7 @@ #include #include #include -#include +#include #include #include #include @@ -418,7 +418,7 @@ snd_wavefront_probe (struct snd_card *card, int dev) return -EBUSY; } if (request_irq(ics2115_irq[dev], snd_wavefront_ics2115_interrupt, - IRQF_DISABLED, "ICS2115", acard)) { + 0, "ICS2115", acard)) { snd_printk(KERN_ERR "unable to use ICS2115 IRQ %d\n", ics2115_irq[dev]); return -EBUSY; } @@ -449,8 +449,7 @@ snd_wavefront_probe (struct snd_card *card, int dev) if (cs4232_mpu_port[dev] > 0 && cs4232_mpu_port[dev] != SNDRV_AUTO_PORT) { err = snd_mpu401_uart_new(card, midi_dev, MPU401_HW_CS4232, cs4232_mpu_port[dev], 0, - cs4232_mpu_irq[dev], IRQF_DISABLED, - NULL); + cs4232_mpu_irq[dev], NULL); if (err < 0) { snd_printk (KERN_ERR "can't allocate CS4232 MPU-401 device\n"); return err; diff --git a/sound/isa/wavefront/wavefront_fx.c b/sound/isa/wavefront/wavefront_fx.c index 657e2d6..e51e090 100644 --- a/sound/isa/wavefront/wavefront_fx.c +++ b/sound/isa/wavefront/wavefront_fx.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include diff --git a/sound/isa/wavefront/wavefront_synth.c b/sound/isa/wavefront/wavefront_synth.c index 4fb7b19..405f8b6 100644 --- a/sound/isa/wavefront/wavefront_synth.c +++ b/sound/isa/wavefront/wavefront_synth.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include diff --git a/sound/isa/wss/wss_lib.c b/sound/isa/wss/wss_lib.c index 2a42cc3..49c8a0c 100644 --- a/sound/isa/wss/wss_lib.c +++ b/sound/isa/wss/wss_lib.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include @@ -1833,7 +1834,7 @@ int snd_wss_create(struct snd_card *card, } chip->cport = cport; if (!(hwshare & WSS_HWSHARE_IRQ)) - if (request_irq(irq, snd_wss_interrupt, IRQF_DISABLED, + if (request_irq(irq, snd_wss_interrupt, 0, "WSS", (void *) chip)) { snd_printk(KERN_ERR "wss: can't grab IRQ %d\n", irq); snd_wss_free(chip); diff --git a/sound/mips/Kconfig b/sound/mips/Kconfig index a9823fa..d2f615a 100644 --- a/sound/mips/Kconfig +++ b/sound/mips/Kconfig @@ -23,12 +23,15 @@ config SND_SGI_HAL2 config SND_AU1X00 - tristate "Au1x00 AC97 Port Driver" - depends on SOC_AU1000 || SOC_AU1100 || SOC_AU1500 + tristate "Au1x00 AC97 Port Driver (DEPRECATED)" + depends on MIPS_ALCHEMY select SND_PCM select SND_AC97_CODEC help ALSA Sound driver for the Au1x00's AC97 port. + Newer drivers for ASoC are available, please do not use + this driver as it will be removed in the future. + endif # SND_MIPS diff --git a/sound/mips/au1x00.c b/sound/mips/au1x00.c index 446cf97..3f3ec0b 100644 --- a/sound/mips/au1x00.c +++ b/sound/mips/au1x00.c @@ -38,6 +38,7 @@ #include #include #include +#include #include #include #include @@ -465,13 +466,13 @@ snd_au1000_pcm_new(struct snd_au1000 *au1000) flags = claim_dma_lock(); if ((au1000->stream[PLAYBACK]->dma = request_au1000_dma(DMA_ID_AC97C_TX, - "AC97 TX", au1000_dma_interrupt, IRQF_DISABLED, + "AC97 TX", au1000_dma_interrupt, 0, au1000->stream[PLAYBACK])) < 0) { release_dma_lock(flags); return -EBUSY; } if ((au1000->stream[CAPTURE]->dma = request_au1000_dma(DMA_ID_AC97C_RX, - "AC97 RX", au1000_dma_interrupt, IRQF_DISABLED, + "AC97 RX", au1000_dma_interrupt, 0, au1000->stream[CAPTURE])) < 0){ release_dma_lock(flags); return -EBUSY; diff --git a/sound/mips/hal2.c b/sound/mips/hal2.c index 453d343..2e6c858 100644 --- a/sound/mips/hal2.c +++ b/sound/mips/hal2.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include diff --git a/sound/mips/sgio2audio.c b/sound/mips/sgio2audio.c index 717604c..69425d4 100644 --- a/sound/mips/sgio2audio.c +++ b/sound/mips/sgio2audio.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include diff --git a/sound/oss/Kconfig b/sound/oss/Kconfig index 6c93e05..1fc28f5 100644 --- a/sound/oss/Kconfig +++ b/sound/oss/Kconfig @@ -250,6 +250,7 @@ config MSND_FIFOSIZE menuconfig SOUND_OSS tristate "OSS sound modules" depends on ISA_DMA_API && VIRT_TO_BUS + depends on !GENERIC_ISA_DMA_SUPPORT_BROKEN help OSS is the Open Sound System suite of sound card drivers. They make sound programming easier since they provide a common API. Say Y or @@ -432,9 +433,7 @@ config SOUND_SB ALS-007 and ALS-1X0 chips (read ) and for cards based on ESS chips (read and - ). If you have an SB AWE 32 or SB AWE - 64, say Y here and also to "AWE32 synth" below and read - . If you have an IBM Mwave + ). If you have an IBM Mwave card, say Y here and read . If you compile the driver into the kernel and don't want to use diff --git a/sound/oss/ad1848.c b/sound/oss/ad1848.c index 4d2a6ae..8a197fd 100644 --- a/sound/oss/ad1848.c +++ b/sound/oss/ad1848.c @@ -458,7 +458,7 @@ static int ad1848_set_recmask(ad1848_info * devc, int mask) return mask; } -static void change_bits(ad1848_info * devc, unsigned char *regval, +static void oss_change_bits(ad1848_info *devc, unsigned char *regval, unsigned char *muteval, int dev, int chn, int newval) { unsigned char mask; @@ -516,10 +516,10 @@ static void ad1848_mixer_set_channel(ad1848_info *devc, int dev, int value, int if (muteregoffs != regoffs) { muteval = ad_read(devc, muteregoffs); - change_bits(devc, &val, &muteval, dev, channel, value); + oss_change_bits(devc, &val, &muteval, dev, channel, value); } else - change_bits(devc, &val, &val, dev, channel, value); + oss_change_bits(devc, &val, &val, dev, channel, value); spin_lock_irqsave(&devc->lock,flags); ad_write(devc, regoffs, val); diff --git a/sound/oss/pas2_pcm.c b/sound/oss/pas2_pcm.c index 8f7d175..6f13ab4 100644 --- a/sound/oss/pas2_pcm.c +++ b/sound/oss/pas2_pcm.c @@ -63,13 +63,13 @@ static int pcm_set_speed(int arg) if (pcm_channels & 2) { - foo = ((CLOCK_TICK_RATE / 2) + (arg / 2)) / arg; - arg = ((CLOCK_TICK_RATE / 2) + (foo / 2)) / foo; + foo = ((PIT_TICK_RATE / 2) + (arg / 2)) / arg; + arg = ((PIT_TICK_RATE / 2) + (foo / 2)) / foo; } else { - foo = (CLOCK_TICK_RATE + (arg / 2)) / arg; - arg = (CLOCK_TICK_RATE + (foo / 2)) / foo; + foo = (PIT_TICK_RATE + (arg / 2)) / arg; + arg = (PIT_TICK_RATE + (foo / 2)) / foo; } pcm_speed = arg; diff --git a/sound/oss/pss.c b/sound/oss/pss.c index 9b800ce..2fc0624 100644 --- a/sound/oss/pss.c +++ b/sound/oss/pss.c @@ -673,7 +673,8 @@ static void configure_nonsound_components(void) if (pss_cdrom_port == -1) { /* If cdrom port enablation wasn't requested */ printk(KERN_INFO "PSS: CDROM port not enabled.\n"); - } else if (check_region(pss_cdrom_port, 2)) { + } else if (!request_region(pss_cdrom_port, 2, "PSS CDROM")) { + pss_cdrom_port = -1; printk(KERN_ERR "PSS: CDROM I/O port conflict.\n"); } else { set_io_base(devc, CONF_CDROM, pss_cdrom_port); @@ -1232,7 +1233,8 @@ static void __exit cleanup_pss(void) if(pssmpu) unload_pss_mpu(&cfg_mpu); unload_pss(&cfg); - } + } else if (pss_cdrom_port != -1) + release_region(pss_cdrom_port, 2); if(!pss_keep_settings) /* Keep hardware settings if asked */ { diff --git a/sound/oss/sb_mixer.c b/sound/oss/sb_mixer.c index 2039d31..f8f3b7a 100644 --- a/sound/oss/sb_mixer.c +++ b/sound/oss/sb_mixer.c @@ -232,7 +232,7 @@ static int detect_mixer(sb_devc * devc) return 1; } -static void change_bits(sb_devc * devc, unsigned char *regval, int dev, int chn, int newval) +static void oss_change_bits(sb_devc *devc, unsigned char *regval, int dev, int chn, int newval) { unsigned char mask; int shift; @@ -284,7 +284,7 @@ int sb_common_mixer_set(sb_devc * devc, int dev, int left, int right) return -EINVAL; val = sb_getmixer(devc, regoffs); - change_bits(devc, &val, dev, LEFT_CHN, left); + oss_change_bits(devc, &val, dev, LEFT_CHN, left); if ((*devc->iomap)[dev][RIGHT_CHN].regno != regoffs) /* * Change register @@ -304,7 +304,7 @@ int sb_common_mixer_set(sb_devc * devc, int dev, int left, int right) * Read the new one */ } - change_bits(devc, &val, dev, RIGHT_CHN, right); + oss_change_bits(devc, &val, dev, RIGHT_CHN, right); sb_setmixer(devc, regoffs, val); diff --git a/sound/oss/sound_timer.c b/sound/oss/sound_timer.c index 48cda6c..8021c85 100644 --- a/sound/oss/sound_timer.c +++ b/sound/oss/sound_timer.c @@ -320,7 +320,7 @@ void sound_timer_init(struct sound_lowlev_timer *t, char *name) n = sound_alloc_timerdev(); if (n == -1) n = 0; /* Overwrite the system timer */ - strcpy(sound_timer.info.name, name); + strlcpy(sound_timer.info.name, name, sizeof(sound_timer.info.name)); sound_timer_devs[n] = &sound_timer; } EXPORT_SYMBOL(sound_timer_init); diff --git a/sound/pci/ac97/ac97_codec.c b/sound/pci/ac97/ac97_codec.c index 11ccc23..1e7cfba 100644 --- a/sound/pci/ac97/ac97_codec.c +++ b/sound/pci/ac97/ac97_codec.c @@ -26,7 +26,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/sound/pci/ac97/ac97_pcm.c b/sound/pci/ac97/ac97_pcm.c index 48cbda9..f1488fc 100644 --- a/sound/pci/ac97/ac97_pcm.c +++ b/sound/pci/ac97/ac97_pcm.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include diff --git a/sound/pci/ali5451/ali5451.c b/sound/pci/ali5451/ali5451.c index f71a0ff..be662c9 100644 --- a/sound/pci/ali5451/ali5451.c +++ b/sound/pci/ali5451/ali5451.c @@ -31,7 +31,7 @@ #include #include #include -#include +#include #include #include #include @@ -2090,7 +2090,7 @@ static int __devinit snd_ali_resources(struct snd_ali *codec) codec->port = pci_resource_start(codec->pci, 0); if (request_irq(codec->pci->irq, snd_ali_card_interrupt, - IRQF_SHARED, "ALI 5451", codec)) { + IRQF_SHARED, KBUILD_MODNAME, codec)) { snd_printk(KERN_ERR "Unable to request irq.\n"); return -EBUSY; } @@ -2295,7 +2295,7 @@ static void __devexit snd_ali_remove(struct pci_dev *pci) } static struct pci_driver driver = { - .name = "ALI 5451", + .name = KBUILD_MODNAME, .id_table = snd_ali_ids, .probe = snd_ali_probe, .remove = __devexit_p(snd_ali_remove), diff --git a/sound/pci/asihpi/asihpi.c b/sound/pci/asihpi/asihpi.c index e3569bd..fbf0bcd 100644 --- a/sound/pci/asihpi/asihpi.c +++ b/sound/pci/asihpi/asihpi.c @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include @@ -41,29 +42,10 @@ #include #include - MODULE_LICENSE("GPL"); MODULE_AUTHOR("AudioScience inc. "); MODULE_DESCRIPTION("AudioScience ALSA ASI5000 ASI6000 ASI87xx ASI89xx"); -#if defined CONFIG_SND_DEBUG -/* copied from pcm_lib.c, hope later patch will make that version public -and this copy can be removed */ -static void pcm_debug_name(struct snd_pcm_substream *substream, - char *name, size_t len) -{ - snprintf(name, len, "pcmC%dD%d%c:%d", - substream->pcm->card->number, - substream->pcm->device, - substream->stream ? 'c' : 'p', - substream->number); -} -#define DEBUG_NAME(substream, name) char name[16]; pcm_debug_name(substream, name, sizeof(name)) -#else -#define pcm_debug_name(s, n, l) do { } while (0) -#define DEBUG_NAME(name, substream) do { } while (0) -#endif - #if defined CONFIG_SND_DEBUG_VERBOSE /** * snd_printddd - very verbose debug printk @@ -304,7 +286,8 @@ static u16 handle_error(u16 err, int line, char *filename) static void print_hwparams(struct snd_pcm_substream *substream, struct snd_pcm_hw_params *p) { - DEBUG_NAME(substream, name); + char name[16]; + snd_pcm_debug_name(substream, name, sizeof(name)); snd_printd("%s HWPARAMS\n", name); snd_printd(" samplerate %d Hz\n", params_rate(p)); snd_printd(" channels %d\n", params_channels(p)); @@ -576,8 +559,9 @@ static int snd_card_asihpi_trigger(struct snd_pcm_substream *substream, struct snd_card_asihpi *card = snd_pcm_substream_chip(substream); struct snd_pcm_substream *s; u16 e; - DEBUG_NAME(substream, name); + char name[16]; + snd_pcm_debug_name(substream, name, sizeof(name)); snd_printdd("%s trigger\n", name); switch (cmd) { @@ -741,7 +725,9 @@ static void snd_card_asihpi_timer_function(unsigned long data) int loops = 0; u16 state; u32 buffer_size, bytes_avail, samples_played, on_card_bytes; - DEBUG_NAME(substream, name); + char name[16]; + + snd_pcm_debug_name(substream, name, sizeof(name)); snd_printdd("%s snd_card_asihpi_timer_function\n", name); @@ -782,7 +768,10 @@ static void snd_card_asihpi_timer_function(unsigned long data) s->number); ds->drained_count++; if (ds->drained_count > 2) { + unsigned long flags; + snd_pcm_stream_lock_irqsave(s, flags); snd_pcm_stop(s, SNDRV_PCM_STATE_XRUN); + snd_pcm_stream_unlock_irqrestore(s, flags); continue; } } else { @@ -1323,10 +1312,12 @@ static const char * const asihpi_src_names[] = { "RF", "Clock", "Bitstream", - "Microphone", - "Cobranet", + "Mic", + "Net", "Analog", "Adapter", + "RTP", + "GPI", }; compile_time_assert( @@ -1341,8 +1332,10 @@ static const char * const asihpi_dst_names[] = { "Digital", "RF", "Speaker", - "Cobranet Out", - "Analog" + "Net", + "Analog", + "RTP", + "GPO", }; compile_time_assert( @@ -1476,11 +1469,40 @@ static int snd_asihpi_volume_put(struct snd_kcontrol *kcontrol, static const DECLARE_TLV_DB_SCALE(db_scale_100, -10000, VOL_STEP_mB, 0); +#define snd_asihpi_volume_mute_info snd_ctl_boolean_mono_info + +static int snd_asihpi_volume_mute_get(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_value *ucontrol) +{ + u32 h_control = kcontrol->private_value; + u32 mute; + + hpi_handle_error(hpi_volume_get_mute(h_control, &mute)); + ucontrol->value.integer.value[0] = mute ? 0 : 1; + + return 0; +} + +static int snd_asihpi_volume_mute_put(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_value *ucontrol) +{ + u32 h_control = kcontrol->private_value; + int change = 1; + /* HPI currently only supports all or none muting of multichannel volume + ALSA Switch element has opposite sense to HPI mute: on==unmuted, off=muted + */ + int mute = ucontrol->value.integer.value[0] ? 0 : HPI_BITMASK_ALL_CHANNELS; + hpi_handle_error(hpi_volume_set_mute(h_control, mute)); + return change; +} + static int __devinit snd_asihpi_volume_add(struct snd_card_asihpi *asihpi, struct hpi_control *hpi_ctl) { struct snd_card *card = asihpi->card; struct snd_kcontrol_new snd_control; + int err; + u32 mute; asihpi_ctl_init(&snd_control, hpi_ctl, "Volume"); snd_control.access = SNDRV_CTL_ELEM_ACCESS_READWRITE | @@ -1490,7 +1512,19 @@ static int __devinit snd_asihpi_volume_add(struct snd_card_asihpi *asihpi, snd_control.put = snd_asihpi_volume_put; snd_control.tlv.p = db_scale_100; - return ctl_add(card, &snd_control, asihpi); + err = ctl_add(card, &snd_control, asihpi); + if (err) + return err; + + if (hpi_volume_get_mute(hpi_ctl->h_control, &mute) == 0) { + asihpi_ctl_init(&snd_control, hpi_ctl, "Switch"); + snd_control.access = SNDRV_CTL_ELEM_ACCESS_READWRITE; + snd_control.info = snd_asihpi_volume_mute_info; + snd_control.get = snd_asihpi_volume_mute_get; + snd_control.put = snd_asihpi_volume_mute_put; + err = ctl_add(card, &snd_control, asihpi); + } + return err; } /*------------------------------------------------------------ @@ -2923,7 +2957,7 @@ static DEFINE_PCI_DEVICE_TABLE(asihpi_pci_tbl) = { MODULE_DEVICE_TABLE(pci, asihpi_pci_tbl); static struct pci_driver driver = { - .name = "asihpi", + .name = KBUILD_MODNAME, .id_table = asihpi_pci_tbl, .probe = snd_asihpi_probe, .remove = __devexit_p(snd_asihpi_remove), diff --git a/sound/pci/asihpi/hpi.h b/sound/pci/asihpi/hpi.h index 255429c..f207272 100644 --- a/sound/pci/asihpi/hpi.h +++ b/sound/pci/asihpi/hpi.h @@ -1,7 +1,7 @@ /****************************************************************************** AudioScience HPI driver - Copyright (C) 1997-2010 AudioScience Inc. + Copyright (C) 1997-2011 AudioScience Inc. This program is free software; you can redistribute it and/or modify it under the terms of version 2 of the GNU General Public License as @@ -42,12 +42,11 @@ i.e 3.05.02 is a development version #define HPI_VER_MINOR(v) ((int)((v >> 8) & 0xFF)) #define HPI_VER_RELEASE(v) ((int)(v & 0xFF)) -/* Use single digits for versions less that 10 to avoid octal. */ -#define HPI_VER HPI_VERSION_CONSTRUCTOR(4L, 6, 0) -#define HPI_VER_STRING "4.06.00" +#define HPI_VER HPI_VERSION_CONSTRUCTOR(4L, 8, 0) +#define HPI_VER_STRING "4.08.00" /* Library version as documented in hpi-api-versions.txt */ -#define HPI_LIB_VER HPI_VERSION_CONSTRUCTOR(9, 0, 0) +#define HPI_LIB_VER HPI_VERSION_CONSTRUCTOR(10, 0, 0) #include #define HPI_BUILD_EXCLUDE_DEPRECATED @@ -211,8 +210,12 @@ enum HPI_SOURCENODES { HPI_SOURCENODE_COBRANET = 109, HPI_SOURCENODE_ANALOG = 110, /**< analog input node. */ HPI_SOURCENODE_ADAPTER = 111, /**< adapter node. */ + /** RTP stream input node - This node is a destination for + packets of RTP audio samples from other devices. */ + HPI_SOURCENODE_RTP_DESTINATION = 112, + HPI_SOURCENODE_GP_IN = 113, /**< general purpose input. */ /* !!!Update this AND hpidebug.h if you add a new sourcenode type!!! */ - HPI_SOURCENODE_LAST_INDEX = 111 /**< largest ID */ + HPI_SOURCENODE_LAST_INDEX = 113 /**< largest ID */ /* AX6 max sourcenode types = 15 */ }; @@ -228,7 +231,7 @@ enum HPI_DESTNODES { HPI_DESTNODE_NONE = 200, /** In Stream (Record) node. */ HPI_DESTNODE_ISTREAM = 201, - HPI_DESTNODE_LINEOUT = 202, /**< line out node. */ + HPI_DESTNODE_LINEOUT = 202, /**< line out node. */ HPI_DESTNODE_AESEBU_OUT = 203, /**< AES/EBU output node. */ HPI_DESTNODE_RF = 204, /**< RF output node. */ HPI_DESTNODE_SPEAKER = 205, /**< speaker output node. */ @@ -236,9 +239,12 @@ enum HPI_DESTNODES { Audio samples from the device are sent out on the Cobranet network.*/ HPI_DESTNODE_COBRANET = 206, HPI_DESTNODE_ANALOG = 207, /**< analog output node. */ - + /** RTP stream output node - This node is a source for + packets of RTP audio samples that are sent to other devices. */ + HPI_DESTNODE_RTP_SOURCE = 208, + HPI_DESTNODE_GP_OUT = 209, /**< general purpose output node. */ /* !!!Update this AND hpidebug.h if you add a new destnode type!!! */ - HPI_DESTNODE_LAST_INDEX = 207 /**< largest ID */ + HPI_DESTNODE_LAST_INDEX = 209 /**< largest ID */ /* AX6 max destnode types = 15 */ }; diff --git a/sound/pci/asihpi/hpi6000.c b/sound/pci/asihpi/hpi6000.c index df4aed5..3cc6f11 100644 --- a/sound/pci/asihpi/hpi6000.c +++ b/sound/pci/asihpi/hpi6000.c @@ -359,7 +359,7 @@ void HPI_6000(struct hpi_message *phm, struct hpi_response *phr) HPI_ERROR_PROCESSING_MESSAGE); switch (phm->type) { - case HPI_TYPE_MESSAGE: + case HPI_TYPE_REQUEST: switch (phm->object) { case HPI_OBJ_SUBSYSTEM: subsys_message(phm, phr); @@ -538,7 +538,7 @@ static short create_adapter_obj(struct hpi_adapter_obj *pao, HPI_DEBUG_LOG(VERBOSE, "send ADAPTER_GET_INFO\n"); memset(&hm, 0, sizeof(hm)); - hm.type = HPI_TYPE_MESSAGE; + hm.type = HPI_TYPE_REQUEST; hm.size = sizeof(struct hpi_message); hm.object = HPI_OBJ_ADAPTER; hm.function = HPI_ADAPTER_GET_INFO; @@ -946,11 +946,8 @@ static short hpi6000_adapter_boot_load_dsp(struct hpi_adapter_obj *pao, } /* write the DSP code down into the DSPs memory */ - /*HpiDspCode_Open(nBootLoadFamily,&DspCode,pdwOsErrorCode); */ - dsp_code.ps_dev = pao->pci.pci_dev; - - error = hpi_dsp_code_open(boot_load_family, &dsp_code, - pos_error_code); + error = hpi_dsp_code_open(boot_load_family, pao->pci.pci_dev, + &dsp_code, pos_error_code); if (error) return error; diff --git a/sound/pci/asihpi/hpi6205.c b/sound/pci/asihpi/hpi6205.c index 9d5df54..e041a6a 100644 --- a/sound/pci/asihpi/hpi6205.c +++ b/sound/pci/asihpi/hpi6205.c @@ -373,6 +373,7 @@ static void instream_message(struct hpi_adapter_obj *pao, /** Entry point to this HPI backend * All calls to the HPI start here */ +static void _HPI_6205(struct hpi_adapter_obj *pao, struct hpi_message *phm, struct hpi_response *phr) { @@ -392,7 +393,7 @@ void _HPI_6205(struct hpi_adapter_obj *pao, struct hpi_message *phm, HPI_DEBUG_LOG(VERBOSE, "start of switch\n"); switch (phm->type) { - case HPI_TYPE_MESSAGE: + case HPI_TYPE_REQUEST: switch (phm->object) { case HPI_OBJ_SUBSYSTEM: subsys_message(pao, phm, phr); @@ -402,7 +403,6 @@ void _HPI_6205(struct hpi_adapter_obj *pao, struct hpi_message *phm, adapter_message(pao, phm, phr); break; - case HPI_OBJ_CONTROLEX: case HPI_OBJ_CONTROL: control_message(pao, phm, phr); break; @@ -634,11 +634,12 @@ static u16 create_adapter_obj(struct hpi_adapter_obj *pao, HPI_DEBUG_LOG(VERBOSE, "init ADAPTER_GET_INFO\n"); memset(&hm, 0, sizeof(hm)); - hm.type = HPI_TYPE_MESSAGE; + /* wAdapterIndex == version == 0 */ + hm.type = HPI_TYPE_REQUEST; hm.size = sizeof(hm); hm.object = HPI_OBJ_ADAPTER; hm.function = HPI_ADAPTER_GET_INFO; - hm.adapter_index = 0; + memset(&hr, 0, sizeof(hr)); hr.size = sizeof(hr); @@ -658,9 +659,6 @@ static u16 create_adapter_obj(struct hpi_adapter_obj *pao, hr.u.ax.info.num_outstreams + hr.u.ax.info.num_instreams; - hpios_locked_mem_prepare((max_streams * 6) / 10, max_streams, - 65536, pao->pci.pci_dev); - HPI_DEBUG_LOG(VERBOSE, "got adapter info type %x index %d serial %d\n", hr.u.ax.info.adapter_type, hr.u.ax.info.adapter_index, @@ -709,9 +707,6 @@ static void delete_adapter_obj(struct hpi_adapter_obj *pao) [i]); phw->outstream_host_buffer_size[i] = 0; } - - hpios_locked_mem_unprepare(pao->pci.pci_dev); - kfree(phw); } @@ -1371,9 +1366,8 @@ static u16 adapter_boot_load_dsp(struct hpi_adapter_obj *pao, return err; /* write the DSP code down into the DSPs memory */ - dsp_code.ps_dev = pao->pci.pci_dev; - err = hpi_dsp_code_open(boot_code_id[dsp], &dsp_code, - pos_error_code); + err = hpi_dsp_code_open(boot_code_id[dsp], pao->pci.pci_dev, + &dsp_code, pos_error_code); if (err) return err; @@ -2084,13 +2078,13 @@ static u16 message_response_sequence(struct hpi_adapter_obj *pao, u16 err = 0; message_count++; - if (phm->size > sizeof(interface->u)) { + if (phm->size > sizeof(interface->u.message_buffer)) { phr->error = HPI_ERROR_MESSAGE_BUFFER_TOO_SMALL; - phr->specific_error = sizeof(interface->u); + phr->specific_error = sizeof(interface->u.message_buffer); phr->size = sizeof(struct hpi_response_header); HPI_DEBUG_LOG(ERROR, "message len %d too big for buffer %zd \n", phm->size, - sizeof(interface->u)); + sizeof(interface->u.message_buffer)); return 0; } @@ -2122,18 +2116,19 @@ static u16 message_response_sequence(struct hpi_adapter_obj *pao, /* read the result */ if (time_out) { - if (interface->u.response_buffer.size <= phr->size) + if (interface->u.response_buffer.response.size <= phr->size) memcpy(phr, &interface->u.response_buffer, - interface->u.response_buffer.size); + interface->u.response_buffer.response.size); else { HPI_DEBUG_LOG(ERROR, "response len %d too big for buffer %d\n", - interface->u.response_buffer.size, phr->size); + interface->u.response_buffer.response.size, + phr->size); memcpy(phr, &interface->u.response_buffer, sizeof(struct hpi_response_header)); phr->error = HPI_ERROR_RESPONSE_BUFFER_TOO_SMALL; phr->specific_error = - interface->u.response_buffer.size; + interface->u.response_buffer.response.size; phr->size = sizeof(struct hpi_response_header); } } @@ -2202,23 +2197,6 @@ static void hw_message(struct hpi_adapter_obj *pao, struct hpi_message *phm, phm->u.d.u.data.data_size, H620_HIF_GET_DATA); break; - case HPI_CONTROL_SET_STATE: - if (phm->object == HPI_OBJ_CONTROLEX - && phm->u.cx.attribute == HPI_COBRANET_SET_DATA) - err = hpi6205_transfer_data(pao, - phm->u.cx.u.cobranet_bigdata.pb_data, - phm->u.cx.u.cobranet_bigdata.byte_count, - H620_HIF_SEND_DATA); - break; - - case HPI_CONTROL_GET_STATE: - if (phm->object == HPI_OBJ_CONTROLEX - && phm->u.cx.attribute == HPI_COBRANET_GET_DATA) - err = hpi6205_transfer_data(pao, - phm->u.cx.u.cobranet_bigdata.pb_data, - phr->u.cx.u.cobranet_data.byte_count, - H620_HIF_GET_DATA); - break; } phr->error = err; diff --git a/sound/pci/asihpi/hpi6205.h b/sound/pci/asihpi/hpi6205.h index df2f02c..ec0827b 100644 --- a/sound/pci/asihpi/hpi6205.h +++ b/sound/pci/asihpi/hpi6205.h @@ -1,7 +1,7 @@ /***************************************************************************** AudioScience HPI driver - Copyright (C) 1997-2010 AudioScience Inc. + Copyright (C) 1997-2011 AudioScience Inc. This program is free software; you can redistribute it and/or modify it under the terms of version 2 of the GNU General Public License as @@ -70,15 +70,28 @@ The Host located memory buffer that the 6205 will bus master in and out of. ************************************************************/ #define HPI6205_SIZEOF_DATA (16*1024) + +struct message_buffer_6205 { + struct hpi_message message; + char data[256]; +}; + +struct response_buffer_6205 { + struct hpi_response response; + char data[256]; +}; + +union buffer_6205 { + struct message_buffer_6205 message_buffer; + struct response_buffer_6205 response_buffer; + u8 b_data[HPI6205_SIZEOF_DATA]; +}; + struct bus_master_interface { u32 host_cmd; u32 dsp_ack; u32 transfer_size_in_bytes; - union { - struct hpi_message_header message_buffer; - struct hpi_response_header response_buffer; - u8 b_data[HPI6205_SIZEOF_DATA]; - } u; + union buffer_6205 u; struct controlcache_6205 control_cache; struct async_event_buffer_6205 async_buffer; struct hpi_hostbuffer_status diff --git a/sound/pci/asihpi/hpi_internal.h b/sound/pci/asihpi/hpi_internal.h index bf5eced..d497030 100644 --- a/sound/pci/asihpi/hpi_internal.h +++ b/sound/pci/asihpi/hpi_internal.h @@ -1,7 +1,7 @@ /****************************************************************************** AudioScience HPI driver - Copyright (C) 1997-2010 AudioScience Inc. + Copyright (C) 1997-2011 AudioScience Inc. This program is free software; you can redistribute it and/or modify it under the terms of version 2 of the GNU General Public License as @@ -32,12 +32,6 @@ HPI internal definitions #include "hpios.h" /* physical memory allocation */ -void hpios_locked_mem_init(void - ); -void hpios_locked_mem_free_all(void - ); -#define hpios_locked_mem_prepare(a, b, c, d); -#define hpios_locked_mem_unprepare(a) /** Allocate and map an area of locked memory for bus master DMA operations. @@ -226,8 +220,8 @@ enum HPI_CONTROL_ATTRIBUTES { HPI_COBRANET_SET = HPI_CTL_ATTR(COBRANET, 1), HPI_COBRANET_GET = HPI_CTL_ATTR(COBRANET, 2), - HPI_COBRANET_SET_DATA = HPI_CTL_ATTR(COBRANET, 3), - HPI_COBRANET_GET_DATA = HPI_CTL_ATTR(COBRANET, 4), + /*HPI_COBRANET_SET_DATA = HPI_CTL_ATTR(COBRANET, 3), */ + /*HPI_COBRANET_GET_DATA = HPI_CTL_ATTR(COBRANET, 4), */ HPI_COBRANET_GET_STATUS = HPI_CTL_ATTR(COBRANET, 5), HPI_COBRANET_SEND_PACKET = HPI_CTL_ATTR(COBRANET, 6), HPI_COBRANET_GET_PACKET = HPI_CTL_ATTR(COBRANET, 7), @@ -364,10 +358,12 @@ Used in DLL to indicate device not present #define HPI_ADAPTER_ASI(f) (f) enum HPI_MESSAGE_TYPES { - HPI_TYPE_MESSAGE = 1, + HPI_TYPE_REQUEST = 1, HPI_TYPE_RESPONSE = 2, HPI_TYPE_DATA = 3, - HPI_TYPE_SSX2BYPASS_MESSAGE = 4 + HPI_TYPE_SSX2BYPASS_MESSAGE = 4, + HPI_TYPE_COMMAND = 5, + HPI_TYPE_NOTIFICATION = 6 }; enum HPI_OBJECT_TYPES { @@ -383,7 +379,7 @@ enum HPI_OBJECT_TYPES { HPI_OBJ_WATCHDOG = 10, HPI_OBJ_CLOCK = 11, HPI_OBJ_PROFILE = 12, - HPI_OBJ_CONTROLEX = 13, + /* HPI_ OBJ_ CONTROLEX = 13, */ HPI_OBJ_ASYNCEVENT = 14 #define HPI_OBJ_MAXINDEX 14 }; @@ -608,7 +604,7 @@ struct hpi_data_compat32 { #endif struct hpi_buffer { - /** placehoder for backward compatibility (see dwBufferSize) */ + /** placeholder for backward compatibility (see dwBufferSize) */ struct hpi_msg_format reserved; u32 command; /**< HPI_BUFFER_CMD_xxx*/ u32 pci_address; /**< PCI physical address of buffer for DSP DMA */ @@ -912,95 +908,13 @@ union hpi_control_union_res { u32 remaining_chars; } chars8; char c_data12[12]; -}; - -/* HPI_CONTROLX_STRUCTURES */ - -/* Message */ - -/** Used for all HMI variables where max length <= 8 bytes -*/ -struct hpi_controlx_msg_cobranet_data { - u32 hmi_address; - u32 byte_count; - u32 data[2]; -}; - -/** Used for string data, and for packet bridge -*/ -struct hpi_controlx_msg_cobranet_bigdata { - u32 hmi_address; - u32 byte_count; - u8 *pb_data; -#ifndef HPI64BIT - u32 padding; -#endif -}; - -/** Used for PADS control reading of string fields. -*/ -struct hpi_controlx_msg_pad_data { - u32 field; - u32 byte_count; - u8 *pb_data; -#ifndef HPI64BIT - u32 padding; -#endif -}; - -/** Used for generic data -*/ - -struct hpi_controlx_msg_generic { - u32 param1; - u32 param2; -}; - -struct hpi_controlx_msg { - u16 attribute; /* control attribute or property */ - u16 saved_index; - union { - struct hpi_controlx_msg_cobranet_data cobranet_data; - struct hpi_controlx_msg_cobranet_bigdata cobranet_bigdata; - struct hpi_controlx_msg_generic generic; - struct hpi_controlx_msg_pad_data pad_data; - /*struct param_value universal_value; */ - /* nothing extra to send for status read */ - } u; -}; - -/* Response */ -/** -*/ -struct hpi_controlx_res_cobranet_data { - u32 byte_count; - u32 data[2]; -}; - -struct hpi_controlx_res_cobranet_bigdata { - u32 byte_count; -}; - -struct hpi_controlx_res_cobranet_status { - u32 status; - u32 readable_size; - u32 writeable_size; -}; - -struct hpi_controlx_res_generic { - u32 param1; - u32 param2; -}; - -struct hpi_controlx_res { union { - struct hpi_controlx_res_cobranet_bigdata cobranet_bigdata; - struct hpi_controlx_res_cobranet_data cobranet_data; - struct hpi_controlx_res_cobranet_status cobranet_status; - struct hpi_controlx_res_generic generic; - /*struct param_info universal_info; */ - /*struct param_value universal_value; */ - } u; + struct { + u32 status; + u32 readable_size; + u32 writeable_size; + } status; + } cobranet; }; struct hpi_nvmemory_msg { @@ -1126,7 +1040,6 @@ struct hpi_message { /* identical to struct hpi_control_msg, but field naming is improved */ struct hpi_control_union_msg cu; - struct hpi_controlx_msg cx; /* extended mixer control; */ struct hpi_nvmemory_msg n; struct hpi_gpio_msg l; /* digital i/o */ struct hpi_watchdog_msg w; @@ -1151,7 +1064,7 @@ struct hpi_message { sizeof(struct hpi_message_header) + sizeof(struct hpi_watchdog_msg),\ sizeof(struct hpi_message_header) + sizeof(struct hpi_clock_msg),\ sizeof(struct hpi_message_header) + sizeof(struct hpi_profile_msg),\ - sizeof(struct hpi_message_header) + sizeof(struct hpi_controlx_msg),\ + sizeof(struct hpi_message_header), /* controlx obj removed */ \ sizeof(struct hpi_message_header) + sizeof(struct hpi_async_msg) \ } @@ -1188,7 +1101,6 @@ struct hpi_response { struct hpi_control_res c; /* mixer control; */ /* identical to hpi_control_res, but field naming is improved */ union hpi_control_union_res cu; - struct hpi_controlx_res cx; /* extended mixer control; */ struct hpi_nvmemory_res n; struct hpi_gpio_res l; /* digital i/o */ struct hpi_watchdog_res w; @@ -1213,7 +1125,7 @@ struct hpi_response { sizeof(struct hpi_response_header) + sizeof(struct hpi_watchdog_res),\ sizeof(struct hpi_response_header) + sizeof(struct hpi_clock_res),\ sizeof(struct hpi_response_header) + sizeof(struct hpi_profile_res),\ - sizeof(struct hpi_response_header) + sizeof(struct hpi_controlx_res),\ + sizeof(struct hpi_response_header), /* controlx obj removed */ \ sizeof(struct hpi_response_header) + sizeof(struct hpi_async_res) \ } @@ -1308,6 +1220,30 @@ struct hpi_res_adapter_debug_read { u8 bytes[256]; }; +struct hpi_msg_cobranet_hmi { + u16 attribute; + u16 padding; + u32 hmi_address; + u32 byte_count; +}; + +struct hpi_msg_cobranet_hmiwrite { + struct hpi_message_header h; + struct hpi_msg_cobranet_hmi p; + u8 bytes[256]; +}; + +struct hpi_msg_cobranet_hmiread { + struct hpi_message_header h; + struct hpi_msg_cobranet_hmi p; +}; + +struct hpi_res_cobranet_hmiread { + struct hpi_response_header h; + u32 byte_count; + u8 bytes[256]; +}; + #if 1 #define hpi_message_header_v1 hpi_message_header #define hpi_response_header_v1 hpi_response_header @@ -1338,7 +1274,6 @@ struct hpi_msg_payload_v0 { union hpi_mixerx_msg mx; struct hpi_control_msg c; struct hpi_control_union_msg cu; - struct hpi_controlx_msg cx; struct hpi_nvmemory_msg n; struct hpi_gpio_msg l; struct hpi_watchdog_msg w; @@ -1358,7 +1293,6 @@ struct hpi_res_payload_v0 { union hpi_mixerx_res mx; struct hpi_control_res c; union hpi_control_union_res cu; - struct hpi_controlx_res cx; struct hpi_nvmemory_res n; struct hpi_gpio_res l; struct hpi_watchdog_res w; @@ -1493,12 +1427,6 @@ struct hpi_control_cache_microphone { char temp_padding[6]; }; -struct hpi_control_cache_generic { - struct hpi_control_cache_info i; - u32 dw1; - u32 dw2; -}; - struct hpi_control_cache_single { union { struct hpi_control_cache_info i; @@ -1514,7 +1442,6 @@ struct hpi_control_cache_single { struct hpi_control_cache_silencedetector silence; struct hpi_control_cache_sampleclock clk; struct hpi_control_cache_microphone microphone; - struct hpi_control_cache_generic generic; } u; }; diff --git a/sound/pci/asihpi/hpicmn.c b/sound/pci/asihpi/hpicmn.c index b15a02e..bd47521 100644 --- a/sound/pci/asihpi/hpicmn.c +++ b/sound/pci/asihpi/hpicmn.c @@ -57,7 +57,7 @@ u16 hpi_validate_response(struct hpi_message *phm, struct hpi_response *phr) } if (phr->function != phm->function) { - HPI_DEBUG_LOG(ERROR, "header type %d invalid\n", + HPI_DEBUG_LOG(ERROR, "header function %d invalid\n", phr->function); return HPI_ERROR_INVALID_RESPONSE; } @@ -315,8 +315,7 @@ short hpi_check_control_cache(struct hpi_control_cache *p_cache, short found = 1; struct hpi_control_cache_info *pI; struct hpi_control_cache_single *pC; - struct hpi_control_cache_pad *p_pad; - + size_t response_size; if (!find_control(phm->obj_index, p_cache, &pI)) { HPI_DEBUG_LOG(VERBOSE, "HPICMN find_control() failed for adap %d\n", @@ -326,11 +325,15 @@ short hpi_check_control_cache(struct hpi_control_cache *p_cache, phr->error = 0; + /* set the default response size */ + response_size = + sizeof(struct hpi_response_header) + + sizeof(struct hpi_control_res); + /* pC is the default cached control strucure. May be cast to something else in the following switch statement. */ pC = (struct hpi_control_cache_single *)pI; - p_pad = (struct hpi_control_cache_pad *)pI; switch (pI->control_type) { @@ -529,9 +532,7 @@ short hpi_check_control_cache(struct hpi_control_cache *p_cache, pI->control_index, pI->control_type, phm->u.c.attribute); if (found) - phr->size = - sizeof(struct hpi_response_header) + - sizeof(struct hpi_control_res); + phr->size = (u16)response_size; return found; } @@ -630,13 +631,12 @@ struct hpi_control_cache *hpi_alloc_control_cache(const u32 control_count, if (!p_cache) return NULL; - p_cache->p_info = - kmalloc(sizeof(*p_cache->p_info) * control_count, GFP_KERNEL); + p_cache->p_info = kzalloc(sizeof(*p_cache->p_info) * control_count, + GFP_KERNEL); if (!p_cache->p_info) { kfree(p_cache); return NULL; } - memset(p_cache->p_info, 0, sizeof(*p_cache->p_info) * control_count); p_cache->cache_size_in_bytes = size_in_bytes; p_cache->control_count = control_count; p_cache->p_cache = p_dsp_control_buffer; @@ -682,7 +682,7 @@ static void subsys_message(struct hpi_message *phm, struct hpi_response *phr) void HPI_COMMON(struct hpi_message *phm, struct hpi_response *phr) { switch (phm->type) { - case HPI_TYPE_MESSAGE: + case HPI_TYPE_REQUEST: switch (phm->object) { case HPI_OBJ_SUBSYSTEM: subsys_message(phm, phr); diff --git a/sound/pci/asihpi/hpidspcd.c b/sound/pci/asihpi/hpidspcd.c index 5c6ea11..71d32c8 100644 --- a/sound/pci/asihpi/hpidspcd.c +++ b/sound/pci/asihpi/hpidspcd.c @@ -1,8 +1,8 @@ /***********************************************************************/ -/*! +/** AudioScience HPI driver - Copyright (C) 1997-2010 AudioScience Inc. + Copyright (C) 1997-2011 AudioScience Inc. This program is free software; you can redistribute it and/or modify it under the terms of version 2 of the GNU General Public License as @@ -18,90 +18,60 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA \file -Functions for reading DSP code to load into DSP - -(Linux only:) If DSPCODE_FIRMWARE_LOADER is defined, code is read using +Functions for reading DSP code using hotplug firmware loader from individual dsp code files - -If neither of the above is defined, code is read from linked arrays. -DSPCODE_ARRAY is defined. - -HPI_INCLUDE_**** must be defined -and the appropriate hzz?????.c or hex?????.c linked in - - */ +*/ /***********************************************************************/ #define SOURCEFILE_NAME "hpidspcd.c" #include "hpidspcd.h" #include "hpidebug.h" -/** - Header structure for binary dsp code file (see asidsp.doc) - This structure must match that used in s2bin.c for generation of asidsp.bin - */ - -#ifndef DISABLE_PRAGMA_PACK1 -#pragma pack(push, 1) -#endif - -struct code_header { - u32 size; - char type[4]; - u32 adapter; - u32 version; - u32 crc; +struct dsp_code_private { + /** Firmware descriptor */ + const struct firmware *firmware; + struct pci_dev *dev; }; -#ifndef DISABLE_PRAGMA_PACK1 -#pragma pack(pop) -#endif - #define HPI_VER_DECIMAL ((int)(HPI_VER_MAJOR(HPI_VER) * 10000 + \ HPI_VER_MINOR(HPI_VER) * 100 + HPI_VER_RELEASE(HPI_VER))) -/***********************************************************************/ -#include /*-------------------------------------------------------------------*/ -short hpi_dsp_code_open(u32 adapter, struct dsp_code *ps_dsp_code, - u32 *pos_error_code) +short hpi_dsp_code_open(u32 adapter, void *os_data, struct dsp_code *dsp_code, + u32 *os_error_code) { - const struct firmware *ps_firmware = ps_dsp_code->ps_firmware; + const struct firmware *firmware; + struct pci_dev *dev = os_data; struct code_header header; char fw_name[20]; + short err_ret = HPI_ERROR_DSP_FILE_NOT_FOUND; int err; sprintf(fw_name, "asihpi/dsp%04x.bin", adapter); - err = request_firmware(&ps_firmware, fw_name, - &ps_dsp_code->ps_dev->dev); + err = request_firmware(&firmware, fw_name, &dev->dev); - if (err != 0) { - dev_printk(KERN_ERR, &ps_dsp_code->ps_dev->dev, + if (err || !firmware) { + dev_printk(KERN_ERR, &dev->dev, "%d, request_firmware failed for %s\n", err, fw_name); goto error1; } - if (ps_firmware->size < sizeof(header)) { - dev_printk(KERN_ERR, &ps_dsp_code->ps_dev->dev, - "Header size too small %s\n", fw_name); - goto error2; - } - memcpy(&header, ps_firmware->data, sizeof(header)); - if (header.adapter != adapter) { - dev_printk(KERN_ERR, &ps_dsp_code->ps_dev->dev, - "Adapter type incorrect %4x != %4x\n", header.adapter, - adapter); + if (firmware->size < sizeof(header)) { + dev_printk(KERN_ERR, &dev->dev, "Header size too small %s\n", + fw_name); goto error2; } - if (header.size != ps_firmware->size) { - dev_printk(KERN_ERR, &ps_dsp_code->ps_dev->dev, - "Code size wrong %d != %ld\n", header.size, - (unsigned long)ps_firmware->size); + memcpy(&header, firmware->data, sizeof(header)); + + if ((header.type != 0x45444F43) || /* "CODE" */ + (header.adapter != adapter) + || (header.size != firmware->size)) { + dev_printk(KERN_ERR, &dev->dev, "Invalid firmware file\n"); goto error2; } - if (header.version / 100 != HPI_VER_DECIMAL / 100) { - dev_printk(KERN_ERR, &ps_dsp_code->ps_dev->dev, + if ((header.version / 100 & ~1) != (HPI_VER_DECIMAL / 100 & ~1)) { + dev_printk(KERN_ERR, &dev->dev, "Incompatible firmware version " "DSP image %d != Driver %d\n", header.version, HPI_VER_DECIMAL); @@ -109,67 +79,72 @@ short hpi_dsp_code_open(u32 adapter, struct dsp_code *ps_dsp_code, } if (header.version != HPI_VER_DECIMAL) { - dev_printk(KERN_WARNING, &ps_dsp_code->ps_dev->dev, + dev_printk(KERN_WARNING, &dev->dev, "Firmware: release version mismatch DSP image %d != Driver %d\n", header.version, HPI_VER_DECIMAL); } HPI_DEBUG_LOG(DEBUG, "dsp code %s opened\n", fw_name); - ps_dsp_code->ps_firmware = ps_firmware; - ps_dsp_code->block_length = header.size / sizeof(u32); - ps_dsp_code->word_count = sizeof(header) / sizeof(u32); - ps_dsp_code->version = header.version; - ps_dsp_code->crc = header.crc; + dsp_code->pvt = kmalloc(sizeof(*dsp_code->pvt), GFP_KERNEL); + if (!dsp_code->pvt) { + err_ret = HPI_ERROR_MEMORY_ALLOC; + goto error2; + } + + dsp_code->pvt->dev = dev; + dsp_code->pvt->firmware = firmware; + dsp_code->header = header; + dsp_code->block_length = header.size / sizeof(u32); + dsp_code->word_count = sizeof(header) / sizeof(u32); return 0; error2: - release_firmware(ps_firmware); + release_firmware(firmware); error1: - ps_dsp_code->ps_firmware = NULL; - ps_dsp_code->block_length = 0; - return HPI_ERROR_DSP_FILE_NOT_FOUND; + dsp_code->block_length = 0; + return err_ret; } /*-------------------------------------------------------------------*/ -void hpi_dsp_code_close(struct dsp_code *ps_dsp_code) +void hpi_dsp_code_close(struct dsp_code *dsp_code) { - if (ps_dsp_code->ps_firmware != NULL) { + if (dsp_code->pvt->firmware) { HPI_DEBUG_LOG(DEBUG, "dsp code closed\n"); - release_firmware(ps_dsp_code->ps_firmware); - ps_dsp_code->ps_firmware = NULL; + release_firmware(dsp_code->pvt->firmware); + dsp_code->pvt->firmware = NULL; } + kfree(dsp_code->pvt); } /*-------------------------------------------------------------------*/ -void hpi_dsp_code_rewind(struct dsp_code *ps_dsp_code) +void hpi_dsp_code_rewind(struct dsp_code *dsp_code) { /* Go back to start of data, after header */ - ps_dsp_code->word_count = sizeof(struct code_header) / sizeof(u32); + dsp_code->word_count = sizeof(struct code_header) / sizeof(u32); } /*-------------------------------------------------------------------*/ -short hpi_dsp_code_read_word(struct dsp_code *ps_dsp_code, u32 *pword) +short hpi_dsp_code_read_word(struct dsp_code *dsp_code, u32 *pword) { - if (ps_dsp_code->word_count + 1 > ps_dsp_code->block_length) + if (dsp_code->word_count + 1 > dsp_code->block_length) return HPI_ERROR_DSP_FILE_FORMAT; - *pword = ((u32 *)(ps_dsp_code->ps_firmware->data))[ps_dsp_code-> + *pword = ((u32 *)(dsp_code->pvt->firmware->data))[dsp_code-> word_count]; - ps_dsp_code->word_count++; + dsp_code->word_count++; return 0; } /*-------------------------------------------------------------------*/ short hpi_dsp_code_read_block(size_t words_requested, - struct dsp_code *ps_dsp_code, u32 **ppblock) + struct dsp_code *dsp_code, u32 **ppblock) { - if (ps_dsp_code->word_count + words_requested > - ps_dsp_code->block_length) + if (dsp_code->word_count + words_requested > dsp_code->block_length) return HPI_ERROR_DSP_FILE_FORMAT; *ppblock = - ((u32 *)(ps_dsp_code->ps_firmware->data)) + - ps_dsp_code->word_count; - ps_dsp_code->word_count += words_requested; + ((u32 *)(dsp_code->pvt->firmware->data)) + + dsp_code->word_count; + dsp_code->word_count += words_requested; return 0; } diff --git a/sound/pci/asihpi/hpidspcd.h b/sound/pci/asihpi/hpidspcd.h index 65f0ca7..b228811 100644 --- a/sound/pci/asihpi/hpidspcd.h +++ b/sound/pci/asihpi/hpidspcd.h @@ -2,7 +2,7 @@ /** AudioScience HPI driver - Copyright (C) 1997-2010 AudioScience Inc. + Copyright (C) 1997-2011 AudioScience Inc. This program is free software; you can redistribute it and/or modify it under the terms of version 2 of the GNU General Public License as @@ -20,19 +20,6 @@ \file Functions for reading DSP code to load into DSP - hpi_dspcode_defines HPI DSP code loading method -Define exactly one of these to select how the DSP code is supplied to -the adapter. - -End users writing applications that use the HPI interface do not have to -use any of the below defines; they are only necessary for building drivers - -HPI_DSPCODE_FILE: -DSP code is supplied as a file that is opened and read from by the driver. - -HPI_DSPCODE_FIRMWARE: -DSP code is read using the hotplug firmware loader module. - Only valid when compiling the HPI kernel driver under Linux. */ /***********************************************************************/ #ifndef _HPIDSPCD_H_ @@ -40,37 +27,56 @@ DSP code is read using the hotplug firmware loader module. #include "hpi_internal.h" -#ifndef DISABLE_PRAGMA_PACK1 -#pragma pack(push, 1) -#endif +/** Code header version is decimal encoded e.g. 4.06.10 is 40601 */ +#define HPI_VER_DECIMAL ((int)(HPI_VER_MAJOR(HPI_VER) * 10000 + \ +HPI_VER_MINOR(HPI_VER) * 100 + HPI_VER_RELEASE(HPI_VER))) + +/** Header structure for dsp firmware file + This structure must match that used in s2bin.c for generation of asidsp.bin + */ +/*#ifndef DISABLE_PRAGMA_PACK1 */ +/*#pragma pack(push, 1) */ +/*#endif */ +struct code_header { + /** Size in bytes including header */ + u32 size; + /** File type tag "CODE" == 0x45444F43 */ + u32 type; + /** Adapter model number */ + u32 adapter; + /** Firmware version*/ + u32 version; + /** Data checksum */ + u32 checksum; +}; +/*#ifndef DISABLE_PRAGMA_PACK1 */ +/*#pragma pack(pop) */ +/*#endif */ + +/*? Don't need the pragmas? */ +compile_time_assert((sizeof(struct code_header) == 20), code_header_size); /** Descriptor for dspcode from firmware loader */ struct dsp_code { - /** Firmware descriptor */ - const struct firmware *ps_firmware; - struct pci_dev *ps_dev; + /** copy of file header */ + struct code_header header; /** Expected number of words in the whole dsp code,INCL header */ - long int block_length; + u32 block_length; /** Number of words read so far */ - long int word_count; - /** Version read from dsp code file */ - u32 version; - /** CRC read from dsp code file */ - u32 crc; -}; + u32 word_count; -#ifndef DISABLE_PRAGMA_PACK1 -#pragma pack(pop) -#endif + /** internal state of DSP code reader */ + struct dsp_code_private *pvt; +}; -/** Prepare *psDspCode to refer to the requuested adapter. - Searches the file, or selects the appropriate linked array +/** Prepare *psDspCode to refer to the requested adapter's firmware. +Code file name is obtained from HpiOs_GetDspCodePath \return 0 for success, or error code if requested code is not available */ short hpi_dsp_code_open( /** Code identifier, usually adapter family */ - u32 adapter, + u32 adapter, void *pci_dev, /** Pointer to DSP code control structure */ struct dsp_code *ps_dsp_code, /** Pointer to dword to receive OS specific error code */ diff --git a/sound/pci/asihpi/hpifunc.c b/sound/pci/asihpi/hpifunc.c index 7397b16..ebb568d 100644 --- a/sound/pci/asihpi/hpifunc.c +++ b/sound/pci/asihpi/hpifunc.c @@ -1663,68 +1663,64 @@ u16 hpi_channel_mode_get(u32 h_control, u16 *mode) u16 hpi_cobranet_hmi_write(u32 h_control, u32 hmi_address, u32 byte_count, u8 *pb_data) { - struct hpi_message hm; - struct hpi_response hr; + struct hpi_msg_cobranet_hmiwrite hm; + struct hpi_response_header hr; - hpi_init_message_response(&hm, &hr, HPI_OBJ_CONTROLEX, - HPI_CONTROL_SET_STATE); - if (hpi_handle_indexes(h_control, &hm.adapter_index, &hm.obj_index)) - return HPI_ERROR_INVALID_HANDLE; + hpi_init_message_responseV1(&hm.h, sizeof(hm), &hr, sizeof(hr), + HPI_OBJ_CONTROL, HPI_CONTROL_SET_STATE); - hm.u.cx.u.cobranet_data.byte_count = byte_count; - hm.u.cx.u.cobranet_data.hmi_address = hmi_address; + if (hpi_handle_indexes(h_control, &hm.h.adapter_index, + &hm.h.obj_index)) + return HPI_ERROR_INVALID_HANDLE; - if (byte_count <= 8) { - memcpy(hm.u.cx.u.cobranet_data.data, pb_data, byte_count); - hm.u.cx.attribute = HPI_COBRANET_SET; - } else { - hm.u.cx.u.cobranet_bigdata.pb_data = pb_data; - hm.u.cx.attribute = HPI_COBRANET_SET_DATA; - } + if (byte_count > sizeof(hm.bytes)) + return HPI_ERROR_MESSAGE_BUFFER_TOO_SMALL; - hpi_send_recv(&hm, &hr); + hm.p.attribute = HPI_COBRANET_SET; + hm.p.byte_count = byte_count; + hm.p.hmi_address = hmi_address; + memcpy(hm.bytes, pb_data, byte_count); + hm.h.size = (u16)(sizeof(hm.h) + sizeof(hm.p) + byte_count); + hpi_send_recvV1(&hm.h, &hr); return hr.error; } u16 hpi_cobranet_hmi_read(u32 h_control, u32 hmi_address, u32 max_byte_count, u32 *pbyte_count, u8 *pb_data) { - struct hpi_message hm; - struct hpi_response hr; + struct hpi_msg_cobranet_hmiread hm; + struct hpi_res_cobranet_hmiread hr; - hpi_init_message_response(&hm, &hr, HPI_OBJ_CONTROLEX, - HPI_CONTROL_GET_STATE); - if (hpi_handle_indexes(h_control, &hm.adapter_index, &hm.obj_index)) + hpi_init_message_responseV1(&hm.h, sizeof(hm), &hr.h, sizeof(hr), + HPI_OBJ_CONTROL, HPI_CONTROL_GET_STATE); + + if (hpi_handle_indexes(h_control, &hm.h.adapter_index, + &hm.h.obj_index)) return HPI_ERROR_INVALID_HANDLE; - hm.u.cx.u.cobranet_data.byte_count = max_byte_count; - hm.u.cx.u.cobranet_data.hmi_address = hmi_address; + if (max_byte_count > sizeof(hr.bytes)) + return HPI_ERROR_RESPONSE_BUFFER_TOO_SMALL; - if (max_byte_count <= 8) { - hm.u.cx.attribute = HPI_COBRANET_GET; - } else { - hm.u.cx.u.cobranet_bigdata.pb_data = pb_data; - hm.u.cx.attribute = HPI_COBRANET_GET_DATA; - } + hm.p.attribute = HPI_COBRANET_GET; + hm.p.byte_count = max_byte_count; + hm.p.hmi_address = hmi_address; - hpi_send_recv(&hm, &hr); - if (!hr.error && pb_data) { + hpi_send_recvV1(&hm.h, &hr.h); - *pbyte_count = hr.u.cx.u.cobranet_data.byte_count; + if (!hr.h.error && pb_data) { + if (hr.byte_count > sizeof(hr.bytes)) - if (*pbyte_count < max_byte_count) - max_byte_count = *pbyte_count; + return HPI_ERROR_RESPONSE_BUFFER_TOO_SMALL; - if (hm.u.cx.attribute == HPI_COBRANET_GET) { - memcpy(pb_data, hr.u.cx.u.cobranet_data.data, - max_byte_count); - } else { + *pbyte_count = hr.byte_count; - } + if (hr.byte_count < max_byte_count) + max_byte_count = *pbyte_count; + memcpy(pb_data, hr.bytes, max_byte_count); } - return hr.error; + return hr.h.error; } u16 hpi_cobranet_hmi_get_status(u32 h_control, u32 *pstatus, @@ -1733,23 +1729,23 @@ u16 hpi_cobranet_hmi_get_status(u32 h_control, u32 *pstatus, struct hpi_message hm; struct hpi_response hr; - hpi_init_message_response(&hm, &hr, HPI_OBJ_CONTROLEX, + hpi_init_message_response(&hm, &hr, HPI_OBJ_CONTROL, HPI_CONTROL_GET_STATE); if (hpi_handle_indexes(h_control, &hm.adapter_index, &hm.obj_index)) return HPI_ERROR_INVALID_HANDLE; - hm.u.cx.attribute = HPI_COBRANET_GET_STATUS; + hm.u.c.attribute = HPI_COBRANET_GET_STATUS; hpi_send_recv(&hm, &hr); if (!hr.error) { if (pstatus) - *pstatus = hr.u.cx.u.cobranet_status.status; + *pstatus = hr.u.cu.cobranet.status.status; if (preadable_size) *preadable_size = - hr.u.cx.u.cobranet_status.readable_size; + hr.u.cu.cobranet.status.readable_size; if (pwriteable_size) *pwriteable_size = - hr.u.cx.u.cobranet_status.writeable_size; + hr.u.cu.cobranet.status.writeable_size; } return hr.error; } diff --git a/sound/pci/asihpi/hpimsginit.c b/sound/pci/asihpi/hpimsginit.c index 628376c..52400a6 100644 --- a/sound/pci/asihpi/hpimsginit.c +++ b/sound/pci/asihpi/hpimsginit.c @@ -46,7 +46,7 @@ static void hpi_init_message(struct hpi_message *phm, u16 object, if (gwSSX2_bypass) phm->type = HPI_TYPE_SSX2BYPASS_MESSAGE; else - phm->type = HPI_TYPE_MESSAGE; + phm->type = HPI_TYPE_REQUEST; phm->object = object; phm->function = function; phm->version = 0; @@ -89,7 +89,7 @@ static void hpi_init_messageV1(struct hpi_message_header *phm, u16 size, memset(phm, 0, sizeof(*phm)); if ((object > 0) && (object <= HPI_OBJ_MAXINDEX)) { phm->size = size; - phm->type = HPI_TYPE_MESSAGE; + phm->type = HPI_TYPE_REQUEST; phm->object = object; phm->function = function; phm->version = 1; diff --git a/sound/pci/asihpi/hpimsgx.c b/sound/pci/asihpi/hpimsgx.c index 7352a5f..2e77942 100644 --- a/sound/pci/asihpi/hpimsgx.c +++ b/sound/pci/asihpi/hpimsgx.c @@ -16,7 +16,7 @@ along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -Extended Message Function With Response Cacheing +Extended Message Function With Response Caching (C) Copyright AudioScience Inc. 2002 *****************************************************************************/ @@ -186,7 +186,6 @@ static void subsys_message(struct hpi_message *phm, struct hpi_response *phr, /* Initialize this module's internal state */ hpios_msgxlock_init(&msgx_lock); memset(&hpi_entry_points, 0, sizeof(hpi_entry_points)); - hpios_locked_mem_init(); /* Init subsys_findadapters response to no-adapters */ HPIMSGX__reset(HPIMSGX_ALLADAPTERS); hpi_init_response(phr, HPI_OBJ_SUBSYSTEM, @@ -197,7 +196,6 @@ static void subsys_message(struct hpi_message *phm, struct hpi_response *phr, case HPI_SUBSYS_DRIVER_UNLOAD: HPI_COMMON(phm, phr); HPIMSGX__cleanup(HPIMSGX_ALLADAPTERS, h_owner); - hpios_locked_mem_free_all(); hpi_init_response(phr, HPI_OBJ_SUBSYSTEM, HPI_SUBSYS_DRIVER_UNLOAD, 0); return; @@ -315,7 +313,7 @@ void hpi_send_recv_ex(struct hpi_message *phm, struct hpi_response *phr, { HPI_DEBUG_MESSAGE(DEBUG, phm); - if (phm->type != HPI_TYPE_MESSAGE) { + if (phm->type != HPI_TYPE_REQUEST) { hpi_init_response(phr, phm->object, phm->function, HPI_ERROR_INVALID_TYPE); return; diff --git a/sound/pci/asihpi/hpioctl.c b/sound/pci/asihpi/hpioctl.c index d8e7047..f6b9517 100644 --- a/sound/pci/asihpi/hpioctl.c +++ b/sound/pci/asihpi/hpioctl.c @@ -1,7 +1,7 @@ /******************************************************************************* AudioScience HPI driver - Copyright (C) 1997-2010 AudioScience Inc. + Copyright (C) 1997-2011 AudioScience Inc. This program is free software; you can redistribute it and/or modify it under the terms of version 2 of the GNU General Public License as @@ -33,6 +33,7 @@ Common Linux HPI ioctl and module probe/remove functions #include #include #include +#include #ifdef MODULE_FIRMWARE MODULE_FIRMWARE("asihpi/dsp5000.bin"); @@ -107,7 +108,6 @@ long asihpi_hpi_ioctl(struct file *file, unsigned int cmd, unsigned long arg) union hpi_response_buffer_v1 *hr; u16 res_max_size; u32 uncopied_bytes; - struct hpi_adapter *pa = NULL; int err = 0; if (cmd != HPI_IOCTL_LINUX) @@ -157,11 +157,6 @@ long asihpi_hpi_ioctl(struct file *file, unsigned int cmd, unsigned long arg) goto out; } - if (hm->h.adapter_index >= HPI_MAX_ADAPTERS) { - err = -EINVAL; - goto out; - } - switch (hm->h.function) { case HPI_SUBSYS_CREATE_ADAPTER: case HPI_ADAPTER_DELETE: @@ -183,16 +178,21 @@ long asihpi_hpi_ioctl(struct file *file, unsigned int cmd, unsigned long arg) } else { u16 __user *ptr = NULL; u32 size = 0; - + u32 adapter_present; /* -1=no data 0=read from user mem, 1=write to user mem */ int wrflag = -1; - u32 adapter = hm->h.adapter_index; - pa = &adapters[adapter]; + struct hpi_adapter *pa; + + if (hm->h.adapter_index < HPI_MAX_ADAPTERS) { + pa = &adapters[hm->h.adapter_index]; + adapter_present = pa->type; + } else { + adapter_present = 0; + } - if ((adapter > HPI_MAX_ADAPTERS) || (!pa->type)) { - hpi_init_response(&hr->r0, HPI_OBJ_ADAPTER, - HPI_ADAPTER_OPEN, - HPI_ERROR_BAD_ADAPTER_NUMBER); + if (!adapter_present) { + hpi_init_response(&hr->r0, hm->h.object, + hm->h.function, HPI_ERROR_BAD_ADAPTER_NUMBER); uncopied_bytes = copy_to_user(puhr, hr, sizeof(hr->h)); @@ -203,7 +203,7 @@ long asihpi_hpi_ioctl(struct file *file, unsigned int cmd, unsigned long arg) goto out; } - if (mutex_lock_interruptible(&adapters[adapter].mutex)) { + if (mutex_lock_interruptible(&pa->mutex)) { err = -EINTR; goto out; } @@ -239,8 +239,7 @@ long asihpi_hpi_ioctl(struct file *file, unsigned int cmd, unsigned long arg) "stream buffer size %d\n", size); - mutex_unlock(&adapters - [adapter].mutex); + mutex_unlock(&pa->mutex); err = -EINVAL; goto out; } @@ -281,7 +280,7 @@ long asihpi_hpi_ioctl(struct file *file, unsigned int cmd, unsigned long arg) uncopied_bytes, size); } - mutex_unlock(&adapters[adapter].mutex); + mutex_unlock(&pa->mutex); } /* on return response size must be set */ diff --git a/sound/pci/asihpi/hpios.c b/sound/pci/asihpi/hpios.c index 742ee12..ff2a19b 100644 --- a/sound/pci/asihpi/hpios.c +++ b/sound/pci/asihpi/hpios.c @@ -39,10 +39,6 @@ void hpios_delay_micro_seconds(u32 num_micro_sec) } -void hpios_locked_mem_init(void) -{ -} - /** Allocated an area of locked memory for bus master DMA operations. On error, return -ENOMEM, and *pMemArea.size = 0 @@ -85,7 +81,3 @@ u16 hpios_locked_mem_free(struct consistent_dma_area *p_mem_area) return 1; } } - -void hpios_locked_mem_free_all(void) -{ -} diff --git a/sound/pci/asihpi/hpios.h b/sound/pci/asihpi/hpios.h index 03273e7..2f605e3 100644 --- a/sound/pci/asihpi/hpios.h +++ b/sound/pci/asihpi/hpios.h @@ -38,6 +38,7 @@ HPI Operating System Specific macros for Linux Kernel driver #include #include #include +#include #define HPI_NO_OS_FILE_OPS diff --git a/sound/pci/au88x0/au88x0.c b/sound/pci/au88x0/au88x0.c index 7b72c88..dc326be 100644 --- a/sound/pci/au88x0/au88x0.c +++ b/sound/pci/au88x0/au88x0.c @@ -19,7 +19,7 @@ #include #include #include -#include +#include #include #include @@ -196,7 +196,7 @@ snd_vortex_create(struct snd_card *card, struct pci_dev *pci, vortex_t ** rchip) } if ((err = request_irq(pci->irq, vortex_interrupt, - IRQF_SHARED, CARD_NAME_SHORT, + IRQF_SHARED, KBUILD_MODNAME, chip)) != 0) { printk(KERN_ERR "cannot grab irq\n"); goto irq_out; @@ -375,7 +375,7 @@ static void __devexit snd_vortex_remove(struct pci_dev *pci) // pci_driver definition static struct pci_driver driver = { - .name = CARD_NAME_SHORT, + .name = KBUILD_MODNAME, .id_table = snd_vortex_ids, .probe = snd_vortex_probe, .remove = __devexit_p(snd_vortex_remove), diff --git a/sound/pci/au88x0/au88x0_game.c b/sound/pci/au88x0/au88x0_game.c index e291aa5..c07c792 100644 --- a/sound/pci/au88x0/au88x0_game.c +++ b/sound/pci/au88x0/au88x0_game.c @@ -34,6 +34,7 @@ #include #include "au88x0.h" #include +#include #if defined(CONFIG_GAMEPORT) || (defined(MODULE) && defined(CONFIG_GAMEPORT_MODULE)) diff --git a/sound/pci/au88x0/au88x0_mpu401.c b/sound/pci/au88x0/au88x0_mpu401.c index 0dc8d25..e6c6a0f 100644 --- a/sound/pci/au88x0/au88x0_mpu401.c +++ b/sound/pci/au88x0/au88x0_mpu401.c @@ -84,7 +84,7 @@ static int __devinit snd_vortex_midi(vortex_t * vortex) #ifdef VORTEX_MPU401_LEGACY if ((temp = snd_mpu401_uart_new(vortex->card, 0, MPU401_HW_MPU401, 0x330, - 0, 0, 0, &rmidi)) != 0) { + MPU401_INFO_IRQ_HOOK, -1, &rmidi)) != 0) { hwwrite(vortex->mmio, VORTEX_CTRL, (hwread(vortex->mmio, VORTEX_CTRL) & ~CTRL_MIDI_PORT) & ~CTRL_MIDI_EN); @@ -94,8 +94,8 @@ static int __devinit snd_vortex_midi(vortex_t * vortex) port = (unsigned long)(vortex->mmio + VORTEX_MIDI_DATA); if ((temp = snd_mpu401_uart_new(vortex->card, 0, MPU401_HW_AUREAL, port, - MPU401_INFO_INTEGRATED | MPU401_INFO_MMIO, - 0, 0, &rmidi)) != 0) { + MPU401_INFO_INTEGRATED | MPU401_INFO_MMIO | + MPU401_INFO_IRQ_HOOK, -1, &rmidi)) != 0) { hwwrite(vortex->mmio, VORTEX_CTRL, (hwread(vortex->mmio, VORTEX_CTRL) & ~CTRL_MIDI_PORT) & ~CTRL_MIDI_EN); diff --git a/sound/pci/aw2/aw2-alsa.c b/sound/pci/aw2/aw2-alsa.c index c150022..7a58115 100644 --- a/sound/pci/aw2/aw2-alsa.c +++ b/sound/pci/aw2/aw2-alsa.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include @@ -171,7 +172,7 @@ MODULE_DEVICE_TABLE(pci, snd_aw2_ids); /* pci_driver definition */ static struct pci_driver driver = { - .name = "Emagic Audiowerk 2", + .name = KBUILD_MODNAME, .id_table = snd_aw2_ids, .probe = snd_aw2_probe, .remove = __devexit_p(snd_aw2_remove), @@ -317,7 +318,7 @@ static int __devinit snd_aw2_create(struct snd_card *card, snd_aw2_saa7146_setup(&chip->saa7146, chip->iobase_virt); if (request_irq(pci->irq, snd_aw2_saa7146_interrupt, - IRQF_SHARED, "Audiowerk2", chip)) { + IRQF_SHARED, KBUILD_MODNAME, chip)) { printk(KERN_ERR "aw2: Cannot grab irq %d\n", pci->irq); iounmap(chip->iobase_virt); diff --git a/sound/pci/ca0106/ca0106_main.c b/sound/pci/ca0106/ca0106_main.c index 4377592..fe99fde 100644 --- a/sound/pci/ca0106/ca0106_main.c +++ b/sound/pci/ca0106/ca0106_main.c @@ -140,7 +140,7 @@ #include #include #include -#include +#include #include #include #include @@ -1666,7 +1666,7 @@ static int __devinit snd_ca0106_create(int dev, struct snd_card *card, } if (request_irq(pci->irq, snd_ca0106_interrupt, - IRQF_SHARED, "snd_ca0106", chip)) { + IRQF_SHARED, KBUILD_MODNAME, chip)) { snd_ca0106_free(chip); printk(KERN_ERR "cannot grab irq\n"); return -EBUSY; @@ -1933,7 +1933,7 @@ MODULE_DEVICE_TABLE(pci, snd_ca0106_ids); // pci_driver definition static struct pci_driver driver = { - .name = "CA0106", + .name = KBUILD_MODNAME, .id_table = snd_ca0106_ids, .probe = snd_ca0106_probe, .remove = __devexit_p(snd_ca0106_remove), diff --git a/sound/pci/cs46xx/cs46xx.c b/sound/pci/cs46xx/cs46xx.c index 767fa7f..a4ecb40 100644 --- a/sound/pci/cs46xx/cs46xx.c +++ b/sound/pci/cs46xx/cs46xx.c @@ -28,7 +28,7 @@ #include #include #include -#include +#include #include #include #include @@ -162,7 +162,7 @@ static void __devexit snd_card_cs46xx_remove(struct pci_dev *pci) } static struct pci_driver driver = { - .name = "Sound Fusion CS46xx", + .name = KBUILD_MODNAME, .id_table = snd_cs46xx_ids, .probe = snd_card_cs46xx_probe, .remove = __devexit_p(snd_card_cs46xx_remove), diff --git a/sound/pci/cs46xx/cs46xx_lib.c b/sound/pci/cs46xx/cs46xx_lib.c index aad3708..4fa5316 100644 --- a/sound/pci/cs46xx/cs46xx_lib.c +++ b/sound/pci/cs46xx/cs46xx_lib.c @@ -53,6 +53,7 @@ #include #include #include +#include #include @@ -3835,7 +3836,7 @@ int __devinit snd_cs46xx_create(struct snd_card *card, } if (request_irq(pci->irq, snd_cs46xx_interrupt, IRQF_SHARED, - "CS46XX", chip)) { + KBUILD_MODNAME, chip)) { snd_printk(KERN_ERR "unable to grab IRQ %d\n", pci->irq); snd_cs46xx_free(chip); return -EBUSY; diff --git a/sound/pci/cs5535audio/cs5535audio.c b/sound/pci/cs5535audio/cs5535audio.c index afb8037..b8959d2 100644 --- a/sound/pci/cs5535audio/cs5535audio.c +++ b/sound/pci/cs5535audio/cs5535audio.c @@ -26,7 +26,7 @@ #include #include #include -#include +#include #include #include #include @@ -311,7 +311,7 @@ static int __devinit snd_cs5535audio_create(struct snd_card *card, cs5535au->port = pci_resource_start(pci, 0); if (request_irq(pci->irq, snd_cs5535audio_interrupt, - IRQF_SHARED, "CS5535 Audio", cs5535au)) { + IRQF_SHARED, KBUILD_MODNAME, cs5535au)) { snd_printk(KERN_ERR "unable to grab IRQ %d\n", pci->irq); err = -EBUSY; goto sndfail; @@ -395,7 +395,7 @@ static void __devexit snd_cs5535audio_remove(struct pci_dev *pci) } static struct pci_driver driver = { - .name = DRIVER_NAME, + .name = KBUILD_MODNAME, .id_table = snd_cs5535audio_ids, .probe = snd_cs5535audio_probe, .remove = __devexit_p(snd_cs5535audio_remove), diff --git a/sound/pci/cs5535audio/cs5535audio_pcm.c b/sound/pci/cs5535audio/cs5535audio_pcm.c index e083122..dbf94b1 100644 --- a/sound/pci/cs5535audio/cs5535audio_pcm.c +++ b/sound/pci/cs5535audio/cs5535audio_pcm.c @@ -148,7 +148,7 @@ static int cs5535audio_build_dma_packets(struct cs5535audio *cs5535au, struct cs5535audio_dma_desc *desc = &((struct cs5535audio_dma_desc *) dma->desc_buf.area)[i]; desc->addr = cpu_to_le32(addr); - desc->size = cpu_to_le32(period_bytes); + desc->size = cpu_to_le16(period_bytes); desc->ctlreserved = cpu_to_le16(PRD_EOP); desc_addr += sizeof(struct cs5535audio_dma_desc); addr += period_bytes; diff --git a/sound/pci/ctxfi/ct20k2reg.h b/sound/pci/ctxfi/ct20k2reg.h index e0394e3..ca501ba 100644 --- a/sound/pci/ctxfi/ct20k2reg.h +++ b/sound/pci/ctxfi/ct20k2reg.h @@ -55,6 +55,7 @@ /* GPIO Registers */ #define GPIO_DATA 0x1B7020 #define GPIO_CTRL 0x1B7024 +#define GPIO_EXT_DATA 0x1B70A0 /* Virtual memory registers */ #define VMEM_PTPAL 0x1C6300 /* 0x1C6300 + (16 * Chn) */ diff --git a/sound/pci/ctxfi/ctatc.c b/sound/pci/ctxfi/ctatc.c index 13f33c0..d8a4423 100644 --- a/sound/pci/ctxfi/ctatc.c +++ b/sound/pci/ctxfi/ctatc.c @@ -18,7 +18,6 @@ #include "ctatc.h" #include "ctpcm.h" #include "ctmixer.h" -#include "cthardware.h" #include "ctsrc.h" #include "ctamixer.h" #include "ctdaio.h" @@ -30,7 +29,6 @@ #include #define MONO_SUM_SCALE 0x19a8 /* 2^(-0.5) in 14-bit floating format */ -#define DAIONUM 7 #define MAX_MULTI_CHN 8 #define IEC958_DEFAULT_CON ((IEC958_AES0_NONAUDIO \ @@ -53,6 +51,8 @@ static struct snd_pci_quirk __devinitdata subsys_20k1_list[] = { static struct snd_pci_quirk __devinitdata subsys_20k2_list[] = { SND_PCI_QUIRK(PCI_VENDOR_ID_CREATIVE, PCI_SUBDEVICE_ID_CREATIVE_SB0760, "SB0760", CTSB0760), + SND_PCI_QUIRK(PCI_VENDOR_ID_CREATIVE, PCI_SUBDEVICE_ID_CREATIVE_SB1270, + "SB1270", CTSB1270), SND_PCI_QUIRK(PCI_VENDOR_ID_CREATIVE, PCI_SUBDEVICE_ID_CREATIVE_SB08801, "SB0880", CTSB0880), SND_PCI_QUIRK(PCI_VENDOR_ID_CREATIVE, PCI_SUBDEVICE_ID_CREATIVE_SB08802, @@ -75,6 +75,7 @@ static const char *ct_subsys_name[NUM_CTCARDS] = { [CTSB0760] = "SB076x", [CTHENDRIX] = "Hendrix", [CTSB0880] = "SB0880", + [CTSB1270] = "SB1270", [CT20K2_UNKNOWN] = "Unknown", }; @@ -459,12 +460,12 @@ static void setup_src_node_conf(struct ct_atc *atc, struct ct_atc_pcm *apcm, apcm->substream->runtime->rate); *n_srcc = 0; - if (1 == atc->msr) { + if (1 == atc->msr) { /* FIXME: do we really need SRC here if pitch==1 */ *n_srcc = apcm->substream->runtime->channels; conf[0].pitch = pitch; conf[0].mix_msr = conf[0].imp_msr = conf[0].msr = 1; conf[0].vo = 1; - } else if (2 == atc->msr) { + } else if (2 <= atc->msr) { if (0x8000000 < pitch) { /* Need two-stage SRCs, SRCIMPs and * AMIXERs for converting format */ @@ -970,11 +971,39 @@ static int atc_select_mic_in(struct ct_atc *atc) return 0; } -static int atc_have_digit_io_switch(struct ct_atc *atc) +static struct capabilities atc_capabilities(struct ct_atc *atc) { struct hw *hw = atc->hw; - return hw->have_digit_io_switch(hw); + return hw->capabilities(hw); +} + +static int atc_output_switch_get(struct ct_atc *atc) +{ + struct hw *hw = atc->hw; + + return hw->output_switch_get(hw); +} + +static int atc_output_switch_put(struct ct_atc *atc, int position) +{ + struct hw *hw = atc->hw; + + return hw->output_switch_put(hw, position); +} + +static int atc_mic_source_switch_get(struct ct_atc *atc) +{ + struct hw *hw = atc->hw; + + return hw->mic_source_switch_get(hw); +} + +static int atc_mic_source_switch_put(struct ct_atc *atc, int position) +{ + struct hw *hw = atc->hw; + + return hw->mic_source_switch_put(hw, position); } static int atc_select_digit_io(struct ct_atc *atc) @@ -1045,6 +1074,11 @@ static int atc_line_in_unmute(struct ct_atc *atc, unsigned char state) return atc_daio_unmute(atc, state, LINEIM); } +static int atc_mic_unmute(struct ct_atc *atc, unsigned char state) +{ + return atc_daio_unmute(atc, state, MIC); +} + static int atc_spdif_out_unmute(struct ct_atc *atc, unsigned char state) { return atc_daio_unmute(atc, state, SPDIFOO); @@ -1331,17 +1365,20 @@ static int atc_get_resources(struct ct_atc *atc) struct srcimp_mgr *srcimp_mgr; struct sum_desc sum_dsc = {0}; struct sum_mgr *sum_mgr; - int err, i; + int err, i, num_srcs, num_daios; - atc->daios = kzalloc(sizeof(void *)*(DAIONUM), GFP_KERNEL); + num_daios = ((atc->model == CTSB1270) ? 8 : 7); + num_srcs = ((atc->model == CTSB1270) ? 6 : 4); + + atc->daios = kzalloc(sizeof(void *)*num_daios, GFP_KERNEL); if (!atc->daios) return -ENOMEM; - atc->srcs = kzalloc(sizeof(void *)*(2*2), GFP_KERNEL); + atc->srcs = kzalloc(sizeof(void *)*num_srcs, GFP_KERNEL); if (!atc->srcs) return -ENOMEM; - atc->srcimps = kzalloc(sizeof(void *)*(2*2), GFP_KERNEL); + atc->srcimps = kzalloc(sizeof(void *)*num_srcs, GFP_KERNEL); if (!atc->srcimps) return -ENOMEM; @@ -1351,8 +1388,9 @@ static int atc_get_resources(struct ct_atc *atc) daio_mgr = (struct daio_mgr *)atc->rsc_mgrs[DAIO]; da_desc.msr = atc->msr; - for (i = 0, atc->n_daio = 0; i < DAIONUM-1; i++) { - da_desc.type = i; + for (i = 0, atc->n_daio = 0; i < num_daios; i++) { + da_desc.type = (atc->model != CTSB073X) ? i : + ((i == SPDIFIO) ? SPDIFI1 : i); err = daio_mgr->get_daio(daio_mgr, &da_desc, (struct daio **)&atc->daios[i]); if (err) { @@ -1362,23 +1400,12 @@ static int atc_get_resources(struct ct_atc *atc) } atc->n_daio++; } - if (atc->model == CTSB073X) - da_desc.type = SPDIFI1; - else - da_desc.type = SPDIFIO; - err = daio_mgr->get_daio(daio_mgr, &da_desc, - (struct daio **)&atc->daios[i]); - if (err) { - printk(KERN_ERR "ctxfi: Failed to get S/PDIF-in resource!!!\n"); - return err; - } - atc->n_daio++; src_mgr = atc->rsc_mgrs[SRC]; src_dsc.multi = 1; src_dsc.msr = atc->msr; src_dsc.mode = ARCRW; - for (i = 0, atc->n_src = 0; i < (2*2); i++) { + for (i = 0, atc->n_src = 0; i < num_srcs; i++) { err = src_mgr->get_src(src_mgr, &src_dsc, (struct src **)&atc->srcs[i]); if (err) @@ -1388,8 +1415,8 @@ static int atc_get_resources(struct ct_atc *atc) } srcimp_mgr = atc->rsc_mgrs[SRCIMP]; - srcimp_dsc.msr = 8; /* SRCIMPs for S/PDIFIn SRT */ - for (i = 0, atc->n_srcimp = 0; i < (2*1); i++) { + srcimp_dsc.msr = 8; + for (i = 0, atc->n_srcimp = 0; i < num_srcs; i++) { err = srcimp_mgr->get_srcimp(srcimp_mgr, &srcimp_dsc, (struct srcimp **)&atc->srcimps[i]); if (err) @@ -1397,15 +1424,6 @@ static int atc_get_resources(struct ct_atc *atc) atc->n_srcimp++; } - srcimp_dsc.msr = 8; /* SRCIMPs for LINE/MICIn SRT */ - for (i = 0; i < (2*1); i++) { - err = srcimp_mgr->get_srcimp(srcimp_mgr, &srcimp_dsc, - (struct srcimp **)&atc->srcimps[2*1+i]); - if (err) - return err; - - atc->n_srcimp++; - } sum_mgr = atc->rsc_mgrs[SUM]; sum_dsc.msr = atc->msr; @@ -1488,6 +1506,18 @@ static void atc_connect_resources(struct ct_atc *atc) src = atc->srcs[3]; mixer->set_input_right(mixer, MIX_LINE_IN, &src->rsc); + if (atc->model == CTSB1270) { + /* Titanium HD has a dedicated ADC for the Mic. */ + dai = container_of(atc->daios[MIC], struct dai, daio); + atc_connect_dai(atc->rsc_mgrs[SRC], dai, + (struct src **)&atc->srcs[4], + (struct srcimp **)&atc->srcimps[4]); + src = atc->srcs[4]; + mixer->set_input_left(mixer, MIX_MIC_IN, &src->rsc); + src = atc->srcs[5]; + mixer->set_input_right(mixer, MIX_MIC_IN, &src->rsc); + } + dai = container_of(atc->daios[SPDIFIO], struct dai, daio); atc_connect_dai(atc->rsc_mgrs[SRC], dai, (struct src **)&atc->srcs[0], @@ -1606,12 +1636,17 @@ static struct ct_atc atc_preset __devinitdata = { .line_clfe_unmute = atc_line_clfe_unmute, .line_rear_unmute = atc_line_rear_unmute, .line_in_unmute = atc_line_in_unmute, + .mic_unmute = atc_mic_unmute, .spdif_out_unmute = atc_spdif_out_unmute, .spdif_in_unmute = atc_spdif_in_unmute, .spdif_out_get_status = atc_spdif_out_get_status, .spdif_out_set_status = atc_spdif_out_set_status, .spdif_out_passthru = atc_spdif_out_passthru, - .have_digit_io_switch = atc_have_digit_io_switch, + .capabilities = atc_capabilities, + .output_switch_get = atc_output_switch_get, + .output_switch_put = atc_output_switch_put, + .mic_source_switch_get = atc_mic_source_switch_get, + .mic_source_switch_put = atc_mic_source_switch_put, #ifdef CONFIG_PM .suspend = atc_suspend, .resume = atc_resume, diff --git a/sound/pci/ctxfi/ctatc.h b/sound/pci/ctxfi/ctatc.h index 7167c01..3a0def6 100644 --- a/sound/pci/ctxfi/ctatc.h +++ b/sound/pci/ctxfi/ctatc.h @@ -25,6 +25,7 @@ #include #include "ctvmem.h" +#include "cthardware.h" #include "ctresource.h" enum CTALSADEVS { /* Types of alsa devices */ @@ -115,12 +116,17 @@ struct ct_atc { int (*line_clfe_unmute)(struct ct_atc *atc, unsigned char state); int (*line_rear_unmute)(struct ct_atc *atc, unsigned char state); int (*line_in_unmute)(struct ct_atc *atc, unsigned char state); + int (*mic_unmute)(struct ct_atc *atc, unsigned char state); int (*spdif_out_unmute)(struct ct_atc *atc, unsigned char state); int (*spdif_in_unmute)(struct ct_atc *atc, unsigned char state); int (*spdif_out_get_status)(struct ct_atc *atc, unsigned int *status); int (*spdif_out_set_status)(struct ct_atc *atc, unsigned int status); int (*spdif_out_passthru)(struct ct_atc *atc, unsigned char state); - int (*have_digit_io_switch)(struct ct_atc *atc); + struct capabilities (*capabilities)(struct ct_atc *atc); + int (*output_switch_get)(struct ct_atc *atc); + int (*output_switch_put)(struct ct_atc *atc, int position); + int (*mic_source_switch_get)(struct ct_atc *atc); + int (*mic_source_switch_put)(struct ct_atc *atc, int position); /* Don't touch! Used for internal object. */ void *rsc_mgrs[NUM_RSCTYP]; /* chip resource managers */ diff --git a/sound/pci/ctxfi/ctdaio.c b/sound/pci/ctxfi/ctdaio.c index 47d9ea9..0c00eb4 100644 --- a/sound/pci/ctxfi/ctdaio.c +++ b/sound/pci/ctxfi/ctdaio.c @@ -22,20 +22,9 @@ #include #include -#define DAIO_RESOURCE_NUM NUM_DAIOTYP #define DAIO_OUT_MAX SPDIFOO -union daio_usage { - struct { - unsigned short lineo1:1; - unsigned short lineo2:1; - unsigned short lineo3:1; - unsigned short lineo4:1; - unsigned short spdifoo:1; - unsigned short lineim:1; - unsigned short spdifio:1; - unsigned short spdifi1:1; - } bf; +struct daio_usage { unsigned short data; }; @@ -61,6 +50,7 @@ struct daio_rsc_idx idx_20k2[NUM_DAIOTYP] = { [LINEO3] = {.left = 0x50, .right = 0x51}, [LINEO4] = {.left = 0x70, .right = 0x71}, [LINEIM] = {.left = 0x45, .right = 0xc5}, + [MIC] = {.left = 0x55, .right = 0xd5}, [SPDIFOO] = {.left = 0x00, .right = 0x01}, [SPDIFIO] = {.left = 0x05, .right = 0x85}, }; @@ -138,6 +128,7 @@ static unsigned int daio_device_index(enum DAIOTYP type, struct hw *hw) case LINEO3: return 5; case LINEO4: return 6; case LINEIM: return 4; + case MIC: return 5; default: return -EINVAL; } default: @@ -519,17 +510,17 @@ static int dai_rsc_uninit(struct dai *dai) static int daio_mgr_get_rsc(struct rsc_mgr *mgr, enum DAIOTYP type) { - if (((union daio_usage *)mgr->rscs)->data & (0x1 << type)) + if (((struct daio_usage *)mgr->rscs)->data & (0x1 << type)) return -ENOENT; - ((union daio_usage *)mgr->rscs)->data |= (0x1 << type); + ((struct daio_usage *)mgr->rscs)->data |= (0x1 << type); return 0; } static int daio_mgr_put_rsc(struct rsc_mgr *mgr, enum DAIOTYP type) { - ((union daio_usage *)mgr->rscs)->data &= ~(0x1 << type); + ((struct daio_usage *)mgr->rscs)->data &= ~(0x1 << type); return 0; } @@ -712,7 +703,7 @@ int daio_mgr_create(void *hw, struct daio_mgr **rdaio_mgr) if (!daio_mgr) return -ENOMEM; - err = rsc_mgr_init(&daio_mgr->mgr, DAIO, DAIO_RESOURCE_NUM, hw); + err = rsc_mgr_init(&daio_mgr->mgr, DAIO, NUM_DAIOTYP, hw); if (err) goto error1; diff --git a/sound/pci/ctxfi/ctdaio.h b/sound/pci/ctxfi/ctdaio.h index 0f52ce5..85ccb6e 100644 --- a/sound/pci/ctxfi/ctdaio.h +++ b/sound/pci/ctxfi/ctdaio.h @@ -33,6 +33,7 @@ enum DAIOTYP { SPDIFOO, /* S/PDIF Out (Flexijack/Optical) */ LINEIM, SPDIFIO, /* S/PDIF In (Flexijack/Optical) on the card */ + MIC, /* Dedicated mic on Titanium HD */ SPDIFI1, /* S/PDIF In on internal Drive Bay */ NUM_DAIOTYP }; diff --git a/sound/pci/ctxfi/cthardware.h b/sound/pci/ctxfi/cthardware.h index af55405..908315b 100644 --- a/sound/pci/ctxfi/cthardware.h +++ b/sound/pci/ctxfi/cthardware.h @@ -39,6 +39,7 @@ enum CTCARDS { CT20K2_MODEL_FIRST = CTSB0760, CTHENDRIX, CTSB0880, + CTSB1270, CT20K2_UNKNOWN, NUM_CTCARDS /* This should always be the last */ }; @@ -60,6 +61,13 @@ struct card_conf { unsigned int msr; /* master sample rate in rsrs */ }; +struct capabilities { + unsigned int digit_io_switch:1; + unsigned int dedicated_mic:1; + unsigned int output_switch:1; + unsigned int mic_source_switch:1; +}; + struct hw { int (*card_init)(struct hw *hw, struct card_conf *info); int (*card_stop)(struct hw *hw); @@ -70,7 +78,11 @@ struct hw { #endif int (*is_adc_source_selected)(struct hw *hw, enum ADCSRC source); int (*select_adc_source)(struct hw *hw, enum ADCSRC source); - int (*have_digit_io_switch)(struct hw *hw); + struct capabilities (*capabilities)(struct hw *hw); + int (*output_switch_get)(struct hw *hw); + int (*output_switch_put)(struct hw *hw, int position); + int (*mic_source_switch_get)(struct hw *hw); + int (*mic_source_switch_put)(struct hw *hw, int position); /* SRC operations */ int (*src_rsc_get_ctrl_blk)(void **rblk); diff --git a/sound/pci/ctxfi/cthw20k1.c b/sound/pci/ctxfi/cthw20k1.c index a5c957d..a7df197 100644 --- a/sound/pci/ctxfi/cthw20k1.c +++ b/sound/pci/ctxfi/cthw20k1.c @@ -1777,10 +1777,17 @@ static int hw_adc_init(struct hw *hw, const struct adc_conf *info) return adc_init_SBx(hw, info->input, info->mic20db); } -static int hw_have_digit_io_switch(struct hw *hw) +static struct capabilities hw_capabilities(struct hw *hw) { + struct capabilities cap; + /* SB073x and Vista compatible cards have no digit IO switch */ - return !(hw->model == CTSB073X || hw->model == CTUAA); + cap.digit_io_switch = !(hw->model == CTSB073X || hw->model == CTUAA); + cap.dedicated_mic = 0; + cap.output_switch = 0; + cap.mic_source_switch = 0; + + return cap; } #define CTLBITS(a, b, c, d) (((a) << 24) | ((b) << 16) | ((c) << 8) | (d)) @@ -1933,7 +1940,7 @@ static int hw_card_start(struct hw *hw) if (hw->irq < 0) { err = request_irq(pci->irq, ct_20k1_interrupt, IRQF_SHARED, - "ctxfi", hw); + KBUILD_MODNAME, hw); if (err < 0) { printk(KERN_ERR "XFi: Cannot get irq %d\n", pci->irq); goto error2; @@ -2172,7 +2179,7 @@ static struct hw ct20k1_preset __devinitdata = { .pll_init = hw_pll_init, .is_adc_source_selected = hw_is_adc_input_selected, .select_adc_source = hw_adc_input_select, - .have_digit_io_switch = hw_have_digit_io_switch, + .capabilities = hw_capabilities, #ifdef CONFIG_PM .suspend = hw_suspend, .resume = hw_resume, diff --git a/sound/pci/ctxfi/cthw20k2.c b/sound/pci/ctxfi/cthw20k2.c index 5364164..d6c54b5 100644 --- a/sound/pci/ctxfi/cthw20k2.c +++ b/sound/pci/ctxfi/cthw20k2.c @@ -8,7 +8,7 @@ * @File cthw20k2.c * * @Brief - * This file contains the implementation of hardware access methord for 20k2. + * This file contains the implementation of hardware access method for 20k2. * * @Author Liu Chun * @Date May 14 2008 @@ -38,6 +38,8 @@ struct hw20k2 { unsigned char dev_id; unsigned char addr_size; unsigned char data_size; + + int mic_source; }; static u32 hw_read_20kx(struct hw *hw, u32 reg); @@ -1163,7 +1165,12 @@ static int hw_daio_init(struct hw *hw, const struct daio_conf *info) hw_write_20kx(hw, AUDIO_IO_TX_BLRCLK, 0x01010101); hw_write_20kx(hw, AUDIO_IO_RX_BLRCLK, 0); } else if (2 == info->msr) { - hw_write_20kx(hw, AUDIO_IO_MCLK, 0x11111111); + if (hw->model != CTSB1270) { + hw_write_20kx(hw, AUDIO_IO_MCLK, 0x11111111); + } else { + /* PCM4220 on Titanium HD is different. */ + hw_write_20kx(hw, AUDIO_IO_MCLK, 0x11011111); + } /* Specify all playing 96khz * EA [0] - Enabled * RTA [4:5] - 96kHz @@ -1175,6 +1182,10 @@ static int hw_daio_init(struct hw *hw, const struct daio_conf *info) * RTD [28:29] - 96kHz */ hw_write_20kx(hw, AUDIO_IO_TX_BLRCLK, 0x11111111); hw_write_20kx(hw, AUDIO_IO_RX_BLRCLK, 0); + } else if ((4 == info->msr) && (hw->model == CTSB1270)) { + hw_write_20kx(hw, AUDIO_IO_MCLK, 0x21011111); + hw_write_20kx(hw, AUDIO_IO_TX_BLRCLK, 0x21212121); + hw_write_20kx(hw, AUDIO_IO_RX_BLRCLK, 0); } else { printk(KERN_ALERT "ctxfi: ERROR!!! Invalid sampling rate!!!\n"); return -EINVAL; @@ -1182,6 +1193,8 @@ static int hw_daio_init(struct hw *hw, const struct daio_conf *info) for (i = 0; i < 8; i++) { if (i <= 3) { + /* This comment looks wrong since loop is over 4 */ + /* channels and emu20k2 supports 4 spdif IOs. */ /* 1st 3 channels are SPDIFs (SB0960) */ if (i == 3) data = 0x1001001; @@ -1206,12 +1219,16 @@ static int hw_daio_init(struct hw *hw, const struct daio_conf *info) hw_write_20kx(hw, AUDIO_IO_TX_CSTAT_H+(0x40*i), 0x0B); } else { + /* Again, loop is over 4 channels not 5. */ /* Next 5 channels are I2S (SB0960) */ data = 0x11; hw_write_20kx(hw, AUDIO_IO_RX_CTL+(0x40*i), data); if (2 == info->msr) { /* Four channels per sample period */ data |= 0x1000; + } else if (4 == info->msr) { + /* FIXME: check this against the chip spec */ + data |= 0x2000; } hw_write_20kx(hw, AUDIO_IO_TX_CTL+(0x40*i), data); } @@ -1299,21 +1316,18 @@ static int hw_pll_init(struct hw *hw, unsigned int rsr) pllenb = 0xB; hw_write_20kx(hw, PLL_ENB, pllenb); - pllctl = 0x20D00000; - set_field(&pllctl, PLLCTL_FD, 16 - 4); + pllctl = 0x20C00000; + set_field(&pllctl, PLLCTL_B, 0); + set_field(&pllctl, PLLCTL_FD, 48000 == rsr ? 16 - 4 : 147 - 4); + set_field(&pllctl, PLLCTL_RD, 48000 == rsr ? 1 - 1 : 10 - 1); hw_write_20kx(hw, PLL_CTL, pllctl); mdelay(40); + pllctl = hw_read_20kx(hw, PLL_CTL); - set_field(&pllctl, PLLCTL_B, 0); - if (48000 == rsr) { - set_field(&pllctl, PLLCTL_FD, 16 - 2); - set_field(&pllctl, PLLCTL_RD, 1 - 1); /* 3000*16/1 = 48000 */ - } else { /* 44100 */ - set_field(&pllctl, PLLCTL_FD, 147 - 2); - set_field(&pllctl, PLLCTL_RD, 10 - 1); /* 3000*147/10 = 44100 */ - } + set_field(&pllctl, PLLCTL_FD, 48000 == rsr ? 16 - 2 : 147 - 2); hw_write_20kx(hw, PLL_CTL, pllctl); mdelay(40); + for (i = 0; i < 1000; i++) { pllstat = hw_read_20kx(hw, PLL_STAT); if (get_field(pllstat, PLLSTAT_PD)) @@ -1557,7 +1571,7 @@ static int hw20k2_i2c_write(struct hw *hw, u16 addr, u32 data) hw_write_20kx(hw, I2C_IF_STATUS, i2c_status); hw20k2_i2c_wait_data_ready(hw); - /* Dummy write to trigger the write oprtation */ + /* Dummy write to trigger the write operation */ hw_write_20kx(hw, I2C_IF_WDATA, 0); hw20k2_i2c_wait_data_ready(hw); @@ -1568,6 +1582,30 @@ static int hw20k2_i2c_write(struct hw *hw, u16 addr, u32 data) return 0; } +static void hw_dac_stop(struct hw *hw) +{ + u32 data; + data = hw_read_20kx(hw, GPIO_DATA); + data &= 0xFFFFFFFD; + hw_write_20kx(hw, GPIO_DATA, data); + mdelay(10); +} + +static void hw_dac_start(struct hw *hw) +{ + u32 data; + data = hw_read_20kx(hw, GPIO_DATA); + data |= 0x2; + hw_write_20kx(hw, GPIO_DATA, data); + mdelay(50); +} + +static void hw_dac_reset(struct hw *hw) +{ + hw_dac_stop(hw); + hw_dac_start(hw); +} + static int hw_dac_init(struct hw *hw, const struct dac_conf *info) { int err; @@ -1594,6 +1632,21 @@ static int hw_dac_init(struct hw *hw, const struct dac_conf *info) 0x00000000 /* Vol Control B4 */ }; + if (hw->model == CTSB1270) { + hw_dac_stop(hw); + data = hw_read_20kx(hw, GPIO_DATA); + data &= ~0x0600; + if (1 == info->msr) + data |= 0x0000; /* Single Speed Mode 0-50kHz */ + else if (2 == info->msr) + data |= 0x0200; /* Double Speed Mode 50-100kHz */ + else + data |= 0x0600; /* Quad Speed Mode 100-200kHz */ + hw_write_20kx(hw, GPIO_DATA, data); + hw_dac_start(hw); + return 0; + } + /* Set DAC reset bit as output */ data = hw_read_20kx(hw, GPIO_CTRL); data |= 0x02; @@ -1606,22 +1659,8 @@ static int hw_dac_init(struct hw *hw, const struct dac_conf *info) for (i = 0; i < 2; i++) { /* Reset DAC twice just in-case the chip * didn't initialized properly */ - data = hw_read_20kx(hw, GPIO_DATA); - /* GPIO data bit 1 */ - data &= 0xFFFFFFFD; - hw_write_20kx(hw, GPIO_DATA, data); - mdelay(10); - data |= 0x2; - hw_write_20kx(hw, GPIO_DATA, data); - mdelay(50); - - /* Reset the 2nd time */ - data &= 0xFFFFFFFD; - hw_write_20kx(hw, GPIO_DATA, data); - mdelay(10); - data |= 0x2; - hw_write_20kx(hw, GPIO_DATA, data); - mdelay(50); + hw_dac_reset(hw); + hw_dac_reset(hw); if (hw20k2_i2c_read(hw, CS4382_MC1, &cs_read.mode_control_1)) continue; @@ -1725,7 +1764,11 @@ End: static int hw_is_adc_input_selected(struct hw *hw, enum ADCSRC type) { u32 data; - + if (hw->model == CTSB1270) { + /* Titanium HD has two ADC chips, one for line in and one */ + /* for MIC. We don't need to switch the ADC input. */ + return 1; + } data = hw_read_20kx(hw, GPIO_DATA); switch (type) { case ADC_MICIN: @@ -1742,35 +1785,47 @@ static int hw_is_adc_input_selected(struct hw *hw, enum ADCSRC type) #define MIC_BOOST_0DB 0xCF #define MIC_BOOST_STEPS_PER_DB 2 -#define MIC_BOOST_20DB (MIC_BOOST_0DB + 20 * MIC_BOOST_STEPS_PER_DB) + +static void hw_wm8775_input_select(struct hw *hw, u8 input, s8 gain_in_db) +{ + u32 adcmc, gain; + + if (input > 3) + input = 3; + + adcmc = ((u32)1 << input) | 0x100; /* Link L+R gain... */ + + hw20k2_i2c_write(hw, MAKE_WM8775_ADDR(WM8775_ADCMC, adcmc), + MAKE_WM8775_DATA(adcmc)); + + if (gain_in_db < -103) + gain_in_db = -103; + if (gain_in_db > 24) + gain_in_db = 24; + + gain = gain_in_db * MIC_BOOST_STEPS_PER_DB + MIC_BOOST_0DB; + + hw20k2_i2c_write(hw, MAKE_WM8775_ADDR(WM8775_AADCL, gain), + MAKE_WM8775_DATA(gain)); + /* ...so there should be no need for the following. */ + hw20k2_i2c_write(hw, MAKE_WM8775_ADDR(WM8775_AADCR, gain), + MAKE_WM8775_DATA(gain)); +} static int hw_adc_input_select(struct hw *hw, enum ADCSRC type) { u32 data; - data = hw_read_20kx(hw, GPIO_DATA); switch (type) { case ADC_MICIN: data |= (0x1 << 14); hw_write_20kx(hw, GPIO_DATA, data); - hw20k2_i2c_write(hw, MAKE_WM8775_ADDR(WM8775_ADCMC, 0x101), - MAKE_WM8775_DATA(0x101)); /* Mic-in */ - hw20k2_i2c_write(hw, - MAKE_WM8775_ADDR(WM8775_AADCL, MIC_BOOST_20DB), - MAKE_WM8775_DATA(MIC_BOOST_20DB)); /* +20dB */ - hw20k2_i2c_write(hw, - MAKE_WM8775_ADDR(WM8775_AADCR, MIC_BOOST_20DB), - MAKE_WM8775_DATA(MIC_BOOST_20DB)); /* +20dB */ + hw_wm8775_input_select(hw, 0, 20); /* Mic, 20dB */ break; case ADC_LINEIN: data &= ~(0x1 << 14); hw_write_20kx(hw, GPIO_DATA, data); - hw20k2_i2c_write(hw, MAKE_WM8775_ADDR(WM8775_ADCMC, 0x102), - MAKE_WM8775_DATA(0x102)); /* Line-in */ - hw20k2_i2c_write(hw, MAKE_WM8775_ADDR(WM8775_AADCL, 0xCF), - MAKE_WM8775_DATA(0xCF)); /* No boost */ - hw20k2_i2c_write(hw, MAKE_WM8775_ADDR(WM8775_AADCR, 0xCF), - MAKE_WM8775_DATA(0xCF)); /* No boost */ + hw_wm8775_input_select(hw, 1, 0); /* Line-in, 0dB */ break; default: break; @@ -1782,7 +1837,7 @@ static int hw_adc_input_select(struct hw *hw, enum ADCSRC type) static int hw_adc_init(struct hw *hw, const struct adc_conf *info) { int err; - u32 mux = 2, data, ctl; + u32 data, ctl; /* Set ADC reset bit as output */ data = hw_read_20kx(hw, GPIO_CTRL); @@ -1796,19 +1851,42 @@ static int hw_adc_init(struct hw *hw, const struct adc_conf *info) goto error; } - /* Make ADC in normal operation */ + /* Reset the ADC (reset is active low). */ data = hw_read_20kx(hw, GPIO_DATA); data &= ~(0x1 << 15); + hw_write_20kx(hw, GPIO_DATA, data); + + if (hw->model == CTSB1270) { + /* Set up the PCM4220 ADC on Titanium HD */ + data &= ~0x0C; + if (1 == info->msr) + data |= 0x00; /* Single Speed Mode 32-50kHz */ + else if (2 == info->msr) + data |= 0x08; /* Double Speed Mode 50-108kHz */ + else + data |= 0x04; /* Quad Speed Mode 108kHz-216kHz */ + hw_write_20kx(hw, GPIO_DATA, data); + } + mdelay(10); + /* Return the ADC to normal operation. */ data |= (0x1 << 15); hw_write_20kx(hw, GPIO_DATA, data); mdelay(50); + /* I2C write to register offset 0x0B to set ADC LRCLK polarity */ + /* invert bit, interface format to I2S, word length to 24-bit, */ + /* enable ADC high pass filter. Fixes bug 5323? */ + hw20k2_i2c_write(hw, MAKE_WM8775_ADDR(WM8775_IC, 0x26), + MAKE_WM8775_DATA(0x26)); + /* Set the master mode (256fs) */ if (1 == info->msr) { + /* slave mode, 128x oversampling 256fs */ hw20k2_i2c_write(hw, MAKE_WM8775_ADDR(WM8775_MMC, 0x02), MAKE_WM8775_DATA(0x02)); - } else if (2 == info->msr) { + } else if ((2 == info->msr) || (4 == info->msr)) { + /* slave mode, 64x oversampling, 256fs */ hw20k2_i2c_write(hw, MAKE_WM8775_ADDR(WM8775_MMC, 0x0A), MAKE_WM8775_DATA(0x0A)); } else { @@ -1818,55 +1896,113 @@ static int hw_adc_init(struct hw *hw, const struct adc_conf *info) goto error; } - /* Configure GPIO bit 14 change to line-in/mic-in */ - ctl = hw_read_20kx(hw, GPIO_CTRL); - ctl |= 0x1 << 14; - hw_write_20kx(hw, GPIO_CTRL, ctl); - - /* Check using Mic-in or Line-in */ - data = hw_read_20kx(hw, GPIO_DATA); - - if (mux == 1) { - /* Configures GPIO data to select Mic-in */ - data |= 0x1 << 14; - hw_write_20kx(hw, GPIO_DATA, data); - - hw20k2_i2c_write(hw, MAKE_WM8775_ADDR(WM8775_ADCMC, 0x101), - MAKE_WM8775_DATA(0x101)); /* Mic-in */ - hw20k2_i2c_write(hw, - MAKE_WM8775_ADDR(WM8775_AADCL, MIC_BOOST_20DB), - MAKE_WM8775_DATA(MIC_BOOST_20DB)); /* +20dB */ - hw20k2_i2c_write(hw, - MAKE_WM8775_ADDR(WM8775_AADCR, MIC_BOOST_20DB), - MAKE_WM8775_DATA(MIC_BOOST_20DB)); /* +20dB */ - } else if (mux == 2) { - /* Configures GPIO data to select Line-in */ - data &= ~(0x1 << 14); - hw_write_20kx(hw, GPIO_DATA, data); - - /* Setup ADC */ - hw20k2_i2c_write(hw, MAKE_WM8775_ADDR(WM8775_ADCMC, 0x102), - MAKE_WM8775_DATA(0x102)); /* Line-in */ - hw20k2_i2c_write(hw, MAKE_WM8775_ADDR(WM8775_AADCL, 0xCF), - MAKE_WM8775_DATA(0xCF)); /* No boost */ - hw20k2_i2c_write(hw, MAKE_WM8775_ADDR(WM8775_AADCR, 0xCF), - MAKE_WM8775_DATA(0xCF)); /* No boost */ + if (hw->model != CTSB1270) { + /* Configure GPIO bit 14 change to line-in/mic-in */ + ctl = hw_read_20kx(hw, GPIO_CTRL); + ctl |= 0x1 << 14; + hw_write_20kx(hw, GPIO_CTRL, ctl); + hw_adc_input_select(hw, ADC_LINEIN); } else { - printk(KERN_ALERT "ctxfi: ERROR!!! Invalid input mux!!!\n"); - err = -EINVAL; - goto error; + hw_wm8775_input_select(hw, 0, 0); } return 0; - error: hw20k2_i2c_uninit(hw); return err; } -static int hw_have_digit_io_switch(struct hw *hw) +static struct capabilities hw_capabilities(struct hw *hw) { - return 0; + struct capabilities cap; + + cap.digit_io_switch = 0; + cap.dedicated_mic = hw->model == CTSB1270; + cap.output_switch = hw->model == CTSB1270; + cap.mic_source_switch = hw->model == CTSB1270; + + return cap; +} + +static int hw_output_switch_get(struct hw *hw) +{ + u32 data = hw_read_20kx(hw, GPIO_EXT_DATA); + + switch (data & 0x30) { + case 0x00: + return 0; + case 0x10: + return 1; + case 0x20: + return 2; + default: + return 3; + } +} + +static int hw_output_switch_put(struct hw *hw, int position) +{ + u32 data; + + if (position == hw_output_switch_get(hw)) + return 0; + + /* Mute line and headphones (intended for anti-pop). */ + data = hw_read_20kx(hw, GPIO_DATA); + data |= (0x03 << 11); + hw_write_20kx(hw, GPIO_DATA, data); + + data = hw_read_20kx(hw, GPIO_EXT_DATA) & ~0x30; + switch (position) { + case 0: + break; + case 1: + data |= 0x10; + break; + default: + data |= 0x20; + } + hw_write_20kx(hw, GPIO_EXT_DATA, data); + + /* Unmute line and headphones. */ + data = hw_read_20kx(hw, GPIO_DATA); + data &= ~(0x03 << 11); + hw_write_20kx(hw, GPIO_DATA, data); + + return 1; +} + +static int hw_mic_source_switch_get(struct hw *hw) +{ + struct hw20k2 *hw20k2 = (struct hw20k2 *)hw; + + return hw20k2->mic_source; +} + +static int hw_mic_source_switch_put(struct hw *hw, int position) +{ + struct hw20k2 *hw20k2 = (struct hw20k2 *)hw; + + if (position == hw20k2->mic_source) + return 0; + + switch (position) { + case 0: + hw_wm8775_input_select(hw, 0, 0); /* Mic, 0dB */ + break; + case 1: + hw_wm8775_input_select(hw, 1, 0); /* FP Mic, 0dB */ + break; + case 2: + hw_wm8775_input_select(hw, 3, 0); /* Aux Ext, 0dB */ + break; + default: + return 0; + } + + hw20k2->mic_source = position; + + return 1; } static irqreturn_t ct_20k2_interrupt(int irq, void *dev_id) @@ -1925,7 +2061,7 @@ static int hw_card_start(struct hw *hw) if (hw->irq < 0) { err = request_irq(pci->irq, ct_20k2_interrupt, IRQF_SHARED, - "ctxfi", hw); + KBUILD_MODNAME, hw); if (err < 0) { printk(KERN_ERR "XFi: Cannot get irq %d\n", pci->irq); goto error2; @@ -2023,13 +2159,16 @@ static int hw_card_init(struct hw *hw, struct card_conf *info) /* Reset all SRC pending interrupts */ hw_write_20kx(hw, SRC_IP, 0); - /* TODO: detect the card ID and configure GPIO accordingly. */ - /* Configures GPIO (0xD802 0x98028) */ - /*hw_write_20kx(hw, GPIO_CTRL, 0x7F07);*/ - /* Configures GPIO (SB0880) */ - /*hw_write_20kx(hw, GPIO_CTRL, 0xFF07);*/ - hw_write_20kx(hw, GPIO_CTRL, 0xD802); - + if (hw->model != CTSB1270) { + /* TODO: detect the card ID and configure GPIO accordingly. */ + /* Configures GPIO (0xD802 0x98028) */ + /*hw_write_20kx(hw, GPIO_CTRL, 0x7F07);*/ + /* Configures GPIO (SB0880) */ + /*hw_write_20kx(hw, GPIO_CTRL, 0xFF07);*/ + hw_write_20kx(hw, GPIO_CTRL, 0xD802); + } else { + hw_write_20kx(hw, GPIO_CTRL, 0x9E5F); + } /* Enable audio ring */ hw_write_20kx(hw, MIXER_AR_ENABLE, 0x01); @@ -2106,7 +2245,11 @@ static struct hw ct20k2_preset __devinitdata = { .pll_init = hw_pll_init, .is_adc_source_selected = hw_is_adc_input_selected, .select_adc_source = hw_adc_input_select, - .have_digit_io_switch = hw_have_digit_io_switch, + .capabilities = hw_capabilities, + .output_switch_get = hw_output_switch_get, + .output_switch_put = hw_output_switch_put, + .mic_source_switch_get = hw_mic_source_switch_get, + .mic_source_switch_put = hw_mic_source_switch_put, #ifdef CONFIG_PM .suspend = hw_suspend, .resume = hw_resume, diff --git a/sound/pci/ctxfi/ctmixer.c b/sound/pci/ctxfi/ctmixer.c index c3519ff..0cc13ee 100644 --- a/sound/pci/ctxfi/ctmixer.c +++ b/sound/pci/ctxfi/ctmixer.c @@ -86,9 +86,7 @@ enum CTALSA_MIXER_CTL { MIXER_LINEIN_C_S, MIXER_MIC_C_S, MIXER_SPDIFI_C_S, - MIXER_LINEIN_P_S, MIXER_SPDIFO_P_S, - MIXER_SPDIFI_P_S, MIXER_WAVEF_P_S, MIXER_WAVER_P_S, MIXER_WAVEC_P_S, @@ -137,11 +135,11 @@ ct_kcontrol_init_table[NUM_CTALSA_MIXERS] = { }, [MIXER_LINEIN_P] = { .ctl = 1, - .name = "Line-in Playback Volume", + .name = "Line Playback Volume", }, [MIXER_LINEIN_C] = { .ctl = 1, - .name = "Line-in Capture Volume", + .name = "Line Capture Volume", }, [MIXER_MIC_P] = { .ctl = 1, @@ -153,15 +151,15 @@ ct_kcontrol_init_table[NUM_CTALSA_MIXERS] = { }, [MIXER_SPDIFI_P] = { .ctl = 1, - .name = "S/PDIF-in Playback Volume", + .name = "IEC958 Playback Volume", }, [MIXER_SPDIFI_C] = { .ctl = 1, - .name = "S/PDIF-in Capture Volume", + .name = "IEC958 Capture Volume", }, [MIXER_SPDIFO_P] = { .ctl = 1, - .name = "S/PDIF-out Playback Volume", + .name = "Digital Playback Volume", }, [MIXER_WAVEF_P] = { .ctl = 1, @@ -179,14 +177,13 @@ ct_kcontrol_init_table[NUM_CTALSA_MIXERS] = { .ctl = 1, .name = "Surround Playback Volume", }, - [MIXER_PCM_C_S] = { .ctl = 1, .name = "PCM Capture Switch", }, [MIXER_LINEIN_C_S] = { .ctl = 1, - .name = "Line-in Capture Switch", + .name = "Line Capture Switch", }, [MIXER_MIC_C_S] = { .ctl = 1, @@ -194,19 +191,11 @@ ct_kcontrol_init_table[NUM_CTALSA_MIXERS] = { }, [MIXER_SPDIFI_C_S] = { .ctl = 1, - .name = "S/PDIF-in Capture Switch", - }, - [MIXER_LINEIN_P_S] = { - .ctl = 1, - .name = "Line-in Playback Switch", + .name = "IEC958 Capture Switch", }, [MIXER_SPDIFO_P_S] = { .ctl = 1, - .name = "S/PDIF-out Playback Switch", - }, - [MIXER_SPDIFI_P_S] = { - .ctl = 1, - .name = "S/PDIF-in Playback Switch", + .name = "Digital Playback Switch", }, [MIXER_WAVEF_P_S] = { .ctl = 1, @@ -236,6 +225,8 @@ ct_mixer_recording_select(struct ct_mixer *mixer, enum CT_AMIXER_CTL type); static void ct_mixer_recording_unselect(struct ct_mixer *mixer, enum CT_AMIXER_CTL type); +/* FIXME: this static looks like it would fail if more than one card was */ +/* installed. */ static struct snd_kcontrol *kctls[2] = {NULL}; static enum CT_AMIXER_CTL get_amixer_index(enum CTALSA_MIXER_CTL alsa_index) @@ -420,6 +411,77 @@ static struct snd_kcontrol_new vol_ctl = { .tlv = { .p = ct_vol_db_scale }, }; +static int output_switch_info(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_info *info) +{ + static const char *const names[3] = { + "FP Headphones", "Headphones", "Speakers" + }; + + return snd_ctl_enum_info(info, 1, 3, names); +} + +static int output_switch_get(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_value *ucontrol) +{ + struct ct_atc *atc = snd_kcontrol_chip(kcontrol); + ucontrol->value.enumerated.item[0] = atc->output_switch_get(atc); + return 0; +} + +static int output_switch_put(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_value *ucontrol) +{ + struct ct_atc *atc = snd_kcontrol_chip(kcontrol); + if (ucontrol->value.enumerated.item[0] > 2) + return -EINVAL; + return atc->output_switch_put(atc, ucontrol->value.enumerated.item[0]); +} + +static struct snd_kcontrol_new output_ctl = { + .iface = SNDRV_CTL_ELEM_IFACE_MIXER, + .name = "Analog Output Playback Enum", + .info = output_switch_info, + .get = output_switch_get, + .put = output_switch_put, +}; + +static int mic_source_switch_info(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_info *info) +{ + static const char *const names[3] = { + "Mic", "FP Mic", "Aux" + }; + + return snd_ctl_enum_info(info, 1, 3, names); +} + +static int mic_source_switch_get(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_value *ucontrol) +{ + struct ct_atc *atc = snd_kcontrol_chip(kcontrol); + ucontrol->value.enumerated.item[0] = atc->mic_source_switch_get(atc); + return 0; +} + +static int mic_source_switch_put(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_value *ucontrol) +{ + struct ct_atc *atc = snd_kcontrol_chip(kcontrol); + if (ucontrol->value.enumerated.item[0] > 2) + return -EINVAL; + return atc->mic_source_switch_put(atc, + ucontrol->value.enumerated.item[0]); +} + +static struct snd_kcontrol_new mic_source_ctl = { + .iface = SNDRV_CTL_ELEM_IFACE_MIXER, + .name = "Mic Source Capture Enum", + .info = mic_source_switch_info, + .get = mic_source_switch_get, + .put = mic_source_switch_put, +}; + static void do_line_mic_switch(struct ct_atc *atc, enum CTALSA_MIXER_CTL type) { @@ -465,6 +527,7 @@ do_digit_io_switch(struct ct_atc *atc, int state) static void do_switch(struct ct_atc *atc, enum CTALSA_MIXER_CTL type, int state) { struct ct_mixer *mixer = atc->mixer; + struct capabilities cap = atc->capabilities(atc); /* Do changes in mixer. */ if ((SWH_CAPTURE_START <= type) && (SWH_CAPTURE_END >= type)) { @@ -477,8 +540,17 @@ static void do_switch(struct ct_atc *atc, enum CTALSA_MIXER_CTL type, int state) } } /* Do changes out of mixer. */ - if (state && (MIXER_LINEIN_C_S == type || MIXER_MIC_C_S == type)) - do_line_mic_switch(atc, type); + if (!cap.dedicated_mic && + (MIXER_LINEIN_C_S == type || MIXER_MIC_C_S == type)) { + if (state) + do_line_mic_switch(atc, type); + atc->line_in_unmute(atc, state); + } else if (cap.dedicated_mic && (MIXER_LINEIN_C_S == type)) + atc->line_in_unmute(atc, state); + else if (cap.dedicated_mic && (MIXER_MIC_C_S == type)) + atc->mic_unmute(atc, state); + else if (MIXER_SPDIFI_C_S == type) + atc->spdif_in_unmute(atc, state); else if (MIXER_WAVEF_P_S == type) atc->line_front_unmute(atc, state); else if (MIXER_WAVES_P_S == type) @@ -487,12 +559,8 @@ static void do_switch(struct ct_atc *atc, enum CTALSA_MIXER_CTL type, int state) atc->line_clfe_unmute(atc, state); else if (MIXER_WAVER_P_S == type) atc->line_rear_unmute(atc, state); - else if (MIXER_LINEIN_P_S == type) - atc->line_in_unmute(atc, state); else if (MIXER_SPDIFO_P_S == type) atc->spdif_out_unmute(atc, state); - else if (MIXER_SPDIFI_P_S == type) - atc->spdif_in_unmute(atc, state); else if (MIXER_DIGITAL_IO_S == type) do_digit_io_switch(atc, state); @@ -671,6 +739,7 @@ static int ct_mixer_kcontrols_create(struct ct_mixer *mixer) { enum CTALSA_MIXER_CTL type; struct ct_atc *atc = mixer->atc; + struct capabilities cap = atc->capabilities(atc); int err; /* Create snd kcontrol instances on demand */ @@ -684,8 +753,8 @@ static int ct_mixer_kcontrols_create(struct ct_mixer *mixer) } } - ct_kcontrol_init_table[MIXER_DIGITAL_IO_S].ctl = - atc->have_digit_io_switch(atc); + ct_kcontrol_init_table[MIXER_DIGITAL_IO_S].ctl = cap.digit_io_switch; + for (type = SWH_MIXER_START; type <= SWH_MIXER_END; type++) { if (ct_kcontrol_init_table[type].ctl) { swh_ctl.name = ct_kcontrol_init_table[type].name; @@ -708,6 +777,17 @@ static int ct_mixer_kcontrols_create(struct ct_mixer *mixer) if (err) return err; + if (cap.output_switch) { + err = ct_mixer_kcontrol_new(mixer, &output_ctl); + if (err) + return err; + } + + if (cap.mic_source_switch) { + err = ct_mixer_kcontrol_new(mixer, &mic_source_ctl); + if (err) + return err; + } atc->line_front_unmute(atc, 1); set_switch_state(mixer, MIXER_WAVEF_P_S, 1); atc->line_surround_unmute(atc, 0); @@ -719,13 +799,12 @@ static int ct_mixer_kcontrols_create(struct ct_mixer *mixer) atc->spdif_out_unmute(atc, 0); set_switch_state(mixer, MIXER_SPDIFO_P_S, 0); atc->line_in_unmute(atc, 0); - set_switch_state(mixer, MIXER_LINEIN_P_S, 0); + if (cap.dedicated_mic) + atc->mic_unmute(atc, 0); atc->spdif_in_unmute(atc, 0); - set_switch_state(mixer, MIXER_SPDIFI_P_S, 0); - - set_switch_state(mixer, MIXER_PCM_C_S, 1); - set_switch_state(mixer, MIXER_LINEIN_C_S, 1); - set_switch_state(mixer, MIXER_SPDIFI_C_S, 1); + set_switch_state(mixer, MIXER_PCM_C_S, 0); + set_switch_state(mixer, MIXER_LINEIN_C_S, 0); + set_switch_state(mixer, MIXER_SPDIFI_C_S, 0); return 0; } diff --git a/sound/pci/ctxfi/ctpcm.c b/sound/pci/ctxfi/ctpcm.c index 457d211..2c86226 100644 --- a/sound/pci/ctxfi/ctpcm.c +++ b/sound/pci/ctxfi/ctpcm.c @@ -404,7 +404,7 @@ int ct_alsa_pcm_create(struct ct_atc *atc, int err; int playback_count, capture_count; - playback_count = (IEC958 == device) ? 1 : 8; + playback_count = (IEC958 == device) ? 1 : 256; capture_count = (FRONT == device) ? 1 : 0; err = snd_pcm_new(atc->card, "ctxfi", device, playback_count, capture_count, &pcm); diff --git a/sound/pci/ctxfi/ctsrc.c b/sound/pci/ctxfi/ctsrc.c index c749fa7..e134b3a 100644 --- a/sound/pci/ctxfi/ctsrc.c +++ b/sound/pci/ctxfi/ctsrc.c @@ -20,7 +20,7 @@ #include "cthardware.h" #include -#define SRC_RESOURCE_NUM 64 +#define SRC_RESOURCE_NUM 256 #define SRCIMP_RESOURCE_NUM 256 static unsigned int conj_mask; diff --git a/sound/pci/ctxfi/ctvmem.h b/sound/pci/ctxfi/ctvmem.h index b23adfc..e6da60e 100644 --- a/sound/pci/ctxfi/ctvmem.h +++ b/sound/pci/ctxfi/ctvmem.h @@ -18,7 +18,7 @@ #ifndef CTVMEM_H #define CTVMEM_H -#define CT_PTP_NUM 1 /* num of device page table pages */ +#define CT_PTP_NUM 4 /* num of device page table pages */ #include #include diff --git a/sound/pci/ctxfi/xfi.c b/sound/pci/ctxfi/xfi.c index f42e7e1..33931ef 100644 --- a/sound/pci/ctxfi/xfi.c +++ b/sound/pci/ctxfi/xfi.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include "ctatc.h" @@ -80,11 +81,11 @@ ct_card_probe(struct pci_dev *pci, const struct pci_device_id *pci_id) "are 48000 and 44100, Value 48000 is assumed.\n"); reference_rate = 48000; } - if ((multiple != 1) && (multiple != 2)) { + if ((multiple != 1) && (multiple != 2) && (multiple != 4)) { printk(KERN_ERR "ctxfi: Invalid multiple value %u!!!\n", multiple); printk(KERN_ERR "ctxfi: The valid values for multiple are " - "1 and 2, Value 2 is assumed.\n"); + "1, 2 and 4, Value 2 is assumed.\n"); multiple = 2; } err = ct_atc_create(card, pci, reference_rate, multiple, @@ -143,7 +144,7 @@ static int ct_card_resume(struct pci_dev *pci) #endif static struct pci_driver ct_driver = { - .name = "SB-XFi", + .name = KBUILD_MODNAME, .id_table = ct_pci_dev_ids, .probe = ct_card_probe, .remove = __devexit_p(ct_card_remove), diff --git a/sound/pci/echoaudio/darla20.c b/sound/pci/echoaudio/darla20.c index fe7ad64..d47e72a 100644 --- a/sound/pci/echoaudio/darla20.c +++ b/sound/pci/echoaudio/darla20.c @@ -40,7 +40,7 @@ #include #include #include -#include +#include #include #include #include @@ -52,7 +52,7 @@ #include #include #include -#include +#include #include "echoaudio.h" MODULE_FIRMWARE("ea/darla20_dsp.fw"); diff --git a/sound/pci/echoaudio/darla24.c b/sound/pci/echoaudio/darla24.c index d1fd34b..413acf7 100644 --- a/sound/pci/echoaudio/darla24.c +++ b/sound/pci/echoaudio/darla24.c @@ -44,7 +44,7 @@ #include #include #include -#include +#include #include #include #include @@ -56,7 +56,7 @@ #include #include #include -#include +#include #include "echoaudio.h" MODULE_FIRMWARE("ea/darla24_dsp.fw"); diff --git a/sound/pci/echoaudio/echo3g.c b/sound/pci/echoaudio/echo3g.c index 1dffdc5..1ec4edc 100644 --- a/sound/pci/echoaudio/echo3g.c +++ b/sound/pci/echoaudio/echo3g.c @@ -51,7 +51,7 @@ #include #include #include -#include +#include #include #include #include @@ -64,7 +64,7 @@ #include #include #include -#include +#include #include "echoaudio.h" MODULE_FIRMWARE("ea/loader_dsp.fw"); diff --git a/sound/pci/echoaudio/echoaudio.c b/sound/pci/echoaudio/echoaudio.c index 20763dd..9fd694c 100644 --- a/sound/pci/echoaudio/echoaudio.c +++ b/sound/pci/echoaudio/echoaudio.c @@ -16,6 +16,8 @@ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ +#include + MODULE_AUTHOR("Giuliano Pochini "); MODULE_LICENSE("GPL v2"); MODULE_DESCRIPTION("Echoaudio " ECHOCARD_NAME " soundcards driver"); @@ -1995,7 +1997,7 @@ static __devinit int snd_echo_create(struct snd_card *card, ioremap_nocache(chip->dsp_registers_phys, sz); if (request_irq(pci->irq, snd_echo_interrupt, IRQF_SHARED, - ECHOCARD_NAME, chip)) { + KBUILD_MODNAME, chip)) { snd_echo_free(chip); snd_printk(KERN_ERR "cannot grab irq\n"); return -EBUSY; @@ -2286,7 +2288,7 @@ static int snd_echo_resume(struct pci_dev *pci) kfree(commpage_bak); if (request_irq(pci->irq, snd_echo_interrupt, IRQF_SHARED, - ECHOCARD_NAME, chip)) { + KBUILD_MODNAME, chip)) { snd_echo_free(chip); snd_printk(KERN_ERR "cannot grab irq\n"); return -EBUSY; @@ -2327,7 +2329,7 @@ static void __devexit snd_echo_remove(struct pci_dev *pci) /* pci_driver definition */ static struct pci_driver driver = { - .name = "Echoaudio " ECHOCARD_NAME, + .name = KBUILD_MODNAME, .id_table = snd_echo_ids, .probe = snd_echo_probe, .remove = __devexit_p(snd_echo_remove), diff --git a/sound/pci/echoaudio/gina20.c b/sound/pci/echoaudio/gina20.c index 050e54a..039125b 100644 --- a/sound/pci/echoaudio/gina20.c +++ b/sound/pci/echoaudio/gina20.c @@ -44,7 +44,7 @@ #include #include #include -#include +#include #include #include #include @@ -56,7 +56,7 @@ #include #include #include -#include +#include #include "echoaudio.h" MODULE_FIRMWARE("ea/gina20_dsp.fw"); diff --git a/sound/pci/echoaudio/gina24.c b/sound/pci/echoaudio/gina24.c index 5748fc6..5e966f6 100644 --- a/sound/pci/echoaudio/gina24.c +++ b/sound/pci/echoaudio/gina24.c @@ -50,7 +50,7 @@ #include #include #include -#include +#include #include #include #include @@ -62,7 +62,7 @@ #include #include #include -#include +#include #include "echoaudio.h" MODULE_FIRMWARE("ea/loader_dsp.fw"); diff --git a/sound/pci/echoaudio/indigo.c b/sound/pci/echoaudio/indigo.c index 4ae5e35..c166b7e 100644 --- a/sound/pci/echoaudio/indigo.c +++ b/sound/pci/echoaudio/indigo.c @@ -42,7 +42,7 @@ #include #include #include -#include +#include #include #include #include @@ -54,7 +54,7 @@ #include #include #include -#include +#include #include "echoaudio.h" MODULE_FIRMWARE("ea/loader_dsp.fw"); diff --git a/sound/pci/echoaudio/indigodj.c b/sound/pci/echoaudio/indigodj.c index 3550715..a3ef3b9 100644 --- a/sound/pci/echoaudio/indigodj.c +++ b/sound/pci/echoaudio/indigodj.c @@ -42,7 +42,7 @@ #include #include #include -#include +#include #include #include #include @@ -54,7 +54,7 @@ #include #include #include -#include +#include #include "echoaudio.h" MODULE_FIRMWARE("ea/loader_dsp.fw"); diff --git a/sound/pci/echoaudio/indigodjx.c b/sound/pci/echoaudio/indigodjx.c index 19b191f..f516444 100644 --- a/sound/pci/echoaudio/indigodjx.c +++ b/sound/pci/echoaudio/indigodjx.c @@ -42,7 +42,7 @@ #include #include #include -#include +#include #include #include #include @@ -54,7 +54,7 @@ #include #include #include -#include +#include #include "echoaudio.h" MODULE_FIRMWARE("ea/loader_dsp.fw"); diff --git a/sound/pci/echoaudio/indigoio.c b/sound/pci/echoaudio/indigoio.c index a9fcedf..c22c82f 100644 --- a/sound/pci/echoaudio/indigoio.c +++ b/sound/pci/echoaudio/indigoio.c @@ -43,7 +43,7 @@ #include #include #include -#include +#include #include #include #include @@ -55,7 +55,7 @@ #include #include #include -#include +#include #include "echoaudio.h" MODULE_FIRMWARE("ea/loader_dsp.fw"); diff --git a/sound/pci/echoaudio/indigoiox.c b/sound/pci/echoaudio/indigoiox.c index bcdfac6..86cf2d0 100644 --- a/sound/pci/echoaudio/indigoiox.c +++ b/sound/pci/echoaudio/indigoiox.c @@ -43,7 +43,7 @@ #include #include #include -#include +#include #include #include #include @@ -55,7 +55,7 @@ #include #include #include -#include +#include #include "echoaudio.h" MODULE_FIRMWARE("ea/loader_dsp.fw"); diff --git a/sound/pci/echoaudio/layla20.c b/sound/pci/echoaudio/layla20.c index d3a98c5..6a027f3 100644 --- a/sound/pci/echoaudio/layla20.c +++ b/sound/pci/echoaudio/layla20.c @@ -49,7 +49,7 @@ #include #include #include -#include +#include #include #include #include @@ -62,7 +62,7 @@ #include #include #include -#include +#include #include "echoaudio.h" MODULE_FIRMWARE("ea/layla20_dsp.fw"); diff --git a/sound/pci/echoaudio/layla24.c b/sound/pci/echoaudio/layla24.c index 2a1dca6..96a5991 100644 --- a/sound/pci/echoaudio/layla24.c +++ b/sound/pci/echoaudio/layla24.c @@ -51,7 +51,7 @@ #include #include #include -#include +#include #include #include #include @@ -64,7 +64,7 @@ #include #include #include -#include +#include #include "echoaudio.h" MODULE_FIRMWARE("ea/loader_dsp.fw"); diff --git a/sound/pci/echoaudio/mia.c b/sound/pci/echoaudio/mia.c index 9cdf14c..b8ce27e 100644 --- a/sound/pci/echoaudio/mia.c +++ b/sound/pci/echoaudio/mia.c @@ -50,7 +50,7 @@ #include #include #include -#include +#include #include #include #include @@ -63,7 +63,7 @@ #include #include #include -#include +#include #include "echoaudio.h" MODULE_FIRMWARE("ea/loader_dsp.fw"); diff --git a/sound/pci/echoaudio/mona.c b/sound/pci/echoaudio/mona.c index 1047be4..1283bfb 100644 --- a/sound/pci/echoaudio/mona.c +++ b/sound/pci/echoaudio/mona.c @@ -48,7 +48,7 @@ #include #include #include -#include +#include #include #include #include @@ -60,7 +60,7 @@ #include #include #include -#include +#include #include "echoaudio.h" MODULE_FIRMWARE("ea/loader_dsp.fw"); diff --git a/sound/pci/emu10k1/emu10k1.c b/sound/pci/emu10k1/emu10k1.c index aff8387..f33e3cc 100644 --- a/sound/pci/emu10k1/emu10k1.c +++ b/sound/pci/emu10k1/emu10k1.c @@ -26,7 +26,7 @@ #include #include #include -#include +#include #include #include #include @@ -181,8 +181,10 @@ static int __devinit snd_card_emu10k1_probe(struct pci_dev *pci, } #endif - strcpy(card->driver, emu->card_capabilities->driver); - strcpy(card->shortname, emu->card_capabilities->name); + strlcpy(card->driver, emu->card_capabilities->driver, + sizeof(card->driver)); + strlcpy(card->shortname, emu->card_capabilities->name, + sizeof(card->shortname)); snprintf(card->longname, sizeof(card->longname), "%s (rev.%d, serial:0x%x) at 0x%lx, irq %i", card->shortname, emu->revision, emu->serial, emu->port, emu->irq); @@ -264,7 +266,7 @@ static int snd_emu10k1_resume(struct pci_dev *pci) #endif static struct pci_driver driver = { - .name = "EMU10K1_Audigy", + .name = KBUILD_MODNAME, .id_table = snd_emu10k1_ids, .probe = snd_card_emu10k1_probe, .remove = __devexit_p(snd_card_emu10k1_remove), diff --git a/sound/pci/emu10k1/emu10k1_callback.c b/sound/pci/emu10k1/emu10k1_callback.c index 7ef949d..8295950 100644 --- a/sound/pci/emu10k1/emu10k1_callback.c +++ b/sound/pci/emu10k1/emu10k1_callback.c @@ -18,6 +18,7 @@ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ +#include #include "emu10k1_synth_local.h" #include @@ -84,6 +85,8 @@ snd_emu10k1_ops_setup(struct snd_emux *emux) * get more voice for pcm * * terminate most inactive voice and give it as a pcm voice. + * + * voice_lock is already held. */ int snd_emu10k1_synth_get_voice(struct snd_emu10k1 *hw) @@ -91,12 +94,10 @@ snd_emu10k1_synth_get_voice(struct snd_emu10k1 *hw) struct snd_emux *emu; struct snd_emux_voice *vp; struct best_voice best[V_END]; - unsigned long flags; int i; emu = hw->synth; - spin_lock_irqsave(&emu->voice_lock, flags); lookup_voices(emu, hw, best, 1); /* no OFF voices */ for (i = 0; i < V_END; i++) { if (best[i].voice >= 0) { @@ -112,11 +113,9 @@ snd_emu10k1_synth_get_voice(struct snd_emu10k1 *hw) vp->emu->num_voices--; vp->ch = -1; vp->state = SNDRV_EMUX_ST_OFF; - spin_unlock_irqrestore(&emu->voice_lock, flags); return ch; } } - spin_unlock_irqrestore(&emu->voice_lock, flags); /* not found */ return -ENOMEM; @@ -416,7 +415,7 @@ start_voice(struct snd_emux_voice *vp) snd_emu10k1_ptr_write(hw, Z2, ch, 0); /* invalidate maps */ - temp = (hw->silent_page.addr << 1) | MAP_PTI_MASK; + temp = (hw->silent_page.addr << hw->address_mode) | (hw->address_mode ? MAP_PTI_MASK1 : MAP_PTI_MASK0); snd_emu10k1_ptr_write(hw, MAPA, ch, temp); snd_emu10k1_ptr_write(hw, MAPB, ch, temp); #if 0 @@ -437,7 +436,7 @@ start_voice(struct snd_emux_voice *vp) snd_emu10k1_ptr_write(hw, CDF, ch, sample); /* invalidate maps */ - temp = ((unsigned int)hw->silent_page.addr << 1) | MAP_PTI_MASK; + temp = ((unsigned int)hw->silent_page.addr << hw_address_mode) | (hw->address_mode ? MAP_PTI_MASK1 : MAP_PTI_MASK0); snd_emu10k1_ptr_write(hw, MAPA, ch, temp); snd_emu10k1_ptr_write(hw, MAPB, ch, temp); diff --git a/sound/pci/emu10k1/emu10k1_main.c b/sound/pci/emu10k1/emu10k1_main.c index 0800bcc..705e73e 100644 --- a/sound/pci/emu10k1/emu10k1_main.c +++ b/sound/pci/emu10k1/emu10k1_main.c @@ -35,6 +35,7 @@ #include #include #include +#include #include #include #include @@ -281,7 +282,7 @@ static int snd_emu10k1_init(struct snd_emu10k1 *emu, int enable_ir, int resume) snd_emu10k1_ptr_write(emu, TCB, 0, 0); /* taken from original driver */ snd_emu10k1_ptr_write(emu, TCBS, 0, 4); /* taken from original driver */ - silent_page = (emu->silent_page.addr << 1) | MAP_PTI_MASK; + silent_page = (emu->silent_page.addr << emu->address_mode) | (emu->address_mode ? MAP_PTI_MASK1 : MAP_PTI_MASK0); for (ch = 0; ch < NUM_G; ch++) { snd_emu10k1_ptr_write(emu, MAPA, ch, silent_page); snd_emu10k1_ptr_write(emu, MAPB, ch, silent_page); @@ -347,6 +348,11 @@ static int snd_emu10k1_init(struct snd_emu10k1 *emu, int enable_ir, int resume) outl(reg | A_IOCFG_GPOUT0, emu->port + A_IOCFG); } + if (emu->address_mode == 0) { + /* use 16M in 4G */ + outl(inl(emu->port + HCFG) | HCFG_EXPANDED_MEM, emu->port + HCFG); + } + return 0; } @@ -1389,7 +1395,7 @@ static struct snd_emu_chip_details emu_chip_details[] = { * */ {.vendor = 0x1102, .device = 0x0008, .subsystem = 0x20011102, - .driver = "Audigy2", .name = "SB Audigy 2 ZS Notebook [SB0530]", + .driver = "Audigy2", .name = "Audigy 2 ZS Notebook [SB0530]", .id = "Audigy2", .emu10k2_chip = 1, .ca0108_chip = 1, @@ -1527,7 +1533,7 @@ static struct snd_emu_chip_details emu_chip_details[] = { .adc_1361t = 1, /* 24 bit capture instead of 16bit */ .ac97_chip = 1} , {.vendor = 0x1102, .device = 0x0004, .subsystem = 0x10051102, - .driver = "Audigy2", .name = "SB Audigy 2 Platinum EX [SB0280]", + .driver = "Audigy2", .name = "Audigy 2 Platinum EX [SB0280]", .id = "Audigy2", .emu10k2_chip = 1, .ca0102_chip = 1, @@ -1831,8 +1837,10 @@ int __devinit snd_emu10k1_create(struct snd_card *card, is_audigy = emu->audigy = c->emu10k2_chip; + /* set addressing mode */ + emu->address_mode = is_audigy ? 0 : 1; /* set the DMA transfer mask */ - emu->dma_mask = is_audigy ? AUDIGY_DMA_MASK : EMU10K1_DMA_MASK; + emu->dma_mask = emu->address_mode ? EMU10K1_DMA_MASK : AUDIGY_DMA_MASK; if (pci_set_dma_mask(pci, emu->dma_mask) < 0 || pci_set_consistent_dma_mask(pci, emu->dma_mask) < 0) { snd_printk(KERN_ERR "architecture does not support PCI busmaster DMA with mask 0x%lx\n", emu->dma_mask); @@ -1855,7 +1863,7 @@ int __devinit snd_emu10k1_create(struct snd_card *card, emu->max_cache_pages = max_cache_bytes >> PAGE_SHIFT; if (snd_dma_alloc_pages(SNDRV_DMA_TYPE_DEV, snd_dma_pci_data(pci), - 32 * 1024, &emu->ptb_pages) < 0) { + (emu->address_mode ? 32 : 16) * 1024, &emu->ptb_pages) < 0) { err = -ENOMEM; goto error; } @@ -1921,7 +1929,7 @@ int __devinit snd_emu10k1_create(struct snd_card *card, /* irq handler must be registered after I/O ports are activated */ if (request_irq(pci->irq, snd_emu10k1_interrupt, IRQF_SHARED, - "EMU10K1", emu)) { + KBUILD_MODNAME, emu)) { err = -EBUSY; goto error; } @@ -1954,8 +1962,8 @@ int __devinit snd_emu10k1_create(struct snd_card *card, /* Clear silent pages and set up pointers */ memset(emu->silent_page.area, 0, PAGE_SIZE); - silent_page = emu->silent_page.addr << 1; - for (idx = 0; idx < MAXPAGES; idx++) + silent_page = emu->silent_page.addr << emu->address_mode; + for (idx = 0; idx < (emu->address_mode ? MAXPAGES1 : MAXPAGES0); idx++) ((u32 *)emu->ptb_pages.area)[idx] = cpu_to_le32(silent_page | idx); /* set up voice indices */ diff --git a/sound/pci/emu10k1/emu10k1_synth.c b/sound/pci/emu10k1/emu10k1_synth.c index ad7b714..4c41c90 100644 --- a/sound/pci/emu10k1/emu10k1_synth.c +++ b/sound/pci/emu10k1/emu10k1_synth.c @@ -20,6 +20,7 @@ #include "emu10k1_synth_local.h" #include +#include MODULE_AUTHOR("Takashi Iwai"); MODULE_DESCRIPTION("Routines for control of EMU10K1 WaveTable synth"); diff --git a/sound/pci/emu10k1/emu10k1x.c b/sound/pci/emu10k1/emu10k1x.c index 0c701e4..2228be9 100644 --- a/sound/pci/emu10k1/emu10k1x.c +++ b/sound/pci/emu10k1/emu10k1x.c @@ -34,7 +34,7 @@ #include #include #include -#include +#include #include #include #include @@ -925,7 +925,7 @@ static int __devinit snd_emu10k1x_create(struct snd_card *card, } if (request_irq(pci->irq, snd_emu10k1x_interrupt, - IRQF_SHARED, "EMU10K1X", chip)) { + IRQF_SHARED, KBUILD_MODNAME, chip)) { snd_printk(KERN_ERR "emu10k1x: cannot grab irq %d\n", pci->irq); snd_emu10k1x_free(chip); return -EBUSY; @@ -1613,7 +1613,7 @@ MODULE_DEVICE_TABLE(pci, snd_emu10k1x_ids); // pci_driver definition static struct pci_driver driver = { - .name = "EMU10K1X", + .name = KBUILD_MODNAME, .id_table = snd_emu10k1x_ids, .probe = snd_emu10k1x_probe, .remove = __devexit_p(snd_emu10k1x_remove), diff --git a/sound/pci/emu10k1/emupcm.c b/sound/pci/emu10k1/emupcm.c index 622bace..c673d2b 100644 --- a/sound/pci/emu10k1/emupcm.c +++ b/sound/pci/emu10k1/emupcm.c @@ -379,7 +379,7 @@ static void snd_emu10k1_pcm_init_voice(struct snd_emu10k1 *emu, snd_emu10k1_ptr_write(emu, Z1, voice, 0); snd_emu10k1_ptr_write(emu, Z2, voice, 0); /* invalidate maps */ - silent_page = ((unsigned int)emu->silent_page.addr << 1) | MAP_PTI_MASK; + silent_page = ((unsigned int)emu->silent_page.addr << emu->address_mode) | (emu->address_mode ? MAP_PTI_MASK1 : MAP_PTI_MASK0); snd_emu10k1_ptr_write(emu, MAPA, voice, silent_page); snd_emu10k1_ptr_write(emu, MAPB, voice, silent_page); /* modulation envelope */ @@ -1146,6 +1146,11 @@ static int snd_emu10k1_playback_open(struct snd_pcm_substream *substream) kfree(epcm); return err; } + err = snd_pcm_hw_rule_noresample(runtime, 48000); + if (err < 0) { + kfree(epcm); + return err; + } mix = &emu->pcm_mixer[substream->number]; for (i = 0; i < 4; i++) mix->send_routing[0][i] = mix->send_routing[1][i] = mix->send_routing[2][i] = i; diff --git a/sound/pci/emu10k1/emuproc.c b/sound/pci/emu10k1/emuproc.c index bc38dd4..9c499e6 100644 --- a/sound/pci/emu10k1/emuproc.c +++ b/sound/pci/emu10k1/emuproc.c @@ -241,31 +241,22 @@ static void snd_emu10k1_proc_spdif_read(struct snd_info_entry *entry, struct snd_emu10k1 *emu = entry->private_data; u32 value; u32 value2; - unsigned long flags; u32 rate; if (emu->card_capabilities->emu_model) { - spin_lock_irqsave(&emu->emu_lock, flags); snd_emu1010_fpga_read(emu, 0x38, &value); - spin_unlock_irqrestore(&emu->emu_lock, flags); if ((value & 0x1) == 0) { - spin_lock_irqsave(&emu->emu_lock, flags); snd_emu1010_fpga_read(emu, 0x2a, &value); snd_emu1010_fpga_read(emu, 0x2b, &value2); - spin_unlock_irqrestore(&emu->emu_lock, flags); rate = 0x1770000 / (((value << 5) | value2)+1); snd_iprintf(buffer, "ADAT Locked : %u\n", rate); } else { snd_iprintf(buffer, "ADAT Unlocked\n"); } - spin_lock_irqsave(&emu->emu_lock, flags); snd_emu1010_fpga_read(emu, 0x20, &value); - spin_unlock_irqrestore(&emu->emu_lock, flags); if ((value & 0x4) == 0) { - spin_lock_irqsave(&emu->emu_lock, flags); snd_emu1010_fpga_read(emu, 0x28, &value); snd_emu1010_fpga_read(emu, 0x29, &value2); - spin_unlock_irqrestore(&emu->emu_lock, flags); rate = 0x1770000 / (((value << 5) | value2)+1); snd_iprintf(buffer, "SPDIF Locked : %d\n", rate); } else { @@ -410,14 +401,11 @@ static void snd_emu_proc_emu1010_reg_read(struct snd_info_entry *entry, { struct snd_emu10k1 *emu = entry->private_data; u32 value; - unsigned long flags; int i; snd_iprintf(buffer, "EMU1010 Registers:\n\n"); for(i = 0; i < 0x40; i+=1) { - spin_lock_irqsave(&emu->emu_lock, flags); snd_emu1010_fpga_read(emu, i, &value); - spin_unlock_irqrestore(&emu->emu_lock, flags); snd_iprintf(buffer, "%02X: %08X, %02X\n", i, value, (value >> 8) & 0x7f); } } diff --git a/sound/pci/emu10k1/io.c b/sound/pci/emu10k1/io.c index 5ef7080..e4fba49 100644 --- a/sound/pci/emu10k1/io.c +++ b/sound/pci/emu10k1/io.c @@ -29,6 +29,7 @@ #include #include #include +#include #include "p17v.h" unsigned int snd_emu10k1_ptr_read(struct snd_emu10k1 * emu, unsigned int reg, unsigned int chn) diff --git a/sound/pci/emu10k1/memory.c b/sound/pci/emu10k1/memory.c index c250614..87b7c65 100644 --- a/sound/pci/emu10k1/memory.c +++ b/sound/pci/emu10k1/memory.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include @@ -33,10 +34,11 @@ * aligned pages in others */ #define __set_ptb_entry(emu,page,addr) \ - (((u32 *)(emu)->ptb_pages.area)[page] = cpu_to_le32(((addr) << 1) | (page))) + (((u32 *)(emu)->ptb_pages.area)[page] = cpu_to_le32(((addr) << (emu->address_mode)) | (page))) #define UNIT_PAGES (PAGE_SIZE / EMUPAGESIZE) -#define MAX_ALIGN_PAGES (MAXPAGES / UNIT_PAGES) +#define MAX_ALIGN_PAGES0 (MAXPAGES0 / UNIT_PAGES) +#define MAX_ALIGN_PAGES1 (MAXPAGES1 / UNIT_PAGES) /* get aligned page from offset address */ #define get_aligned_page(offset) ((offset) >> PAGE_SHIFT) /* get offset address from aligned page */ @@ -123,7 +125,7 @@ static int search_empty_map_area(struct snd_emu10k1 *emu, int npages, struct lis } page = blk->mapped_page + blk->pages; } - size = MAX_ALIGN_PAGES - page; + size = (emu->address_mode ? MAX_ALIGN_PAGES1 : MAX_ALIGN_PAGES0) - page; if (size >= max_size) { *nextp = pos; return page; @@ -180,7 +182,7 @@ static int unmap_memblk(struct snd_emu10k1 *emu, struct snd_emu10k1_memblk *blk) q = get_emu10k1_memblk(p, mapped_link); end_page = q->mapped_page; } else - end_page = MAX_ALIGN_PAGES; + end_page = (emu->address_mode ? MAX_ALIGN_PAGES1 : MAX_ALIGN_PAGES0); /* remove links */ list_del(&blk->mapped_link); @@ -304,7 +306,7 @@ snd_emu10k1_alloc_pages(struct snd_emu10k1 *emu, struct snd_pcm_substream *subst if (snd_BUG_ON(!emu)) return NULL; if (snd_BUG_ON(runtime->dma_bytes <= 0 || - runtime->dma_bytes >= MAXPAGES * EMUPAGESIZE)) + runtime->dma_bytes >= (emu->address_mode ? MAXPAGES1 : MAXPAGES0) * EMUPAGESIZE)) return NULL; hdr = emu->memhdr; if (snd_BUG_ON(!hdr)) diff --git a/sound/pci/emu10k1/voice.c b/sound/pci/emu10k1/voice.c index 20b8da2..101e7cb 100644 --- a/sound/pci/emu10k1/voice.c +++ b/sound/pci/emu10k1/voice.c @@ -29,6 +29,7 @@ */ #include +#include #include #include diff --git a/sound/pci/ice1712/ak4xxx.c b/sound/pci/ice1712/ak4xxx.c index 90d560c..3981823 100644 --- a/sound/pci/ice1712/ak4xxx.c +++ b/sound/pci/ice1712/ak4xxx.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include "ice1712.h" diff --git a/sound/pci/ice1712/ice1712.c b/sound/pci/ice1712/ice1712.c index 3ec8fed..7a4a196 100644 --- a/sound/pci/ice1712/ice1712.c +++ b/sound/pci/ice1712/ice1712.c @@ -54,7 +54,7 @@ #include #include #include -#include +#include #include #include @@ -87,7 +87,7 @@ static char *id[SNDRV_CARDS] = SNDRV_DEFAULT_STR; /* ID for this card */ static int enable[SNDRV_CARDS] = SNDRV_DEFAULT_ENABLE_PNP;/* Enable this card */ static char *model[SNDRV_CARDS]; static int omni[SNDRV_CARDS]; /* Delta44 & 66 Omni I/O support */ -static int cs8427_timeout[SNDRV_CARDS] = {[0 ... (SNDRV_CARDS-1)] = 500}; /* CS8427 S/PDIF transciever reset timeout value in msec */ +static int cs8427_timeout[SNDRV_CARDS] = {[0 ... (SNDRV_CARDS-1)] = 500}; /* CS8427 S/PDIF transceiver reset timeout value in msec */ static int dxr_enable[SNDRV_CARDS]; /* DXR enable for DMX6FIRE */ module_param_array(index, int, NULL, 0444); @@ -686,9 +686,10 @@ static snd_pcm_uframes_t snd_ice1712_playback_pointer(struct snd_pcm_substream * if (!(snd_ice1712_read(ice, ICE1712_IREG_PBK_CTRL) & 1)) return 0; ptr = runtime->buffer_size - inw(ice->ddma_port + 4); + ptr = bytes_to_frames(substream->runtime, ptr); if (ptr == runtime->buffer_size) ptr = 0; - return bytes_to_frames(substream->runtime, ptr); + return ptr; } static snd_pcm_uframes_t snd_ice1712_playback_ds_pointer(struct snd_pcm_substream *substream) @@ -705,9 +706,10 @@ static snd_pcm_uframes_t snd_ice1712_playback_ds_pointer(struct snd_pcm_substrea addr = ICE1712_DSC_ADDR0; ptr = snd_ice1712_ds_read(ice, substream->number * 2, addr) - ice->playback_con_virt_addr[substream->number]; + ptr = bytes_to_frames(substream->runtime, ptr); if (ptr == substream->runtime->buffer_size) ptr = 0; - return bytes_to_frames(substream->runtime, ptr); + return ptr; } static snd_pcm_uframes_t snd_ice1712_capture_pointer(struct snd_pcm_substream *substream) @@ -718,9 +720,10 @@ static snd_pcm_uframes_t snd_ice1712_capture_pointer(struct snd_pcm_substream *s if (!(snd_ice1712_read(ice, ICE1712_IREG_CAP_CTRL) & 1)) return 0; ptr = inl(ICEREG(ice, CONCAP_ADDR)) - ice->capture_con_virt_addr; + ptr = bytes_to_frames(substream->runtime, ptr); if (ptr == substream->runtime->buffer_size) ptr = 0; - return bytes_to_frames(substream->runtime, ptr); + return ptr; } static const struct snd_pcm_hardware snd_ice1712_playback = { @@ -1114,9 +1117,10 @@ static snd_pcm_uframes_t snd_ice1712_playback_pro_pointer(struct snd_pcm_substre if (!(inl(ICEMT(ice, PLAYBACK_CONTROL)) & ICE1712_PLAYBACK_START)) return 0; ptr = ice->playback_pro_size - (inw(ICEMT(ice, PLAYBACK_SIZE)) << 2); + ptr = bytes_to_frames(substream->runtime, ptr); if (ptr == substream->runtime->buffer_size) ptr = 0; - return bytes_to_frames(substream->runtime, ptr); + return ptr; } static snd_pcm_uframes_t snd_ice1712_capture_pro_pointer(struct snd_pcm_substream *substream) @@ -1127,9 +1131,10 @@ static snd_pcm_uframes_t snd_ice1712_capture_pro_pointer(struct snd_pcm_substrea if (!(inl(ICEMT(ice, PLAYBACK_CONTROL)) & ICE1712_CAPTURE_START_SHADOW)) return 0; ptr = ice->capture_pro_size - (inw(ICEMT(ice, CAPTURE_SIZE)) << 2); + ptr = bytes_to_frames(substream->runtime, ptr); if (ptr == substream->runtime->buffer_size) ptr = 0; - return bytes_to_frames(substream->runtime, ptr); + return ptr; } static const struct snd_pcm_hardware snd_ice1712_playback_pro = { @@ -2595,8 +2600,6 @@ static int __devinit snd_ice1712_create(struct snd_card *card, snd_ice1712_proc_init(ice); synchronize_irq(pci->irq); - card->private_data = ice; - err = pci_request_regions(pci, "ICE1712"); if (err < 0) { kfree(ice); @@ -2609,7 +2612,7 @@ static int __devinit snd_ice1712_create(struct snd_card *card, ice->profi_port = pci_resource_start(pci, 3); if (request_irq(pci->irq, snd_ice1712_interrupt, IRQF_SHARED, - "ICE1712", ice)) { + KBUILD_MODNAME, ice)) { snd_printk(KERN_ERR "unable to grab IRQ %d\n", pci->irq); snd_ice1712_free(ice); return -EIO; @@ -2750,8 +2753,9 @@ static int __devinit snd_ice1712_probe(struct pci_dev *pci, if (!c->no_mpu401) { err = snd_mpu401_uart_new(card, 0, MPU401_HW_ICE1712, ICEREG(ice, MPU1_CTRL), - (c->mpu401_1_info_flags | MPU401_INFO_INTEGRATED), - ice->irq, 0, &ice->rmidi[0]); + c->mpu401_1_info_flags | + MPU401_INFO_INTEGRATED | MPU401_INFO_IRQ_HOOK, + -1, &ice->rmidi[0]); if (err < 0) { snd_card_free(card); return err; @@ -2766,8 +2770,9 @@ static int __devinit snd_ice1712_probe(struct pci_dev *pci, /* 2nd port used */ err = snd_mpu401_uart_new(card, 1, MPU401_HW_ICE1712, ICEREG(ice, MPU2_CTRL), - (c->mpu401_2_info_flags | MPU401_INFO_INTEGRATED), - ice->irq, 0, &ice->rmidi[1]); + c->mpu401_2_info_flags | + MPU401_INFO_INTEGRATED | MPU401_INFO_IRQ_HOOK, + -1, &ice->rmidi[1]); if (err < 0) { snd_card_free(card); @@ -2804,7 +2809,7 @@ static void __devexit snd_ice1712_remove(struct pci_dev *pci) } static struct pci_driver driver = { - .name = "ICE1712", + .name = KBUILD_MODNAME, .id_table = snd_ice1712_ids, .probe = snd_ice1712_probe, .remove = __devexit_p(snd_ice1712_remove), diff --git a/sound/pci/ice1712/ice1724.c b/sound/pci/ice1712/ice1724.c index c1498fa..4353e76 100644 --- a/sound/pci/ice1712/ice1724.c +++ b/sound/pci/ice1712/ice1724.c @@ -28,7 +28,7 @@ #include #include #include -#include +#include #include #include #include @@ -2509,7 +2509,7 @@ static int __devinit snd_vt1724_create(struct snd_card *card, ice->profi_port = pci_resource_start(pci, 1); if (request_irq(pci->irq, snd_vt1724_interrupt, - IRQF_SHARED, "ICE1724", ice)) { + IRQF_SHARED, KBUILD_MODNAME, ice)) { snd_printk(KERN_ERR "unable to grab IRQ %d\n", pci->irq); snd_vt1724_free(ice); return -EIO; @@ -2802,7 +2802,7 @@ static int snd_vt1724_resume(struct pci_dev *pci) #endif static struct pci_driver driver = { - .name = "ICE1724", + .name = KBUILD_MODNAME, .id_table = snd_vt1724_ids, .probe = snd_vt1724_probe, .remove = __devexit_p(snd_vt1724_remove), diff --git a/sound/pci/oxygen/oxygen.c b/sound/pci/oxygen/oxygen.c index d7e8ddd..5f3a13d 100644 --- a/sound/pci/oxygen/oxygen.c +++ b/sound/pci/oxygen/oxygen.c @@ -51,6 +51,7 @@ #include #include #include +#include #include #include #include @@ -859,7 +860,7 @@ static int __devinit generic_oxygen_probe(struct pci_dev *pci, } static struct pci_driver oxygen_driver = { - .name = "CMI8788", + .name = KBUILD_MODNAME, .id_table = oxygen_ids, .probe = generic_oxygen_probe, .remove = __devexit_p(oxygen_pci_remove), diff --git a/sound/pci/oxygen/oxygen_io.c b/sound/pci/oxygen/oxygen_io.c index f5164b1..521eae4 100644 --- a/sound/pci/oxygen/oxygen_io.c +++ b/sound/pci/oxygen/oxygen_io.c @@ -19,6 +19,7 @@ #include #include +#include #include #include #include diff --git a/sound/pci/oxygen/oxygen_lib.c b/sound/pci/oxygen/oxygen_lib.c index 70b7398..92e2d67 100644 --- a/sound/pci/oxygen/oxygen_lib.c +++ b/sound/pci/oxygen/oxygen_lib.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -655,7 +656,7 @@ int oxygen_pci_probe(struct pci_dev *pci, int index, char *id, chip->model.init(chip); err = request_irq(pci->irq, oxygen_interrupt, IRQF_SHARED, - DRIVER, chip); + KBUILD_MODNAME, chip); if (err < 0) { snd_printk(KERN_ERR "cannot grab interrupt %d\n", pci->irq); goto err_card; @@ -678,15 +679,15 @@ int oxygen_pci_probe(struct pci_dev *pci, int index, char *id, goto err_card; if (chip->model.device_config & (MIDI_OUTPUT | MIDI_INPUT)) { - unsigned int info_flags = MPU401_INFO_INTEGRATED; + unsigned int info_flags = + MPU401_INFO_INTEGRATED | MPU401_INFO_IRQ_HOOK; if (chip->model.device_config & MIDI_OUTPUT) info_flags |= MPU401_INFO_OUTPUT; if (chip->model.device_config & MIDI_INPUT) info_flags |= MPU401_INFO_INPUT; err = snd_mpu401_uart_new(card, 0, MPU401_HW_CMIPCI, chip->addr + OXYGEN_MPU401, - info_flags, 0, 0, - &chip->midi); + info_flags, -1, &chip->midi); if (err < 0) goto err_card; } diff --git a/sound/pci/oxygen/oxygen_mixer.c b/sound/pci/oxygen/oxygen_mixer.c index 26c7e8b..c0dbb52 100644 --- a/sound/pci/oxygen/oxygen_mixer.c +++ b/sound/pci/oxygen/oxygen_mixer.c @@ -618,9 +618,12 @@ static int ac97_volume_get(struct snd_kcontrol *ctl, mutex_lock(&chip->mutex); reg = oxygen_read_ac97(chip, codec, index); mutex_unlock(&chip->mutex); - value->value.integer.value[0] = 31 - (reg & 0x1f); - if (stereo) - value->value.integer.value[1] = 31 - ((reg >> 8) & 0x1f); + if (!stereo) { + value->value.integer.value[0] = 31 - (reg & 0x1f); + } else { + value->value.integer.value[0] = 31 - ((reg >> 8) & 0x1f); + value->value.integer.value[1] = 31 - (reg & 0x1f); + } return 0; } @@ -636,14 +639,14 @@ static int ac97_volume_put(struct snd_kcontrol *ctl, mutex_lock(&chip->mutex); oldreg = oxygen_read_ac97(chip, codec, index); - newreg = oldreg; - newreg = (newreg & ~0x1f) | - (31 - (value->value.integer.value[0] & 0x1f)); - if (stereo) - newreg = (newreg & ~0x1f00) | - ((31 - (value->value.integer.value[1] & 0x1f)) << 8); - else - newreg = (newreg & ~0x1f00) | ((newreg & 0x1f) << 8); + if (!stereo) { + newreg = oldreg & ~0x1f; + newreg |= 31 - (value->value.integer.value[0] & 0x1f); + } else { + newreg = oldreg & ~0x1f1f; + newreg |= (31 - (value->value.integer.value[0] & 0x1f)) << 8; + newreg |= 31 - (value->value.integer.value[1] & 0x1f); + } change = newreg != oldreg; if (change) oxygen_write_ac97(chip, codec, index, newreg); diff --git a/sound/pci/oxygen/oxygen_pcm.c b/sound/pci/oxygen/oxygen_pcm.c index d5533e3..cc0bcd9 100644 --- a/sound/pci/oxygen/oxygen_pcm.c +++ b/sound/pci/oxygen/oxygen_pcm.c @@ -168,12 +168,6 @@ static int oxygen_open(struct snd_pcm_substream *substream, if (err < 0) return err; } - if (channel == PCM_MULTICH) { - err = snd_pcm_hw_constraint_minmax - (runtime, SNDRV_PCM_HW_PARAM_PERIOD_TIME, 0, 8192000); - if (err < 0) - return err; - } snd_pcm_set_sync(substream); chip->streams[channel] = substream; diff --git a/sound/pci/oxygen/virtuoso.c b/sound/pci/oxygen/virtuoso.c index 469010a..7edd5e4 100644 --- a/sound/pci/oxygen/virtuoso.c +++ b/sound/pci/oxygen/virtuoso.c @@ -19,6 +19,7 @@ #include #include +#include #include #include #include @@ -51,6 +52,8 @@ static DEFINE_PCI_DEVICE_TABLE(xonar_ids) = { { OXYGEN_PCI_SUBID(0x1043, 0x835d) }, { OXYGEN_PCI_SUBID(0x1043, 0x835e) }, { OXYGEN_PCI_SUBID(0x1043, 0x838e) }, + { OXYGEN_PCI_SUBID(0x1043, 0x8522) }, + { OXYGEN_PCI_SUBID(0x1043, 0x85f4) }, { OXYGEN_PCI_SUBID_BROKEN_EEPROM }, { } }; @@ -88,7 +91,7 @@ static int __devinit xonar_probe(struct pci_dev *pci, } static struct pci_driver xonar_driver = { - .name = "AV200", + .name = KBUILD_MODNAME, .id_table = xonar_ids, .probe = xonar_probe, .remove = __devexit_p(oxygen_pci_remove), diff --git a/sound/pci/oxygen/xonar_dg.c b/sound/pci/oxygen/xonar_dg.c index bc6eb58..59eda0a 100644 --- a/sound/pci/oxygen/xonar_dg.c +++ b/sound/pci/oxygen/xonar_dg.c @@ -294,6 +294,16 @@ static int output_switch_put(struct snd_kcontrol *ctl, oxygen_write16_masked(chip, OXYGEN_GPIO_DATA, data->output_sel == 1 ? GPIO_HP_REAR : 0, GPIO_HP_REAR); + oxygen_write8_masked(chip, OXYGEN_PLAY_ROUTING, + data->output_sel == 0 ? + OXYGEN_PLAY_MUTE01 : + OXYGEN_PLAY_MUTE23 | + OXYGEN_PLAY_MUTE45 | + OXYGEN_PLAY_MUTE67, + OXYGEN_PLAY_MUTE01 | + OXYGEN_PLAY_MUTE23 | + OXYGEN_PLAY_MUTE45 | + OXYGEN_PLAY_MUTE67); } mutex_unlock(&chip->mutex); return changed; @@ -597,7 +607,7 @@ struct oxygen_model model_xonar_dg = { .model_data_size = sizeof(struct dg), .device_config = PLAYBACK_0_TO_I2S | PLAYBACK_1_TO_SPDIF | - CAPTURE_0_FROM_I2S_2, + CAPTURE_0_FROM_I2S_1, .dac_channels_pcm = 6, .dac_channels_mixer = 0, .function_flags = OXYGEN_FUNCTION_SPI, diff --git a/sound/pci/oxygen/xonar_pcm179x.c b/sound/pci/oxygen/xonar_pcm179x.c index 32d096c..4cac06a 100644 --- a/sound/pci/oxygen/xonar_pcm179x.c +++ b/sound/pci/oxygen/xonar_pcm179x.c @@ -100,8 +100,8 @@ */ /* - * Xonar Essence ST (Deluxe)/STX - * ----------------------------- + * Xonar Essence ST (Deluxe)/STX (II) + * ---------------------------------- * * CMI8788: * @@ -1074,6 +1074,7 @@ static const struct oxygen_model model_xonar_st = { .device_config = PLAYBACK_0_TO_I2S | PLAYBACK_1_TO_SPDIF | CAPTURE_0_FROM_I2S_2 | + CAPTURE_1_FROM_SPDIF | AC97_FMIC_SWITCH, .dac_channels_pcm = 2, .dac_channels_mixer = 2, @@ -1137,6 +1138,14 @@ int __devinit get_xonar_pcm179x_model(struct oxygen *chip, chip->model.resume = xonar_stx_resume; chip->model.set_dac_params = set_pcm1796_params; break; + case 0x85f4: + chip->model = model_xonar_st; + /* TODO: daughterboard support */ + chip->model.shortname = "Xonar STX II"; + chip->model.init = xonar_stx_init; + chip->model.resume = xonar_stx_resume; + chip->model.set_dac_params = set_pcm1796_params; + break; default: return -EINVAL; } diff --git a/sound/pci/oxygen/xonar_wm87x6.c b/sound/pci/oxygen/xonar_wm87x6.c index 915546a..30495b8 100644 --- a/sound/pci/oxygen/xonar_wm87x6.c +++ b/sound/pci/oxygen/xonar_wm87x6.c @@ -1255,7 +1255,6 @@ static void dump_wm87x6_registers(struct oxygen *chip, } static const struct oxygen_model model_xonar_ds = { - .shortname = "Xonar DS", .longname = "Asus Virtuoso 66", .chip = "AV200", .init = xonar_ds_init, @@ -1325,6 +1324,11 @@ int __devinit get_xonar_wm87x6_model(struct oxygen *chip, switch (id->subdevice) { case 0x838e: chip->model = model_xonar_ds; + chip->model.shortname = "Xonar DS"; + break; + case 0x8522: + chip->model = model_xonar_ds; + chip->model.shortname = "Xonar DSX"; break; case 0x835e: chip->model = model_xonar_hdav_slim; diff --git a/sound/pci/pcxhr/pcxhr.c b/sound/pci/pcxhr/pcxhr.c index 95cfde2..56a5265 100644 --- a/sound/pci/pcxhr/pcxhr.c +++ b/sound/pci/pcxhr/pcxhr.c @@ -27,7 +27,7 @@ #include #include #include -#include +#include #include #include @@ -1501,7 +1501,7 @@ static int __devinit pcxhr_probe(struct pci_dev *pci, mgr->irq = -1; if (request_irq(pci->irq, pcxhr_interrupt, IRQF_SHARED, - card_name, mgr)) { + KBUILD_MODNAME, mgr)) { snd_printk(KERN_ERR "unable to grab IRQ %d\n", pci->irq); pcxhr_free(mgr); return -EBUSY; @@ -1608,7 +1608,7 @@ static void __devexit pcxhr_remove(struct pci_dev *pci) } static struct pci_driver driver = { - .name = "Digigram pcxhr", + .name = KBUILD_MODNAME, .id_table = pcxhr_ids, .probe = pcxhr_probe, .remove = __devexit_p(pcxhr_remove), diff --git a/sound/pci/pcxhr/pcxhr_hwdep.c b/sound/pci/pcxhr/pcxhr_hwdep.c index 17cb123..ec1587c 100644 --- a/sound/pci/pcxhr/pcxhr_hwdep.c +++ b/sound/pci/pcxhr/pcxhr_hwdep.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include diff --git a/sound/pci/riptide/riptide.c b/sound/pci/riptide/riptide.c index ad5202e..c86044f 100644 --- a/sound/pci/riptide/riptide.c +++ b/sound/pci/riptide/riptide.c @@ -98,6 +98,7 @@ #include #include #include +#include #include #include #include @@ -1890,7 +1891,7 @@ snd_riptide_create(struct snd_card *card, struct pci_dev *pci, UNSET_AIE(hwport); if (request_irq(pci->irq, snd_riptide_interrupt, IRQF_SHARED, - "RIPTIDE", chip)) { + KBUILD_MODNAME, chip)) { snd_printk(KERN_ERR "Riptide: unable to grab IRQ %d\n", pci->irq); snd_riptide_free(chip); @@ -2025,32 +2026,43 @@ snd_riptide_joystick_probe(struct pci_dev *pci, const struct pci_device_id *id) { static int dev; struct gameport *gameport; + int ret; if (dev >= SNDRV_CARDS) return -ENODEV; + if (!enable[dev]) { - dev++; - return -ENOENT; + ret = -ENOENT; + goto inc_dev; } - if (!joystick_port[dev++]) - return 0; + if (!joystick_port[dev]) { + ret = 0; + goto inc_dev; + } gameport = gameport_allocate_port(); - if (!gameport) - return -ENOMEM; + if (!gameport) { + ret = -ENOMEM; + goto inc_dev; + } if (!request_region(joystick_port[dev], 8, "Riptide gameport")) { snd_printk(KERN_WARNING "Riptide: cannot grab gameport 0x%x\n", joystick_port[dev]); gameport_free_port(gameport); - return -EBUSY; + ret = -EBUSY; + goto inc_dev; } gameport->io = joystick_port[dev]; gameport_register_port(gameport); pci_set_drvdata(pci, gameport); - return 0; + + ret = 0; +inc_dev: + dev++; + return ret; } static void __devexit snd_riptide_joystick_remove(struct pci_dev *pci) @@ -2109,7 +2121,7 @@ snd_card_riptide_probe(struct pci_dev *pci, const struct pci_device_id *pci_id) val = mpu_port[dev]; pci_write_config_word(chip->pci, PCI_EXT_MPU_Base, val); err = snd_mpu401_uart_new(card, 0, MPU401_HW_RIPTIDE, - val, 0, chip->irq, 0, + val, MPU401_INFO_IRQ_HOOK, -1, &chip->rmidi); if (err < 0) snd_printk(KERN_WARNING @@ -2176,7 +2188,7 @@ static void __devexit snd_card_riptide_remove(struct pci_dev *pci) } static struct pci_driver driver = { - .name = "RIPTIDE", + .name = KBUILD_MODNAME, .id_table = snd_riptide_ids, .probe = snd_card_riptide_probe, .remove = __devexit_p(snd_card_riptide_remove), @@ -2188,7 +2200,7 @@ static struct pci_driver driver = { #ifdef SUPPORT_JOYSTICK static struct pci_driver joystick_driver = { - .name = "Riptide Joystick", + .name = KBUILD_MODNAME "-joystick", .id_table = snd_riptide_joystick_ids, .probe = snd_riptide_joystick_probe, .remove = __devexit_p(snd_riptide_joystick_remove), diff --git a/sound/pci/rme9652/hdsp.c b/sound/pci/rme9652/hdsp.c index 2d83324..f2a3758 100644 --- a/sound/pci/rme9652/hdsp.c +++ b/sound/pci/rme9652/hdsp.c @@ -26,7 +26,7 @@ #include #include #include -#include +#include #include #include @@ -151,7 +151,7 @@ MODULE_FIRMWARE("digiface_firmware_rev11.bin"); #define HDSP_PROGRAM 0x020 #define HDSP_CONFIG_MODE_0 0x040 #define HDSP_CONFIG_MODE_1 0x080 -#define HDSP_VERSION_BIT 0x100 +#define HDSP_VERSION_BIT (0x100 | HDSP_S_LOAD) #define HDSP_BIGENDIAN_MODE 0x200 #define HDSP_RD_MULTIPLE 0x400 #define HDSP_9652_ENABLE_MIXER 0x800 @@ -5482,7 +5482,7 @@ static int __devinit snd_hdsp_create(struct snd_card *card, } if (request_irq(pci->irq, snd_hdsp_interrupt, IRQF_SHARED, - "hdsp", hdsp)) { + KBUILD_MODNAME, hdsp)) { snd_printk(KERN_ERR "Hammerfall-DSP: unable to use IRQ %d\n", pci->irq); return -EBUSY; } @@ -5637,7 +5637,7 @@ static void __devexit snd_hdsp_remove(struct pci_dev *pci) } static struct pci_driver driver = { - .name = "RME Hammerfall DSP", + .name = KBUILD_MODNAME, .id_table = snd_hdsp_ids, .probe = snd_hdsp_probe, .remove = __devexit_p(snd_hdsp_remove), diff --git a/sound/pci/rme9652/hdspm.c b/sound/pci/rme9652/hdspm.c index c8e402f..71a3d52 100644 --- a/sound/pci/rme9652/hdspm.c +++ b/sound/pci/rme9652/hdspm.c @@ -41,7 +41,7 @@ #include #include #include -#include +#include #include #include #include @@ -520,15 +520,9 @@ MODULE_SUPPORTED_DEVICE("{{RME HDSPM-MADI}}"); #define HDSPM_DMA_AREA_BYTES (HDSPM_MAX_CHANNELS * HDSPM_CHANNEL_BUFFER_BYTES) #define HDSPM_DMA_AREA_KILOBYTES (HDSPM_DMA_AREA_BYTES/1024) -/* revisions >= 230 indicate AES32 card */ -#define HDSPM_MADI_OLD_REV 207 -#define HDSPM_MADI_REV 210 #define HDSPM_RAYDAT_REV 211 #define HDSPM_AIO_REV 212 #define HDSPM_MADIFACE_REV 213 -#define HDSPM_AES_REV 240 -#define HDSPM_AES32_REV 234 -#define HDSPM_AES32_OLD_REV 233 /* speed factor modes */ #define HDSPM_SPEED_SINGLE 0 @@ -1217,6 +1211,22 @@ static int hdspm_external_sample_rate(struct hdspm *hdspm) rate = 0; break; } + + /* QS and DS rates normally can not be detected + * automatically by the card. Only exception is MADI + * in 96k frame mode. + * + * So if we read SS values (32 .. 48k), check for + * user-provided DS/QS bits in the control register + * and multiply the base frequency accordingly. + */ + if (rate <= 48000) { + if (hdspm->control_register & HDSPM_QuadSpeed) + rate *= 4; + else if (hdspm->control_register & + HDSPM_DoubleSpeed) + rate *= 2; + } } break; } @@ -1224,10 +1234,30 @@ static int hdspm_external_sample_rate(struct hdspm *hdspm) return rate; } +/* return latency in samples per period */ +static int hdspm_get_latency(struct hdspm *hdspm) +{ + int n; + + n = hdspm_decode_latency(hdspm->control_register); + + /* Special case for new RME cards with 32 samples period size. + * The three latency bits in the control register + * (HDSP_LatencyMask) encode latency values of 64 samples as + * 0, 128 samples as 1 ... 4096 samples as 6. For old cards, 7 + * denotes 8192 samples, but on new cards like RayDAT or AIO, + * it corresponds to 32 samples. + */ + if ((7 == n) && (RayDAT == hdspm->io_type || AIO == hdspm->io_type)) + n = -1; + + return 1 << (n + 6); +} + /* Latency function */ static inline void hdspm_compute_period_size(struct hdspm *hdspm) { - hdspm->period_bytes = 1 << ((hdspm_decode_latency(hdspm->control_register) + 8)); + hdspm->period_bytes = 4 * hdspm_get_latency(hdspm); } @@ -1286,12 +1316,27 @@ static int hdspm_set_interrupt_interval(struct hdspm *s, unsigned int frames) spin_lock_irq(&s->lock); - frames >>= 7; - n = 0; - while (frames) { - n++; - frames >>= 1; + if (32 == frames) { + /* Special case for new RME cards like RayDAT/AIO which + * support period sizes of 32 samples. Since latency is + * encoded in the three bits of HDSP_LatencyMask, we can only + * have values from 0 .. 7. While 0 still means 64 samples and + * 6 represents 4096 samples on all cards, 7 represents 8192 + * on older cards and 32 samples on new cards. + * + * In other words, period size in samples is calculated by + * 2^(n+6) with n ranging from 0 .. 7. + */ + n = 7; + } else { + frames >>= 7; + n = 0; + while (frames) { + n++; + frames >>= 1; + } } + s->control_register &= ~HDSPM_LatencyMask; s->control_register |= hdspm_encode_latency(n); @@ -1322,6 +1367,10 @@ static u64 hdspm_calc_dds_value(struct hdspm *hdspm, u64 period) break; case MADIface: freq_const = 131072000000000ULL; + break; + default: + snd_BUG(); + return 0; } return div_u64(freq_const, period); @@ -1339,16 +1388,19 @@ static void hdspm_set_dds_value(struct hdspm *hdspm, int rate) switch (hdspm->io_type) { case MADIface: - n = 131072000000000ULL; /* 125 MHz */ - break; + n = 131072000000000ULL; /* 125 MHz */ + break; case MADI: case AES32: - n = 110069313433624ULL; /* 105 MHz */ - break; + n = 110069313433624ULL; /* 105 MHz */ + break; case RayDAT: case AIO: - n = 104857600000000ULL; /* 100 MHz */ - break; + n = 104857600000000ULL; /* 100 MHz */ + break; + default: + snd_BUG(); + return; } n = div_u64(n, rate); @@ -3415,6 +3467,91 @@ static int snd_hdspm_put_qs_wire(struct snd_kcontrol *kcontrol, return change; } +#define HDSPM_MADI_SPEEDMODE(xname, xindex) \ +{ .iface = SNDRV_CTL_ELEM_IFACE_MIXER, \ + .name = xname, \ + .index = xindex, \ + .info = snd_hdspm_info_madi_speedmode, \ + .get = snd_hdspm_get_madi_speedmode, \ + .put = snd_hdspm_put_madi_speedmode \ +} + +static int hdspm_madi_speedmode(struct hdspm *hdspm) +{ + if (hdspm->control_register & HDSPM_QuadSpeed) + return 2; + if (hdspm->control_register & HDSPM_DoubleSpeed) + return 1; + return 0; +} + +static int hdspm_set_madi_speedmode(struct hdspm *hdspm, int mode) +{ + hdspm->control_register &= ~(HDSPM_DoubleSpeed | HDSPM_QuadSpeed); + switch (mode) { + case 0: + break; + case 1: + hdspm->control_register |= HDSPM_DoubleSpeed; + break; + case 2: + hdspm->control_register |= HDSPM_QuadSpeed; + break; + } + hdspm_write(hdspm, HDSPM_controlRegister, hdspm->control_register); + + return 0; +} + +static int snd_hdspm_info_madi_speedmode(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_info *uinfo) +{ + static char *texts[] = { "Single", "Double", "Quad" }; + + uinfo->type = SNDRV_CTL_ELEM_TYPE_ENUMERATED; + uinfo->count = 1; + uinfo->value.enumerated.items = 3; + + if (uinfo->value.enumerated.item >= uinfo->value.enumerated.items) + uinfo->value.enumerated.item = + uinfo->value.enumerated.items - 1; + strcpy(uinfo->value.enumerated.name, + texts[uinfo->value.enumerated.item]); + + return 0; +} + +static int snd_hdspm_get_madi_speedmode(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_value *ucontrol) +{ + struct hdspm *hdspm = snd_kcontrol_chip(kcontrol); + + spin_lock_irq(&hdspm->lock); + ucontrol->value.enumerated.item[0] = hdspm_madi_speedmode(hdspm); + spin_unlock_irq(&hdspm->lock); + return 0; +} + +static int snd_hdspm_put_madi_speedmode(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_value *ucontrol) +{ + struct hdspm *hdspm = snd_kcontrol_chip(kcontrol); + int change; + int val; + + if (!snd_hdspm_use_is_exclusive(hdspm)) + return -EBUSY; + val = ucontrol->value.integer.value[0]; + if (val < 0) + val = 0; + if (val > 2) + val = 2; + spin_lock_irq(&hdspm->lock); + change = val != hdspm_madi_speedmode(hdspm); + hdspm_set_madi_speedmode(hdspm, val); + spin_unlock_irq(&hdspm->lock); + return change; +} #define HDSPM_MIXER(xname, xindex) \ { .iface = SNDRV_CTL_ELEM_IFACE_HWDEP, \ @@ -4289,7 +4426,8 @@ static struct snd_kcontrol_new snd_hdspm_controls_madi[] = { HDSPM_TX_64("TX 64 channels mode", 0), HDSPM_C_TMS("Clear Track Marker", 0), HDSPM_SAFE_MODE("Safe Mode", 0), - HDSPM_INPUT_SELECT("Input Select", 0) + HDSPM_INPUT_SELECT("Input Select", 0), + HDSPM_MADI_SPEEDMODE("MADI Speed Mode", 0) }; @@ -4302,7 +4440,8 @@ static struct snd_kcontrol_new snd_hdspm_controls_madiface[] = { HDSPM_SYNC_CHECK("MADI SyncCheck", 0), HDSPM_TX_64("TX 64 channels mode", 0), HDSPM_C_TMS("Clear Track Marker", 0), - HDSPM_SAFE_MODE("Safe Mode", 0) + HDSPM_SAFE_MODE("Safe Mode", 0), + HDSPM_MADI_SPEEDMODE("MADI Speed Mode", 0) }; static struct snd_kcontrol_new snd_hdspm_controls_aio[] = { @@ -4690,8 +4829,7 @@ snd_hdspm_proc_read_madi(struct snd_info_entry * entry, snd_iprintf(buffer, "--- Settings ---\n"); - x = 1 << (6 + hdspm_decode_latency(hdspm->control_register & - HDSPM_LatencyMask)); + x = hdspm_get_latency(hdspm); snd_iprintf(buffer, "Size (Latency): %d samples (2 periods of %lu bytes)\n", @@ -4854,8 +4992,7 @@ snd_hdspm_proc_read_aes32(struct snd_info_entry * entry, snd_iprintf(buffer, "--- Settings ---\n"); - x = 1 << (6 + hdspm_decode_latency(hdspm->control_register & - HDSPM_LatencyMask)); + x = hdspm_get_latency(hdspm); snd_iprintf(buffer, "Size (Latency): %d samples (2 periods of %lu bytes)\n", @@ -5561,19 +5698,6 @@ static int snd_hdspm_prepare(struct snd_pcm_substream *substream) return 0; } -static unsigned int period_sizes_old[] = { - 64, 128, 256, 512, 1024, 2048, 4096 -}; - -static unsigned int period_sizes_new[] = { - 32, 64, 128, 256, 512, 1024, 2048, 4096 -}; - -/* RayDAT and AIO always have a buffer of 16384 samples per channel */ -static unsigned int raydat_aio_buffer_sizes[] = { - 16384 -}; - static struct snd_pcm_hardware snd_hdspm_playback_subinfo = { .info = (SNDRV_PCM_INFO_MMAP | SNDRV_PCM_INFO_MMAP_VALID | @@ -5592,8 +5716,8 @@ static struct snd_pcm_hardware snd_hdspm_playback_subinfo = { .channels_max = HDSPM_MAX_CHANNELS, .buffer_bytes_max = HDSPM_CHANNEL_BUFFER_BYTES * HDSPM_MAX_CHANNELS, - .period_bytes_min = (64 * 4), - .period_bytes_max = (4096 * 4) * HDSPM_MAX_CHANNELS, + .period_bytes_min = (32 * 4), + .period_bytes_max = (8192 * 4) * HDSPM_MAX_CHANNELS, .periods_min = 2, .periods_max = 512, .fifo_size = 0 @@ -5617,31 +5741,13 @@ static struct snd_pcm_hardware snd_hdspm_capture_subinfo = { .channels_max = HDSPM_MAX_CHANNELS, .buffer_bytes_max = HDSPM_CHANNEL_BUFFER_BYTES * HDSPM_MAX_CHANNELS, - .period_bytes_min = (64 * 4), - .period_bytes_max = (4096 * 4) * HDSPM_MAX_CHANNELS, + .period_bytes_min = (32 * 4), + .period_bytes_max = (8192 * 4) * HDSPM_MAX_CHANNELS, .periods_min = 2, .periods_max = 512, .fifo_size = 0 }; -static struct snd_pcm_hw_constraint_list hw_constraints_period_sizes_old = { - .count = ARRAY_SIZE(period_sizes_old), - .list = period_sizes_old, - .mask = 0 -}; - -static struct snd_pcm_hw_constraint_list hw_constraints_period_sizes_new = { - .count = ARRAY_SIZE(period_sizes_new), - .list = period_sizes_new, - .mask = 0 -}; - -static struct snd_pcm_hw_constraint_list hw_constraints_raydat_io_buffer = { - .count = ARRAY_SIZE(raydat_aio_buffer_sizes), - .list = raydat_aio_buffer_sizes, - .mask = 0 -}; - static int snd_hdspm_hw_rule_in_channels_rate(struct snd_pcm_hw_params *params, struct snd_pcm_hw_rule *rule) { @@ -5842,26 +5948,32 @@ static int snd_hdspm_playback_open(struct snd_pcm_substream *substream) spin_unlock_irq(&hdspm->lock); snd_pcm_hw_constraint_msbits(runtime, 0, 32, 24); + snd_pcm_hw_constraint_pow2(runtime, 0, SNDRV_PCM_HW_PARAM_PERIOD_SIZE); switch (hdspm->io_type) { case AIO: case RayDAT: - snd_pcm_hw_constraint_list(runtime, 0, - SNDRV_PCM_HW_PARAM_PERIOD_SIZE, - &hw_constraints_period_sizes_new); - snd_pcm_hw_constraint_list(runtime, 0, - SNDRV_PCM_HW_PARAM_BUFFER_SIZE, - &hw_constraints_raydat_io_buffer); - + snd_pcm_hw_constraint_minmax(runtime, + SNDRV_PCM_HW_PARAM_PERIOD_SIZE, + 32, 4096); + /* RayDAT & AIO have a fixed buffer of 16384 samples per channel */ + snd_pcm_hw_constraint_minmax(runtime, + SNDRV_PCM_HW_PARAM_BUFFER_SIZE, + 16384, 16384); break; default: - snd_pcm_hw_constraint_list(runtime, 0, - SNDRV_PCM_HW_PARAM_PERIOD_SIZE, - &hw_constraints_period_sizes_old); + snd_pcm_hw_constraint_minmax(runtime, + SNDRV_PCM_HW_PARAM_PERIOD_SIZE, + 64, 8192); + snd_pcm_hw_constraint_minmax(runtime, + SNDRV_PCM_HW_PARAM_PERIODS, + 2, 2); + break; } if (AES32 == hdspm->io_type) { + runtime->hw.rates |= SNDRV_PCM_RATE_KNOT; snd_pcm_hw_constraint_list(runtime, 0, SNDRV_PCM_HW_PARAM_RATE, &hdspm_hw_constraints_aes32_sample_rates); } else { @@ -5914,24 +6026,31 @@ static int snd_hdspm_capture_open(struct snd_pcm_substream *substream) spin_unlock_irq(&hdspm->lock); snd_pcm_hw_constraint_msbits(runtime, 0, 32, 24); + snd_pcm_hw_constraint_pow2(runtime, 0, SNDRV_PCM_HW_PARAM_PERIOD_SIZE); + switch (hdspm->io_type) { case AIO: case RayDAT: - snd_pcm_hw_constraint_list(runtime, 0, - SNDRV_PCM_HW_PARAM_PERIOD_SIZE, - &hw_constraints_period_sizes_new); - snd_pcm_hw_constraint_list(runtime, 0, - SNDRV_PCM_HW_PARAM_BUFFER_SIZE, - &hw_constraints_raydat_io_buffer); - break; + snd_pcm_hw_constraint_minmax(runtime, + SNDRV_PCM_HW_PARAM_PERIOD_SIZE, + 32, 4096); + snd_pcm_hw_constraint_minmax(runtime, + SNDRV_PCM_HW_PARAM_BUFFER_SIZE, + 16384, 16384); + break; default: - snd_pcm_hw_constraint_list(runtime, 0, - SNDRV_PCM_HW_PARAM_PERIOD_SIZE, - &hw_constraints_period_sizes_old); + snd_pcm_hw_constraint_minmax(runtime, + SNDRV_PCM_HW_PARAM_PERIOD_SIZE, + 64, 8192); + snd_pcm_hw_constraint_minmax(runtime, + SNDRV_PCM_HW_PARAM_PERIODS, + 2, 2); + break; } if (AES32 == hdspm->io_type) { + runtime->hw.rates |= SNDRV_PCM_RATE_KNOT; snd_pcm_hw_constraint_list(runtime, 0, SNDRV_PCM_HW_PARAM_RATE, &hdspm_hw_constraints_aes32_sample_rates); } else { @@ -5977,7 +6096,7 @@ static inline int copy_u32_le(void __user *dest, void __iomem *src) } static int snd_hdspm_hwdep_ioctl(struct snd_hwdep *hw, struct file *file, - unsigned int cmd, unsigned long __user arg) + unsigned int cmd, unsigned long arg) { void __user *argp = (void __user *)arg; struct hdspm *hdspm = hw->private_data; @@ -6102,11 +6221,13 @@ static int snd_hdspm_hwdep_ioctl(struct snd_hwdep *hw, struct file *file, info.line_out = hdspm_line_out(hdspm); info.passthru = 0; spin_unlock_irq(&hdspm->lock); - if (copy_to_user((void __user *) arg, &info, sizeof(info))) + if (copy_to_user(argp, &info, sizeof(info))) return -EFAULT; break; case SNDRV_HDSPM_IOCTL_GET_STATUS: + memset(&status, 0, sizeof(status)); + status.card_type = hdspm->io_type; status.autosync_source = hdspm_autosync_ref(hdspm); @@ -6131,7 +6252,7 @@ static int snd_hdspm_hwdep_ioctl(struct snd_hwdep *hw, struct file *file, status.card_specific.madi.madi_input = (statusregister & HDSPM_AB_int) ? 1 : 0; status.card_specific.madi.channel_format = - (statusregister & HDSPM_TX_64ch) ? 1 : 0; + (statusregister & HDSPM_RX_64ch) ? 1 : 0; /* TODO: Mac driver sets it when f_s>48kHz */ status.card_specific.madi.frame_format = 0; @@ -6139,13 +6260,15 @@ static int snd_hdspm_hwdep_ioctl(struct snd_hwdep *hw, struct file *file, break; } - if (copy_to_user((void __user *) arg, &status, sizeof(status))) + if (copy_to_user(argp, &status, sizeof(status))) return -EFAULT; break; case SNDRV_HDSPM_IOCTL_GET_VERSION: + memset(&hdspm_version, 0, sizeof(hdspm_version)); + hdspm_version.card_type = hdspm->io_type; strncpy(hdspm_version.cardname, hdspm->card_name, sizeof(hdspm_version.cardname)); @@ -6156,13 +6279,13 @@ static int snd_hdspm_hwdep_ioctl(struct snd_hwdep *hw, struct file *file, if (hdspm->tco) hdspm_version.addons |= HDSPM_ADDON_TCO; - if (copy_to_user((void __user *) arg, &hdspm_version, + if (copy_to_user(argp, &hdspm_version, sizeof(hdspm_version))) return -EFAULT; break; case SNDRV_HDSPM_IOCTL_GET_MIXER: - if (copy_from_user(&mixer, (void __user *)arg, sizeof(mixer))) + if (copy_from_user(&mixer, argp, sizeof(mixer))) return -EFAULT; if (copy_to_user((void __user *)mixer.mixer, hdspm->mixer, sizeof(struct hdspm_mixer))) @@ -6379,12 +6502,6 @@ static int __devinit snd_hdspm_create(struct snd_card *card, strcpy(card->driver, "HDSPM"); switch (hdspm->firmware_rev) { - case HDSPM_MADI_REV: - case HDSPM_MADI_OLD_REV: - hdspm->io_type = MADI; - hdspm->card_name = "RME MADI"; - hdspm->midiPorts = 3; - break; case HDSPM_RAYDAT_REV: hdspm->io_type = RayDAT; hdspm->card_name = "RME RayDAT"; @@ -6400,17 +6517,25 @@ static int __devinit snd_hdspm_create(struct snd_card *card, hdspm->card_name = "RME MADIface"; hdspm->midiPorts = 1; break; - case HDSPM_AES_REV: - case HDSPM_AES32_REV: - case HDSPM_AES32_OLD_REV: - hdspm->io_type = AES32; - hdspm->card_name = "RME AES32"; - hdspm->midiPorts = 2; - break; default: - snd_printk(KERN_ERR "HDSPM: unknown firmware revision %x\n", + if ((hdspm->firmware_rev == 0xf0) || + ((hdspm->firmware_rev >= 0xe6) && + (hdspm->firmware_rev <= 0xea))) { + hdspm->io_type = AES32; + hdspm->card_name = "RME AES32"; + hdspm->midiPorts = 2; + } else if ((hdspm->firmware_rev == 0xd2) || + ((hdspm->firmware_rev >= 0xc8) && + (hdspm->firmware_rev <= 0xcf))) { + hdspm->io_type = MADI; + hdspm->card_name = "RME MADI"; + hdspm->midiPorts = 3; + } else { + snd_printk(KERN_ERR + "HDSPM: unknown firmware revision %x\n", hdspm->firmware_rev); - return -ENODEV; + return -ENODEV; + } } err = pci_enable_device(pci); @@ -6441,7 +6566,7 @@ static int __devinit snd_hdspm_create(struct snd_card *card, hdspm->port + io_extent - 1); if (request_irq(pci->irq, snd_hdspm_interrupt, - IRQF_SHARED, "hdspm", hdspm)) { + IRQF_SHARED, KBUILD_MODNAME, hdspm)) { snd_printk(KERN_ERR "HDSPM: unable to use IRQ %d\n", pci->irq); return -EBUSY; } @@ -6779,7 +6904,7 @@ static void __devexit snd_hdspm_remove(struct pci_dev *pci) } static struct pci_driver driver = { - .name = "RME Hammerfall DSP MADI", + .name = KBUILD_MODNAME, .id_table = snd_hdspm_ids, .probe = snd_hdspm_probe, .remove = __devexit_p(snd_hdspm_remove), diff --git a/sound/pci/rme9652/rme9652.c b/sound/pci/rme9652/rme9652.c index c492af5..1b35864 100644 --- a/sound/pci/rme9652/rme9652.c +++ b/sound/pci/rme9652/rme9652.c @@ -24,7 +24,7 @@ #include #include #include -#include +#include #include #include @@ -285,7 +285,7 @@ static char channel_map_9636_ds[26] = { /* ADAT channels are remapped */ 1, 3, 5, 7, 9, 11, 13, 15, /* channels 8 and 9 are S/PDIF */ - 24, 25 + 24, 25, /* others don't exist */ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 }; @@ -2479,7 +2479,7 @@ static int __devinit snd_rme9652_create(struct snd_card *card, } if (request_irq(pci->irq, snd_rme9652_interrupt, IRQF_SHARED, - "rme9652", rme9652)) { + KBUILD_MODNAME, rme9652)) { snd_printk(KERN_ERR "unable to request IRQ %d\n", pci->irq); return -EBUSY; } @@ -2632,7 +2632,7 @@ static void __devexit snd_rme9652_remove(struct pci_dev *pci) } static struct pci_driver driver = { - .name = "RME Digi9652 (Hammerfall)", + .name = KBUILD_MODNAME, .id_table = snd_rme9652_ids, .probe = snd_rme9652_probe, .remove = __devexit_p(snd_rme9652_remove), diff --git a/sound/pcmcia/vx/vxpocket.c b/sound/pcmcia/vx/vxpocket.c index 31777d1..9e361c9 100644 --- a/sound/pcmcia/vx/vxpocket.c +++ b/sound/pcmcia/vx/vxpocket.c @@ -20,7 +20,7 @@ #include -#include +#include #include #include #include "vxpocket.h" diff --git a/sound/ppc/keywest.c b/sound/ppc/keywest.c index 8f064c7..4080bec 100644 --- a/sound/ppc/keywest.c +++ b/sound/ppc/keywest.c @@ -82,7 +82,6 @@ static int keywest_attach_adapter(struct i2c_adapter *adapter) static int keywest_remove(struct i2c_client *client) { - i2c_set_clientdata(client, NULL); if (! keywest_ctx) return 0; if (client == keywest_ctx->client) diff --git a/sound/ppc/pmac.c b/sound/ppc/pmac.c index 3ecbd67..ab96cde 100644 --- a/sound/ppc/pmac.c +++ b/sound/ppc/pmac.c @@ -881,8 +881,7 @@ static int snd_pmac_free(struct snd_pmac *chip) for (i = 0; i < 3; i++) { if (chip->requested & (1 << i)) release_mem_region(chip->rsrc[i].start, - chip->rsrc[i].end - - chip->rsrc[i].start + 1); + resource_size(&chip->rsrc[i])); } } @@ -1228,8 +1227,7 @@ int __devinit snd_pmac_new(struct snd_card *card, struct snd_pmac **chip_return) goto __error; } if (request_mem_region(chip->rsrc[i].start, - chip->rsrc[i].end - - chip->rsrc[i].start + 1, + resource_size(&chip->rsrc[i]), rnames[i]) == NULL) { printk(KERN_ERR "snd: can't request rsrc " " %d (%s: %pR)\n", @@ -1254,8 +1252,7 @@ int __devinit snd_pmac_new(struct snd_card *card, struct snd_pmac **chip_return) goto __error; } if (request_mem_region(chip->rsrc[i].start, - chip->rsrc[i].end - - chip->rsrc[i].start + 1, + resource_size(&chip->rsrc[i]), rnames[i]) == NULL) { printk(KERN_ERR "snd: can't request rsrc " " %d (%s: %pR)\n", diff --git a/sound/ppc/powermac.c b/sound/ppc/powermac.c index a2b69b8..6564569 100644 --- a/sound/ppc/powermac.c +++ b/sound/ppc/powermac.c @@ -21,7 +21,7 @@ #include #include #include -#include +#include #include #include #include "pmac.h" diff --git a/sound/ppc/snd_ps3.c b/sound/ppc/snd_ps3.c index bc823a5..1aa52ef 100644 --- a/sound/ppc/snd_ps3.c +++ b/sound/ppc/snd_ps3.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include @@ -845,7 +846,7 @@ static int __devinit snd_ps3_allocate_irq(void) return ret; } - ret = request_irq(the_card.irq_no, snd_ps3_interrupt, IRQF_DISABLED, + ret = request_irq(the_card.irq_no, snd_ps3_interrupt, 0, SND_PS3_DRIVER_NAME, &the_card); if (ret) { pr_info("%s: request_irq failed (%d)\n", __func__, ret); @@ -875,7 +876,7 @@ static void __devinit snd_ps3_audio_set_base_addr(uint64_t ioaddr_start) (0x0fUL << 12) | (PS3_AUDIO_IOID); - ret = lv1_gpu_attribute(0x100, 0x007, val, 0, 0); + ret = lv1_gpu_attribute(0x100, 0x007, val); if (ret) pr_info("%s: gpu_attribute failed %d\n", __func__, ret); diff --git a/sound/soc/atmel/Kconfig b/sound/soc/atmel/Kconfig index bee3c94..d1fcc81 100644 --- a/sound/soc/atmel/Kconfig +++ b/sound/soc/atmel/Kconfig @@ -1,6 +1,6 @@ config SND_ATMEL_SOC tristate "SoC Audio for the Atmel System-on-Chip" - depends on ARCH_AT91 || AVR32 + depends on ARCH_AT91 help Say Y or M if you want to add support for codecs attached to the ATMEL SSC interface. You will also need @@ -24,25 +24,6 @@ config SND_AT91_SOC_SAM9G20_WM8731 Say Y if you want to add support for SoC audio on WM8731-based AT91sam9g20 evaluation board. -config SND_AT32_SOC_PLAYPAQ - tristate "SoC Audio support for PlayPaq with WM8510" - depends on SND_ATMEL_SOC && BOARD_PLAYPAQ && AT91_PROGRAMMABLE_CLOCKS - select SND_ATMEL_SOC_SSC - select SND_SOC_WM8510 - help - Say Y or M here if you want to add support for SoC audio - on the LRS PlayPaq. - -config SND_AT32_SOC_PLAYPAQ_SLAVE - bool "Run CODEC on PlayPaq in slave mode" - depends on SND_AT32_SOC_PLAYPAQ - default n - help - Say Y if you want to run with the AT32 SSC generating the BCLK - and FRAME signals on the PlayPaq. Unless you want to play - with the AT32 as the SSC master, you probably want to say N here, - as this will give you better sound quality. - config SND_AT91_SOC_AFEB9260 tristate "SoC Audio support for AFEB9260 board" depends on ARCH_AT91 && MACH_AFEB9260 && SND_ATMEL_SOC diff --git a/sound/soc/atmel/Makefile b/sound/soc/atmel/Makefile index e7ea56b..a5c0bf1 100644 --- a/sound/soc/atmel/Makefile +++ b/sound/soc/atmel/Makefile @@ -8,9 +8,5 @@ obj-$(CONFIG_SND_ATMEL_SOC_SSC) += snd-soc-atmel_ssc_dai.o # AT91 Machine Support snd-soc-sam9g20-wm8731-objs := sam9g20_wm8731.o -# AT32 Machine Support -snd-soc-playpaq-objs := playpaq_wm8510.o - obj-$(CONFIG_SND_AT91_SOC_SAM9G20_WM8731) += snd-soc-sam9g20-wm8731.o -obj-$(CONFIG_SND_AT32_SOC_PLAYPAQ) += snd-soc-playpaq.o obj-$(CONFIG_SND_AT91_SOC_AFEB9260) += snd-soc-afeb9260.o diff --git a/sound/soc/atmel/atmel-pcm.c b/sound/soc/atmel/atmel-pcm.c index d0e7532..f81d4c3 100644 --- a/sound/soc/atmel/atmel-pcm.c +++ b/sound/soc/atmel/atmel-pcm.c @@ -364,9 +364,11 @@ static struct snd_pcm_ops atmel_pcm_ops = { \*--------------------------------------------------------------------------*/ static u64 atmel_pcm_dmamask = 0xffffffff; -static int atmel_pcm_new(struct snd_card *card, - struct snd_soc_dai *dai, struct snd_pcm *pcm) +static int atmel_pcm_new(struct snd_soc_pcm_runtime *rtd) { + struct snd_card *card = rtd->card->snd_card; + struct snd_soc_dai *dai = rtd->cpu_dai; + struct snd_pcm *pcm = rtd->pcm; int ret = 0; if (!card->dev->dma_mask) @@ -382,7 +384,7 @@ static int atmel_pcm_new(struct snd_card *card, } if (dai->driver->capture.channels_min) { - pr_debug("at32-pcm:" + pr_debug("atmel-pcm:" "Allocating PCM capture DMA buffer\n"); ret = atmel_pcm_preallocate_dma_buffer(pcm, SNDRV_PCM_STREAM_CAPTURE); diff --git a/sound/soc/atmel/atmel-pcm.h b/sound/soc/atmel/atmel-pcm.h index 2597329..5e0a95e 100644 --- a/sound/soc/atmel/atmel-pcm.h +++ b/sound/soc/atmel/atmel-pcm.h @@ -60,7 +60,7 @@ struct atmel_ssc_mask { * This structure, shared between the PCM driver and the interface, * contains all information required by the PCM driver to perform the * PDC DMA operation. All fields except dma_intr_handler() are initialized - * by the interface. The dms_intr_handler() pointer is set by the PCM + * by the interface. The dma_intr_handler() pointer is set by the PCM * driver and called by the interface SSC interrupt handler if it is * non-NULL. */ diff --git a/sound/soc/atmel/atmel_ssc_dai.c b/sound/soc/atmel/atmel_ssc_dai.c index eda955b..361078d 100644 --- a/sound/soc/atmel/atmel_ssc_dai.c +++ b/sound/soc/atmel/atmel_ssc_dai.c @@ -341,7 +341,6 @@ static int atmel_ssc_hw_params(struct snd_pcm_substream *substream, struct atmel_pcm_dma_params *dma_params; int dir, channels, bits; u32 tfmr, rfmr, tcmr, rcmr; - int start_event; int ret; /* @@ -402,7 +401,7 @@ static int atmel_ssc_hw_params(struct snd_pcm_substream *substream, if ((ssc_p->daifmt & SND_SOC_DAIFMT_FORMAT_MASK) == SND_SOC_DAIFMT_I2S && bits > 16) { printk(KERN_WARNING - "atmel_ssc_dai: sample size %d" + "atmel_ssc_dai: sample size %d " "is too large for I2S\n", bits); return -EINVAL; } @@ -460,19 +459,10 @@ static int atmel_ssc_hw_params(struct snd_pcm_substream *substream, * The SSC transmit clock is obtained from the BCLK signal on * on the TK line, and the SSC receive clock is * generated from the transmit clock. - * - * For single channel data, one sample is transferred - * on the falling edge of the LRC clock. - * For two channel data, one sample is - * transferred on both edges of the LRC clock. */ - start_event = ((channels == 1) - ? SSC_START_FALLING_RF - : SSC_START_EDGE_RF); - rcmr = SSC_BF(RCMR_PERIOD, 0) | SSC_BF(RCMR_STTDLY, START_DELAY) - | SSC_BF(RCMR_START, start_event) + | SSC_BF(RCMR_START, SSC_START_FALLING_RF) | SSC_BF(RCMR_CKI, SSC_CKI_RISING) | SSC_BF(RCMR_CKO, SSC_CKO_NONE) | SSC_BF(RCMR_CKS, SSC_CKS_CLOCK); @@ -480,14 +470,14 @@ static int atmel_ssc_hw_params(struct snd_pcm_substream *substream, rfmr = SSC_BF(RFMR_FSEDGE, SSC_FSEDGE_POSITIVE) | SSC_BF(RFMR_FSOS, SSC_FSOS_NONE) | SSC_BF(RFMR_FSLEN, 0) - | SSC_BF(RFMR_DATNB, 0) + | SSC_BF(RFMR_DATNB, (channels - 1)) | SSC_BIT(RFMR_MSBF) | SSC_BF(RFMR_LOOP, 0) | SSC_BF(RFMR_DATLEN, (bits - 1)); tcmr = SSC_BF(TCMR_PERIOD, 0) | SSC_BF(TCMR_STTDLY, START_DELAY) - | SSC_BF(TCMR_START, start_event) + | SSC_BF(TCMR_START, SSC_START_FALLING_RF) | SSC_BF(TCMR_CKI, SSC_CKI_FALLING) | SSC_BF(TCMR_CKO, SSC_CKO_NONE) | SSC_BF(TCMR_CKS, SSC_CKS_PIN); @@ -496,7 +486,7 @@ static int atmel_ssc_hw_params(struct snd_pcm_substream *substream, | SSC_BF(TFMR_FSDEN, 0) | SSC_BF(TFMR_FSOS, SSC_FSOS_NONE) | SSC_BF(TFMR_FSLEN, 0) - | SSC_BF(TFMR_DATNB, 0) + | SSC_BF(TFMR_DATNB, (channels - 1)) | SSC_BIT(TFMR_MSBF) | SSC_BF(TFMR_DATDEF, 0) | SSC_BF(TFMR_DATLEN, (bits - 1)); @@ -838,10 +828,8 @@ int atmel_ssc_set_audio(int ssc_id) } ssc_pdev = platform_device_alloc("atmel-ssc-dai", ssc_id); - if (!ssc_pdev) { - ssc_free(ssc); + if (!ssc_pdev) return -ENOMEM; - } /* If we can grab the SSC briefly to parent the DAI device off it */ ssc = ssc_request(ssc_id); diff --git a/sound/soc/atmel/playpaq_wm8510.c b/sound/soc/atmel/playpaq_wm8510.c deleted file mode 100644 index 1aac2f4..0000000 --- a/sound/soc/atmel/playpaq_wm8510.c +++ /dev/null @@ -1,471 +0,0 @@ -/* sound/soc/at32/playpaq_wm8510.c - * ASoC machine driver for PlayPaq using WM8510 codec - * - * Copyright (C) 2008 Long Range Systems - * Geoffrey Wossum - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This code is largely inspired by sound/soc/at91/eti_b1_wm8731.c - * - * NOTE: If you don't have the AT32 enhanced portmux configured (which - * isn't currently in the mainline or Atmel patched kernel), you will - * need to set the MCLK pin (PA30) to peripheral A in your board initialization - * code. Something like: - * at32_select_periph(GPIO_PIN_PA(30), GPIO_PERIPH_A, 0); - * - */ - -/* #define DEBUG */ - -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -#include -#include - -#include "../codecs/wm8510.h" -#include "atmel-pcm.h" -#include "atmel_ssc_dai.h" - - -/*-------------------------------------------------------------------------*\ - * constants -\*-------------------------------------------------------------------------*/ -#define MCLK_PIN GPIO_PIN_PA(30) -#define MCLK_PERIPH GPIO_PERIPH_A - - -/*-------------------------------------------------------------------------*\ - * data types -\*-------------------------------------------------------------------------*/ -/* SSC clocking data */ -struct ssc_clock_data { - /* CMR div */ - unsigned int cmr_div; - - /* Frame period (as needed by xCMR.PERIOD) */ - unsigned int period; - - /* The SSC clock rate these settings where calculated for */ - unsigned long ssc_rate; -}; - - -/*-------------------------------------------------------------------------*\ - * module data -\*-------------------------------------------------------------------------*/ -static struct clk *_gclk0; -static struct clk *_pll0; - -#define CODEC_CLK (_gclk0) - - -/*-------------------------------------------------------------------------*\ - * Sound SOC operations -\*-------------------------------------------------------------------------*/ -#if defined CONFIG_SND_AT32_SOC_PLAYPAQ_SLAVE -static struct ssc_clock_data playpaq_wm8510_calc_ssc_clock( - struct snd_pcm_hw_params *params, - struct snd_soc_dai *cpu_dai) -{ - struct at32_ssc_info *ssc_p = snd_soc_dai_get_drvdata(cpu_dai); - struct ssc_device *ssc = ssc_p->ssc; - struct ssc_clock_data cd; - unsigned int rate, width_bits, channels; - unsigned int bitrate, ssc_div; - unsigned actual_rate; - - - /* - * Figure out required bitrate - */ - rate = params_rate(params); - channels = params_channels(params); - width_bits = snd_pcm_format_physical_width(params_format(params)); - bitrate = rate * width_bits * channels; - - - /* - * Figure out required SSC divider and period for required bitrate - */ - cd.ssc_rate = clk_get_rate(ssc->clk); - ssc_div = cd.ssc_rate / bitrate; - cd.cmr_div = ssc_div / 2; - if (ssc_div & 1) { - /* round cmr_div up */ - cd.cmr_div++; - } - cd.period = width_bits - 1; - - - /* - * Find actual rate, compare to requested rate - */ - actual_rate = (cd.ssc_rate / (cd.cmr_div * 2)) / (2 * (cd.period + 1)); - pr_debug("playpaq_wm8510: Request rate = %u, actual rate = %u\n", - rate, actual_rate); - - - return cd; -} -#endif /* CONFIG_SND_AT32_SOC_PLAYPAQ_SLAVE */ - - - -static int playpaq_wm8510_hw_params(struct snd_pcm_substream *substream, - struct snd_pcm_hw_params *params) -{ - struct snd_soc_pcm_runtime *rtd = substream->private_data; - struct snd_soc_dai *codec_dai = rtd->codec_dai; - struct snd_soc_dai *cpu_dai = rtd->cpu_dai; - struct at32_ssc_info *ssc_p = snd_soc_dai_get_drvdata(cpu_dai); - struct ssc_device *ssc = ssc_p->ssc; - unsigned int pll_out = 0, bclk = 0, mclk_div = 0; - int ret; - - - /* Due to difficulties with getting the correct clocks from the AT32's - * PLL0, we're going to let the CODEC be in charge of all the clocks - */ -#if !defined CONFIG_SND_AT32_SOC_PLAYPAQ_SLAVE - const unsigned int fmt = (SND_SOC_DAIFMT_I2S | - SND_SOC_DAIFMT_NB_NF | - SND_SOC_DAIFMT_CBM_CFM); -#else - struct ssc_clock_data cd; - const unsigned int fmt = (SND_SOC_DAIFMT_I2S | - SND_SOC_DAIFMT_NB_NF | - SND_SOC_DAIFMT_CBS_CFS); -#endif - - if (ssc == NULL) { - pr_warning("playpaq_wm8510_hw_params: ssc is NULL!\n"); - return -EINVAL; - } - - - /* - * Figure out PLL and BCLK dividers for WM8510 - */ - switch (params_rate(params)) { - case 48000: - pll_out = 24576000; - mclk_div = WM8510_MCLKDIV_2; - bclk = WM8510_BCLKDIV_8; - break; - - case 44100: - pll_out = 22579200; - mclk_div = WM8510_MCLKDIV_2; - bclk = WM8510_BCLKDIV_8; - break; - - case 22050: - pll_out = 22579200; - mclk_div = WM8510_MCLKDIV_4; - bclk = WM8510_BCLKDIV_8; - break; - - case 16000: - pll_out = 24576000; - mclk_div = WM8510_MCLKDIV_6; - bclk = WM8510_BCLKDIV_8; - break; - - case 11025: - pll_out = 22579200; - mclk_div = WM8510_MCLKDIV_8; - bclk = WM8510_BCLKDIV_8; - break; - - case 8000: - pll_out = 24576000; - mclk_div = WM8510_MCLKDIV_12; - bclk = WM8510_BCLKDIV_8; - break; - - default: - pr_warning("playpaq_wm8510: Unsupported sample rate %d\n", - params_rate(params)); - return -EINVAL; - } - - - /* - * set CPU and CODEC DAI configuration - */ - ret = snd_soc_dai_set_fmt(codec_dai, fmt); - if (ret < 0) { - pr_warning("playpaq_wm8510: " - "Failed to set CODEC DAI format (%d)\n", - ret); - return ret; - } - ret = snd_soc_dai_set_fmt(cpu_dai, fmt); - if (ret < 0) { - pr_warning("playpaq_wm8510: " - "Failed to set CPU DAI format (%d)\n", - ret); - return ret; - } - - - /* - * Set CPU clock configuration - */ -#if defined CONFIG_SND_AT32_SOC_PLAYPAQ_SLAVE - cd = playpaq_wm8510_calc_ssc_clock(params, cpu_dai); - pr_debug("playpaq_wm8510: cmr_div = %d, period = %d\n", - cd.cmr_div, cd.period); - ret = snd_soc_dai_set_clkdiv(cpu_dai, AT32_SSC_CMR_DIV, cd.cmr_div); - if (ret < 0) { - pr_warning("playpaq_wm8510: Failed to set CPU CMR_DIV (%d)\n", - ret); - return ret; - } - ret = snd_soc_dai_set_clkdiv(cpu_dai, AT32_SSC_TCMR_PERIOD, - cd.period); - if (ret < 0) { - pr_warning("playpaq_wm8510: " - "Failed to set CPU transmit period (%d)\n", - ret); - return ret; - } -#endif /* CONFIG_SND_AT32_SOC_PLAYPAQ_SLAVE */ - - - /* - * Set CODEC clock configuration - */ - pr_debug("playpaq_wm8510: " - "pll_in = %ld, pll_out = %u, bclk = %x, mclk = %x\n", - clk_get_rate(CODEC_CLK), pll_out, bclk, mclk_div); - - -#if !defined CONFIG_SND_AT32_SOC_PLAYPAQ_SLAVE - ret = snd_soc_dai_set_clkdiv(codec_dai, WM8510_BCLKDIV, bclk); - if (ret < 0) { - pr_warning - ("playpaq_wm8510: Failed to set CODEC DAI BCLKDIV (%d)\n", - ret); - return ret; - } -#endif /* CONFIG_SND_AT32_SOC_PLAYPAQ_SLAVE */ - - - ret = snd_soc_dai_set_pll(codec_dai, 0, 0, - clk_get_rate(CODEC_CLK), pll_out); - if (ret < 0) { - pr_warning("playpaq_wm8510: Failed to set CODEC DAI PLL (%d)\n", - ret); - return ret; - } - - - ret = snd_soc_dai_set_clkdiv(codec_dai, WM8510_MCLKDIV, mclk_div); - if (ret < 0) { - pr_warning("playpaq_wm8510: Failed to set CODEC MCLKDIV (%d)\n", - ret); - return ret; - } - - - return 0; -} - - - -static struct snd_soc_ops playpaq_wm8510_ops = { - .hw_params = playpaq_wm8510_hw_params, -}; - - - -static const struct snd_soc_dapm_widget playpaq_dapm_widgets[] = { - SND_SOC_DAPM_MIC("Int Mic", NULL), - SND_SOC_DAPM_SPK("Ext Spk", NULL), -}; - - - -static const struct snd_soc_dapm_route intercon[] = { - /* speaker connected to SPKOUT */ - {"Ext Spk", NULL, "SPKOUTP"}, - {"Ext Spk", NULL, "SPKOUTN"}, - - {"Mic Bias", NULL, "Int Mic"}, - {"MICN", NULL, "Mic Bias"}, - {"MICP", NULL, "Mic Bias"}, -}; - - - -static int playpaq_wm8510_init(struct snd_soc_pcm_runtime *rtd) -{ - struct snd_soc_codec *codec = rtd->codec; - struct snd_soc_dapm_context *dapm = &codec->dapm; - int i; - - /* - * Add DAPM widgets - */ - for (i = 0; i < ARRAY_SIZE(playpaq_dapm_widgets); i++) - snd_soc_dapm_new_control(dapm, &playpaq_dapm_widgets[i]); - - - - /* - * Setup audio path interconnects - */ - snd_soc_dapm_add_routes(dapm, intercon, ARRAY_SIZE(intercon)); - - - - /* always connected pins */ - snd_soc_dapm_enable_pin(dapm, "Int Mic"); - snd_soc_dapm_enable_pin(dapm, "Ext Spk"); - snd_soc_dapm_sync(dapm); - - - - /* Make CSB show PLL rate */ - snd_soc_dai_set_clkdiv(rtd->codec_dai, WM8510_OPCLKDIV, - WM8510_OPCLKDIV_1 | 4); - - return 0; -} - - - -static struct snd_soc_dai_link playpaq_wm8510_dai = { - .name = "WM8510", - .stream_name = "WM8510 PCM", - .cpu_dai_name= "atmel-ssc-dai.0", - .platform_name = "atmel-pcm-audio", - .codec_name = "wm8510-codec.0-0x1a", - .codec_dai_name = "wm8510-hifi", - .init = playpaq_wm8510_init, - .ops = &playpaq_wm8510_ops, -}; - - - -static struct snd_soc_card snd_soc_playpaq = { - .name = "LRS_PlayPaq_WM8510", - .dai_link = &playpaq_wm8510_dai, - .num_links = 1, -}; - -static struct platform_device *playpaq_snd_device; - - -static int __init playpaq_asoc_init(void) -{ - int ret = 0; - - /* - * Configure MCLK for WM8510 - */ - _gclk0 = clk_get(NULL, "gclk0"); - if (IS_ERR(_gclk0)) { - _gclk0 = NULL; - goto err_gclk0; - } - _pll0 = clk_get(NULL, "pll0"); - if (IS_ERR(_pll0)) { - _pll0 = NULL; - goto err_pll0; - } - if (clk_set_parent(_gclk0, _pll0)) { - pr_warning("snd-soc-playpaq: " - "Failed to set PLL0 as parent for DAC clock\n"); - goto err_set_clk; - } - clk_set_rate(CODEC_CLK, 12000000); - clk_enable(CODEC_CLK); - -#if defined CONFIG_AT32_ENHANCED_PORTMUX - at32_select_periph(MCLK_PIN, MCLK_PERIPH, 0); -#endif - - - /* - * Create and register platform device - */ - playpaq_snd_device = platform_device_alloc("soc-audio", 0); - if (playpaq_snd_device == NULL) { - ret = -ENOMEM; - goto err_device_alloc; - } - - platform_set_drvdata(playpaq_snd_device, &snd_soc_playpaq); - - ret = platform_device_add(playpaq_snd_device); - if (ret) { - pr_warning("playpaq_wm8510: platform_device_add failed (%d)\n", - ret); - goto err_device_add; - } - - return 0; - - -err_device_add: - if (playpaq_snd_device != NULL) { - platform_device_put(playpaq_snd_device); - playpaq_snd_device = NULL; - } -err_device_alloc: -err_set_clk: - if (_pll0 != NULL) { - clk_put(_pll0); - _pll0 = NULL; - } -err_pll0: - if (_gclk0 != NULL) { - clk_put(_gclk0); - _gclk0 = NULL; - } - return ret; -} - - -static void __exit playpaq_asoc_exit(void) -{ - if (_gclk0 != NULL) { - clk_put(_gclk0); - _gclk0 = NULL; - } - if (_pll0 != NULL) { - clk_put(_pll0); - _pll0 = NULL; - } - -#if defined CONFIG_AT32_ENHANCED_PORTMUX - at32_free_pin(MCLK_PIN); -#endif - - platform_device_unregister(playpaq_snd_device); - playpaq_snd_device = NULL; -} - -module_init(playpaq_asoc_init); -module_exit(playpaq_asoc_exit); - -MODULE_AUTHOR("Geoffrey Wossum "); -MODULE_DESCRIPTION("ASoC machine driver for LRS PlayPaq"); -MODULE_LICENSE("GPL"); diff --git a/sound/soc/atmel/sam9g20_wm8731.c b/sound/soc/atmel/sam9g20_wm8731.c index 95572d2..0377c54 100644 --- a/sound/soc/atmel/sam9g20_wm8731.c +++ b/sound/soc/atmel/sam9g20_wm8731.c @@ -92,6 +92,7 @@ static struct snd_soc_ops at91sam9g20ek_ops = { }; static int at91sam9g20ek_set_bias_level(struct snd_soc_card *card, + struct snd_soc_dapm_context *dapm, enum snd_soc_bias_level level) { static int mclk_on; @@ -172,8 +173,6 @@ static int at91sam9g20ek_wm8731_init(struct snd_soc_pcm_runtime *rtd) /* always connected */ snd_soc_dapm_enable_pin(dapm, "Ext Spk"); - snd_soc_dapm_sync(dapm); - return 0; } diff --git a/sound/soc/atmel/snd-soc-afeb9260.c b/sound/soc/atmel/snd-soc-afeb9260.c index 5e4d499..d427e92 100644 --- a/sound/soc/atmel/snd-soc-afeb9260.c +++ b/sound/soc/atmel/snd-soc-afeb9260.c @@ -117,8 +117,6 @@ static int afeb9260_tlv320aic23_init(struct snd_soc_pcm_runtime *rtd) snd_soc_dapm_enable_pin(dapm, "Line In"); snd_soc_dapm_enable_pin(dapm, "Mic Jack"); - snd_soc_dapm_sync(dapm); - return 0; } diff --git a/sound/soc/au1x/Kconfig b/sound/soc/au1x/Kconfig index 4b67140..e908a81 100644 --- a/sound/soc/au1x/Kconfig +++ b/sound/soc/au1x/Kconfig @@ -3,7 +3,7 @@ ## config SND_SOC_AU1XPSC tristate "SoC Audio for Au1200/Au1250/Au1550" - depends on SOC_AU1200 || SOC_AU1550 + depends on MIPS_ALCHEMY help This option enables support for the Programmable Serial Controllers in AC97 and I2S mode, and the Descriptor-Based DMA @@ -18,10 +18,38 @@ config SND_SOC_AU1XPSC_AC97 select SND_AC97_CODEC select SND_SOC_AC97_BUS +## +## Au1000/1500/1100 DMA + AC97C/I2SC +## +config SND_SOC_AU1XAUDIO + tristate "SoC Audio for Au1000/Au1500/Au1100" + depends on MIPS_ALCHEMY + help + This is a driver set for the AC97 unit and the + old DMA controller as found on the Au1000/Au1500/Au1100 chips. + +config SND_SOC_AU1XAC97C + tristate + select AC97_BUS + select SND_AC97_CODEC + select SND_SOC_AC97_BUS + +config SND_SOC_AU1XI2SC + tristate + ## ## Boards ## +config SND_SOC_DB1000 + tristate "DB1000 Audio support" + depends on SND_SOC_AU1XAUDIO + select SND_SOC_AU1XAC97C + select SND_SOC_AC97_CODEC + help + Select this option to enable AC97 audio on the early DB1x00 series + of boards (DB1000/DB1500/DB1100). + config SND_SOC_DB1200 tristate "DB1200 AC97+I2S audio support" depends on SND_SOC_AU1XPSC diff --git a/sound/soc/au1x/Makefile b/sound/soc/au1x/Makefile index 1687307..9207105 100644 --- a/sound/soc/au1x/Makefile +++ b/sound/soc/au1x/Makefile @@ -3,11 +3,21 @@ snd-soc-au1xpsc-dbdma-objs := dbdma2.o snd-soc-au1xpsc-i2s-objs := psc-i2s.o snd-soc-au1xpsc-ac97-objs := psc-ac97.o +# Au1000/1500/1100 Audio units +snd-soc-au1x-dma-objs := dma.o +snd-soc-au1x-ac97c-objs := ac97c.o +snd-soc-au1x-i2sc-objs := i2sc.o + obj-$(CONFIG_SND_SOC_AU1XPSC) += snd-soc-au1xpsc-dbdma.o obj-$(CONFIG_SND_SOC_AU1XPSC_I2S) += snd-soc-au1xpsc-i2s.o obj-$(CONFIG_SND_SOC_AU1XPSC_AC97) += snd-soc-au1xpsc-ac97.o +obj-$(CONFIG_SND_SOC_AU1XAUDIO) += snd-soc-au1x-dma.o +obj-$(CONFIG_SND_SOC_AU1XAC97C) += snd-soc-au1x-ac97c.o +obj-$(CONFIG_SND_SOC_AU1XI2SC) += snd-soc-au1x-i2sc.o # Boards +snd-soc-db1000-objs := db1000.o snd-soc-db1200-objs := db1200.o +obj-$(CONFIG_SND_SOC_DB1000) += snd-soc-db1000.o obj-$(CONFIG_SND_SOC_DB1200) += snd-soc-db1200.o diff --git a/sound/soc/au1x/db1200.c b/sound/soc/au1x/db1200.c index 1d3e258..289312c 100644 --- a/sound/soc/au1x/db1200.c +++ b/sound/soc/au1x/db1200.c @@ -1,7 +1,7 @@ /* * DB1200 ASoC audio fabric support code. * - * (c) 2008-9 Manuel Lauss + * (c) 2008-2011 Manuel Lauss * */ @@ -21,6 +21,17 @@ #include "../codecs/wm8731.h" #include "psc.h" +static struct platform_device_id db1200_pids[] = { + { + .name = "db1200-ac97", + .driver_data = 0, + }, { + .name = "db1200-i2s", + .driver_data = 1, + }, + {}, +}; + /*------------------------- AC97 PART ---------------------------*/ static struct snd_soc_dai_link db1200_ac97_dai = { @@ -89,36 +100,47 @@ static struct snd_soc_card db1200_i2s_machine = { /*------------------------- COMMON PART ---------------------------*/ -static struct platform_device *db1200_asoc_dev; +static struct snd_soc_card *db1200_cards[] __devinitdata = { + &db1200_ac97_machine, + &db1200_i2s_machine, +}; -static int __init db1200_audio_load(void) +static int __devinit db1200_audio_probe(struct platform_device *pdev) { - int ret; + const struct platform_device_id *pid = platform_get_device_id(pdev); + struct snd_soc_card *card; - ret = -ENOMEM; - db1200_asoc_dev = platform_device_alloc("soc-audio", 1); /* PSC1 */ - if (!db1200_asoc_dev) - goto out; + card = db1200_cards[pid->driver_data]; + card->dev = &pdev->dev; + return snd_soc_register_card(card); +} - /* DB1200 board setup set PSC1MUX to preferred audio device */ - if (bcsr_read(BCSR_RESETS) & BCSR_RESETS_PSC1MUX) - platform_set_drvdata(db1200_asoc_dev, &db1200_i2s_machine); - else - platform_set_drvdata(db1200_asoc_dev, &db1200_ac97_machine); +static int __devexit db1200_audio_remove(struct platform_device *pdev) +{ + struct snd_soc_card *card = platform_get_drvdata(pdev); + snd_soc_unregister_card(card); + return 0; +} - ret = platform_device_add(db1200_asoc_dev); +static struct platform_driver db1200_audio_driver = { + .driver = { + .name = "db1200-ac97", + .owner = THIS_MODULE, + .pm = &snd_soc_pm_ops, + }, + .id_table = db1200_pids, + .probe = db1200_audio_probe, + .remove = __devexit_p(db1200_audio_remove), +}; - if (ret) { - platform_device_put(db1200_asoc_dev); - db1200_asoc_dev = NULL; - } -out: - return ret; +static int __init db1200_audio_load(void) +{ + return platform_driver_register(&db1200_audio_driver); } static void __exit db1200_audio_unload(void) { - platform_device_unregister(db1200_asoc_dev); + platform_driver_unregister(&db1200_audio_driver); } module_init(db1200_audio_load); diff --git a/sound/soc/au1x/dbdma2.c b/sound/soc/au1x/dbdma2.c index 10fdd28..d7d04e2 100644 --- a/sound/soc/au1x/dbdma2.c +++ b/sound/soc/au1x/dbdma2.c @@ -169,7 +169,7 @@ static int au1x_pcm_dbdma_realloc(struct au1xpsc_audio_dmadata *pcd, au1x_pcm_dbdma_free(pcd); - if (stype == PCM_RX) + if (stype == SNDRV_PCM_STREAM_CAPTURE) pcd->ddma_chan = au1xxx_dbdma_chan_alloc(pcd->ddma_id, DSCR_CMD0_ALWAYS, au1x_pcm_dmarx_cb, (void *)pcd); @@ -198,7 +198,7 @@ static inline struct au1xpsc_audio_dmadata *to_dmadata(struct snd_pcm_substream struct snd_soc_pcm_runtime *rtd = ss->private_data; struct au1xpsc_audio_dmadata *pcd = snd_soc_platform_get_drvdata(rtd->platform); - return &pcd[SUBSTREAM_TYPE(ss)]; + return &pcd[ss->stream]; } static int au1xpsc_pcm_hw_params(struct snd_pcm_substream *substream, @@ -212,7 +212,7 @@ static int au1xpsc_pcm_hw_params(struct snd_pcm_substream *substream, if (ret < 0) goto out; - stype = SUBSTREAM_TYPE(substream); + stype = substream->stream; pcd = to_dmadata(substream); DBG("runtime->dma_area = 0x%08lx dma_addr_t = 0x%08lx dma_size = %d " @@ -255,7 +255,7 @@ static int au1xpsc_pcm_prepare(struct snd_pcm_substream *substream) au1xxx_dbdma_reset(pcd->ddma_chan); - if (SUBSTREAM_TYPE(substream) == PCM_RX) { + if (substream->stream == SNDRV_PCM_STREAM_CAPTURE) { au1x_pcm_queue_rx(pcd); au1x_pcm_queue_rx(pcd); } else { @@ -293,6 +293,16 @@ au1xpsc_pcm_pointer(struct snd_pcm_substream *substream) static int au1xpsc_pcm_open(struct snd_pcm_substream *substream) { + struct au1xpsc_audio_dmadata *pcd = to_dmadata(substream); + struct snd_soc_pcm_runtime *rtd = substream->private_data; + int stype = substream->stream, *dmaids; + + dmaids = snd_soc_dai_get_dma_data(rtd->cpu_dai, substream); + if (!dmaids) + return -ENODEV; /* whoa, has ordering changed? */ + + pcd->ddma_id = dmaids[stype]; + snd_soc_set_runtime_hwparams(substream, &au1xpsc_pcm_hardware); return 0; } @@ -319,10 +329,11 @@ static void au1xpsc_pcm_free_dma_buffers(struct snd_pcm *pcm) snd_pcm_lib_preallocate_free_for_all(pcm); } -static int au1xpsc_pcm_new(struct snd_card *card, - struct snd_soc_dai *dai, - struct snd_pcm *pcm) +static int au1xpsc_pcm_new(struct snd_soc_pcm_runtime *rtd) { + struct snd_card *card = rtd->card->snd_card; + struct snd_pcm *pcm = rtd->pcm; + snd_pcm_lib_preallocate_pages_for_all(pcm, SNDRV_DMA_TYPE_DEV, card->dev, AU1XPSC_BUFFER_MIN_BYTES, (4096 * 1024) - 1); @@ -339,36 +350,18 @@ struct snd_soc_platform_driver au1xpsc_soc_platform = { static int __devinit au1xpsc_pcm_drvprobe(struct platform_device *pdev) { struct au1xpsc_audio_dmadata *dmadata; - struct resource *r; int ret; dmadata = kzalloc(2 * sizeof(struct au1xpsc_audio_dmadata), GFP_KERNEL); if (!dmadata) return -ENOMEM; - r = platform_get_resource(pdev, IORESOURCE_DMA, 0); - if (!r) { - ret = -ENODEV; - goto out1; - } - dmadata[PCM_TX].ddma_id = r->start; - - /* RX DMA */ - r = platform_get_resource(pdev, IORESOURCE_DMA, 1); - if (!r) { - ret = -ENODEV; - goto out1; - } - dmadata[PCM_RX].ddma_id = r->start; - platform_set_drvdata(pdev, dmadata); ret = snd_soc_register_platform(&pdev->dev, &au1xpsc_soc_platform); - if (!ret) - return ret; + if (ret) + kfree(dmadata); -out1: - kfree(dmadata); return ret; } @@ -404,57 +397,6 @@ static void __exit au1xpsc_audio_dbdma_unload(void) module_init(au1xpsc_audio_dbdma_load); module_exit(au1xpsc_audio_dbdma_unload); - -struct platform_device *au1xpsc_pcm_add(struct platform_device *pdev) -{ - struct resource *res, *r; - struct platform_device *pd; - int id[2]; - int ret; - - r = platform_get_resource(pdev, IORESOURCE_DMA, 0); - if (!r) - return NULL; - id[0] = r->start; - - r = platform_get_resource(pdev, IORESOURCE_DMA, 1); - if (!r) - return NULL; - id[1] = r->start; - - res = kzalloc(sizeof(struct resource) * 2, GFP_KERNEL); - if (!res) - return NULL; - - res[0].start = res[0].end = id[0]; - res[1].start = res[1].end = id[1]; - res[0].flags = res[1].flags = IORESOURCE_DMA; - - pd = platform_device_alloc("au1xpsc-pcm", pdev->id); - if (!pd) - goto out; - - pd->resource = res; - pd->num_resources = 2; - - ret = platform_device_add(pd); - if (!ret) - return pd; - - platform_device_put(pd); -out: - kfree(res); - return NULL; -} -EXPORT_SYMBOL_GPL(au1xpsc_pcm_add); - -void au1xpsc_pcm_destroy(struct platform_device *dmapd) -{ - if (dmapd) - platform_device_unregister(dmapd); -} -EXPORT_SYMBOL_GPL(au1xpsc_pcm_destroy); - MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Au12x0/Au1550 PSC Audio DMA driver"); MODULE_AUTHOR("Manuel Lauss"); diff --git a/sound/soc/au1x/psc-ac97.c b/sound/soc/au1x/psc-ac97.c index d0db66f..0c6acd5 100644 --- a/sound/soc/au1x/psc-ac97.c +++ b/sound/soc/au1x/psc-ac97.c @@ -41,14 +41,14 @@ (SNDRV_PCM_FMTBIT_S16_LE | SNDRV_PCM_FMTBIT_S20_3BE) #define AC97PCR_START(stype) \ - ((stype) == PCM_TX ? PSC_AC97PCR_TS : PSC_AC97PCR_RS) + ((stype) == SNDRV_PCM_STREAM_PLAYBACK ? PSC_AC97PCR_TS : PSC_AC97PCR_RS) #define AC97PCR_STOP(stype) \ - ((stype) == PCM_TX ? PSC_AC97PCR_TP : PSC_AC97PCR_RP) + ((stype) == SNDRV_PCM_STREAM_PLAYBACK ? PSC_AC97PCR_TP : PSC_AC97PCR_RP) #define AC97PCR_CLRFIFO(stype) \ - ((stype) == PCM_TX ? PSC_AC97PCR_TC : PSC_AC97PCR_RC) + ((stype) == SNDRV_PCM_STREAM_PLAYBACK ? PSC_AC97PCR_TC : PSC_AC97PCR_RC) #define AC97STAT_BUSY(stype) \ - ((stype) == PCM_TX ? PSC_AC97STAT_TB : PSC_AC97STAT_RB) + ((stype) == SNDRV_PCM_STREAM_PLAYBACK ? PSC_AC97STAT_TB : PSC_AC97STAT_RB) /* instance data. There can be only one, MacLeod!!!! */ static struct au1xpsc_audio_data *au1xpsc_ac97_workdata; @@ -215,7 +215,7 @@ static int au1xpsc_ac97_hw_params(struct snd_pcm_substream *substream, { struct au1xpsc_audio_data *pscdata = snd_soc_dai_get_drvdata(dai); unsigned long r, ro, stat; - int chans, t, stype = SUBSTREAM_TYPE(substream); + int chans, t, stype = substream->stream; chans = params_channels(params); @@ -235,7 +235,7 @@ static int au1xpsc_ac97_hw_params(struct snd_pcm_substream *substream, r |= PSC_AC97CFG_SET_LEN(params->msbits); /* channels: enable slots for front L/R channel */ - if (stype == PCM_TX) { + if (stype == SNDRV_PCM_STREAM_PLAYBACK) { r &= ~PSC_AC97CFG_TXSLOT_MASK; r |= PSC_AC97CFG_TXSLOT_ENA(3); r |= PSC_AC97CFG_TXSLOT_ENA(4); @@ -294,7 +294,7 @@ static int au1xpsc_ac97_trigger(struct snd_pcm_substream *substream, int cmd, struct snd_soc_dai *dai) { struct au1xpsc_audio_data *pscdata = snd_soc_dai_get_drvdata(dai); - int ret, stype = SUBSTREAM_TYPE(substream); + int ret, stype = substream->stream; ret = 0; @@ -324,12 +324,21 @@ static int au1xpsc_ac97_trigger(struct snd_pcm_substream *substream, return ret; } +static int au1xpsc_ac97_startup(struct snd_pcm_substream *substream, + struct snd_soc_dai *dai) +{ + struct au1xpsc_audio_data *pscdata = snd_soc_dai_get_drvdata(dai); + snd_soc_dai_set_dma_data(dai, substream, &pscdata->dmaids[0]); + return 0; +} + static int au1xpsc_ac97_probe(struct snd_soc_dai *dai) { return au1xpsc_ac97_workdata ? 0 : -ENODEV; } static struct snd_soc_dai_ops au1xpsc_ac97_dai_ops = { + .startup = au1xpsc_ac97_startup, .trigger = au1xpsc_ac97_trigger, .hw_params = au1xpsc_ac97_hw_params, }; @@ -355,7 +364,7 @@ static const struct snd_soc_dai_driver au1xpsc_ac97_dai_template = { static int __devinit au1xpsc_ac97_drvprobe(struct platform_device *pdev) { int ret; - struct resource *r; + struct resource *iores, *dmares; unsigned long sel; struct au1xpsc_audio_data *wd; @@ -365,20 +374,31 @@ static int __devinit au1xpsc_ac97_drvprobe(struct platform_device *pdev) mutex_init(&wd->lock); - r = platform_get_resource(pdev, IORESOURCE_MEM, 0); - if (!r) { + iores = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!iores) { ret = -ENODEV; goto out0; } ret = -EBUSY; - if (!request_mem_region(r->start, resource_size(r), pdev->name)) + if (!request_mem_region(iores->start, resource_size(iores), + pdev->name)) goto out0; - wd->mmio = ioremap(r->start, resource_size(r)); + wd->mmio = ioremap(iores->start, resource_size(iores)); if (!wd->mmio) goto out1; + dmares = platform_get_resource(pdev, IORESOURCE_DMA, 0); + if (!dmares) + goto out2; + wd->dmaids[SNDRV_PCM_STREAM_PLAYBACK] = dmares->start; + + dmares = platform_get_resource(pdev, IORESOURCE_DMA, 1); + if (!dmares) + goto out2; + wd->dmaids[SNDRV_PCM_STREAM_CAPTURE] = dmares->start; + /* configuration: max dma trigger threshold, enable ac97 */ wd->cfg = PSC_AC97CFG_RT_FIFO8 | PSC_AC97CFG_TT_FIFO8 | PSC_AC97CFG_DE_ENABLE; @@ -401,17 +421,15 @@ static int __devinit au1xpsc_ac97_drvprobe(struct platform_device *pdev) ret = snd_soc_register_dai(&pdev->dev, &wd->dai_drv); if (ret) - goto out1; + goto out2; - wd->dmapd = au1xpsc_pcm_add(pdev); - if (wd->dmapd) { - au1xpsc_ac97_workdata = wd; - return 0; - } + au1xpsc_ac97_workdata = wd; + return 0; - snd_soc_unregister_dai(&pdev->dev); +out2: + iounmap(wd->mmio); out1: - release_mem_region(r->start, resource_size(r)); + release_mem_region(iores->start, resource_size(iores)); out0: kfree(wd); return ret; @@ -422,9 +440,6 @@ static int __devexit au1xpsc_ac97_drvremove(struct platform_device *pdev) struct au1xpsc_audio_data *wd = platform_get_drvdata(pdev); struct resource *r = platform_get_resource(pdev, IORESOURCE_MEM, 0); - if (wd->dmapd) - au1xpsc_pcm_destroy(wd->dmapd); - snd_soc_unregister_dai(&pdev->dev); /* disable PSC completely */ diff --git a/sound/soc/au1x/psc-i2s.c b/sound/soc/au1x/psc-i2s.c index fca0912..e03c5ce 100644 --- a/sound/soc/au1x/psc-i2s.c +++ b/sound/soc/au1x/psc-i2s.c @@ -42,13 +42,13 @@ (SNDRV_PCM_FMTBIT_S16_LE | SNDRV_PCM_FMTBIT_S24_LE) #define I2SSTAT_BUSY(stype) \ - ((stype) == PCM_TX ? PSC_I2SSTAT_TB : PSC_I2SSTAT_RB) + ((stype) == SNDRV_PCM_STREAM_PLAYBACK ? PSC_I2SSTAT_TB : PSC_I2SSTAT_RB) #define I2SPCR_START(stype) \ - ((stype) == PCM_TX ? PSC_I2SPCR_TS : PSC_I2SPCR_RS) + ((stype) == SNDRV_PCM_STREAM_PLAYBACK ? PSC_I2SPCR_TS : PSC_I2SPCR_RS) #define I2SPCR_STOP(stype) \ - ((stype) == PCM_TX ? PSC_I2SPCR_TP : PSC_I2SPCR_RP) + ((stype) == SNDRV_PCM_STREAM_PLAYBACK ? PSC_I2SPCR_TP : PSC_I2SPCR_RP) #define I2SPCR_CLRFIFO(stype) \ - ((stype) == PCM_TX ? PSC_I2SPCR_TC : PSC_I2SPCR_RC) + ((stype) == SNDRV_PCM_STREAM_PLAYBACK ? PSC_I2SPCR_TC : PSC_I2SPCR_RC) static int au1xpsc_i2s_set_fmt(struct snd_soc_dai *cpu_dai, @@ -240,7 +240,7 @@ static int au1xpsc_i2s_trigger(struct snd_pcm_substream *substream, int cmd, struct snd_soc_dai *dai) { struct au1xpsc_audio_data *pscdata = snd_soc_dai_get_drvdata(dai); - int ret, stype = SUBSTREAM_TYPE(substream); + int ret, stype = substream->stream; switch (cmd) { case SNDRV_PCM_TRIGGER_START: @@ -257,7 +257,16 @@ static int au1xpsc_i2s_trigger(struct snd_pcm_substream *substream, int cmd, return ret; } +static int au1xpsc_i2s_startup(struct snd_pcm_substream *substream, + struct snd_soc_dai *dai) +{ + struct au1xpsc_audio_data *pscdata = snd_soc_dai_get_drvdata(dai); + snd_soc_dai_set_dma_data(dai, substream, &pscdata->dmaids[0]); + return 0; +} + static struct snd_soc_dai_ops au1xpsc_i2s_dai_ops = { + .startup = au1xpsc_i2s_startup, .trigger = au1xpsc_i2s_trigger, .hw_params = au1xpsc_i2s_hw_params, .set_fmt = au1xpsc_i2s_set_fmt, @@ -281,7 +290,7 @@ static const struct snd_soc_dai_driver au1xpsc_i2s_dai_template = { static int __devinit au1xpsc_i2s_drvprobe(struct platform_device *pdev) { - struct resource *r; + struct resource *iores, *dmares; unsigned long sel; int ret; struct au1xpsc_audio_data *wd; @@ -290,20 +299,31 @@ static int __devinit au1xpsc_i2s_drvprobe(struct platform_device *pdev) if (!wd) return -ENOMEM; - r = platform_get_resource(pdev, IORESOURCE_MEM, 0); - if (!r) { + iores = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!iores) { ret = -ENODEV; goto out0; } ret = -EBUSY; - if (!request_mem_region(r->start, resource_size(r), pdev->name)) + if (!request_mem_region(iores->start, resource_size(iores), + pdev->name)) goto out0; - wd->mmio = ioremap(r->start, resource_size(r)); + wd->mmio = ioremap(iores->start, resource_size(iores)); if (!wd->mmio) goto out1; + dmares = platform_get_resource(pdev, IORESOURCE_DMA, 0); + if (!dmares) + goto out2; + wd->dmaids[SNDRV_PCM_STREAM_PLAYBACK] = dmares->start; + + dmares = platform_get_resource(pdev, IORESOURCE_DMA, 1); + if (!dmares) + goto out2; + wd->dmaids[SNDRV_PCM_STREAM_CAPTURE] = dmares->start; + /* preserve PSC clock source set up by platform (dev.platform_data * is already occupied by soc layer) */ @@ -330,17 +350,13 @@ static int __devinit au1xpsc_i2s_drvprobe(struct platform_device *pdev) platform_set_drvdata(pdev, wd); ret = snd_soc_register_dai(&pdev->dev, &wd->dai_drv); - if (ret) - goto out1; - - /* finally add the DMA device for this PSC */ - wd->dmapd = au1xpsc_pcm_add(pdev); - if (wd->dmapd) + if (!ret) return 0; - snd_soc_unregister_dai(&pdev->dev); +out2: + iounmap(wd->mmio); out1: - release_mem_region(r->start, resource_size(r)); + release_mem_region(iores->start, resource_size(iores)); out0: kfree(wd); return ret; @@ -351,9 +367,6 @@ static int __devexit au1xpsc_i2s_drvremove(struct platform_device *pdev) struct au1xpsc_audio_data *wd = platform_get_drvdata(pdev); struct resource *r = platform_get_resource(pdev, IORESOURCE_MEM, 0); - if (wd->dmapd) - au1xpsc_pcm_destroy(wd->dmapd); - snd_soc_unregister_dai(&pdev->dev); au_writel(0, I2S_CFG(wd)); diff --git a/sound/soc/au1x/psc.h b/sound/soc/au1x/psc.h index b30eadd..b16b2e0 100644 --- a/sound/soc/au1x/psc.h +++ b/sound/soc/au1x/psc.h @@ -1,7 +1,7 @@ /* - * Au12x0/Au1550 PSC ALSA ASoC audio support. + * Alchemy ALSA ASoC audio support. * - * (c) 2007-2008 MSC Vertriebsges.m.b.H., + * (c) 2007-2011 MSC Vertriebsges.m.b.H., * Manuel Lauss * * This program is free software; you can redistribute it and/or modify @@ -13,10 +13,6 @@ #ifndef _AU1X_PCM_H #define _AU1X_PCM_H -/* DBDMA helpers */ -extern struct platform_device *au1xpsc_pcm_add(struct platform_device *pdev); -extern void au1xpsc_pcm_destroy(struct platform_device *dmapd); - struct au1xpsc_audio_data { void __iomem *mmio; @@ -27,15 +23,9 @@ struct au1xpsc_audio_data { unsigned long pm[2]; struct mutex lock; - struct platform_device *dmapd; + int dmaids[2]; }; -#define PCM_TX 0 -#define PCM_RX 1 - -#define SUBSTREAM_TYPE(substream) \ - ((substream)->stream == SNDRV_PCM_STREAM_PLAYBACK ? PCM_TX : PCM_RX) - /* easy access macros */ #define PSC_CTRL(x) ((unsigned long)((x)->mmio) + PSC_CTRL_OFFSET) #define PSC_SEL(x) ((unsigned long)((x)->mmio) + PSC_SEL_OFFSET) diff --git a/sound/soc/davinci/Kconfig b/sound/soc/davinci/Kconfig index 6bbf001..9e11a14 100644 --- a/sound/soc/davinci/Kconfig +++ b/sound/soc/davinci/Kconfig @@ -29,7 +29,6 @@ choice prompt "DM365 codec select" depends on SND_DAVINCI_SOC_EVM depends on MACH_DAVINCI_DM365_EVM - default SND_DM365_EXTERNAL_CODEC config SND_DM365_AIC3X_CODEC bool "Audio Codec - AIC3101" diff --git a/sound/soc/davinci/davinci-evm.c b/sound/soc/davinci/davinci-evm.c index fe79842..f78c3f0 100644 --- a/sound/soc/davinci/davinci-evm.c +++ b/sound/soc/davinci/davinci-evm.c @@ -150,8 +150,6 @@ static int evm_aic3x_init(struct snd_soc_pcm_runtime *rtd) snd_soc_dapm_enable_pin(dapm, "Mic Jack"); snd_soc_dapm_enable_pin(dapm, "Line In"); - snd_soc_dapm_sync(dapm); - return 0; } diff --git a/sound/soc/davinci/davinci-i2s.c b/sound/soc/davinci/davinci-i2s.c index d0d60b8..300e121 100644 --- a/sound/soc/davinci/davinci-i2s.c +++ b/sound/soc/davinci/davinci-i2s.c @@ -265,6 +265,7 @@ static int davinci_i2s_set_dai_fmt(struct snd_soc_dai *cpu_dai, struct davinci_mcbsp_dev *dev = snd_soc_dai_get_drvdata(cpu_dai); unsigned int pcr; unsigned int srgr; + bool inv_fs = false; /* Attention srgr is updated by hw_params! */ srgr = DAVINCI_MCBSP_SRGR_FSGM | DAVINCI_MCBSP_SRGR_FPER(DEFAULT_BITPERSAMPLE * 2 - 1) | @@ -330,7 +331,7 @@ static int davinci_i2s_set_dai_fmt(struct snd_soc_dai *cpu_dai, * more empty bit clock slots between channels as the sample * rate is lowered. */ - fmt ^= SND_SOC_DAIFMT_NB_IF; + inv_fs = true; case SND_SOC_DAIFMT_DSP_A: dev->mode = MOD_DSP_A; break; @@ -394,6 +395,8 @@ static int davinci_i2s_set_dai_fmt(struct snd_soc_dai *cpu_dai, default: return -EINVAL; } + if (inv_fs == true) + pcr ^= (DAVINCI_MCBSP_PCR_FSXP | DAVINCI_MCBSP_PCR_FSRP); davinci_mcbsp_write_reg(dev, DAVINCI_MCBSP_SRGR_REG, srgr); dev->pcr = pcr; davinci_mcbsp_write_reg(dev, DAVINCI_MCBSP_PCR_REG, pcr); diff --git a/sound/soc/davinci/davinci-mcasp.c b/sound/soc/davinci/davinci-mcasp.c index 8566238..7173df2 100644 --- a/sound/soc/davinci/davinci-mcasp.c +++ b/sound/soc/davinci/davinci-mcasp.c @@ -732,16 +732,19 @@ static int davinci_mcasp_hw_params(struct snd_pcm_substream *substream, davinci_hw_param(dev, substream->stream); switch (params_format(params)) { + case SNDRV_PCM_FORMAT_U8: case SNDRV_PCM_FORMAT_S8: dma_params->data_type = 1; word_length = DAVINCI_AUDIO_WORD_8; break; + case SNDRV_PCM_FORMAT_U16_LE: case SNDRV_PCM_FORMAT_S16_LE: dma_params->data_type = 2; word_length = DAVINCI_AUDIO_WORD_16; break; + case SNDRV_PCM_FORMAT_U32_LE: case SNDRV_PCM_FORMAT_S32_LE: dma_params->data_type = 4; word_length = DAVINCI_AUDIO_WORD_32; @@ -818,6 +821,13 @@ static struct snd_soc_dai_ops davinci_mcasp_dai_ops = { }; +#define DAVINCI_MCASP_PCM_FMTS (SNDRV_PCM_FMTBIT_S8 | \ + SNDRV_PCM_FMTBIT_U8 | \ + SNDRV_PCM_FMTBIT_S16_LE | \ + SNDRV_PCM_FMTBIT_U16_LE | \ + SNDRV_PCM_FMTBIT_S32_LE | \ + SNDRV_PCM_FMTBIT_U32_LE) + static struct snd_soc_dai_driver davinci_mcasp_dai[] = { { .name = "davinci-mcasp.0", @@ -825,17 +835,13 @@ static struct snd_soc_dai_driver davinci_mcasp_dai[] = { .channels_min = 2, .channels_max = 2, .rates = DAVINCI_MCASP_RATES, - .formats = SNDRV_PCM_FMTBIT_S8 | - SNDRV_PCM_FMTBIT_S16_LE | - SNDRV_PCM_FMTBIT_S32_LE, + .formats = DAVINCI_MCASP_PCM_FMTS, }, .capture = { .channels_min = 2, .channels_max = 2, .rates = DAVINCI_MCASP_RATES, - .formats = SNDRV_PCM_FMTBIT_S8 | - SNDRV_PCM_FMTBIT_S16_LE | - SNDRV_PCM_FMTBIT_S32_LE, + .formats = DAVINCI_MCASP_PCM_FMTS, }, .ops = &davinci_mcasp_dai_ops, @@ -846,7 +852,7 @@ static struct snd_soc_dai_driver davinci_mcasp_dai[] = { .channels_min = 1, .channels_max = 384, .rates = DAVINCI_MCASP_RATES, - .formats = SNDRV_PCM_FMTBIT_S16_LE, + .formats = DAVINCI_MCASP_PCM_FMTS, }, .ops = &davinci_mcasp_dai_ops, }, diff --git a/sound/soc/davinci/davinci-pcm.c b/sound/soc/davinci/davinci-pcm.c index 9d35b8c..d5fe08c 100644 --- a/sound/soc/davinci/davinci-pcm.c +++ b/sound/soc/davinci/davinci-pcm.c @@ -46,11 +46,28 @@ static void print_buf_info(int slot, char *name) } #endif +#define DAVINCI_PCM_FMTBITS (\ + SNDRV_PCM_FMTBIT_S8 |\ + SNDRV_PCM_FMTBIT_U8 |\ + SNDRV_PCM_FMTBIT_S16_LE |\ + SNDRV_PCM_FMTBIT_S16_BE |\ + SNDRV_PCM_FMTBIT_U16_LE |\ + SNDRV_PCM_FMTBIT_U16_BE |\ + SNDRV_PCM_FMTBIT_S24_LE |\ + SNDRV_PCM_FMTBIT_S24_BE |\ + SNDRV_PCM_FMTBIT_U24_LE |\ + SNDRV_PCM_FMTBIT_U24_BE |\ + SNDRV_PCM_FMTBIT_S32_LE |\ + SNDRV_PCM_FMTBIT_S32_BE |\ + SNDRV_PCM_FMTBIT_U32_LE |\ + SNDRV_PCM_FMTBIT_U32_BE) + static struct snd_pcm_hardware pcm_hardware_playback = { .info = (SNDRV_PCM_INFO_INTERLEAVED | SNDRV_PCM_INFO_BLOCK_TRANSFER | SNDRV_PCM_INFO_MMAP | SNDRV_PCM_INFO_MMAP_VALID | - SNDRV_PCM_INFO_PAUSE | SNDRV_PCM_INFO_RESUME), - .formats = (SNDRV_PCM_FMTBIT_S16_LE), + SNDRV_PCM_INFO_PAUSE | SNDRV_PCM_INFO_RESUME| + SNDRV_PCM_INFO_BATCH), + .formats = DAVINCI_PCM_FMTBITS, .rates = (SNDRV_PCM_RATE_8000 | SNDRV_PCM_RATE_16000 | SNDRV_PCM_RATE_22050 | SNDRV_PCM_RATE_32000 | SNDRV_PCM_RATE_44100 | SNDRV_PCM_RATE_48000 | @@ -59,7 +76,7 @@ static struct snd_pcm_hardware pcm_hardware_playback = { .rate_min = 8000, .rate_max = 96000, .channels_min = 2, - .channels_max = 2, + .channels_max = 384, .buffer_bytes_max = 128 * 1024, .period_bytes_min = 32, .period_bytes_max = 8 * 1024, @@ -71,8 +88,9 @@ static struct snd_pcm_hardware pcm_hardware_playback = { static struct snd_pcm_hardware pcm_hardware_capture = { .info = (SNDRV_PCM_INFO_INTERLEAVED | SNDRV_PCM_INFO_BLOCK_TRANSFER | SNDRV_PCM_INFO_MMAP | SNDRV_PCM_INFO_MMAP_VALID | - SNDRV_PCM_INFO_PAUSE), - .formats = (SNDRV_PCM_FMTBIT_S16_LE), + SNDRV_PCM_INFO_PAUSE | + SNDRV_PCM_INFO_BATCH), + .formats = DAVINCI_PCM_FMTBITS, .rates = (SNDRV_PCM_RATE_8000 | SNDRV_PCM_RATE_16000 | SNDRV_PCM_RATE_22050 | SNDRV_PCM_RATE_32000 | SNDRV_PCM_RATE_44100 | SNDRV_PCM_RATE_48000 | @@ -81,7 +99,7 @@ static struct snd_pcm_hardware pcm_hardware_capture = { .rate_min = 8000, .rate_max = 96000, .channels_min = 2, - .channels_max = 2, + .channels_max = 384, .buffer_bytes_max = 128 * 1024, .period_bytes_min = 32, .period_bytes_max = 8 * 1024, @@ -139,6 +157,22 @@ struct davinci_runtime_data { struct edmacc_param ram_params; }; +static void davinci_pcm_period_elapsed(struct snd_pcm_substream *substream) +{ + struct davinci_runtime_data *prtd = substream->runtime->private_data; + struct snd_pcm_runtime *runtime = substream->runtime; + + prtd->period++; + if (unlikely(prtd->period >= runtime->periods)) + prtd->period = 0; +} + +static void davinci_pcm_period_reset(struct snd_pcm_substream *substream) +{ + struct davinci_runtime_data *prtd = substream->runtime->private_data; + + prtd->period = 0; +} /* * Not used with ping/pong */ @@ -146,7 +180,6 @@ static void davinci_pcm_enqueue_dma(struct snd_pcm_substream *substream) { struct davinci_runtime_data *prtd = substream->runtime->private_data; struct snd_pcm_runtime *runtime = substream->runtime; - int link = prtd->asp_link[0]; unsigned int period_size; unsigned int dma_offset; dma_addr_t dma_pos; @@ -164,7 +197,8 @@ static void davinci_pcm_enqueue_dma(struct snd_pcm_substream *substream) fifo_level = prtd->params->fifo_level; pr_debug("davinci_pcm: audio_set_dma_params_play channel = %d " - "dma_ptr = %x period_size=%x\n", link, dma_pos, period_size); + "dma_ptr = %x period_size=%x\n", prtd->asp_link[0], dma_pos, + period_size); data_type = prtd->params->data_type; count = period_size / data_type; @@ -188,21 +222,19 @@ static void davinci_pcm_enqueue_dma(struct snd_pcm_substream *substream) } acnt = prtd->params->acnt; - edma_set_src(link, src, INCR, W8BIT); - edma_set_dest(link, dst, INCR, W8BIT); + edma_set_src(prtd->asp_link[0], src, INCR, W8BIT); + edma_set_dest(prtd->asp_link[0], dst, INCR, W8BIT); - edma_set_src_index(link, src_bidx, src_cidx); - edma_set_dest_index(link, dst_bidx, dst_cidx); + edma_set_src_index(prtd->asp_link[0], src_bidx, src_cidx); + edma_set_dest_index(prtd->asp_link[0], dst_bidx, dst_cidx); if (!fifo_level) - edma_set_transfer_params(link, acnt, count, 1, 0, ASYNC); + edma_set_transfer_params(prtd->asp_link[0], acnt, count, 1, 0, + ASYNC); else - edma_set_transfer_params(link, acnt, fifo_level, count, - fifo_level, ABSYNC); - - prtd->period++; - if (unlikely(prtd->period >= runtime->periods)) - prtd->period = 0; + edma_set_transfer_params(prtd->asp_link[0], acnt, fifo_level, + count, fifo_level, + ABSYNC); } static void davinci_pcm_dma_irq(unsigned link, u16 ch_status, void *data) @@ -217,12 +249,13 @@ static void davinci_pcm_dma_irq(unsigned link, u16 ch_status, void *data) return; if (snd_pcm_running(substream)) { + spin_lock(&prtd->lock); if (prtd->ram_channel < 0) { /* No ping/pong must fix up link dma data*/ - spin_lock(&prtd->lock); davinci_pcm_enqueue_dma(substream); - spin_unlock(&prtd->lock); } + davinci_pcm_period_elapsed(substream); + spin_unlock(&prtd->lock); snd_pcm_period_elapsed(substream); } } @@ -274,7 +307,6 @@ static int ping_pong_dma_setup(struct snd_pcm_substream *substream) unsigned int acnt = params->acnt; /* divide by 2 for ping/pong */ unsigned int ping_size = snd_pcm_lib_period_bytes(substream) >> 1; - int link = prtd->asp_link[1]; unsigned int fifo_level = prtd->params->fifo_level; unsigned int count; if ((data_type == 0) || (data_type > 4)) { @@ -285,28 +317,26 @@ static int ping_pong_dma_setup(struct snd_pcm_substream *substream) dma_addr_t asp_src_pong = iram_dma->addr + ping_size; ram_src_cidx = ping_size; ram_dst_cidx = -ping_size; - edma_set_src(link, asp_src_pong, INCR, W8BIT); + edma_set_src(prtd->asp_link[1], asp_src_pong, INCR, W8BIT); - link = prtd->asp_link[0]; - edma_set_src_index(link, data_type, data_type * fifo_level); - link = prtd->asp_link[1]; - edma_set_src_index(link, data_type, data_type * fifo_level); + edma_set_src_index(prtd->asp_link[0], data_type, + data_type * fifo_level); + edma_set_src_index(prtd->asp_link[1], data_type, + data_type * fifo_level); - link = prtd->ram_link; - edma_set_src(link, runtime->dma_addr, INCR, W32BIT); + edma_set_src(prtd->ram_link, runtime->dma_addr, INCR, W32BIT); } else { dma_addr_t asp_dst_pong = iram_dma->addr + ping_size; ram_src_cidx = -ping_size; ram_dst_cidx = ping_size; - edma_set_dest(link, asp_dst_pong, INCR, W8BIT); + edma_set_dest(prtd->asp_link[1], asp_dst_pong, INCR, W8BIT); - link = prtd->asp_link[0]; - edma_set_dest_index(link, data_type, data_type * fifo_level); - link = prtd->asp_link[1]; - edma_set_dest_index(link, data_type, data_type * fifo_level); + edma_set_dest_index(prtd->asp_link[0], data_type, + data_type * fifo_level); + edma_set_dest_index(prtd->asp_link[1], data_type, + data_type * fifo_level); - link = prtd->ram_link; - edma_set_dest(link, runtime->dma_addr, INCR, W32BIT); + edma_set_dest(prtd->ram_link, runtime->dma_addr, INCR, W32BIT); } if (!fifo_level) { @@ -323,10 +353,9 @@ static int ping_pong_dma_setup(struct snd_pcm_substream *substream) count, fifo_level, ABSYNC); } - link = prtd->ram_link; - edma_set_src_index(link, ping_size, ram_src_cidx); - edma_set_dest_index(link, ping_size, ram_dst_cidx); - edma_set_transfer_params(link, ping_size, 2, + edma_set_src_index(prtd->ram_link, ping_size, ram_src_cidx); + edma_set_dest_index(prtd->ram_link, ping_size, ram_dst_cidx); + edma_set_transfer_params(prtd->ram_link, ping_size, 2, runtime->periods, 2, ASYNC); /* init master params */ @@ -375,32 +404,32 @@ static int request_ping_pong(struct snd_pcm_substream *substream, { dma_addr_t asp_src_ping; dma_addr_t asp_dst_ping; - int link; + int ret; struct davinci_pcm_dma_params *params = prtd->params; /* Request ram master channel */ - link = prtd->ram_channel = edma_alloc_channel(EDMA_CHANNEL_ANY, + ret = prtd->ram_channel = edma_alloc_channel(EDMA_CHANNEL_ANY, davinci_pcm_dma_irq, substream, prtd->params->ram_chan_q); - if (link < 0) + if (ret < 0) goto exit1; /* Request ram link channel */ - link = prtd->ram_link = edma_alloc_slot( + ret = prtd->ram_link = edma_alloc_slot( EDMA_CTLR(prtd->ram_channel), EDMA_SLOT_ANY); - if (link < 0) + if (ret < 0) goto exit2; - link = prtd->asp_link[1] = edma_alloc_slot( + ret = prtd->asp_link[1] = edma_alloc_slot( EDMA_CTLR(prtd->asp_channel), EDMA_SLOT_ANY); - if (link < 0) + if (ret < 0) goto exit3; prtd->ram_link2 = -1; if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) { - link = prtd->ram_link2 = edma_alloc_slot( + ret = prtd->ram_link2 = edma_alloc_slot( EDMA_CTLR(prtd->ram_channel), EDMA_SLOT_ANY); - if (link < 0) + if (ret < 0) goto exit4; } /* circle ping-pong buffers */ @@ -417,35 +446,33 @@ static int request_ping_pong(struct snd_pcm_substream *substream, asp_dst_ping = iram_dma->addr; } /* ping */ - link = prtd->asp_link[0]; - edma_set_src(link, asp_src_ping, INCR, W16BIT); - edma_set_dest(link, asp_dst_ping, INCR, W16BIT); - edma_set_src_index(link, 0, 0); - edma_set_dest_index(link, 0, 0); + edma_set_src(prtd->asp_link[0], asp_src_ping, INCR, W16BIT); + edma_set_dest(prtd->asp_link[0], asp_dst_ping, INCR, W16BIT); + edma_set_src_index(prtd->asp_link[0], 0, 0); + edma_set_dest_index(prtd->asp_link[0], 0, 0); - edma_read_slot(link, &prtd->asp_params); + edma_read_slot(prtd->asp_link[0], &prtd->asp_params); prtd->asp_params.opt &= ~(TCCMODE | EDMA_TCC(0x3f) | TCINTEN); - prtd->asp_params.opt |= TCCHEN | EDMA_TCC(prtd->ram_channel & 0x3f); - edma_write_slot(link, &prtd->asp_params); + prtd->asp_params.opt |= TCCHEN | + EDMA_TCC(prtd->ram_channel & 0x3f); + edma_write_slot(prtd->asp_link[0], &prtd->asp_params); /* pong */ - link = prtd->asp_link[1]; - edma_set_src(link, asp_src_ping, INCR, W16BIT); - edma_set_dest(link, asp_dst_ping, INCR, W16BIT); - edma_set_src_index(link, 0, 0); - edma_set_dest_index(link, 0, 0); + edma_set_src(prtd->asp_link[1], asp_src_ping, INCR, W16BIT); + edma_set_dest(prtd->asp_link[1], asp_dst_ping, INCR, W16BIT); + edma_set_src_index(prtd->asp_link[1], 0, 0); + edma_set_dest_index(prtd->asp_link[1], 0, 0); - edma_read_slot(link, &prtd->asp_params); + edma_read_slot(prtd->asp_link[1], &prtd->asp_params); prtd->asp_params.opt &= ~(TCCMODE | EDMA_TCC(0x3f)); /* interrupt after every pong completion */ prtd->asp_params.opt |= TCINTEN | TCCHEN | - EDMA_TCC(EDMA_CHAN_SLOT(prtd->ram_channel)); - edma_write_slot(link, &prtd->asp_params); + EDMA_TCC(prtd->ram_channel & 0x3f); + edma_write_slot(prtd->asp_link[1], &prtd->asp_params); /* ram */ - link = prtd->ram_link; - edma_set_src(link, iram_dma->addr, INCR, W32BIT); - edma_set_dest(link, iram_dma->addr, INCR, W32BIT); + edma_set_src(prtd->ram_link, iram_dma->addr, INCR, W32BIT); + edma_set_dest(prtd->ram_link, iram_dma->addr, INCR, W32BIT); pr_debug("%s: audio dma channels/slots in use for ram:%u %u %u," "for asp:%u %u %u\n", __func__, prtd->ram_channel, prtd->ram_link, prtd->ram_link2, @@ -462,7 +489,7 @@ exit2: edma_free_channel(prtd->ram_channel); prtd->ram_channel = -1; exit1: - return link; + return ret; } static int davinci_pcm_dma_request(struct snd_pcm_substream *substream) @@ -470,22 +497,22 @@ static int davinci_pcm_dma_request(struct snd_pcm_substream *substream) struct snd_dma_buffer *iram_dma; struct davinci_runtime_data *prtd = substream->runtime->private_data; struct davinci_pcm_dma_params *params = prtd->params; - int link; + int ret; if (!params) return -ENODEV; /* Request asp master DMA channel */ - link = prtd->asp_channel = edma_alloc_channel(params->channel, + ret = prtd->asp_channel = edma_alloc_channel(params->channel, davinci_pcm_dma_irq, substream, prtd->params->asp_chan_q); - if (link < 0) + if (ret < 0) goto exit1; /* Request asp link channels */ - link = prtd->asp_link[0] = edma_alloc_slot( + ret = prtd->asp_link[0] = edma_alloc_slot( EDMA_CTLR(prtd->asp_channel), EDMA_SLOT_ANY); - if (link < 0) + if (ret < 0) goto exit2; iram_dma = (struct snd_dma_buffer *)substream->dma_buffer.private_data; @@ -505,17 +532,17 @@ static int davinci_pcm_dma_request(struct snd_pcm_substream *substream) * the buffer and its length (ccnt) ... use it as a template * so davinci_pcm_enqueue_dma() takes less time in IRQ. */ - edma_read_slot(link, &prtd->asp_params); + edma_read_slot(prtd->asp_link[0], &prtd->asp_params); prtd->asp_params.opt |= TCINTEN | EDMA_TCC(EDMA_CHAN_SLOT(prtd->asp_channel)); - prtd->asp_params.link_bcntrld = EDMA_CHAN_SLOT(link) << 5; - edma_write_slot(link, &prtd->asp_params); + prtd->asp_params.link_bcntrld = EDMA_CHAN_SLOT(prtd->asp_link[0]) << 5; + edma_write_slot(prtd->asp_link[0], &prtd->asp_params); return 0; exit2: edma_free_channel(prtd->asp_channel); prtd->asp_channel = -1; exit1: - return link; + return ret; } static int davinci_pcm_trigger(struct snd_pcm_substream *substream, int cmd) @@ -527,6 +554,13 @@ static int davinci_pcm_trigger(struct snd_pcm_substream *substream, int cmd) switch (cmd) { case SNDRV_PCM_TRIGGER_START: + edma_start(prtd->asp_channel); + if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK && + prtd->ram_channel >= 0) { + /* copy 1st iram buffer */ + edma_start(prtd->ram_channel); + } + break; case SNDRV_PCM_TRIGGER_RESUME: case SNDRV_PCM_TRIGGER_PAUSE_RELEASE: edma_resume(prtd->asp_channel); @@ -550,6 +584,7 @@ static int davinci_pcm_prepare(struct snd_pcm_substream *substream) { struct davinci_runtime_data *prtd = substream->runtime->private_data; + davinci_pcm_period_reset(substream); if (prtd->ram_channel >= 0) { int ret = ping_pong_dma_setup(substream); if (ret < 0) @@ -565,21 +600,31 @@ static int davinci_pcm_prepare(struct snd_pcm_substream *substream) print_buf_info(prtd->asp_link[0], "asp_link[0]"); print_buf_info(prtd->asp_link[1], "asp_link[1]"); - if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) { - /* copy 1st iram buffer */ - edma_start(prtd->ram_channel); - } - edma_start(prtd->asp_channel); + /* + * There is a phase offset of 2 periods between the position + * used by dma setup and the position reported in the pointer + * function. + * + * The phase offset, when not using ping-pong buffers, is due to + * the two consecutive calls to davinci_pcm_enqueue_dma() below. + * + * Whereas here, with ping-pong buffers, the phase is due to + * there being an entire buffer transfer complete before the + * first dma completion event triggers davinci_pcm_dma_irq(). + */ + davinci_pcm_period_elapsed(substream); + davinci_pcm_period_elapsed(substream); + return 0; } - prtd->period = 0; davinci_pcm_enqueue_dma(substream); + davinci_pcm_period_elapsed(substream); /* Copy self-linked parameter RAM entry into master channel */ edma_read_slot(prtd->asp_link[0], &prtd->asp_params); edma_write_slot(prtd->asp_channel, &prtd->asp_params); davinci_pcm_enqueue_dma(substream); - edma_start(prtd->asp_channel); + davinci_pcm_period_elapsed(substream); return 0; } @@ -591,51 +636,23 @@ davinci_pcm_pointer(struct snd_pcm_substream *substream) struct davinci_runtime_data *prtd = runtime->private_data; unsigned int offset; int asp_count; - dma_addr_t asp_src, asp_dst; - + unsigned int period_size = snd_pcm_lib_period_bytes(substream); + + /* + * There is a phase offset of 2 periods between the position used by dma + * setup and the position reported in the pointer function. Either +2 in + * the dma setup or -2 here in the pointer function (with wrapping, + * both) accounts for this offset -- choose the latter since it makes + * the first-time setup clearer. + */ spin_lock(&prtd->lock); - if (prtd->ram_channel >= 0) { - int ram_count; - int mod_ram; - dma_addr_t ram_src, ram_dst; - unsigned int period_size = snd_pcm_lib_period_bytes(substream); - if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) { - /* reading ram before asp should be safe - * as long as the asp transfers less than a ping size - * of bytes between the 2 reads - */ - edma_get_position(prtd->ram_channel, - &ram_src, &ram_dst); - edma_get_position(prtd->asp_channel, - &asp_src, &asp_dst); - asp_count = asp_src - prtd->asp_params.src; - ram_count = ram_src - prtd->ram_params.src; - mod_ram = ram_count % period_size; - mod_ram -= asp_count; - if (mod_ram < 0) - mod_ram += period_size; - else if (mod_ram == 0) { - if (snd_pcm_running(substream)) - mod_ram += period_size; - } - ram_count -= mod_ram; - if (ram_count < 0) - ram_count += period_size * runtime->periods; - } else { - edma_get_position(prtd->ram_channel, - &ram_src, &ram_dst); - ram_count = ram_dst - prtd->ram_params.dst; - } - asp_count = ram_count; - } else { - edma_get_position(prtd->asp_channel, &asp_src, &asp_dst); - if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) - asp_count = asp_src - runtime->dma_addr; - else - asp_count = asp_dst - runtime->dma_addr; - } + asp_count = prtd->period - 2; spin_unlock(&prtd->lock); + if (asp_count < 0) + asp_count += runtime->periods; + asp_count *= period_size; + offset = bytes_to_frames(runtime, asp_count); if (offset >= runtime->buffer_size) offset = 0; @@ -811,9 +828,11 @@ static void davinci_pcm_free(struct snd_pcm *pcm) static u64 davinci_pcm_dmamask = 0xffffffff; -static int davinci_pcm_new(struct snd_card *card, - struct snd_soc_dai *dai, struct snd_pcm *pcm) +static int davinci_pcm_new(struct snd_soc_pcm_runtime *rtd) { + struct snd_card *card = rtd->card->snd_card; + struct snd_soc_dai *dai = rtd->cpu_dai; + struct snd_pcm *pcm = rtd->pcm; int ret; if (!card->dev->dma_mask) diff --git a/sound/soc/ep93xx/edb93xx.c b/sound/soc/ep93xx/edb93xx.c index d3aa151..51930b6 100644 --- a/sound/soc/ep93xx/edb93xx.c +++ b/sound/soc/ep93xx/edb93xx.c @@ -21,6 +21,7 @@ #include #include +#include #include #include #include @@ -28,12 +29,6 @@ #include #include "ep93xx-pcm.h" -#define edb93xx_has_audio() (machine_is_edb9301() || \ - machine_is_edb9302() || \ - machine_is_edb9302a() || \ - machine_is_edb9307a() || \ - machine_is_edb9315a()) - static int edb93xx_hw_params(struct snd_pcm_substream *substream, struct snd_pcm_hw_params *params) { @@ -94,49 +89,61 @@ static struct snd_soc_card snd_soc_edb93xx = { .num_links = 1, }; -static struct platform_device *edb93xx_snd_device; - -static int __init edb93xx_init(void) +static int __devinit edb93xx_probe(struct platform_device *pdev) { + struct snd_soc_card *card = &snd_soc_edb93xx; int ret; - if (!edb93xx_has_audio()) - return -ENODEV; - ret = ep93xx_i2s_acquire(EP93XX_SYSCON_DEVCFG_I2SONAC97, EP93XX_SYSCON_I2SCLKDIV_ORIDE | EP93XX_SYSCON_I2SCLKDIV_SPOL); if (ret) return ret; - edb93xx_snd_device = platform_device_alloc("soc-audio", -1); - if (!edb93xx_snd_device) { - ret = -ENOMEM; - goto free_i2s; + card->dev = &pdev->dev; + + ret = snd_soc_register_card(card); + if (ret) { + dev_err(&pdev->dev, "snd_soc_register_card() failed: %d\n", + ret); + ep93xx_i2s_release(); } - platform_set_drvdata(edb93xx_snd_device, &snd_soc_edb93xx); - ret = platform_device_add(edb93xx_snd_device); - if (ret) - goto device_put; + return ret; +} - return 0; +static int __devexit edb93xx_remove(struct platform_device *pdev) +{ + struct snd_soc_card *card = platform_get_drvdata(pdev); -device_put: - platform_device_put(edb93xx_snd_device); -free_i2s: + snd_soc_unregister_card(card); ep93xx_i2s_release(); - return ret; + + return 0; +} + +static struct platform_driver edb93xx_driver = { + .driver = { + .name = "edb93xx-audio", + .owner = THIS_MODULE, + }, + .probe = edb93xx_probe, + .remove = __devexit_p(edb93xx_remove), +}; + +static int __init edb93xx_init(void) +{ + return platform_driver_register(&edb93xx_driver); } module_init(edb93xx_init); static void __exit edb93xx_exit(void) { - platform_device_unregister(edb93xx_snd_device); - ep93xx_i2s_release(); + platform_driver_unregister(&edb93xx_driver); } module_exit(edb93xx_exit); MODULE_AUTHOR("Alexander Sverdlin "); MODULE_DESCRIPTION("ALSA SoC EDB93xx"); MODULE_LICENSE("GPL"); +MODULE_ALIAS("platform:edb93xx-audio"); diff --git a/sound/soc/ep93xx/ep93xx-ac97.c b/sound/soc/ep93xx/ep93xx-ac97.c index 104e95c..3cd6158 100644 --- a/sound/soc/ep93xx/ep93xx-ac97.c +++ b/sound/soc/ep93xx/ep93xx-ac97.c @@ -106,12 +106,12 @@ static struct ep93xx_ac97_info *ep93xx_ac97_info; static struct ep93xx_pcm_dma_params ep93xx_ac97_pcm_out = { .name = "ac97-pcm-out", - .dma_port = EP93XX_DMA_M2P_PORT_AAC1, + .dma_port = EP93XX_DMA_AAC1, }; static struct ep93xx_pcm_dma_params ep93xx_ac97_pcm_in = { .name = "ac97-pcm-in", - .dma_port = EP93XX_DMA_M2P_PORT_AAC1, + .dma_port = EP93XX_DMA_AAC1, }; static inline unsigned ep93xx_ac97_read_reg(struct ep93xx_ac97_info *info, @@ -335,7 +335,7 @@ static struct snd_soc_dai_ops ep93xx_ac97_dai_ops = { .trigger = ep93xx_ac97_trigger, }; -struct snd_soc_dai_driver ep93xx_ac97_dai = { +static struct snd_soc_dai_driver ep93xx_ac97_dai = { .name = "ep93xx-ac97", .id = 0, .ac97_control = 1, diff --git a/sound/soc/ep93xx/ep93xx-i2s.c b/sound/soc/ep93xx/ep93xx-i2s.c index 042f4e9..099614e 100644 --- a/sound/soc/ep93xx/ep93xx-i2s.c +++ b/sound/soc/ep93xx/ep93xx-i2s.c @@ -2,7 +2,7 @@ * linux/sound/soc/ep93xx-i2s.c * EP93xx I2S driver * - * Copyright (C) 2010 Ryan Mallon + * Copyright (C) 2010 Ryan Mallon * * Based on the original driver by: * Copyright (C) 2007 Chase Douglas @@ -70,11 +70,11 @@ struct ep93xx_i2s_info { struct ep93xx_pcm_dma_params ep93xx_i2s_dma_params[] = { [SNDRV_PCM_STREAM_PLAYBACK] = { .name = "i2s-pcm-out", - .dma_port = EP93XX_DMA_M2P_PORT_I2S1, + .dma_port = EP93XX_DMA_I2S1, }, [SNDRV_PCM_STREAM_CAPTURE] = { .name = "i2s-pcm-in", - .dma_port = EP93XX_DMA_M2P_PORT_I2S1, + .dma_port = EP93XX_DMA_I2S1, }, }; @@ -385,14 +385,14 @@ static int ep93xx_i2s_probe(struct platform_device *pdev) res = platform_get_resource(pdev, IORESOURCE_MEM, 0); if (!res) { err = -ENODEV; - goto fail; + goto fail_free_info; } info->mem = request_mem_region(res->start, resource_size(res), pdev->name); if (!info->mem) { err = -EBUSY; - goto fail; + goto fail_free_info; } info->regs = ioremap(info->mem->start, resource_size(info->mem)); @@ -435,6 +435,7 @@ fail_unmap_mem: iounmap(info->regs); fail_release_mem: release_mem_region(info->mem->start, resource_size(info->mem)); +fail_free_info: kfree(info); fail: return err; @@ -477,6 +478,6 @@ module_init(ep93xx_i2s_init); module_exit(ep93xx_i2s_exit); MODULE_ALIAS("platform:ep93xx-i2s"); -MODULE_AUTHOR("Ryan Mallon "); +MODULE_AUTHOR("Ryan Mallon"); MODULE_DESCRIPTION("EP93XX I2S driver"); MODULE_LICENSE("GPL"); diff --git a/sound/soc/ep93xx/ep93xx-pcm.c b/sound/soc/ep93xx/ep93xx-pcm.c index a456e49..d00230a 100644 --- a/sound/soc/ep93xx/ep93xx-pcm.c +++ b/sound/soc/ep93xx/ep93xx-pcm.c @@ -5,7 +5,7 @@ * Copyright (C) 2006 Applied Data Systems * * Rewritten for the SoC audio subsystem (Based on PXA2xx code): - * Copyright (c) 2008 Ryan Mallon + * Copyright (c) 2008 Ryan Mallon * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -16,6 +16,7 @@ #include #include #include +#include #include #include @@ -53,43 +54,34 @@ static const struct snd_pcm_hardware ep93xx_pcm_hardware = { struct ep93xx_runtime_data { - struct ep93xx_dma_m2p_client cl; - struct ep93xx_pcm_dma_params *params; int pointer_bytes; - struct tasklet_struct period_tasklet; int periods; - struct ep93xx_dma_buffer buf[32]; + int period_bytes; + struct dma_chan *dma_chan; + struct ep93xx_dma_data dma_data; }; -static void ep93xx_pcm_period_elapsed(unsigned long data) +static void ep93xx_pcm_dma_callback(void *data) { - struct snd_pcm_substream *substream = (struct snd_pcm_substream *)data; - snd_pcm_period_elapsed(substream); -} + struct snd_pcm_substream *substream = data; + struct ep93xx_runtime_data *rtd = substream->runtime->private_data; -static void ep93xx_pcm_buffer_started(void *cookie, - struct ep93xx_dma_buffer *buf) -{ + rtd->pointer_bytes += rtd->period_bytes; + rtd->pointer_bytes %= rtd->period_bytes * rtd->periods; + + snd_pcm_period_elapsed(substream); } -static void ep93xx_pcm_buffer_finished(void *cookie, - struct ep93xx_dma_buffer *buf, - int bytes, int error) +static bool ep93xx_pcm_dma_filter(struct dma_chan *chan, void *filter_param) { - struct snd_pcm_substream *substream = cookie; - struct ep93xx_runtime_data *rtd = substream->runtime->private_data; - - if (buf == rtd->buf + rtd->periods - 1) - rtd->pointer_bytes = 0; - else - rtd->pointer_bytes += buf->size; + struct ep93xx_dma_data *data = filter_param; - if (!error) { - ep93xx_dma_m2p_submit_recursive(&rtd->cl, buf); - tasklet_schedule(&rtd->period_tasklet); - } else { - snd_pcm_stop(substream, SNDRV_PCM_STATE_XRUN); + if (data->direction == ep93xx_dma_chan_direction(chan)) { + chan->private = data; + return true; } + + return false; } static int ep93xx_pcm_open(struct snd_pcm_substream *substream) @@ -98,30 +90,38 @@ static int ep93xx_pcm_open(struct snd_pcm_substream *substream) struct snd_soc_dai *cpu_dai = soc_rtd->cpu_dai; struct ep93xx_pcm_dma_params *dma_params; struct ep93xx_runtime_data *rtd; + dma_cap_mask_t mask; int ret; - dma_params = snd_soc_dai_get_dma_data(cpu_dai, substream); + ret = snd_pcm_hw_constraint_integer(substream->runtime, + SNDRV_PCM_HW_PARAM_PERIODS); + if (ret < 0) + return ret; + snd_soc_set_runtime_hwparams(substream, &ep93xx_pcm_hardware); rtd = kmalloc(sizeof(*rtd), GFP_KERNEL); if (!rtd) return -ENOMEM; - memset(&rtd->period_tasklet, 0, sizeof(rtd->period_tasklet)); - rtd->period_tasklet.func = ep93xx_pcm_period_elapsed; - rtd->period_tasklet.data = (unsigned long)substream; - - rtd->cl.name = dma_params->name; - rtd->cl.flags = dma_params->dma_port | EP93XX_DMA_M2P_IGNORE_ERROR | - ((substream->stream == SNDRV_PCM_STREAM_PLAYBACK) ? - EP93XX_DMA_M2P_TX : EP93XX_DMA_M2P_RX); - rtd->cl.cookie = substream; - rtd->cl.buffer_started = ep93xx_pcm_buffer_started; - rtd->cl.buffer_finished = ep93xx_pcm_buffer_finished; - ret = ep93xx_dma_m2p_client_register(&rtd->cl); - if (ret < 0) { + dma_cap_zero(mask); + dma_cap_set(DMA_SLAVE, mask); + dma_cap_set(DMA_CYCLIC, mask); + + dma_params = snd_soc_dai_get_dma_data(cpu_dai, substream); + rtd->dma_data.port = dma_params->dma_port; + rtd->dma_data.name = dma_params->name; + + if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) + rtd->dma_data.direction = DMA_TO_DEVICE; + else + rtd->dma_data.direction = DMA_FROM_DEVICE; + + rtd->dma_chan = dma_request_channel(mask, ep93xx_pcm_dma_filter, + &rtd->dma_data); + if (!rtd->dma_chan) { kfree(rtd); - return ret; + return -EINVAL; } substream->runtime->private_data = rtd; @@ -132,31 +132,52 @@ static int ep93xx_pcm_close(struct snd_pcm_substream *substream) { struct ep93xx_runtime_data *rtd = substream->runtime->private_data; - ep93xx_dma_m2p_client_unregister(&rtd->cl); + dma_release_channel(rtd->dma_chan); kfree(rtd); return 0; } +static int ep93xx_pcm_dma_submit(struct snd_pcm_substream *substream) +{ + struct snd_pcm_runtime *runtime = substream->runtime; + struct ep93xx_runtime_data *rtd = runtime->private_data; + struct dma_chan *chan = rtd->dma_chan; + struct dma_device *dma_dev = chan->device; + struct dma_async_tx_descriptor *desc; + + rtd->pointer_bytes = 0; + desc = dma_dev->device_prep_dma_cyclic(chan, runtime->dma_addr, + rtd->period_bytes * rtd->periods, + rtd->period_bytes, + rtd->dma_data.direction); + if (!desc) + return -EINVAL; + + desc->callback = ep93xx_pcm_dma_callback; + desc->callback_param = substream; + + dmaengine_submit(desc); + return 0; +} + +static void ep93xx_pcm_dma_flush(struct snd_pcm_substream *substream) +{ + struct snd_pcm_runtime *runtime = substream->runtime; + struct ep93xx_runtime_data *rtd = runtime->private_data; + + dmaengine_terminate_all(rtd->dma_chan); +} + static int ep93xx_pcm_hw_params(struct snd_pcm_substream *substream, struct snd_pcm_hw_params *params) { struct snd_pcm_runtime *runtime = substream->runtime; struct ep93xx_runtime_data *rtd = runtime->private_data; - size_t totsize = params_buffer_bytes(params); - size_t period = params_period_bytes(params); - int i; snd_pcm_set_runtime_buffer(substream, &substream->dma_buffer); - runtime->dma_bytes = totsize; - - rtd->periods = (totsize + period - 1) / period; - for (i = 0; i < rtd->periods; i++) { - rtd->buf[i].bus_addr = runtime->dma_addr + (i * period); - rtd->buf[i].size = period; - if ((i + 1) * period > totsize) - rtd->buf[i].size = totsize - (i * period); - } + rtd->periods = params_periods(params); + rtd->period_bytes = params_period_bytes(params); return 0; } @@ -168,24 +189,20 @@ static int ep93xx_pcm_hw_free(struct snd_pcm_substream *substream) static int ep93xx_pcm_trigger(struct snd_pcm_substream *substream, int cmd) { - struct ep93xx_runtime_data *rtd = substream->runtime->private_data; int ret; - int i; ret = 0; switch (cmd) { case SNDRV_PCM_TRIGGER_START: case SNDRV_PCM_TRIGGER_RESUME: case SNDRV_PCM_TRIGGER_PAUSE_RELEASE: - rtd->pointer_bytes = 0; - for (i = 0; i < rtd->periods; i++) - ep93xx_dma_m2p_submit(&rtd->cl, rtd->buf + i); + ret = ep93xx_pcm_dma_submit(substream); break; case SNDRV_PCM_TRIGGER_STOP: case SNDRV_PCM_TRIGGER_SUSPEND: case SNDRV_PCM_TRIGGER_PAUSE_PUSH: - ep93xx_dma_m2p_flush(&rtd->cl); + ep93xx_pcm_dma_flush(substream); break; default: @@ -266,9 +283,11 @@ static void ep93xx_pcm_free_dma_buffers(struct snd_pcm *pcm) static u64 ep93xx_pcm_dmamask = 0xffffffff; -static int ep93xx_pcm_new(struct snd_card *card, struct snd_soc_dai *dai, - struct snd_pcm *pcm) +static int ep93xx_pcm_new(struct snd_soc_pcm_runtime *rtd) { + struct snd_card *card = rtd->card->snd_card; + struct snd_soc_dai *dai = rtd->cpu_dai; + struct snd_pcm *pcm = rtd->pcm; int ret = 0; if (!card->dev->dma_mask) @@ -333,6 +352,7 @@ static void __exit ep93xx_soc_platform_exit(void) module_init(ep93xx_soc_platform_init); module_exit(ep93xx_soc_platform_exit); -MODULE_AUTHOR("Ryan Mallon "); +MODULE_AUTHOR("Ryan Mallon"); MODULE_DESCRIPTION("EP93xx ALSA PCM interface"); MODULE_LICENSE("GPL"); +MODULE_ALIAS("platform:ep93xx-pcm-audio"); diff --git a/sound/soc/ep93xx/simone.c b/sound/soc/ep93xx/simone.c index 2868179..968cb31 100644 --- a/sound/soc/ep93xx/simone.c +++ b/sound/soc/ep93xx/simone.c @@ -39,53 +39,61 @@ static struct snd_soc_card snd_soc_simone = { }; static struct platform_device *simone_snd_ac97_device; -static struct platform_device *simone_snd_device; -static int __init simone_init(void) +static int __devinit simone_probe(struct platform_device *pdev) { + struct snd_soc_card *card = &snd_soc_simone; int ret; - if (!machine_is_sim_one()) - return -ENODEV; - - simone_snd_ac97_device = platform_device_alloc("ac97-codec", -1); - if (!simone_snd_ac97_device) - return -ENOMEM; + simone_snd_ac97_device = platform_device_register_simple("ac97-codec", + -1, NULL, 0); + if (IS_ERR(simone_snd_ac97_device)) + return PTR_ERR(simone_snd_ac97_device); - ret = platform_device_add(simone_snd_ac97_device); - if (ret) - goto fail1; + card->dev = &pdev->dev; - simone_snd_device = platform_device_alloc("soc-audio", -1); - if (!simone_snd_device) { - ret = -ENOMEM; - goto fail2; + ret = snd_soc_register_card(card); + if (ret) { + dev_err(&pdev->dev, "snd_soc_register_card() failed: %d\n", + ret); + platform_device_unregister(simone_snd_ac97_device); } - platform_set_drvdata(simone_snd_device, &snd_soc_simone); - ret = platform_device_add(simone_snd_device); - if (ret) - goto fail3; + return ret; +} + +static int __devexit simone_remove(struct platform_device *pdev) +{ + struct snd_soc_card *card = platform_get_drvdata(pdev); + + snd_soc_unregister_card(card); + platform_device_unregister(simone_snd_ac97_device); return 0; +} -fail3: - platform_device_put(simone_snd_device); -fail2: - platform_device_del(simone_snd_ac97_device); -fail1: - platform_device_put(simone_snd_ac97_device); - return ret; +static struct platform_driver simone_driver = { + .driver = { + .name = "simone-audio", + .owner = THIS_MODULE, + }, + .probe = simone_probe, + .remove = __devexit_p(simone_remove), +}; + +static int __init simone_init(void) +{ + return platform_driver_register(&simone_driver); } module_init(simone_init); static void __exit simone_exit(void) { - platform_device_unregister(simone_snd_device); - platform_device_unregister(simone_snd_ac97_device); + platform_driver_unregister(&simone_driver); } module_exit(simone_exit); MODULE_DESCRIPTION("ALSA SoC Simplemachines Sim.One"); MODULE_AUTHOR("Mika Westerberg "); MODULE_LICENSE("GPL"); +MODULE_ALIAS("platform:simone-audio"); diff --git a/sound/soc/ep93xx/snappercl15.c b/sound/soc/ep93xx/snappercl15.c index dfe1d7f..2cde433 100644 --- a/sound/soc/ep93xx/snappercl15.c +++ b/sound/soc/ep93xx/snappercl15.c @@ -2,7 +2,7 @@ * snappercl15.c -- SoC audio for Bluewater Systems Snapper CL15 module * * Copyright (C) 2008 Bluewater Systems Ltd - * Author: Ryan Mallon + * Author: Ryan Mallon * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the @@ -12,6 +12,7 @@ */ #include +#include #include #include #include @@ -104,43 +105,62 @@ static struct snd_soc_card snd_soc_snappercl15 = { .num_links = 1, }; -static struct platform_device *snappercl15_snd_device; - -static int __init snappercl15_init(void) +static int __devinit snappercl15_probe(struct platform_device *pdev) { + struct snd_soc_card *card = &snd_soc_snappercl15; int ret; - if (!machine_is_snapper_cl15()) - return -ENODEV; - ret = ep93xx_i2s_acquire(EP93XX_SYSCON_DEVCFG_I2SONAC97, EP93XX_SYSCON_I2SCLKDIV_ORIDE | EP93XX_SYSCON_I2SCLKDIV_SPOL); if (ret) return ret; - snappercl15_snd_device = platform_device_alloc("soc-audio", -1); - if (!snappercl15_snd_device) - return -ENOMEM; - - platform_set_drvdata(snappercl15_snd_device, &snd_soc_snappercl15); - ret = platform_device_add(snappercl15_snd_device); - if (ret) - platform_device_put(snappercl15_snd_device); + card->dev = &pdev->dev; + + ret = snd_soc_register_card(card); + if (ret) { + dev_err(&pdev->dev, "snd_soc_register_card() failed: %d\n", + ret); + ep93xx_i2s_release(); + } return ret; } -static void __exit snappercl15_exit(void) +static int __devexit snappercl15_remove(struct platform_device *pdev) { - platform_device_unregister(snappercl15_snd_device); + struct snd_soc_card *card = platform_get_drvdata(pdev); + + snd_soc_unregister_card(card); ep93xx_i2s_release(); + + return 0; +} + +static struct platform_driver snappercl15_driver = { + .driver = { + .name = "snappercl15-audio", + .owner = THIS_MODULE, + }, + .probe = snappercl15_probe, + .remove = __devexit_p(snappercl15_remove), +}; + +static int __init snappercl15_init(void) +{ + return platform_driver_register(&snappercl15_driver); +} + +static void __exit snappercl15_exit(void) +{ + platform_driver_unregister(&snappercl15_driver); } module_init(snappercl15_init); module_exit(snappercl15_exit); -MODULE_AUTHOR("Ryan Mallon "); +MODULE_AUTHOR("Ryan Mallon"); MODULE_DESCRIPTION("ALSA SoC Snapper CL15"); MODULE_LICENSE("GPL"); - +MODULE_ALIAS("platform:snappercl15-audio"); diff --git a/sound/soc/fsl/fsl_dma.c b/sound/soc/fsl/fsl_dma.c index 6680c0b..ef15402 100644 --- a/sound/soc/fsl/fsl_dma.c +++ b/sound/soc/fsl/fsl_dma.c @@ -294,9 +294,10 @@ static irqreturn_t fsl_dma_isr(int irq, void *dev_id) * Regardless of where the memory is actually allocated, since the device can * technically DMA to any 36-bit address, we do need to set the DMA mask to 36. */ -static int fsl_dma_new(struct snd_card *card, struct snd_soc_dai *dai, - struct snd_pcm *pcm) +static int fsl_dma_new(struct snd_soc_pcm_runtime *rtd) { + struct snd_card *card = rtd->card->snd_card; + struct snd_pcm *pcm = rtd->pcm; static u64 fsl_dma_dmamask = DMA_BIT_MASK(36); int ret; @@ -877,10 +878,12 @@ static struct device_node *find_ssi_node(struct device_node *dma_channel_np) * assume that device_node pointers are a valid comparison. */ np = of_parse_phandle(ssi_np, "fsl,playback-dma", 0); + of_node_put(np); if (np == dma_channel_np) return ssi_np; np = of_parse_phandle(ssi_np, "fsl,capture-dma", 0); + of_node_put(np); if (np == dma_channel_np) return ssi_np; } @@ -939,7 +942,7 @@ static int __devinit fsl_soc_dma_probe(struct platform_device *pdev) iprop = of_get_property(ssi_np, "fsl,fifo-depth", NULL); if (iprop) - dma->ssi_fifo_depth = *iprop; + dma->ssi_fifo_depth = be32_to_cpup(iprop); else /* Older 8610 DTs didn't have the fifo-depth property */ dma->ssi_fifo_depth = 8; diff --git a/sound/soc/fsl/fsl_ssi.c b/sound/soc/fsl/fsl_ssi.c index bd811a0..83c4bd5 100644 --- a/sound/soc/fsl/fsl_ssi.c +++ b/sound/soc/fsl/fsl_ssi.c @@ -78,7 +78,6 @@ * @second_stream: pointer to second stream * @playback: the number of playback streams opened * @capture: the number of capture streams opened - * @asynchronous: 0=synchronous mode, 1=asynchronous mode * @cpu_dai: the CPU DAI for this device * @dev_attr: the sysfs device attribute structure * @stats: SSI statistics @@ -90,9 +89,6 @@ struct fsl_ssi_private { unsigned int irq; struct snd_pcm_substream *first_stream; struct snd_pcm_substream *second_stream; - unsigned int playback; - unsigned int capture; - int asynchronous; unsigned int fifo_depth; struct snd_soc_dai_driver cpu_dai_drv; struct device_attribute dev_attr; @@ -281,24 +277,18 @@ static int fsl_ssi_startup(struct snd_pcm_substream *substream, struct snd_soc_dai *dai) { struct snd_soc_pcm_runtime *rtd = substream->private_data; - struct fsl_ssi_private *ssi_private = snd_soc_dai_get_drvdata(rtd->cpu_dai); + struct fsl_ssi_private *ssi_private = + snd_soc_dai_get_drvdata(rtd->cpu_dai); + int synchronous = ssi_private->cpu_dai_drv.symmetric_rates; /* * If this is the first stream opened, then request the IRQ * and initialize the SSI registers. */ - if (!ssi_private->playback && !ssi_private->capture) { + if (!ssi_private->first_stream) { struct ccsr_ssi __iomem *ssi = ssi_private->ssi; - int ret; - - /* The 'name' should not have any slashes in it. */ - ret = request_irq(ssi_private->irq, fsl_ssi_isr, 0, - ssi_private->name, ssi_private); - if (ret < 0) { - dev_err(substream->pcm->card->dev, - "could not claim irq %u\n", ssi_private->irq); - return ret; - } + + ssi_private->first_stream = substream; /* * Section 16.5 of the MPC8610 reference manual says that the @@ -316,7 +306,7 @@ static int fsl_ssi_startup(struct snd_pcm_substream *substream, clrsetbits_be32(&ssi->scr, CCSR_SSI_SCR_I2S_MODE_MASK | CCSR_SSI_SCR_SYN, CCSR_SSI_SCR_TFR_CLK_DIS | CCSR_SSI_SCR_I2S_MODE_SLAVE - | (ssi_private->asynchronous ? 0 : CCSR_SSI_SCR_SYN)); + | (synchronous ? CCSR_SSI_SCR_SYN : 0)); out_be32(&ssi->stcr, CCSR_SSI_STCR_TXBIT0 | CCSR_SSI_STCR_TFEN0 | @@ -333,7 +323,7 @@ static int fsl_ssi_startup(struct snd_pcm_substream *substream, * master. */ - /* 4. Enable the interrupts and DMA requests */ + /* Enable the interrupts and DMA requests */ out_be32(&ssi->sier, SIER_FLAGS); /* @@ -362,58 +352,47 @@ static int fsl_ssi_startup(struct snd_pcm_substream *substream, * this is bad is because at this point, the PCM driver has not * finished initializing the DMA controller. */ - } + } else { + if (synchronous) { + struct snd_pcm_runtime *first_runtime = + ssi_private->first_stream->runtime; + /* + * This is the second stream open, and we're in + * synchronous mode, so we need to impose sample + * sample size constraints. This is because STCCR is + * used for playback and capture in synchronous mode, + * so there's no way to specify different word + * lengths. + * + * Note that this can cause a race condition if the + * second stream is opened before the first stream is + * fully initialized. We provide some protection by + * checking to make sure the first stream is + * initialized, but it's not perfect. ALSA sometimes + * re-initializes the driver with a different sample + * rate or size. If the second stream is opened + * before the first stream has received its final + * parameters, then the second stream may be + * constrained to the wrong sample rate or size. + */ + if (!first_runtime->sample_bits) { + dev_err(substream->pcm->card->dev, + "set sample size in %s stream first\n", + substream->stream == + SNDRV_PCM_STREAM_PLAYBACK + ? "capture" : "playback"); + return -EAGAIN; + } - if (!ssi_private->first_stream) - ssi_private->first_stream = substream; - else { - /* This is the second stream open, so we need to impose sample - * rate and maybe sample size constraints. Note that this can - * cause a race condition if the second stream is opened before - * the first stream is fully initialized. - * - * We provide some protection by checking to make sure the first - * stream is initialized, but it's not perfect. ALSA sometimes - * re-initializes the driver with a different sample rate or - * size. If the second stream is opened before the first stream - * has received its final parameters, then the second stream may - * be constrained to the wrong sample rate or size. - * - * FIXME: This code does not handle opening and closing streams - * repeatedly. If you open two streams and then close the first - * one, you may not be able to open another stream until you - * close the second one as well. - */ - struct snd_pcm_runtime *first_runtime = - ssi_private->first_stream->runtime; - - if (!first_runtime->sample_bits) { - dev_err(substream->pcm->card->dev, - "set sample size in %s stream first\n", - substream->stream == SNDRV_PCM_STREAM_PLAYBACK - ? "capture" : "playback"); - return -EAGAIN; - } - - /* If we're in synchronous mode, then we need to constrain - * the sample size as well. We don't support independent sample - * rates in asynchronous mode. - */ - if (!ssi_private->asynchronous) snd_pcm_hw_constraint_minmax(substream->runtime, SNDRV_PCM_HW_PARAM_SAMPLE_BITS, first_runtime->sample_bits, first_runtime->sample_bits); + } ssi_private->second_stream = substream; } - if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) - ssi_private->playback++; - - if (substream->stream == SNDRV_PCM_STREAM_CAPTURE) - ssi_private->capture++; - return 0; } @@ -434,24 +413,35 @@ static int fsl_ssi_hw_params(struct snd_pcm_substream *substream, struct snd_pcm_hw_params *hw_params, struct snd_soc_dai *cpu_dai) { struct fsl_ssi_private *ssi_private = snd_soc_dai_get_drvdata(cpu_dai); + struct ccsr_ssi __iomem *ssi = ssi_private->ssi; + unsigned int sample_size = + snd_pcm_format_width(params_format(hw_params)); + u32 wl = CCSR_SSI_SxCCR_WL(sample_size); + int enabled = in_be32(&ssi->scr) & CCSR_SSI_SCR_SSIEN; - if (substream == ssi_private->first_stream) { - struct ccsr_ssi __iomem *ssi = ssi_private->ssi; - unsigned int sample_size = - snd_pcm_format_width(params_format(hw_params)); - u32 wl = CCSR_SSI_SxCCR_WL(sample_size); + /* + * If we're in synchronous mode, and the SSI is already enabled, + * then STCCR is already set properly. + */ + if (enabled && ssi_private->cpu_dai_drv.symmetric_rates) + return 0; - /* The SSI should always be disabled at this points (SSIEN=0) */ + /* + * FIXME: The documentation says that SxCCR[WL] should not be + * modified while the SSI is enabled. The only time this can + * happen is if we're trying to do simultaneous playback and + * capture in asynchronous mode. Unfortunately, I have been enable + * to get that to work at all on the P1022DS. Therefore, we don't + * bother to disable/enable the SSI when setting SxCCR[WL], because + * the SSI will stop anyway. Maybe one day, this will get fixed. + */ - /* In synchronous mode, the SSI uses STCCR for capture */ - if ((substream->stream == SNDRV_PCM_STREAM_PLAYBACK) || - !ssi_private->asynchronous) - clrsetbits_be32(&ssi->stccr, - CCSR_SSI_SxCCR_WL_MASK, wl); - else - clrsetbits_be32(&ssi->srccr, - CCSR_SSI_SxCCR_WL_MASK, wl); - } + /* In synchronous mode, the SSI uses STCCR for capture */ + if ((substream->stream == SNDRV_PCM_STREAM_PLAYBACK) || + ssi_private->cpu_dai_drv.symmetric_rates) + clrsetbits_be32(&ssi->stccr, CCSR_SSI_SxCCR_WL_MASK, wl); + else + clrsetbits_be32(&ssi->srccr, CCSR_SSI_SxCCR_WL_MASK, wl); return 0; } @@ -474,7 +464,6 @@ static int fsl_ssi_trigger(struct snd_pcm_substream *substream, int cmd, switch (cmd) { case SNDRV_PCM_TRIGGER_START: - clrbits32(&ssi->scr, CCSR_SSI_SCR_SSIEN); case SNDRV_PCM_TRIGGER_PAUSE_RELEASE: if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) setbits32(&ssi->scr, @@ -510,27 +499,18 @@ static void fsl_ssi_shutdown(struct snd_pcm_substream *substream, struct snd_soc_pcm_runtime *rtd = substream->private_data; struct fsl_ssi_private *ssi_private = snd_soc_dai_get_drvdata(rtd->cpu_dai); - if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) - ssi_private->playback--; - - if (substream->stream == SNDRV_PCM_STREAM_CAPTURE) - ssi_private->capture--; - if (ssi_private->first_stream == substream) ssi_private->first_stream = ssi_private->second_stream; ssi_private->second_stream = NULL; /* - * If this is the last active substream, disable the SSI and release - * the IRQ. + * If this is the last active substream, disable the SSI. */ - if (!ssi_private->playback && !ssi_private->capture) { + if (!ssi_private->first_stream) { struct ccsr_ssi __iomem *ssi = ssi_private->ssi; clrbits32(&ssi->scr, CCSR_SSI_SCR_SSIEN); - - free_irq(ssi_private->irq, ssi_private); } } @@ -675,23 +655,39 @@ static int __devinit fsl_ssi_probe(struct platform_device *pdev) ret = of_address_to_resource(np, 0, &res); if (ret) { dev_err(&pdev->dev, "could not determine device resources\n"); - kfree(ssi_private); - return ret; + goto error_kmalloc; + } + ssi_private->ssi = of_iomap(np, 0); + if (!ssi_private->ssi) { + dev_err(&pdev->dev, "could not map device resources\n"); + ret = -ENOMEM; + goto error_kmalloc; } - ssi_private->ssi = ioremap(res.start, 1 + res.end - res.start); ssi_private->ssi_phys = res.start; + ssi_private->irq = irq_of_parse_and_map(np, 0); + if (ssi_private->irq == NO_IRQ) { + dev_err(&pdev->dev, "no irq for node %s\n", np->full_name); + ret = -ENXIO; + goto error_iomap; + } + + /* The 'name' should not have any slashes in it. */ + ret = request_irq(ssi_private->irq, fsl_ssi_isr, 0, ssi_private->name, + ssi_private); + if (ret < 0) { + dev_err(&pdev->dev, "could not claim irq %u\n", ssi_private->irq); + goto error_irqmap; + } /* Are the RX and the TX clocks locked? */ - if (of_find_property(np, "fsl,ssi-asynchronous", NULL)) - ssi_private->asynchronous = 1; - else + if (!of_find_property(np, "fsl,ssi-asynchronous", NULL)) ssi_private->cpu_dai_drv.symmetric_rates = 1; /* Determine the FIFO depth. */ iprop = of_get_property(np, "fsl,fifo-depth", NULL); if (iprop) - ssi_private->fifo_depth = *iprop; + ssi_private->fifo_depth = be32_to_cpup(iprop); else /* Older 8610 DTs didn't have the fifo-depth property */ ssi_private->fifo_depth = 8; @@ -707,7 +703,7 @@ static int __devinit fsl_ssi_probe(struct platform_device *pdev) if (ret) { dev_err(&pdev->dev, "could not create sysfs %s file\n", ssi_private->dev_attr.attr.name); - goto error; + goto error_irq; } /* Register with ASoC */ @@ -716,7 +712,7 @@ static int __devinit fsl_ssi_probe(struct platform_device *pdev) ret = snd_soc_register_dai(&pdev->dev, &ssi_private->cpu_dai_drv); if (ret) { dev_err(&pdev->dev, "failed to register DAI: %d\n", ret); - goto error; + goto error_dev; } /* Trigger the machine driver's probe function. The platform driver @@ -737,18 +733,28 @@ static int __devinit fsl_ssi_probe(struct platform_device *pdev) if (IS_ERR(ssi_private->pdev)) { ret = PTR_ERR(ssi_private->pdev); dev_err(&pdev->dev, "failed to register platform: %d\n", ret); - goto error; + goto error_dai; } return 0; -error: +error_dai: snd_soc_unregister_dai(&pdev->dev); + +error_dev: dev_set_drvdata(&pdev->dev, NULL); - if (dev_attr) - device_remove_file(&pdev->dev, dev_attr); + device_remove_file(&pdev->dev, dev_attr); + +error_irq: + free_irq(ssi_private->irq, ssi_private); + +error_irqmap: irq_dispose_mapping(ssi_private->irq); + +error_iomap: iounmap(ssi_private->ssi); + +error_kmalloc: kfree(ssi_private); return ret; @@ -762,6 +768,9 @@ static int fsl_ssi_remove(struct platform_device *pdev) snd_soc_unregister_dai(&pdev->dev); device_remove_file(&pdev->dev, &ssi_private->dev_attr); + free_irq(ssi_private->irq, ssi_private); + irq_dispose_mapping(ssi_private->irq); + kfree(ssi_private); dev_set_drvdata(&pdev->dev, NULL); diff --git a/sound/soc/fsl/mpc5200_dma.c b/sound/soc/fsl/mpc5200_dma.c index cbaf8b7..5c6c245 100644 --- a/sound/soc/fsl/mpc5200_dma.c +++ b/sound/soc/fsl/mpc5200_dma.c @@ -299,10 +299,11 @@ static struct snd_pcm_ops psc_dma_ops = { }; static u64 psc_dma_dmamask = 0xffffffff; -static int psc_dma_new(struct snd_card *card, struct snd_soc_dai *dai, - struct snd_pcm *pcm) +static int psc_dma_new(struct snd_soc_pcm_runtime *rtd) { - struct snd_soc_pcm_runtime *rtd = pcm->private_data; + struct snd_card *card = rtd->card->snd_card; + struct snd_soc_dai *dai = rtd->cpu_dai; + struct snd_pcm *pcm = rtd->pcm; struct psc_dma *psc_dma = snd_soc_dai_get_drvdata(rtd->cpu_dai); size_t size = psc_dma_hardware.buffer_bytes_max; int rc = 0; @@ -384,7 +385,7 @@ static int mpc5200_hpcd_probe(struct platform_device *op) dev_err(&op->dev, "Missing reg property\n"); return -ENODEV; } - regs = ioremap(res.start, 1 + res.end - res.start); + regs = ioremap(res.start, resource_size(&res)); if (!regs) { dev_err(&op->dev, "Could not map registers\n"); return -ENODEV; diff --git a/sound/soc/fsl/mpc8610_hpcd.c b/sound/soc/fsl/mpc8610_hpcd.c index c16c6b2..ae49f1c 100644 --- a/sound/soc/fsl/mpc8610_hpcd.c +++ b/sound/soc/fsl/mpc8610_hpcd.c @@ -233,7 +233,7 @@ static int get_parent_cell_index(struct device_node *np) if (!iprop) return -1; - return *iprop; + return be32_to_cpup(iprop); } /** @@ -258,7 +258,7 @@ static int codec_node_dev_name(struct device_node *np, char *buf, size_t len) if (!iprop) return -EINVAL; - addr = *iprop; + addr = be32_to_cpup(iprop); bus = get_parent_cell_index(np); if (bus < 0) @@ -305,7 +305,7 @@ static int get_dma_channel(struct device_node *ssi_np, return -EINVAL; } - *dma_channel_id = *iprop; + *dma_channel_id = be32_to_cpup(iprop); *dma_id = get_parent_cell_index(dma_channel_np); of_node_put(dma_channel_np); @@ -345,8 +345,10 @@ static int mpc8610_hpcd_probe(struct platform_device *pdev) } machine_data = kzalloc(sizeof(struct mpc8610_hpcd_data), GFP_KERNEL); - if (!machine_data) - return -ENOMEM; + if (!machine_data) { + ret = -ENOMEM; + goto error_alloc; + } machine_data->dai[0].cpu_dai_name = dev_name(&ssi_pdev->dev); machine_data->dai[0].ops = &mpc8610_hpcd_ops; @@ -379,7 +381,7 @@ static int mpc8610_hpcd_probe(struct platform_device *pdev) ret = -EINVAL; goto error; } - machine_data->ssi_id = *iprop; + machine_data->ssi_id = be32_to_cpup(iprop); /* Get the serial format and clock direction. */ sprop = of_get_property(np, "fsl,mode", NULL); @@ -390,7 +392,8 @@ static int mpc8610_hpcd_probe(struct platform_device *pdev) } if (strcasecmp(sprop, "i2s-slave") == 0) { - machine_data->dai_format = SND_SOC_DAIFMT_I2S; + machine_data->dai_format = + SND_SOC_DAIFMT_I2S | SND_SOC_DAIFMT_CBM_CFM; machine_data->codec_clk_direction = SND_SOC_CLOCK_OUT; machine_data->cpu_clk_direction = SND_SOC_CLOCK_IN; @@ -405,33 +408,40 @@ static int mpc8610_hpcd_probe(struct platform_device *pdev) ret = -EINVAL; goto error; } - machine_data->clk_frequency = *iprop; + machine_data->clk_frequency = be32_to_cpup(iprop); } else if (strcasecmp(sprop, "i2s-master") == 0) { - machine_data->dai_format = SND_SOC_DAIFMT_I2S; + machine_data->dai_format = + SND_SOC_DAIFMT_I2S | SND_SOC_DAIFMT_CBS_CFS; machine_data->codec_clk_direction = SND_SOC_CLOCK_IN; machine_data->cpu_clk_direction = SND_SOC_CLOCK_OUT; } else if (strcasecmp(sprop, "lj-slave") == 0) { - machine_data->dai_format = SND_SOC_DAIFMT_LEFT_J; + machine_data->dai_format = + SND_SOC_DAIFMT_LEFT_J | SND_SOC_DAIFMT_CBM_CFM; machine_data->codec_clk_direction = SND_SOC_CLOCK_OUT; machine_data->cpu_clk_direction = SND_SOC_CLOCK_IN; } else if (strcasecmp(sprop, "lj-master") == 0) { - machine_data->dai_format = SND_SOC_DAIFMT_LEFT_J; + machine_data->dai_format = + SND_SOC_DAIFMT_LEFT_J | SND_SOC_DAIFMT_CBS_CFS; machine_data->codec_clk_direction = SND_SOC_CLOCK_IN; machine_data->cpu_clk_direction = SND_SOC_CLOCK_OUT; } else if (strcasecmp(sprop, "rj-slave") == 0) { - machine_data->dai_format = SND_SOC_DAIFMT_RIGHT_J; + machine_data->dai_format = + SND_SOC_DAIFMT_RIGHT_J | SND_SOC_DAIFMT_CBM_CFM; machine_data->codec_clk_direction = SND_SOC_CLOCK_OUT; machine_data->cpu_clk_direction = SND_SOC_CLOCK_IN; } else if (strcasecmp(sprop, "rj-master") == 0) { - machine_data->dai_format = SND_SOC_DAIFMT_RIGHT_J; + machine_data->dai_format = + SND_SOC_DAIFMT_RIGHT_J | SND_SOC_DAIFMT_CBS_CFS; machine_data->codec_clk_direction = SND_SOC_CLOCK_IN; machine_data->cpu_clk_direction = SND_SOC_CLOCK_OUT; } else if (strcasecmp(sprop, "ac97-slave") == 0) { - machine_data->dai_format = SND_SOC_DAIFMT_AC97; + machine_data->dai_format = + SND_SOC_DAIFMT_AC97 | SND_SOC_DAIFMT_CBM_CFM; machine_data->codec_clk_direction = SND_SOC_CLOCK_OUT; machine_data->cpu_clk_direction = SND_SOC_CLOCK_IN; } else if (strcasecmp(sprop, "ac97-master") == 0) { - machine_data->dai_format = SND_SOC_DAIFMT_AC97; + machine_data->dai_format = + SND_SOC_DAIFMT_AC97 | SND_SOC_DAIFMT_CBS_CFS; machine_data->codec_clk_direction = SND_SOC_CLOCK_IN; machine_data->cpu_clk_direction = SND_SOC_CLOCK_OUT; } else { @@ -494,7 +504,7 @@ static int mpc8610_hpcd_probe(struct platform_device *pdev) ret = platform_device_add(sound_device); if (ret) { dev_err(&pdev->dev, "platform device add failed\n"); - goto error; + goto error_sound; } dev_set_drvdata(&pdev->dev, sound_device); @@ -502,14 +512,12 @@ static int mpc8610_hpcd_probe(struct platform_device *pdev) return 0; +error_sound: + platform_device_put(sound_device); error: - of_node_put(codec_np); - - if (sound_device) - platform_device_unregister(sound_device); - kfree(machine_data); - +error_alloc: + of_node_put(codec_np); return ret; } diff --git a/sound/soc/fsl/p1022_ds.c b/sound/soc/fsl/p1022_ds.c index 66e0b68..075677c 100644 --- a/sound/soc/fsl/p1022_ds.c +++ b/sound/soc/fsl/p1022_ds.c @@ -232,7 +232,7 @@ static int get_parent_cell_index(struct device_node *np) iprop = of_get_property(parent, "cell-index", NULL); if (iprop) - ret = *iprop; + ret = be32_to_cpup(iprop); of_node_put(parent); @@ -261,13 +261,13 @@ static int codec_node_dev_name(struct device_node *np, char *buf, size_t len) if (!iprop) return -EINVAL; - addr = *iprop; + addr = be32_to_cpup(iprop); bus = get_parent_cell_index(np); if (bus < 0) return bus; - snprintf(buf, len, "%s-codec.%u-%04x", temp, bus, addr); + snprintf(buf, len, "%s.%u-%04x", temp, bus, addr); return 0; } @@ -297,8 +297,10 @@ static int get_dma_channel(struct device_node *ssi_np, * dai->platform name should already point to an allocated buffer. */ ret = of_address_to_resource(dma_channel_np, 0, &res); - if (ret) + if (ret) { + of_node_put(dma_channel_np); return ret; + } snprintf((char *)dai->platform_name, DAI_NAME_SIZE, "%llx.%s", (unsigned long long) res.start, dma_channel_np->name); @@ -308,7 +310,7 @@ static int get_dma_channel(struct device_node *ssi_np, return -EINVAL; } - *dma_channel_id = *iprop; + *dma_channel_id = be32_to_cpup(iprop); *dma_id = get_parent_cell_index(dma_channel_np); of_node_put(dma_channel_np); @@ -379,7 +381,7 @@ static int p1022_ds_probe(struct platform_device *pdev) ret = -EINVAL; goto error; } - mdata->ssi_id = *iprop; + mdata->ssi_id = be32_to_cpup(iprop); /* Get the serial format and clock direction. */ sprop = of_get_property(np, "fsl,mode", NULL); @@ -390,7 +392,8 @@ static int p1022_ds_probe(struct platform_device *pdev) } if (strcasecmp(sprop, "i2s-slave") == 0) { - mdata->dai_format = SND_SOC_DAIFMT_I2S; + mdata->dai_format = SND_SOC_DAIFMT_NB_NF | + SND_SOC_DAIFMT_I2S | SND_SOC_DAIFMT_CBM_CFM; mdata->codec_clk_direction = SND_SOC_CLOCK_OUT; mdata->cpu_clk_direction = SND_SOC_CLOCK_IN; @@ -405,33 +408,40 @@ static int p1022_ds_probe(struct platform_device *pdev) ret = -EINVAL; goto error; } - mdata->clk_frequency = *iprop; + mdata->clk_frequency = be32_to_cpup(iprop); } else if (strcasecmp(sprop, "i2s-master") == 0) { - mdata->dai_format = SND_SOC_DAIFMT_I2S; + mdata->dai_format = SND_SOC_DAIFMT_NB_NF | + SND_SOC_DAIFMT_I2S | SND_SOC_DAIFMT_CBS_CFS; mdata->codec_clk_direction = SND_SOC_CLOCK_IN; mdata->cpu_clk_direction = SND_SOC_CLOCK_OUT; } else if (strcasecmp(sprop, "lj-slave") == 0) { - mdata->dai_format = SND_SOC_DAIFMT_LEFT_J; + mdata->dai_format = SND_SOC_DAIFMT_NB_NF | + SND_SOC_DAIFMT_LEFT_J | SND_SOC_DAIFMT_CBM_CFM; mdata->codec_clk_direction = SND_SOC_CLOCK_OUT; mdata->cpu_clk_direction = SND_SOC_CLOCK_IN; } else if (strcasecmp(sprop, "lj-master") == 0) { - mdata->dai_format = SND_SOC_DAIFMT_LEFT_J; + mdata->dai_format = SND_SOC_DAIFMT_NB_NF | + SND_SOC_DAIFMT_LEFT_J | SND_SOC_DAIFMT_CBS_CFS; mdata->codec_clk_direction = SND_SOC_CLOCK_IN; mdata->cpu_clk_direction = SND_SOC_CLOCK_OUT; } else if (strcasecmp(sprop, "rj-slave") == 0) { - mdata->dai_format = SND_SOC_DAIFMT_RIGHT_J; + mdata->dai_format = SND_SOC_DAIFMT_NB_NF | + SND_SOC_DAIFMT_RIGHT_J | SND_SOC_DAIFMT_CBM_CFM; mdata->codec_clk_direction = SND_SOC_CLOCK_OUT; mdata->cpu_clk_direction = SND_SOC_CLOCK_IN; } else if (strcasecmp(sprop, "rj-master") == 0) { - mdata->dai_format = SND_SOC_DAIFMT_RIGHT_J; + mdata->dai_format = SND_SOC_DAIFMT_NB_NF | + SND_SOC_DAIFMT_RIGHT_J | SND_SOC_DAIFMT_CBS_CFS; mdata->codec_clk_direction = SND_SOC_CLOCK_IN; mdata->cpu_clk_direction = SND_SOC_CLOCK_OUT; } else if (strcasecmp(sprop, "ac97-slave") == 0) { - mdata->dai_format = SND_SOC_DAIFMT_AC97; + mdata->dai_format = SND_SOC_DAIFMT_NB_NF | + SND_SOC_DAIFMT_AC97 | SND_SOC_DAIFMT_CBM_CFM; mdata->codec_clk_direction = SND_SOC_CLOCK_OUT; mdata->cpu_clk_direction = SND_SOC_CLOCK_IN; } else if (strcasecmp(sprop, "ac97-master") == 0) { - mdata->dai_format = SND_SOC_DAIFMT_AC97; + mdata->dai_format = SND_SOC_DAIFMT_NB_NF | + SND_SOC_DAIFMT_AC97 | SND_SOC_DAIFMT_CBS_CFS; mdata->codec_clk_direction = SND_SOC_CLOCK_IN; mdata->cpu_clk_direction = SND_SOC_CLOCK_OUT; } else { @@ -504,7 +514,7 @@ static int p1022_ds_probe(struct platform_device *pdev) error: if (sound_device) - platform_device_unregister(sound_device); + platform_device_put(sound_device); kfree(mdata); error_put: diff --git a/sound/soc/imx/Kconfig b/sound/soc/imx/Kconfig index bb699bb..7383917 100644 --- a/sound/soc/imx/Kconfig +++ b/sound/soc/imx/Kconfig @@ -28,8 +28,8 @@ config SND_MXC_SOC_WM1133_EV1 config SND_SOC_MX27VIS_AIC32X4 tristate "SoC audio support for Visstrim M10 boards" - depends on MACH_IMX27_VISSTRIM_M10 - select SND_SOC_TVL320AIC32X4 + depends on MACH_IMX27_VISSTRIM_M10 && I2C + select SND_SOC_TLV320AIC32X4 select SND_MXC_SOC_MX2 help Say Y if you want to add support for SoC audio on Visstrim SM10 @@ -50,6 +50,7 @@ config SND_SOC_EUKREA_TLV320 || MACH_EUKREA_MBIMXSD25_BASEBOARD \ || MACH_EUKREA_MBIMXSD35_BASEBOARD \ || MACH_EUKREA_MBIMXSD51_BASEBOARD + depends on I2C select SND_SOC_TLV320AIC23 select SND_MXC_SOC_FIQ help diff --git a/sound/soc/imx/imx-pcm-dma-mx2.c b/sound/soc/imx/imx-pcm-dma-mx2.c index 4173b3d..43fdc24f 100644 --- a/sound/soc/imx/imx-pcm-dma-mx2.c +++ b/sound/soc/imx/imx-pcm-dma-mx2.c @@ -110,12 +110,12 @@ static int imx_ssi_dma_alloc(struct snd_pcm_substream *substream, slave_config.direction = DMA_TO_DEVICE; slave_config.dst_addr = dma_params->dma_addr; slave_config.dst_addr_width = buswidth; - slave_config.dst_maxburst = dma_params->burstsize * buswidth; + slave_config.dst_maxburst = dma_params->burstsize; } else { slave_config.direction = DMA_FROM_DEVICE; slave_config.src_addr = dma_params->dma_addr; slave_config.src_addr_width = buswidth; - slave_config.src_maxburst = dma_params->burstsize * buswidth; + slave_config.src_maxburst = dma_params->burstsize; } ret = dmaengine_slave_config(iprtd->dma_chan, &slave_config); diff --git a/sound/soc/imx/imx-pcm-fiq.c b/sound/soc/imx/imx-pcm-fiq.c index 413b78d..8df0fae2 100644 --- a/sound/soc/imx/imx-pcm-fiq.c +++ b/sound/soc/imx/imx-pcm-fiq.c @@ -238,26 +238,25 @@ static struct snd_pcm_ops imx_pcm_ops = { static int ssi_irq = 0; -static int imx_pcm_fiq_new(struct snd_card *card, struct snd_soc_dai *dai, - struct snd_pcm *pcm) +static int imx_pcm_fiq_new(struct snd_soc_pcm_runtime *rtd) { + struct snd_pcm *pcm = rtd->pcm; + struct snd_pcm_substream *substream; int ret; - ret = imx_pcm_new(card, dai, pcm); + ret = imx_pcm_new(rtd); if (ret) return ret; - if (dai->driver->playback.channels_min) { - struct snd_pcm_substream *substream = - pcm->streams[SNDRV_PCM_STREAM_PLAYBACK].substream; + substream = pcm->streams[SNDRV_PCM_STREAM_PLAYBACK].substream; + if (substream) { struct snd_dma_buffer *buf = &substream->dma_buffer; imx_ssi_fiq_tx_buffer = (unsigned long)buf->area; } - if (dai->driver->capture.channels_min) { - struct snd_pcm_substream *substream = - pcm->streams[SNDRV_PCM_STREAM_CAPTURE].substream; + substream = pcm->streams[SNDRV_PCM_STREAM_CAPTURE].substream; + if (substream) { struct snd_dma_buffer *buf = &substream->dma_buffer; imx_ssi_fiq_rx_buffer = (unsigned long)buf->area; diff --git a/sound/soc/imx/imx-ssi.c b/sound/soc/imx/imx-ssi.c index 3b56254..4969c98 100644 --- a/sound/soc/imx/imx-ssi.c +++ b/sound/soc/imx/imx-ssi.c @@ -357,8 +357,8 @@ int snd_imx_pcm_mmap(struct snd_pcm_substream *substream, struct snd_pcm_runtime *runtime = substream->runtime; int ret; - ret = dma_mmap_coherent(NULL, vma, runtime->dma_area, - runtime->dma_addr, runtime->dma_bytes); + ret = dma_mmap_writecombine(substream->pcm->card->dev, vma, + runtime->dma_area, runtime->dma_addr, runtime->dma_bytes); pr_debug("%s: ret: %d %p 0x%08x 0x%08x\n", __func__, ret, runtime->dma_area, @@ -388,24 +388,24 @@ static int imx_pcm_preallocate_dma_buffer(struct snd_pcm *pcm, int stream) static u64 imx_pcm_dmamask = DMA_BIT_MASK(32); -int imx_pcm_new(struct snd_card *card, struct snd_soc_dai *dai, - struct snd_pcm *pcm) +int imx_pcm_new(struct snd_soc_pcm_runtime *rtd) { - + struct snd_card *card = rtd->card->snd_card; + struct snd_pcm *pcm = rtd->pcm; int ret = 0; if (!card->dev->dma_mask) card->dev->dma_mask = &imx_pcm_dmamask; if (!card->dev->coherent_dma_mask) card->dev->coherent_dma_mask = DMA_BIT_MASK(32); - if (dai->driver->playback.channels_min) { + if (pcm->streams[SNDRV_PCM_STREAM_PLAYBACK].substream) { ret = imx_pcm_preallocate_dma_buffer(pcm, SNDRV_PCM_STREAM_PLAYBACK); if (ret) goto out; } - if (dai->driver->capture.channels_min) { + if (pcm->streams[SNDRV_PCM_STREAM_CAPTURE].substream) { ret = imx_pcm_preallocate_dma_buffer(pcm, SNDRV_PCM_STREAM_CAPTURE); if (ret) @@ -573,6 +573,8 @@ static void imx_ssi_ac97_reset(struct snd_ac97 *ac97) if (imx_ssi->ac97_reset) imx_ssi->ac97_reset(ac97); + /* First read sometimes fails, do a dummy read */ + imx_ssi_ac97_read(ac97, 0); } static void imx_ssi_ac97_warm_reset(struct snd_ac97 *ac97) @@ -581,6 +583,9 @@ static void imx_ssi_ac97_warm_reset(struct snd_ac97 *ac97) if (imx_ssi->ac97_warm_reset) imx_ssi->ac97_warm_reset(ac97); + + /* First read sometimes fails, do a dummy read */ + imx_ssi_ac97_read(ac97, 0); } struct snd_ac97_bus_ops soc_ac97_ops = { diff --git a/sound/soc/imx/imx-ssi.h b/sound/soc/imx/imx-ssi.h index dc8a875..1072dfb 100644 --- a/sound/soc/imx/imx-ssi.h +++ b/sound/soc/imx/imx-ssi.h @@ -218,15 +218,8 @@ struct imx_ssi { struct platform_device *soc_platform_pdev_fiq; }; -struct snd_soc_platform *imx_ssi_fiq_init(struct platform_device *pdev, - struct imx_ssi *ssi); -void imx_ssi_fiq_exit(struct platform_device *pdev, struct imx_ssi *ssi); -struct snd_soc_platform *imx_ssi_dma_mx2_init(struct platform_device *pdev, - struct imx_ssi *ssi); - int snd_imx_pcm_mmap(struct snd_pcm_substream *substream, struct vm_area_struct *vma); -int imx_pcm_new(struct snd_card *card, struct snd_soc_dai *dai, - struct snd_pcm *pcm); +int imx_pcm_new(struct snd_soc_pcm_runtime *rtd); void imx_pcm_free(struct snd_pcm *pcm); /* diff --git a/sound/soc/imx/wm1133-ev1.c b/sound/soc/imx/wm1133-ev1.c index 75b4c72..490a126 100644 --- a/sound/soc/imx/wm1133-ev1.c +++ b/sound/soc/imx/wm1133-ev1.c @@ -14,6 +14,7 @@ #include #include +#include #include #include #include diff --git a/sound/soc/jz4740/jz4740-pcm.c b/sound/soc/jz4740/jz4740-pcm.c index fb1483f..d1989cd 100644 --- a/sound/soc/jz4740/jz4740-pcm.c +++ b/sound/soc/jz4740/jz4740-pcm.c @@ -299,9 +299,11 @@ static void jz4740_pcm_free(struct snd_pcm *pcm) static u64 jz4740_pcm_dmamask = DMA_BIT_MASK(32); -int jz4740_pcm_new(struct snd_card *card, struct snd_soc_dai *dai, - struct snd_pcm *pcm) +static int jz4740_pcm_new(struct snd_soc_pcm_runtime *rtd) { + struct snd_card *card = rtd->card->snd_card; + struct snd_soc_dai *dai = rtd->cpu_dai; + struct snd_pcm *pcm = rtd->pcm; int ret = 0; if (!card->dev->dma_mask) diff --git a/sound/soc/kirkwood/Kconfig b/sound/soc/kirkwood/Kconfig index 8f49e16..c62d715 100644 --- a/sound/soc/kirkwood/Kconfig +++ b/sound/soc/kirkwood/Kconfig @@ -12,6 +12,7 @@ config SND_KIRKWOOD_SOC_I2S config SND_KIRKWOOD_SOC_OPENRD tristate "SoC Audio support for Kirkwood Openrd Client" depends on SND_KIRKWOOD_SOC && (MACH_OPENRD_CLIENT || MACH_OPENRD_ULTIMATE) + depends on I2C select SND_KIRKWOOD_SOC_I2S select SND_SOC_CS42L51 help @@ -20,7 +21,7 @@ config SND_KIRKWOOD_SOC_OPENRD config SND_KIRKWOOD_SOC_T5325 tristate "SoC Audio support for HP t5325" - depends on SND_KIRKWOOD_SOC && MACH_T5325 + depends on SND_KIRKWOOD_SOC && MACH_T5325 && I2C select SND_KIRKWOOD_SOC_I2S select SND_SOC_ALC5623 help diff --git a/sound/soc/kirkwood/kirkwood-dma.c b/sound/soc/kirkwood/kirkwood-dma.c index e13c6ce..cd33de1 100644 --- a/sound/soc/kirkwood/kirkwood-dma.c +++ b/sound/soc/kirkwood/kirkwood-dma.c @@ -312,9 +312,11 @@ static int kirkwood_dma_preallocate_dma_buffer(struct snd_pcm *pcm, return 0; } -static int kirkwood_dma_new(struct snd_card *card, - struct snd_soc_dai *dai, struct snd_pcm *pcm) +static int kirkwood_dma_new(struct snd_soc_pcm_runtime *rtd) { + struct snd_card *card = rtd->card->snd_card; + struct snd_soc_dai *dai = rtd->cpu_dai; + struct snd_pcm *pcm = rtd->pcm; int ret; if (!card->dev->dma_mask) diff --git a/sound/soc/kirkwood/kirkwood-i2s.c b/sound/soc/kirkwood/kirkwood-i2s.c index a33fc51..715e841 100644 --- a/sound/soc/kirkwood/kirkwood-i2s.c +++ b/sound/soc/kirkwood/kirkwood-i2s.c @@ -424,7 +424,7 @@ static __devinit int kirkwood_i2s_dev_probe(struct platform_device *pdev) if (!priv->mem) { dev_err(&pdev->dev, "request_mem_region failed\n"); err = -EBUSY; - goto error; + goto err_alloc; } priv->io = ioremap(priv->mem->start, SZ_16K); @@ -476,7 +476,7 @@ static __devexit int kirkwood_i2s_dev_remove(struct platform_device *pdev) static struct platform_driver kirkwood_i2s_driver = { .probe = kirkwood_i2s_dev_probe, - .remove = kirkwood_i2s_dev_remove, + .remove = __devexit_p(kirkwood_i2s_dev_remove), .driver = { .name = DRV_NAME, .owner = THIS_MODULE, diff --git a/sound/soc/kirkwood/kirkwood-t5325.c b/sound/soc/kirkwood/kirkwood-t5325.c index c8d2195..c772b3c 100644 --- a/sound/soc/kirkwood/kirkwood-t5325.c +++ b/sound/soc/kirkwood/kirkwood-t5325.c @@ -79,8 +79,6 @@ static int t5325_dai_init(struct snd_soc_pcm_runtime *rtd) snd_soc_dapm_enable_pin(dapm, "Headphone Jack"); snd_soc_dapm_enable_pin(dapm, "Speaker"); - snd_soc_dapm_sync(dapm); - return 0; } diff --git a/sound/soc/mid-x86/mfld_machine.c b/sound/soc/mid-x86/mfld_machine.c index 429aa1b..cca693a 100644 --- a/sound/soc/mid-x86/mfld_machine.c +++ b/sound/soc/mid-x86/mfld_machine.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include #include @@ -54,9 +55,7 @@ static unsigned int hs_switch; static unsigned int lo_dac; struct mfld_mc_private { - struct platform_device *socdev; void __iomem *int_base; - struct snd_soc_codec *codec; u8 interrupt_status; }; @@ -235,7 +234,6 @@ static int mfld_init(struct snd_soc_pcm_runtime *runtime) /* always connected */ snd_soc_dapm_enable_pin(dapm, "Headphones"); snd_soc_dapm_enable_pin(dapm, "Mic"); - snd_soc_dapm_sync(dapm); ret_val = snd_soc_add_controls(codec, mfld_snd_controls, ARRAY_SIZE(mfld_snd_controls)); @@ -253,7 +251,6 @@ static int mfld_init(struct snd_soc_pcm_runtime *runtime) /* we dont use linein in this so set to NC */ snd_soc_dapm_disable_pin(dapm, "LINEINL"); snd_soc_dapm_disable_pin(dapm, "LINEINR"); - snd_soc_dapm_sync(dapm); /* Headset and button jack detection */ ret_val = snd_soc_jack_new(codec, "Intel(R) MID Audio Jack", diff --git a/sound/soc/mid-x86/sst_platform.c b/sound/soc/mid-x86/sst_platform.c index 5a946b4..2305702 100644 --- a/sound/soc/mid-x86/sst_platform.c +++ b/sound/soc/mid-x86/sst_platform.c @@ -27,6 +27,7 @@ #include #include +#include #include #include #include @@ -63,7 +64,7 @@ static struct snd_pcm_hardware sst_platform_pcm_hw = { }; /* MFLD - MSIC */ -struct snd_soc_dai_driver sst_platform_dai[] = { +static struct snd_soc_dai_driver sst_platform_dai[] = { { .name = "Headset-cpu-dai", .id = 0, @@ -226,13 +227,18 @@ static int sst_platform_init_stream(struct snd_pcm_substream *substream) static int sst_platform_open(struct snd_pcm_substream *substream) { - struct snd_pcm_runtime *runtime; + struct snd_pcm_runtime *runtime = substream->runtime; struct sst_runtime_stream *stream; int ret_val = 0; pr_debug("sst_platform_open called\n"); - runtime = substream->runtime; - runtime->hw = sst_platform_pcm_hw; + + snd_soc_set_runtime_hwparams(substream, &sst_platform_pcm_hw); + ret_val = snd_pcm_hw_constraint_integer(runtime, + SNDRV_PCM_HW_PARAM_PERIODS); + if (ret_val < 0) + return ret_val; + stream = kzalloc(sizeof(*stream), GFP_KERNEL); if (!stream) return -ENOMEM; @@ -259,8 +265,8 @@ static int sst_platform_open(struct snd_pcm_substream *substream) return ret_val; } runtime->private_data = stream; - return snd_pcm_hw_constraint_integer(runtime, - SNDRV_PCM_HW_PARAM_PERIODS); + + return 0; } static int sst_platform_close(struct snd_pcm_substream *substream) @@ -402,9 +408,10 @@ static void sst_pcm_free(struct snd_pcm *pcm) snd_pcm_lib_preallocate_free_for_all(pcm); } -int sst_pcm_new(struct snd_card *card, struct snd_soc_dai *dai, - struct snd_pcm *pcm) +int sst_pcm_new(struct snd_soc_pcm_runtime *rtd) { + struct snd_soc_dai *dai = rtd->cpu_dai; + struct snd_pcm *pcm = rtd->pcm; int retval = 0; pr_debug("sst_pcm_new called\n"); @@ -468,7 +475,7 @@ static struct platform_driver sst_platform_driver = { static int __init sst_soc_platform_init(void) { pr_debug("sst_soc_platform_init called\n"); - return platform_driver_register(&sst_platform_driver); + return platform_driver_register(&sst_platform_driver); } module_init(sst_soc_platform_init); diff --git a/sound/soc/nuc900/nuc900-ac97.c b/sound/soc/nuc900/nuc900-ac97.c index dac6732..a4e3237 100644 --- a/sound/soc/nuc900/nuc900-ac97.c +++ b/sound/soc/nuc900/nuc900-ac97.c @@ -356,7 +356,7 @@ static int __devinit nuc900_ac97_drvprobe(struct platform_device *pdev) nuc900_audio->irq_num = platform_get_irq(pdev, 0); if (!nuc900_audio->irq_num) { ret = -EBUSY; - goto out2; + goto out3; } nuc900_ac97_data = nuc900_audio; @@ -365,7 +365,8 @@ static int __devinit nuc900_ac97_drvprobe(struct platform_device *pdev) if (ret) goto out3; - mfp_set_groupg(nuc900_audio->dev); /* enbale ac97 multifunction pin*/ + /* enbale ac97 multifunction pin */ + mfp_set_groupg(nuc900_audio->dev, "nuc900-audio"); return 0; diff --git a/sound/soc/nuc900/nuc900-pcm.c b/sound/soc/nuc900/nuc900-pcm.c index 8263f56..ae8d680 100644 --- a/sound/soc/nuc900/nuc900-pcm.c +++ b/sound/soc/nuc900/nuc900-pcm.c @@ -227,7 +227,7 @@ static int nuc900_dma_trigger(struct snd_pcm_substream *substream, int cmd) return ret; } -int nuc900_dma_getposition(struct snd_pcm_substream *substream, +static int nuc900_dma_getposition(struct snd_pcm_substream *substream, dma_addr_t *src, dma_addr_t *dst) { struct snd_pcm_runtime *runtime = substream->runtime; @@ -268,7 +268,7 @@ static int nuc900_dma_open(struct snd_pcm_substream *substream) nuc900_audio = nuc900_ac97_data; if (request_irq(nuc900_audio->irq_num, nuc900_dma_interrupt, - IRQF_DISABLED, "nuc900-dma", substream)) + 0, "nuc900-dma", substream)) return -EBUSY; runtime->private_data = nuc900_audio; @@ -315,9 +315,11 @@ static void nuc900_dma_free_dma_buffers(struct snd_pcm *pcm) } static u64 nuc900_pcm_dmamask = DMA_BIT_MASK(32); -static int nuc900_dma_new(struct snd_card *card, - struct snd_soc_dai *dai, struct snd_pcm *pcm) +static int nuc900_dma_new(struct snd_soc_pcm_runtime *rtd) { + struct snd_card *card = rtd->card->snd_card; + struct snd_pcm *pcm = rtd->pcm; + if (!card->dev->dma_mask) card->dev->dma_mask = &nuc900_pcm_dmamask; if (!card->dev->coherent_dma_mask) diff --git a/sound/soc/pxa/Kconfig b/sound/soc/pxa/Kconfig index 33ebc46..23deb67 100644 --- a/sound/soc/pxa/Kconfig +++ b/sound/soc/pxa/Kconfig @@ -1,7 +1,6 @@ config SND_PXA2XX_SOC tristate "SoC Audio for the Intel PXA2xx chip" depends on ARCH_PXA - select SND_ARM select SND_PXA2XX_LIB help Say Y or M if you want to add support for codecs attached to @@ -15,7 +14,6 @@ config SND_PXA2XX_AC97 config SND_PXA2XX_SOC_AC97 tristate select AC97_BUS - select SND_ARM select SND_PXA2XX_LIB_AC97 select SND_SOC_AC97_BUS @@ -121,6 +119,7 @@ config SND_PXA2XX_SOC_PALM27X config SND_SOC_SAARB tristate "SoC Audio support for Marvell Saarb" depends on SND_PXA2XX_SOC && MACH_SAARB + select MFD_88PM860X select SND_PXA_SOC_SSP select SND_SOC_88PM860X help @@ -130,6 +129,7 @@ config SND_SOC_SAARB config SND_SOC_TAVOREVB3 tristate "SoC Audio support for Marvell Tavor EVB3" depends on SND_PXA2XX_SOC && MACH_TAVOREVB3 + select MFD_88PM860X select SND_PXA_SOC_SSP select SND_SOC_88PM860X help @@ -149,6 +149,7 @@ config SND_SOC_ZYLONITE config SND_SOC_RAUMFELD tristate "SoC Audio support Raumfeld audio adapter" depends on SND_PXA2XX_SOC && (MACH_RAUMFELD_SPEAKER || MACH_RAUMFELD_CONNECTOR) + depends on I2C && SPI_MASTER select SND_PXA_SOC_SSP select SND_SOC_CS4270 select SND_SOC_AK4104 @@ -157,7 +158,7 @@ config SND_SOC_RAUMFELD config SND_PXA2XX_SOC_HX4700 tristate "SoC Audio support for HP iPAQ hx4700" - depends on SND_PXA2XX_SOC && MACH_H4700 + depends on SND_PXA2XX_SOC && MACH_H4700 && I2C select SND_PXA2XX_SOC_I2S select SND_SOC_AK4641 help diff --git a/sound/soc/pxa/corgi.c b/sound/soc/pxa/corgi.c index 28757fb..b0e2fb7 100644 --- a/sound/soc/pxa/corgi.c +++ b/sound/soc/pxa/corgi.c @@ -299,7 +299,6 @@ static int corgi_wm8731_init(struct snd_soc_pcm_runtime *rtd) /* Set up corgi specific audio path audio_map */ snd_soc_dapm_add_routes(dapm, audio_map, ARRAY_SIZE(audio_map)); - snd_soc_dapm_sync(dapm); return 0; } diff --git a/sound/soc/pxa/e740_wm9705.c b/sound/soc/pxa/e740_wm9705.c index dc65650..35ed7eb 100644 --- a/sound/soc/pxa/e740_wm9705.c +++ b/sound/soc/pxa/e740_wm9705.c @@ -108,8 +108,6 @@ static int e740_ac97_init(struct snd_soc_pcm_runtime *rtd) snd_soc_dapm_add_routes(dapm, audio_map, ARRAY_SIZE(audio_map)); - snd_soc_dapm_sync(dapm); - return 0; } diff --git a/sound/soc/pxa/e750_wm9705.c b/sound/soc/pxa/e750_wm9705.c index 51897fc..ce5f056 100644 --- a/sound/soc/pxa/e750_wm9705.c +++ b/sound/soc/pxa/e750_wm9705.c @@ -90,8 +90,6 @@ static int e750_ac97_init(struct snd_soc_pcm_runtime *rtd) snd_soc_dapm_add_routes(dapm, audio_map, ARRAY_SIZE(audio_map)); - snd_soc_dapm_sync(dapm); - return 0; } diff --git a/sound/soc/pxa/e800_wm9712.c b/sound/soc/pxa/e800_wm9712.c index 053ed20..6a8f38b 100644 --- a/sound/soc/pxa/e800_wm9712.c +++ b/sound/soc/pxa/e800_wm9712.c @@ -80,7 +80,6 @@ static int e800_ac97_init(struct snd_soc_pcm_runtime *rtd) ARRAY_SIZE(e800_dapm_widgets)); snd_soc_dapm_add_routes(dapm, audio_map, ARRAY_SIZE(audio_map)); - snd_soc_dapm_sync(dapm); return 0; } diff --git a/sound/soc/pxa/hx4700.c b/sound/soc/pxa/hx4700.c index 65c1248..c664e33 100644 --- a/sound/soc/pxa/hx4700.c +++ b/sound/soc/pxa/hx4700.c @@ -209,9 +209,10 @@ static int __devinit hx4700_audio_probe(struct platform_device *pdev) snd_soc_card_hx4700.dev = &pdev->dev; ret = snd_soc_register_card(&snd_soc_card_hx4700); if (ret) - return ret; + gpio_free_array(hx4700_audio_gpios, + ARRAY_SIZE(hx4700_audio_gpios)); - return 0; + return ret; } static int __devexit hx4700_audio_remove(struct platform_device *pdev) diff --git a/sound/soc/pxa/magician.c b/sound/soc/pxa/magician.c index 67dcc36..e79f516 100644 --- a/sound/soc/pxa/magician.c +++ b/sound/soc/pxa/magician.c @@ -92,11 +92,10 @@ static int magician_playback_hw_params(struct snd_pcm_substream *substream, struct snd_soc_pcm_runtime *rtd = substream->private_data; struct snd_soc_dai *codec_dai = rtd->codec_dai; struct snd_soc_dai *cpu_dai = rtd->cpu_dai; - unsigned int acps, acds, width, rate; + unsigned int acps, acds, width; unsigned int div4 = PXA_SSP_CLK_SCDB_4; int ret = 0; - rate = params_rate(params); width = snd_pcm_format_physical_width(params_format(params)); /* @@ -424,7 +423,6 @@ static int magician_uda1380_init(struct snd_soc_pcm_runtime *rtd) /* Set up magician specific audio path interconnects */ snd_soc_dapm_add_routes(dapm, audio_map, ARRAY_SIZE(audio_map)); - snd_soc_dapm_sync(dapm); return 0; } diff --git a/sound/soc/pxa/mioa701_wm9713.c b/sound/soc/pxa/mioa701_wm9713.c index 38ca675..0b8d1ee 100644 --- a/sound/soc/pxa/mioa701_wm9713.c +++ b/sound/soc/pxa/mioa701_wm9713.c @@ -151,7 +151,6 @@ static int mioa701_wm9713_init(struct snd_soc_pcm_runtime *rtd) snd_soc_dapm_enable_pin(dapm, "Front Mic"); snd_soc_dapm_enable_pin(dapm, "GSM Line In"); snd_soc_dapm_enable_pin(dapm, "GSM Line Out"); - snd_soc_dapm_sync(dapm); return 0; } diff --git a/sound/soc/pxa/palm27x.c b/sound/soc/pxa/palm27x.c index 504e400..7edc1fb 100644 --- a/sound/soc/pxa/palm27x.c +++ b/sound/soc/pxa/palm27x.c @@ -107,10 +107,6 @@ static int palm27x_ac97_init(struct snd_soc_pcm_runtime *rtd) snd_soc_dapm_nc_pin(dapm, "PHONE"); snd_soc_dapm_nc_pin(dapm, "MIC2"); - err = snd_soc_dapm_sync(dapm); - if (err) - return err; - /* Jack detection API stuff */ err = snd_soc_jack_new(codec, "Headphone Jack", SND_JACK_HEADPHONE, &hs_jack); diff --git a/sound/soc/pxa/poodle.c b/sound/soc/pxa/poodle.c index da3ae43..4c29bc1 100644 --- a/sound/soc/pxa/poodle.c +++ b/sound/soc/pxa/poodle.c @@ -265,7 +265,6 @@ static int poodle_wm8731_init(struct snd_soc_pcm_runtime *rtd) /* Set up poodle specific audio path audio_map */ snd_soc_dapm_add_routes(dapm, audio_map, ARRAY_SIZE(audio_map)); - snd_soc_dapm_sync(dapm); return 0; } diff --git a/sound/soc/pxa/pxa-ssp.c b/sound/soc/pxa/pxa-ssp.c index b583e60..72886e3 100644 --- a/sound/soc/pxa/pxa-ssp.c +++ b/sound/soc/pxa/pxa-ssp.c @@ -778,9 +778,7 @@ static int pxa_ssp_remove(struct snd_soc_dai *dai) SNDRV_PCM_RATE_44100 | SNDRV_PCM_RATE_48000 | \ SNDRV_PCM_RATE_88200 | SNDRV_PCM_RATE_96000) -#define PXA_SSP_FORMATS (SNDRV_PCM_FMTBIT_S16_LE |\ - SNDRV_PCM_FMTBIT_S24_LE | \ - SNDRV_PCM_FMTBIT_S32_LE) +#define PXA_SSP_FORMATS (SNDRV_PCM_FMTBIT_S16_LE | SNDRV_PCM_FMTBIT_S32_LE) static struct snd_soc_dai_ops pxa_ssp_dai_ops = { .startup = pxa_ssp_startup, diff --git a/sound/soc/pxa/pxa2xx-pcm.c b/sound/soc/pxa/pxa2xx-pcm.c index fab20a5..600676f 100644 --- a/sound/soc/pxa/pxa2xx-pcm.c +++ b/sound/soc/pxa/pxa2xx-pcm.c @@ -11,6 +11,7 @@ */ #include +#include #include #include @@ -85,9 +86,10 @@ static struct snd_pcm_ops pxa2xx_pcm_ops = { static u64 pxa2xx_pcm_dmamask = DMA_BIT_MASK(32); -static int pxa2xx_soc_pcm_new(struct snd_card *card, struct snd_soc_dai *dai, - struct snd_pcm *pcm) +static int pxa2xx_soc_pcm_new(struct snd_soc_pcm_runtime *rtd) { + struct snd_card *card = rtd->card->snd_card; + struct snd_pcm *pcm = rtd->pcm; int ret = 0; if (!card->dev->dma_mask) diff --git a/sound/soc/pxa/raumfeld.c b/sound/soc/pxa/raumfeld.c index 1a591f1..b899a3b 100644 --- a/sound/soc/pxa/raumfeld.c +++ b/sound/soc/pxa/raumfeld.c @@ -306,8 +306,10 @@ static int __init raumfeld_audio_init(void) &snd_soc_raumfeld_connector); ret = platform_device_add(raumfeld_audio_device); - if (ret < 0) + if (ret < 0) { + platform_device_put(raumfeld_audio_device); return ret; + } raumfeld_enable_audio(true); return 0; diff --git a/sound/soc/pxa/saarb.c b/sound/soc/pxa/saarb.c index 9595189..d9467a2 100644 --- a/sound/soc/pxa/saarb.c +++ b/sound/soc/pxa/saarb.c @@ -146,10 +146,6 @@ static int saarb_pm860x_init(struct snd_soc_pcm_runtime *rtd) snd_soc_dapm_disable_pin(dapm, "Headset Mic 2"); snd_soc_dapm_disable_pin(dapm, "Headset Stereophone"); - ret = snd_soc_dapm_sync(dapm); - if (ret) - return ret; - /* Headset jack detection */ snd_soc_jack_new(codec, "Headphone Jack", SND_JACK_HEADPHONE | SND_JACK_BTN_0 | SND_JACK_BTN_1 | SND_JACK_BTN_2, diff --git a/sound/soc/pxa/spitz.c b/sound/soc/pxa/spitz.c index b253d86..c2d6ff9 100644 --- a/sound/soc/pxa/spitz.c +++ b/sound/soc/pxa/spitz.c @@ -301,7 +301,6 @@ static int spitz_wm8750_init(struct snd_soc_pcm_runtime *rtd) /* Set up spitz specific audio paths */ snd_soc_dapm_add_routes(dapm, audio_map, ARRAY_SIZE(audio_map)); - snd_soc_dapm_sync(dapm); return 0; } @@ -312,7 +311,7 @@ static struct snd_soc_dai_link spitz_dai = { .cpu_dai_name = "pxa2xx-i2s", .codec_dai_name = "wm8750-hifi", .platform_name = "pxa-pcm-audio", - .codec_name = "wm8750-codec.0-001b", + .codec_name = "wm8750.0-001b", .init = spitz_wm8750_init, .ops = &spitz_ops, }; diff --git a/sound/soc/pxa/tavorevb3.c b/sound/soc/pxa/tavorevb3.c index f881f65..eeec892 100644 --- a/sound/soc/pxa/tavorevb3.c +++ b/sound/soc/pxa/tavorevb3.c @@ -146,10 +146,6 @@ static int evb3_pm860x_init(struct snd_soc_pcm_runtime *rtd) snd_soc_dapm_disable_pin(dapm, "Headset Mic 2"); snd_soc_dapm_disable_pin(dapm, "Headset Stereophone"); - ret = snd_soc_dapm_sync(dapm); - if (ret) - return ret; - /* Headset jack detection */ snd_soc_jack_new(codec, "Headphone Jack", SND_JACK_HEADPHONE | SND_JACK_BTN_0 | SND_JACK_BTN_1 | SND_JACK_BTN_2, diff --git a/sound/soc/pxa/tosa.c b/sound/soc/pxa/tosa.c index 9a23513..620fc69 100644 --- a/sound/soc/pxa/tosa.c +++ b/sound/soc/pxa/tosa.c @@ -211,7 +211,6 @@ static int tosa_ac97_init(struct snd_soc_pcm_runtime *rtd) /* set up tosa specific audio path audio_map */ snd_soc_dapm_add_routes(dapm, audio_map, ARRAY_SIZE(audio_map)); - snd_soc_dapm_sync(dapm); return 0; } diff --git a/sound/soc/pxa/z2.c b/sound/soc/pxa/z2.c index d69d9fc..b311ffe 100644 --- a/sound/soc/pxa/z2.c +++ b/sound/soc/pxa/z2.c @@ -161,10 +161,6 @@ static int z2_wm8750_init(struct snd_soc_pcm_runtime *rtd) /* Set up z2 specific audio paths */ snd_soc_dapm_add_routes(dapm, audio_map, ARRAY_SIZE(audio_map)); - ret = snd_soc_dapm_sync(dapm); - if (ret) - goto err; - /* Jack detection API stuff */ ret = snd_soc_jack_new(codec, "Headset Jack", SND_JACK_HEADSET, &hs_jack); @@ -198,7 +194,7 @@ static struct snd_soc_dai_link z2_dai = { .cpu_dai_name = "pxa2xx-i2s", .codec_dai_name = "wm8750-hifi", .platform_name = "pxa-pcm-audio", - .codec_name = "wm8750-codec.0-001b", + .codec_name = "wm8750.0-001b", .init = z2_wm8750_init, .ops = &z2_ops, }; diff --git a/sound/soc/pxa/zylonite.c b/sound/soc/pxa/zylonite.c index 2b8350b..580aae3 100644 --- a/sound/soc/pxa/zylonite.c +++ b/sound/soc/pxa/zylonite.c @@ -87,7 +87,6 @@ static int zylonite_wm9713_init(struct snd_soc_pcm_runtime *rtd) snd_soc_dapm_enable_pin(dapm, "Headphone"); snd_soc_dapm_enable_pin(dapm, "Headset Earpiece"); - snd_soc_dapm_sync(dapm); return 0; } diff --git a/sound/soc/s6000/s6000-pcm.c b/sound/soc/s6000/s6000-pcm.c index ab3ccae..75babae 100644 --- a/sound/soc/s6000/s6000-pcm.c +++ b/sound/soc/s6000/s6000-pcm.c @@ -128,7 +128,9 @@ static irqreturn_t s6000_pcm_irq(int irq, void *data) substream->runtime && snd_pcm_running(substream)) { dev_dbg(pcm->dev, "xrun\n"); + snd_pcm_stream_lock(substream); snd_pcm_stop(substream, SNDRV_PCM_STATE_XRUN); + snd_pcm_stream_unlock(substream); ret = IRQ_HANDLED; } @@ -443,10 +445,10 @@ static void s6000_pcm_free(struct snd_pcm *pcm) static u64 s6000_pcm_dmamask = DMA_BIT_MASK(32); -static int s6000_pcm_new(struct snd_card *card, - struct snd_soc_dai *dai, struct snd_pcm *pcm) +static int s6000_pcm_new(struct snd_soc_pcm_runtime *runtime) { - struct snd_soc_pcm_runtime *runtime = pcm->private_data; + struct snd_card *card = runtime->card->snd_card; + struct snd_pcm *pcm = runtime->pcm; struct s6000_pcm_dma_params *params; int res; diff --git a/sound/soc/sh/dma-sh7760.c b/sound/soc/sh/dma-sh7760.c index 5ba023a..1a757c3 100644 --- a/sound/soc/sh/dma-sh7760.c +++ b/sound/soc/sh/dma-sh7760.c @@ -327,10 +327,10 @@ static void camelot_pcm_free(struct snd_pcm *pcm) snd_pcm_lib_preallocate_free_for_all(pcm); } -static int camelot_pcm_new(struct snd_card *card, - struct snd_soc_dai *dai, - struct snd_pcm *pcm) +static int camelot_pcm_new(struct snd_soc_pcm_runtime *rtd) { + struct snd_pcm *pcm = rtd->pcm; + /* dont use SNDRV_DMA_TYPE_DEV, since it will oops the SH kernel * in MMAP mode (i.e. aplay -M) */ diff --git a/sound/soc/sh/fsi-ak4642.c b/sound/soc/sh/fsi-ak4642.c index 770a71a..dff64b9 100644 --- a/sound/soc/sh/fsi-ak4642.c +++ b/sound/soc/sh/fsi-ak4642.c @@ -10,6 +10,7 @@ */ #include +#include #include struct fsi_ak4642_data { diff --git a/sound/soc/sh/fsi-da7210.c b/sound/soc/sh/fsi-da7210.c index 59553fd..f5586b5b 100644 --- a/sound/soc/sh/fsi-da7210.c +++ b/sound/soc/sh/fsi-da7210.c @@ -11,6 +11,7 @@ */ #include +#include #include static int fsi_da7210_init(struct snd_soc_pcm_runtime *rtd) diff --git a/sound/soc/sh/fsi-hdmi.c b/sound/soc/sh/fsi-hdmi.c index d3d9fd8..3ebebe7 100644 --- a/sound/soc/sh/fsi-hdmi.c +++ b/sound/soc/sh/fsi-hdmi.c @@ -10,6 +10,7 @@ */ #include +#include #include struct fsi_hdmi_data { diff --git a/sound/soc/sh/fsi.c b/sound/soc/sh/fsi.c index 4a9da6b..4a2c639 100644 --- a/sound/soc/sh/fsi.c +++ b/sound/soc/sh/fsi.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include @@ -118,10 +119,38 @@ typedef int (*set_rate_func)(struct device *dev, int is_porta, int rate, int ena /* * FSI driver use below type name for variable * - * xxx_len : data length - * xxx_width : data width - * xxx_offset : data offset * xxx_num : number of data + * xxx_pos : position of data + * xxx_capa : capacity of data + */ + +/* + * period/frame/sample image + * + * ex) PCM (2ch) + * + * period pos period pos + * [n] [n + 1] + * |<-------------------- period--------------------->| + * ==|============================================ ... =|== + * | | + * ||<----- frame ----->|<------ frame ----->| ... | + * |+--------------------+--------------------+- ... | + * ||[ sample ][ sample ]|[ sample ][ sample ]| ... | + * |+--------------------+--------------------+- ... | + * ==|============================================ ... =|== + */ + +/* + * FSI FIFO image + * + * | | + * | | + * | [ sample ] | + * | [ sample ] | + * | [ sample ] | + * | [ sample ] | + * --> go to codecs */ /* @@ -131,12 +160,11 @@ typedef int (*set_rate_func)(struct device *dev, int is_porta, int rate, int ena struct fsi_stream { struct snd_pcm_substream *substream; - int fifo_max_num; - - int buff_offset; - int buff_len; - int period_len; - int period_num; + int fifo_sample_capa; /* sample capacity of FSI FIFO */ + int buff_sample_capa; /* sample capacity of ALSA buffer */ + int buff_sample_pos; /* sample position of ALSA buffer */ + int period_samples; /* sample number / 1 period */ + int period_pos; /* current period position */ int uerr_num; int oerr_num; @@ -149,17 +177,14 @@ struct fsi_priv { struct fsi_stream playback; struct fsi_stream capture; + u32 do_fmt; + u32 di_fmt; + int chan_num:16; int clk_master:1; + int spdif:1; long rate; - - /* for suspend/resume */ - u32 saved_do_fmt; - u32 saved_di_fmt; - u32 saved_ckg1; - u32 saved_ckg2; - u32 saved_out_sel; }; struct fsi_core { @@ -180,21 +205,13 @@ struct fsi_master { struct fsi_core *core; struct sh_fsi_platform_info *info; spinlock_t lock; - - /* for suspend/resume */ - u32 saved_a_mclk; - u32 saved_b_mclk; - u32 saved_iemsk; - u32 saved_imsk; - u32 saved_clk_rst; - u32 saved_soft_rst; }; /* * basic read write function */ -static void __fsi_reg_write(u32 reg, u32 data) +static void __fsi_reg_write(u32 __iomem *reg, u32 data) { /* valid data area is 24bit */ data &= 0x00ffffff; @@ -202,12 +219,12 @@ static void __fsi_reg_write(u32 reg, u32 data) __raw_writel(data, reg); } -static u32 __fsi_reg_read(u32 reg) +static u32 __fsi_reg_read(u32 __iomem *reg) { return __raw_readl(reg); } -static void __fsi_reg_mask_set(u32 reg, u32 mask, u32 data) +static void __fsi_reg_mask_set(u32 __iomem *reg, u32 mask, u32 data) { u32 val = __fsi_reg_read(reg); @@ -234,7 +251,7 @@ static u32 _fsi_master_read(struct fsi_master *master, u32 reg) unsigned long flags; spin_lock_irqsave(&master->lock, flags); - ret = __fsi_reg_read((u32)(master->base + reg)); + ret = __fsi_reg_read(master->base + reg); spin_unlock_irqrestore(&master->lock, flags); return ret; @@ -248,7 +265,7 @@ static void _fsi_master_mask_set(struct fsi_master *master, unsigned long flags; spin_lock_irqsave(&master->lock, flags); - __fsi_reg_mask_set((u32)(master->base + reg), mask, data); + __fsi_reg_mask_set(master->base + reg, mask, data); spin_unlock_irqrestore(&master->lock, flags); } @@ -271,6 +288,11 @@ static int fsi_is_port_a(struct fsi_priv *fsi) return fsi->master->base == fsi->base; } +static int fsi_is_spdif(struct fsi_priv *fsi) +{ + return fsi->spdif; +} + static struct snd_soc_dai *fsi_get_dai(struct snd_pcm_substream *substream) { struct snd_soc_pcm_runtime *rtd = substream->private_data; @@ -342,28 +364,59 @@ static u32 fsi_get_port_shift(struct fsi_priv *fsi, int is_play) return shift; } +static int fsi_frame2sample(struct fsi_priv *fsi, int frames) +{ + return frames * fsi->chan_num; +} + +static int fsi_sample2frame(struct fsi_priv *fsi, int samples) +{ + return samples / fsi->chan_num; +} + +static int fsi_stream_is_working(struct fsi_priv *fsi, + int is_play) +{ + struct fsi_stream *io = fsi_get_stream(fsi, is_play); + struct fsi_master *master = fsi_get_master(fsi); + unsigned long flags; + int ret; + + spin_lock_irqsave(&master->lock, flags); + ret = !!io->substream; + spin_unlock_irqrestore(&master->lock, flags); + + return ret; +} + static void fsi_stream_push(struct fsi_priv *fsi, int is_play, - struct snd_pcm_substream *substream, - u32 buffer_len, - u32 period_len) + struct snd_pcm_substream *substream) { struct fsi_stream *io = fsi_get_stream(fsi, is_play); + struct snd_pcm_runtime *runtime = substream->runtime; + struct fsi_master *master = fsi_get_master(fsi); + unsigned long flags; + spin_lock_irqsave(&master->lock, flags); io->substream = substream; - io->buff_len = buffer_len; - io->buff_offset = 0; - io->period_len = period_len; - io->period_num = 0; + io->buff_sample_capa = fsi_frame2sample(fsi, runtime->buffer_size); + io->buff_sample_pos = 0; + io->period_samples = fsi_frame2sample(fsi, runtime->period_size); + io->period_pos = 0; io->oerr_num = -1; /* ignore 1st err */ io->uerr_num = -1; /* ignore 1st err */ + spin_unlock_irqrestore(&master->lock, flags); } static void fsi_stream_pop(struct fsi_priv *fsi, int is_play) { struct fsi_stream *io = fsi_get_stream(fsi, is_play); struct snd_soc_dai *dai = fsi_get_dai(io->substream); + struct fsi_master *master = fsi_get_master(fsi); + unsigned long flags; + spin_lock_irqsave(&master->lock, flags); if (io->oerr_num > 0) dev_err(dai->dev, "over_run = %d\n", io->oerr_num); @@ -372,47 +425,27 @@ static void fsi_stream_pop(struct fsi_priv *fsi, int is_play) dev_err(dai->dev, "under_run = %d\n", io->uerr_num); io->substream = NULL; - io->buff_len = 0; - io->buff_offset = 0; - io->period_len = 0; - io->period_num = 0; + io->buff_sample_capa = 0; + io->buff_sample_pos = 0; + io->period_samples = 0; + io->period_pos = 0; io->oerr_num = 0; io->uerr_num = 0; + spin_unlock_irqrestore(&master->lock, flags); } -static int fsi_get_fifo_data_num(struct fsi_priv *fsi, int is_play) +static int fsi_get_current_fifo_samples(struct fsi_priv *fsi, int is_play) { u32 status; - int data_num; + int frames; status = is_play ? fsi_reg_read(fsi, DOFF_ST) : fsi_reg_read(fsi, DIFF_ST); - data_num = 0x1ff & (status >> 8); - data_num *= fsi->chan_num; - - return data_num; -} - -static int fsi_len2num(int len, int width) -{ - return len / width; -} - -#define fsi_num2offset(a, b) fsi_num2len(a, b) -static int fsi_num2len(int num, int width) -{ - return num * width; -} - -static int fsi_get_frame_width(struct fsi_priv *fsi, int is_play) -{ - struct fsi_stream *io = fsi_get_stream(fsi, is_play); - struct snd_pcm_substream *substream = io->substream; - struct snd_pcm_runtime *runtime = substream->runtime; + frames = 0x1ff & (status >> 8); - return frames_to_bytes(runtime, 1) / fsi->chan_num; + return fsi_frame2sample(fsi, frames); } static void fsi_count_fifo_err(struct fsi_priv *fsi) @@ -444,8 +477,10 @@ static u8 *fsi_dma_get_area(struct fsi_priv *fsi, int stream) { int is_play = fsi_stream_is_play(stream); struct fsi_stream *io = fsi_get_stream(fsi, is_play); + struct snd_pcm_runtime *runtime = io->substream->runtime; - return io->substream->runtime->dma_area + io->buff_offset; + return runtime->dma_area + + samples_to_bytes(runtime, io->buff_sample_pos); } static void fsi_dma_soft_push16(struct fsi_priv *fsi, int num) @@ -559,37 +594,94 @@ static void fsi_spdif_clk_ctrl(struct fsi_priv *fsi, int enable) /* * clock function */ -#define fsi_module_init(m, d) __fsi_module_clk_ctrl(m, d, 1) -#define fsi_module_kill(m, d) __fsi_module_clk_ctrl(m, d, 0) -static void __fsi_module_clk_ctrl(struct fsi_master *master, - struct device *dev, - int enable) +static int fsi_set_master_clk(struct device *dev, struct fsi_priv *fsi, + long rate, int enable) { - pm_runtime_get_sync(dev); + struct fsi_master *master = fsi_get_master(fsi); + set_rate_func set_rate = fsi_get_info_set_rate(master); + int fsi_ver = master->core->ver; + int ret; - if (enable) { - /* enable only SR */ - fsi_master_mask_set(master, SOFT_RST, FSISR, FSISR); - fsi_master_mask_set(master, SOFT_RST, PASR | PBSR, 0); - } else { - /* clear all registers */ - fsi_master_mask_set(master, SOFT_RST, FSISR, 0); + ret = set_rate(dev, fsi_is_port_a(fsi), rate, enable); + if (ret < 0) /* error */ + return ret; + + if (!enable) + return 0; + + if (ret > 0) { + u32 data = 0; + + switch (ret & SH_FSI_ACKMD_MASK) { + default: + /* FALL THROUGH */ + case SH_FSI_ACKMD_512: + data |= (0x0 << 12); + break; + case SH_FSI_ACKMD_256: + data |= (0x1 << 12); + break; + case SH_FSI_ACKMD_128: + data |= (0x2 << 12); + break; + case SH_FSI_ACKMD_64: + data |= (0x3 << 12); + break; + case SH_FSI_ACKMD_32: + if (fsi_ver < 2) + dev_err(dev, "unsupported ACKMD\n"); + else + data |= (0x4 << 12); + break; + } + + switch (ret & SH_FSI_BPFMD_MASK) { + default: + /* FALL THROUGH */ + case SH_FSI_BPFMD_32: + data |= (0x0 << 8); + break; + case SH_FSI_BPFMD_64: + data |= (0x1 << 8); + break; + case SH_FSI_BPFMD_128: + data |= (0x2 << 8); + break; + case SH_FSI_BPFMD_256: + data |= (0x3 << 8); + break; + case SH_FSI_BPFMD_512: + data |= (0x4 << 8); + break; + case SH_FSI_BPFMD_16: + if (fsi_ver < 2) + dev_err(dev, "unsupported ACKMD\n"); + else + data |= (0x7 << 8); + break; + } + + fsi_reg_mask_set(fsi, CKG1, (ACKMD_MASK | BPFMD_MASK) , data); + udelay(10); + ret = 0; } - pm_runtime_put_sync(dev); + return ret; } -#define fsi_port_start(f) __fsi_port_clk_ctrl(f, 1) -#define fsi_port_stop(f) __fsi_port_clk_ctrl(f, 0) -static void __fsi_port_clk_ctrl(struct fsi_priv *fsi, int enable) +#define fsi_port_start(f, i) __fsi_port_clk_ctrl(f, i, 1) +#define fsi_port_stop(f, i) __fsi_port_clk_ctrl(f, i, 0) +static void __fsi_port_clk_ctrl(struct fsi_priv *fsi, int is_play, int enable) { struct fsi_master *master = fsi_get_master(fsi); - u32 soft = fsi_is_port_a(fsi) ? PASR : PBSR; u32 clk = fsi_is_port_a(fsi) ? CRA : CRB; - int is_master = fsi_is_clk_master(fsi); - fsi_master_mask_set(master, SOFT_RST, soft, (enable) ? soft : 0); - if (is_master) + if (enable) + fsi_irq_enable(fsi, is_play); + else + fsi_irq_disable(fsi, is_play); + + if (fsi_is_clk_master(fsi)) fsi_master_mask_set(master, CLK_RST, clk, (enable) ? clk : 0); } @@ -598,18 +690,19 @@ static void __fsi_port_clk_ctrl(struct fsi_priv *fsi, int enable) */ static void fsi_fifo_init(struct fsi_priv *fsi, int is_play, - struct snd_soc_dai *dai) + struct device *dev) { struct fsi_master *master = fsi_get_master(fsi); struct fsi_stream *io = fsi_get_stream(fsi, is_play); u32 shift, i; + int frame_capa; /* get on-chip RAM capacity */ shift = fsi_master_read(master, FIFO_SZ); shift >>= fsi_get_port_shift(fsi, is_play); shift &= FIFO_SZ_MASK; - io->fifo_max_num = 256 << shift; - dev_dbg(dai->dev, "fifo = %d words\n", io->fifo_max_num); + frame_capa = 256 << shift; + dev_dbg(dev, "fifo = %d words\n", frame_capa); /* * The maximum number of sample data varies depending @@ -631,9 +724,11 @@ static void fsi_fifo_init(struct fsi_priv *fsi, * 8 channels: 32 ( 32 x 8 = 256) */ for (i = 1; i < fsi->chan_num; i <<= 1) - io->fifo_max_num >>= 1; - dev_dbg(dai->dev, "%d channel %d store\n", - fsi->chan_num, io->fifo_max_num); + frame_capa >>= 1; + dev_dbg(dev, "%d channel %d store\n", + fsi->chan_num, frame_capa); + + io->fifo_sample_capa = fsi_frame2sample(fsi, frame_capa); /* * set interrupt generation factor @@ -654,10 +749,10 @@ static int fsi_fifo_data_ctrl(struct fsi_priv *fsi, int stream) struct snd_pcm_substream *substream = NULL; int is_play = fsi_stream_is_play(stream); struct fsi_stream *io = fsi_get_stream(fsi, is_play); - int data_residue_num; - int data_num; - int data_num_max; - int ch_width; + int sample_residues; + int sample_width; + int samples; + int samples_max; int over_period; void (*fn)(struct fsi_priv *fsi, int size); @@ -673,36 +768,35 @@ static int fsi_fifo_data_ctrl(struct fsi_priv *fsi, int stream) /* FSI FIFO has limit. * So, this driver can not send periods data at a time */ - if (io->buff_offset >= - fsi_num2offset(io->period_num + 1, io->period_len)) { + if (io->buff_sample_pos >= + io->period_samples * (io->period_pos + 1)) { over_period = 1; - io->period_num = (io->period_num + 1) % runtime->periods; + io->period_pos = (io->period_pos + 1) % runtime->periods; - if (0 == io->period_num) - io->buff_offset = 0; + if (0 == io->period_pos) + io->buff_sample_pos = 0; } - /* get 1 channel data width */ - ch_width = fsi_get_frame_width(fsi, is_play); + /* get 1 sample data width */ + sample_width = samples_to_bytes(runtime, 1); - /* get residue data number of alsa */ - data_residue_num = fsi_len2num(io->buff_len - io->buff_offset, - ch_width); + /* get number of residue samples */ + sample_residues = io->buff_sample_capa - io->buff_sample_pos; if (is_play) { /* * for play-back * - * data_num_max : number of FSI fifo free space - * data_num : number of ALSA residue data + * samples_max : number of FSI fifo free samples space + * samples : number of ALSA residue samples */ - data_num_max = io->fifo_max_num * fsi->chan_num; - data_num_max -= fsi_get_fifo_data_num(fsi, is_play); + samples_max = io->fifo_sample_capa; + samples_max -= fsi_get_current_fifo_samples(fsi, is_play); - data_num = data_residue_num; + samples = sample_residues; - switch (ch_width) { + switch (sample_width) { case 2: fn = fsi_dma_soft_push16; break; @@ -716,13 +810,13 @@ static int fsi_fifo_data_ctrl(struct fsi_priv *fsi, int stream) /* * for capture * - * data_num_max : number of ALSA free space - * data_num : number of data in FSI fifo + * samples_max : number of ALSA free samples space + * samples : number of samples in FSI fifo */ - data_num_max = data_residue_num; - data_num = fsi_get_fifo_data_num(fsi, is_play); + samples_max = sample_residues; + samples = fsi_get_current_fifo_samples(fsi, is_play); - switch (ch_width) { + switch (sample_width) { case 2: fn = fsi_dma_soft_pop16; break; @@ -734,12 +828,12 @@ static int fsi_fifo_data_ctrl(struct fsi_priv *fsi, int stream) } } - data_num = min(data_num, data_num_max); + samples = min(samples, samples_max); - fn(fsi, data_num); + fn(fsi, samples); - /* update buff_offset */ - io->buff_offset += fsi_num2offset(data_num, ch_width); + /* update buff_sample_pos */ + io->buff_sample_pos += samples; if (over_period) snd_pcm_period_elapsed(substream); @@ -788,16 +882,20 @@ static irqreturn_t fsi_interrupt(int irq, void *data) * dai ops */ -static int fsi_dai_startup(struct snd_pcm_substream *substream, - struct snd_soc_dai *dai) +static int fsi_hw_startup(struct fsi_priv *fsi, + int is_play, + struct device *dev) { - struct fsi_priv *fsi = fsi_get_priv(substream); u32 flags = fsi_get_info_flags(fsi); - u32 data; - int is_play = fsi_is_play(substream); + u32 data = 0; - pm_runtime_get_sync(dai->dev); + pm_runtime_get_sync(dev); + /* clock setting */ + if (fsi_is_clk_master(fsi)) + data = DIMD | DOMD; + + fsi_reg_mask_set(fsi, CKG1, (DIMD | DOMD), data); /* clock inversion (CKG2) */ data = 0; @@ -812,54 +910,70 @@ static int fsi_dai_startup(struct snd_pcm_substream *substream, fsi_reg_write(fsi, CKG2, data); + /* set format */ + fsi_reg_write(fsi, DO_FMT, fsi->do_fmt); + fsi_reg_write(fsi, DI_FMT, fsi->di_fmt); + + /* spdif ? */ + if (fsi_is_spdif(fsi)) { + fsi_spdif_clk_ctrl(fsi, 1); + fsi_reg_mask_set(fsi, OUT_SEL, DMMD, DMMD); + } + /* irq clear */ fsi_irq_disable(fsi, is_play); fsi_irq_clear_status(fsi); /* fifo init */ - fsi_fifo_init(fsi, is_play, dai); + fsi_fifo_init(fsi, is_play, dev); return 0; } -static void fsi_dai_shutdown(struct snd_pcm_substream *substream, - struct snd_soc_dai *dai) +static void fsi_hw_shutdown(struct fsi_priv *fsi, + int is_play, + struct device *dev) +{ + if (fsi_is_clk_master(fsi)) + fsi_set_master_clk(dev, fsi, fsi->rate, 0); + + pm_runtime_put_sync(dev); +} + +static int fsi_dai_startup(struct snd_pcm_substream *substream, + struct snd_soc_dai *dai) { struct fsi_priv *fsi = fsi_get_priv(substream); int is_play = fsi_is_play(substream); - struct fsi_master *master = fsi_get_master(fsi); - set_rate_func set_rate = fsi_get_info_set_rate(master); - fsi_irq_disable(fsi, is_play); + return fsi_hw_startup(fsi, is_play, dai->dev); +} - if (fsi_is_clk_master(fsi)) - set_rate(dai->dev, fsi_is_port_a(fsi), fsi->rate, 0); +static void fsi_dai_shutdown(struct snd_pcm_substream *substream, + struct snd_soc_dai *dai) +{ + struct fsi_priv *fsi = fsi_get_priv(substream); + int is_play = fsi_is_play(substream); + fsi_hw_shutdown(fsi, is_play, dai->dev); fsi->rate = 0; - - pm_runtime_put_sync(dai->dev); } static int fsi_dai_trigger(struct snd_pcm_substream *substream, int cmd, struct snd_soc_dai *dai) { struct fsi_priv *fsi = fsi_get_priv(substream); - struct snd_pcm_runtime *runtime = substream->runtime; int is_play = fsi_is_play(substream); int ret = 0; switch (cmd) { case SNDRV_PCM_TRIGGER_START: - fsi_stream_push(fsi, is_play, substream, - frames_to_bytes(runtime, runtime->buffer_size), - frames_to_bytes(runtime, runtime->period_size)); + fsi_stream_push(fsi, is_play, substream); ret = is_play ? fsi_data_push(fsi) : fsi_data_pop(fsi); - fsi_irq_enable(fsi, is_play); - fsi_port_start(fsi); + fsi_port_start(fsi, is_play); break; case SNDRV_PCM_TRIGGER_STOP: - fsi_port_stop(fsi); - fsi_irq_disable(fsi, is_play); + fsi_port_stop(fsi, is_play); fsi_stream_pop(fsi, is_play); break; } @@ -884,8 +998,8 @@ static int fsi_set_fmt_dai(struct fsi_priv *fsi, unsigned int fmt) return -EINVAL; } - fsi_reg_write(fsi, DO_FMT, data); - fsi_reg_write(fsi, DI_FMT, data); + fsi->do_fmt = data; + fsi->di_fmt = data; return 0; } @@ -900,11 +1014,10 @@ static int fsi_set_fmt_spdif(struct fsi_priv *fsi) data = CR_BWS_16 | CR_DTMD_SPDIF_PCM | CR_PCM; fsi->chan_num = 2; - fsi_spdif_clk_ctrl(fsi, 1); - fsi_reg_mask_set(fsi, OUT_SEL, DMMD, DMMD); + fsi->spdif = 1; - fsi_reg_write(fsi, DO_FMT, data); - fsi_reg_write(fsi, DI_FMT, data); + fsi->do_fmt = data; + fsi->di_fmt = data; return 0; } @@ -915,32 +1028,24 @@ static int fsi_dai_set_fmt(struct snd_soc_dai *dai, unsigned int fmt) struct fsi_master *master = fsi_get_master(fsi); set_rate_func set_rate = fsi_get_info_set_rate(master); u32 flags = fsi_get_info_flags(fsi); - u32 data = 0; int ret; - pm_runtime_get_sync(dai->dev); - /* set master/slave audio interface */ switch (fmt & SND_SOC_DAIFMT_MASTER_MASK) { case SND_SOC_DAIFMT_CBM_CFM: - data = DIMD | DOMD; fsi->clk_master = 1; break; case SND_SOC_DAIFMT_CBS_CFS: break; default: - ret = -EINVAL; - goto set_fmt_exit; + return -EINVAL; } if (fsi_is_clk_master(fsi) && !set_rate) { dev_err(dai->dev, "platform doesn't have set_rate\n"); - ret = -EINVAL; - goto set_fmt_exit; + return -EINVAL; } - fsi_reg_mask_set(fsi, CKG1, (DIMD | DOMD), data); - /* set format */ switch (flags & SH_FSI_FMT_MASK) { case SH_FSI_FMT_DAI: @@ -953,9 +1058,6 @@ static int fsi_dai_set_fmt(struct snd_soc_dai *dai, unsigned int fmt) ret = -EINVAL; } -set_fmt_exit: - pm_runtime_put_sync(dai->dev); - return ret; } @@ -964,79 +1066,19 @@ static int fsi_dai_hw_params(struct snd_pcm_substream *substream, struct snd_soc_dai *dai) { struct fsi_priv *fsi = fsi_get_priv(substream); - struct fsi_master *master = fsi_get_master(fsi); - set_rate_func set_rate = fsi_get_info_set_rate(master); - int fsi_ver = master->core->ver; long rate = params_rate(params); int ret; if (!fsi_is_clk_master(fsi)) return 0; - ret = set_rate(dai->dev, fsi_is_port_a(fsi), rate, 1); - if (ret < 0) /* error */ + ret = fsi_set_master_clk(dai->dev, fsi, rate, 1); + if (ret < 0) return ret; fsi->rate = rate; - if (ret > 0) { - u32 data = 0; - - switch (ret & SH_FSI_ACKMD_MASK) { - default: - /* FALL THROUGH */ - case SH_FSI_ACKMD_512: - data |= (0x0 << 12); - break; - case SH_FSI_ACKMD_256: - data |= (0x1 << 12); - break; - case SH_FSI_ACKMD_128: - data |= (0x2 << 12); - break; - case SH_FSI_ACKMD_64: - data |= (0x3 << 12); - break; - case SH_FSI_ACKMD_32: - if (fsi_ver < 2) - dev_err(dai->dev, "unsupported ACKMD\n"); - else - data |= (0x4 << 12); - break; - } - - switch (ret & SH_FSI_BPFMD_MASK) { - default: - /* FALL THROUGH */ - case SH_FSI_BPFMD_32: - data |= (0x0 << 8); - break; - case SH_FSI_BPFMD_64: - data |= (0x1 << 8); - break; - case SH_FSI_BPFMD_128: - data |= (0x2 << 8); - break; - case SH_FSI_BPFMD_256: - data |= (0x3 << 8); - break; - case SH_FSI_BPFMD_512: - data |= (0x4 << 8); - break; - case SH_FSI_BPFMD_16: - if (fsi_ver < 2) - dev_err(dai->dev, "unsupported ACKMD\n"); - else - data |= (0x7 << 8); - break; - } - - fsi_reg_mask_set(fsi, CKG1, (ACKMD_MASK | BPFMD_MASK) , data); - udelay(10); - ret = 0; - } return ret; - } static struct snd_soc_dai_ops fsi_dai_ops = { @@ -1054,8 +1096,7 @@ static struct snd_soc_dai_ops fsi_dai_ops = { static struct snd_pcm_hardware fsi_pcm_hardware = { .info = SNDRV_PCM_INFO_INTERLEAVED | SNDRV_PCM_INFO_MMAP | - SNDRV_PCM_INFO_MMAP_VALID | - SNDRV_PCM_INFO_PAUSE, + SNDRV_PCM_INFO_MMAP_VALID, .formats = FSI_FMTS, .rates = FSI_RATES, .rate_min = 8000, @@ -1097,16 +1138,14 @@ static int fsi_hw_free(struct snd_pcm_substream *substream) static snd_pcm_uframes_t fsi_pointer(struct snd_pcm_substream *substream) { - struct snd_pcm_runtime *runtime = substream->runtime; struct fsi_priv *fsi = fsi_get_priv(substream); struct fsi_stream *io = fsi_get_stream(fsi, fsi_is_play(substream)); - long location; + int samples_pos = io->buff_sample_pos - 1; - location = (io->buff_offset - 1); - if (location < 0) - location = 0; + if (samples_pos < 0) + samples_pos = 0; - return bytes_to_frames(runtime, location); + return fsi_sample2frame(fsi, samples_pos); } static struct snd_pcm_ops fsi_pcm_ops = { @@ -1129,10 +1168,10 @@ static void fsi_pcm_free(struct snd_pcm *pcm) snd_pcm_lib_preallocate_free_for_all(pcm); } -static int fsi_pcm_new(struct snd_card *card, - struct snd_soc_dai *dai, - struct snd_pcm *pcm) +static int fsi_pcm_new(struct snd_soc_pcm_runtime *rtd) { + struct snd_pcm *pcm = rtd->pcm; + /* * dont use SNDRV_DMA_TYPE_DEV, since it will oops the SH kernel * in MMAP mode (i.e. aplay -M) @@ -1246,9 +1285,7 @@ static int fsi_probe(struct platform_device *pdev) pm_runtime_enable(&pdev->dev); dev_set_drvdata(&pdev->dev, master); - fsi_module_init(master, &pdev->dev); - - ret = request_irq(irq, &fsi_interrupt, IRQF_DISABLED, + ret = request_irq(irq, &fsi_interrupt, 0, id_entry->name, master); if (ret) { dev_err(&pdev->dev, "irq request err\n"); @@ -1290,8 +1327,6 @@ static int fsi_remove(struct platform_device *pdev) master = dev_get_drvdata(&pdev->dev); - fsi_module_kill(master, &pdev->dev); - free_irq(master->irq, master); pm_runtime_disable(&pdev->dev); @@ -1305,53 +1340,43 @@ static int fsi_remove(struct platform_device *pdev) } static void __fsi_suspend(struct fsi_priv *fsi, - struct device *dev, - set_rate_func set_rate) + int is_play, + struct device *dev) { - fsi->saved_do_fmt = fsi_reg_read(fsi, DO_FMT); - fsi->saved_di_fmt = fsi_reg_read(fsi, DI_FMT); - fsi->saved_ckg1 = fsi_reg_read(fsi, CKG1); - fsi->saved_ckg2 = fsi_reg_read(fsi, CKG2); - fsi->saved_out_sel = fsi_reg_read(fsi, OUT_SEL); + if (!fsi_stream_is_working(fsi, is_play)) + return; - if (fsi_is_clk_master(fsi)) - set_rate(dev, fsi_is_port_a(fsi), fsi->rate, 0); + fsi_port_stop(fsi, is_play); + fsi_hw_shutdown(fsi, is_play, dev); } static void __fsi_resume(struct fsi_priv *fsi, - struct device *dev, - set_rate_func set_rate) + int is_play, + struct device *dev) { - fsi_reg_write(fsi, DO_FMT, fsi->saved_do_fmt); - fsi_reg_write(fsi, DI_FMT, fsi->saved_di_fmt); - fsi_reg_write(fsi, CKG1, fsi->saved_ckg1); - fsi_reg_write(fsi, CKG2, fsi->saved_ckg2); - fsi_reg_write(fsi, OUT_SEL, fsi->saved_out_sel); + if (!fsi_stream_is_working(fsi, is_play)) + return; + + fsi_hw_startup(fsi, is_play, dev); + + if (fsi_is_clk_master(fsi) && fsi->rate) + fsi_set_master_clk(dev, fsi, fsi->rate, 1); + + fsi_port_start(fsi, is_play); - if (fsi_is_clk_master(fsi)) - set_rate(dev, fsi_is_port_a(fsi), fsi->rate, 1); } static int fsi_suspend(struct device *dev) { struct fsi_master *master = dev_get_drvdata(dev); - set_rate_func set_rate = fsi_get_info_set_rate(master); - - pm_runtime_get_sync(dev); - - __fsi_suspend(&master->fsia, dev, set_rate); - __fsi_suspend(&master->fsib, dev, set_rate); + struct fsi_priv *fsia = &master->fsia; + struct fsi_priv *fsib = &master->fsib; - master->saved_a_mclk = fsi_core_read(master, a_mclk); - master->saved_b_mclk = fsi_core_read(master, b_mclk); - master->saved_iemsk = fsi_core_read(master, iemsk); - master->saved_imsk = fsi_core_read(master, imsk); - master->saved_clk_rst = fsi_master_read(master, CLK_RST); - master->saved_soft_rst = fsi_master_read(master, SOFT_RST); + __fsi_suspend(fsia, 1, dev); + __fsi_suspend(fsia, 0, dev); - fsi_module_kill(master, dev); - - pm_runtime_put_sync(dev); + __fsi_suspend(fsib, 1, dev); + __fsi_suspend(fsib, 0, dev); return 0; } @@ -1359,23 +1384,14 @@ static int fsi_suspend(struct device *dev) static int fsi_resume(struct device *dev) { struct fsi_master *master = dev_get_drvdata(dev); - set_rate_func set_rate = fsi_get_info_set_rate(master); - - pm_runtime_get_sync(dev); - - fsi_module_init(master, dev); + struct fsi_priv *fsia = &master->fsia; + struct fsi_priv *fsib = &master->fsib; - fsi_master_mask_set(master, SOFT_RST, 0xffff, master->saved_soft_rst); - fsi_master_mask_set(master, CLK_RST, 0xffff, master->saved_clk_rst); - fsi_core_mask_set(master, a_mclk, 0xffff, master->saved_a_mclk); - fsi_core_mask_set(master, b_mclk, 0xffff, master->saved_b_mclk); - fsi_core_mask_set(master, iemsk, 0xffff, master->saved_iemsk); - fsi_core_mask_set(master, imsk, 0xffff, master->saved_imsk); + __fsi_resume(fsia, 1, dev); + __fsi_resume(fsia, 0, dev); - __fsi_resume(&master->fsia, dev, set_rate); - __fsi_resume(&master->fsib, dev, set_rate); - - pm_runtime_put_sync(dev); + __fsi_resume(fsib, 1, dev); + __fsi_resume(fsib, 0, dev); return 0; } diff --git a/sound/soc/sh/sh7760-ac97.c b/sound/soc/sh/sh7760-ac97.c index 917d3ce..c62ae68 100644 --- a/sound/soc/sh/sh7760-ac97.c +++ b/sound/soc/sh/sh7760-ac97.c @@ -20,12 +20,6 @@ extern struct snd_soc_dai_driver sh4_hac_dai[2]; extern struct snd_soc_platform_driver sh7760_soc_platform; -static int machine_init(struct snd_soc_pcm_runtime *rtd) -{ - snd_soc_dapm_sync(&rtd->codec->dapm); - return 0; -} - static struct snd_soc_dai_link sh7760_ac97_dai = { .name = "AC97", .stream_name = "AC97 HiFi", @@ -33,7 +27,6 @@ static struct snd_soc_dai_link sh7760_ac97_dai = { .codec_dai_name = "ac97-hifi", .platform_name = "sh7760-pcm-audio", .codec_name = "ac97-codec", - .init = machine_init, .ops = NULL, }; diff --git a/sound/soc/sh/siu_dai.c b/sound/soc/sh/siu_dai.c index 4973c29..edacfeb 100644 --- a/sound/soc/sh/siu_dai.c +++ b/sound/soc/sh/siu_dai.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include diff --git a/sound/soc/sh/siu_pcm.c b/sound/soc/sh/siu_pcm.c index a423bab..f8f6816 100644 --- a/sound/soc/sh/siu_pcm.c +++ b/sound/soc/sh/siu_pcm.c @@ -527,10 +527,11 @@ static snd_pcm_uframes_t siu_pcm_pointer_dma(struct snd_pcm_substream *ss) return bytes_to_frames(ss->runtime, ptr); } -static int siu_pcm_new(struct snd_card *card, struct snd_soc_dai *dai, - struct snd_pcm *pcm) +static int siu_pcm_new(struct snd_soc_pcm_runtime *rtd) { /* card->dev == socdev->dev, see snd_soc_new_pcms() */ + struct snd_card *card = rtd->card->snd_card; + struct snd_pcm *pcm = rtd->pcm; struct siu_info *info = siu_i2s_data; struct platform_device *pdev = to_platform_device(card->dev); int ret; diff --git a/sound/soc/sh/ssi.c b/sound/soc/sh/ssi.c index 05192d9..e0c621c 100644 --- a/sound/soc/sh/ssi.c +++ b/sound/soc/sh/ssi.c @@ -342,7 +342,7 @@ static struct snd_soc_dai_ops ssi_dai_ops = { .set_fmt = ssi_set_fmt, }; -struct snd_soc_dai_driver sh4_ssi_dai[] = { +static struct snd_soc_dai_driver sh4_ssi_dai[] = { { .name = "ssi-dai.0", .playback = { diff --git a/sound/soc/tegra/Kconfig b/sound/soc/tegra/Kconfig index 035d39a..c6af1fd 100644 --- a/sound/soc/tegra/Kconfig +++ b/sound/soc/tegra/Kconfig @@ -12,6 +12,15 @@ config SND_SOC_TEGRA_I2S Tegra I2S interface. You will also need to select the individual machine drivers to support below. +config SND_SOC_TEGRA_SPDIF + tristate + depends on SND_SOC_TEGRA + default m + help + Say Y or M if you want to add support for the SPDIF interface. + You will also need to select the individual machine drivers to support + below. + config MACH_HAS_SND_SOC_TEGRA_WM8903 bool help diff --git a/sound/soc/tegra/Makefile b/sound/soc/tegra/Makefile index fa6574d..4d943b3 100644 --- a/sound/soc/tegra/Makefile +++ b/sound/soc/tegra/Makefile @@ -2,12 +2,14 @@ snd-soc-tegra-das-objs := tegra_das.o snd-soc-tegra-pcm-objs := tegra_pcm.o snd-soc-tegra-i2s-objs := tegra_i2s.o +snd-soc-tegra-spdif-objs := tegra_spdif.o snd-soc-tegra-utils-objs += tegra_asoc_utils.o obj-$(CONFIG_SND_SOC_TEGRA) += snd-soc-tegra-utils.o obj-$(CONFIG_SND_SOC_TEGRA) += snd-soc-tegra-das.o obj-$(CONFIG_SND_SOC_TEGRA) += snd-soc-tegra-pcm.o obj-$(CONFIG_SND_SOC_TEGRA_I2S) += snd-soc-tegra-i2s.o +obj-$(CONFIG_SND_SOC_TEGRA_SPDIF) += snd-soc-tegra-spdif.o # Tegra machine Support snd-soc-tegra-wm8903-objs := tegra_wm8903.o diff --git a/sound/soc/tegra/tegra_asoc_utils.c b/sound/soc/tegra/tegra_asoc_utils.c index dfa85cb..f8428e4 100644 --- a/sound/soc/tegra/tegra_asoc_utils.c +++ b/sound/soc/tegra/tegra_asoc_utils.c @@ -24,6 +24,7 @@ #include #include #include +#include #include "tegra_asoc_utils.h" diff --git a/sound/soc/tegra/tegra_das.c b/sound/soc/tegra/tegra_das.c index 9f24ef7..3b55a44 100644 --- a/sound/soc/tegra/tegra_das.c +++ b/sound/soc/tegra/tegra_das.c @@ -212,7 +212,7 @@ err_release: release_mem_region(res->start, resource_size(res)); err_free: kfree(das); - das = 0; + das = NULL; exit: return ret; } @@ -234,7 +234,7 @@ static int __devexit tegra_das_remove(struct platform_device *pdev) release_mem_region(res->start, resource_size(res)); kfree(das); - das = 0; + das = NULL; return 0; } diff --git a/sound/soc/tegra/tegra_i2s.c b/sound/soc/tegra/tegra_i2s.c index 95f03c1..6728fab 100644 --- a/sound/soc/tegra/tegra_i2s.c +++ b/sound/soc/tegra/tegra_i2s.c @@ -312,7 +312,7 @@ static struct snd_soc_dai_ops tegra_i2s_dai_ops = { .trigger = tegra_i2s_trigger, }; -struct snd_soc_dai_driver tegra_i2s_dai[] = { +static struct snd_soc_dai_driver tegra_i2s_dai[] = { { .name = DRV_NAME ".0", .probe = tegra_i2s_probe, @@ -354,7 +354,6 @@ struct snd_soc_dai_driver tegra_i2s_dai[] = { static __devinit int tegra_i2s_platform_probe(struct platform_device *pdev) { struct tegra_i2s * i2s; - char clk_name[12]; /* tegra-i2s.0 */ struct resource *mem, *memregion, *dmareq; int ret; @@ -389,8 +388,7 @@ static __devinit int tegra_i2s_platform_probe(struct platform_device *pdev) } dev_set_drvdata(&pdev->dev, i2s); - snprintf(clk_name, sizeof(clk_name), DRV_NAME ".%d", pdev->id); - i2s->clk_i2s = clk_get_sys(clk_name, NULL); + i2s->clk_i2s = clk_get(&pdev->dev, NULL); if (IS_ERR(i2s->clk_i2s)) { dev_err(&pdev->dev, "Can't retrieve i2s clock\n"); ret = PTR_ERR(i2s->clk_i2s); diff --git a/sound/soc/tegra/tegra_pcm.c b/sound/soc/tegra/tegra_pcm.c index 6201710..436def1 100644 --- a/sound/soc/tegra/tegra_pcm.c +++ b/sound/soc/tegra/tegra_pcm.c @@ -327,9 +327,11 @@ static void tegra_pcm_deallocate_dma_buffer(struct snd_pcm *pcm, int stream) static u64 tegra_dma_mask = DMA_BIT_MASK(32); -static int tegra_pcm_new(struct snd_card *card, - struct snd_soc_dai *dai, struct snd_pcm *pcm) +static int tegra_pcm_new(struct snd_soc_pcm_runtime *rtd) { + struct snd_card *card = rtd->card->snd_card; + struct snd_soc_dai *dai = rtd->cpu_dai; + struct snd_pcm *pcm = rtd->pcm; int ret = 0; if (!card->dev->dma_mask) @@ -365,7 +367,7 @@ static void tegra_pcm_free(struct snd_pcm *pcm) tegra_pcm_deallocate_dma_buffer(pcm, SNDRV_PCM_STREAM_PLAYBACK); } -struct snd_soc_platform_driver tegra_pcm_platform = { +static struct snd_soc_platform_driver tegra_pcm_platform = { .ops = &tegra_pcm_ops, .pcm_new = tegra_pcm_new, .pcm_free = tegra_pcm_free, diff --git a/sound/soc/tegra/tegra_wm8903.c b/sound/soc/tegra/tegra_wm8903.c index 7766478..a81cf39 100644 --- a/sound/soc/tegra/tegra_wm8903.c +++ b/sound/soc/tegra/tegra_wm8903.c @@ -268,7 +268,7 @@ static int tegra_wm8903_init(struct snd_soc_pcm_runtime *rtd) } machine->gpio_requested |= GPIO_HP_MUTE; - gpio_direction_output(pdata->gpio_hp_mute, 0); + gpio_direction_output(pdata->gpio_hp_mute, 1); } if (gpio_is_valid(pdata->gpio_int_mic_en)) { @@ -319,7 +319,7 @@ static int tegra_wm8903_init(struct snd_soc_pcm_runtime *rtd) snd_soc_dapm_force_enable_pin(dapm, "Mic Bias"); /* FIXME: Calculate automatically based on DAPM routes? */ - if (!machine_is_harmony() && !machine_is_ventana()) + if (!machine_is_harmony()) snd_soc_dapm_nc_pin(dapm, "IN1L"); if (!machine_is_seaboard() && !machine_is_aebl()) snd_soc_dapm_nc_pin(dapm, "IN1R"); @@ -339,8 +339,6 @@ static int tegra_wm8903_init(struct snd_soc_pcm_runtime *rtd) snd_soc_dapm_nc_pin(dapm, "LINEOUTL"); } - snd_soc_dapm_sync(dapm); - return 0; } @@ -395,7 +393,7 @@ static __devinit int tegra_wm8903_driver_probe(struct platform_device *pdev) platform_set_drvdata(pdev, card); snd_soc_card_set_drvdata(card, machine); - if (machine_is_harmony() || machine_is_ventana()) { + if (machine_is_harmony()) { card->dapm_routes = harmony_audio_map; card->num_dapm_routes = ARRAY_SIZE(harmony_audio_map); } else if (machine_is_seaboard()) { diff --git a/sound/soc/tegra/trimslice.c b/sound/soc/tegra/trimslice.c index 8fc07e9..b3a7efa 100644 --- a/sound/soc/tegra/trimslice.c +++ b/sound/soc/tegra/trimslice.c @@ -124,8 +124,6 @@ static int trimslice_asoc_init(struct snd_soc_pcm_runtime *rtd) snd_soc_dapm_nc_pin(dapm, "RHPOUT"); snd_soc_dapm_nc_pin(dapm, "MICIN"); - snd_soc_dapm_sync(dapm); - return 0; } diff --git a/sound/soc/txx9/txx9aclc-ac97.c b/sound/soc/txx9/txx9aclc-ac97.c index 743d07b..a4e3f55 100644 --- a/sound/soc/txx9/txx9aclc-ac97.c +++ b/sound/soc/txx9/txx9aclc-ac97.c @@ -201,7 +201,7 @@ static int __devinit txx9aclc_ac97_dev_probe(struct platform_device *pdev) if (!drvdata->base) return -EBUSY; err = devm_request_irq(&pdev->dev, irq, txx9aclc_ac97_irq, - IRQF_DISABLED, dev_name(&pdev->dev), drvdata); + 0, dev_name(&pdev->dev), drvdata); if (err < 0) return err; diff --git a/sound/soc/txx9/txx9aclc-generic.c b/sound/soc/txx9/txx9aclc-generic.c index 6770e71..9b5e283 100644 --- a/sound/soc/txx9/txx9aclc-generic.c +++ b/sound/soc/txx9/txx9aclc-generic.c @@ -62,7 +62,7 @@ static int __exit txx9aclc_generic_remove(struct platform_device *pdev) } static struct platform_driver txx9aclc_generic_driver = { - .remove = txx9aclc_generic_remove, + .remove = __exit_p(txx9aclc_generic_remove), .driver = { .name = "txx9aclc-generic", .owner = THIS_MODULE, diff --git a/sound/soc/txx9/txx9aclc.c b/sound/soc/txx9/txx9aclc.c index f4aa4e0..3de99af 100644 --- a/sound/soc/txx9/txx9aclc.c +++ b/sound/soc/txx9/txx9aclc.c @@ -288,9 +288,11 @@ static void txx9aclc_pcm_free_dma_buffers(struct snd_pcm *pcm) snd_pcm_lib_preallocate_free_for_all(pcm); } -static int txx9aclc_pcm_new(struct snd_card *card, struct snd_soc_dai *dai, - struct snd_pcm *pcm) +static int txx9aclc_pcm_new(struct snd_soc_pcm_runtime *rtd) { + struct snd_card *card = rtd->card->snd_card; + struct snd_soc_dai *dai = rtd->cpu_dai; + struct snd_pcm *pcm = rtd->pcm; struct platform_device *pdev = to_platform_device(dai->platform->dev); struct txx9aclc_soc_device *dev; struct resource *r; diff --git a/sound/sparc/amd7930.c b/sound/sparc/amd7930.c index ad7d4d7..f036776 100644 --- a/sound/sparc/amd7930.c +++ b/sound/sparc/amd7930.c @@ -962,7 +962,7 @@ static int __devinit snd_amd7930_create(struct snd_card *card, amd7930_idle(amd); if (request_irq(irq, snd_amd7930_interrupt, - IRQF_DISABLED | IRQF_SHARED, "amd7930", amd)) { + IRQF_SHARED, "amd7930", amd)) { snd_printk(KERN_ERR "amd7930-%d: Unable to grab IRQ %d\n", dev, irq); snd_amd7930_free(amd); diff --git a/sound/sparc/dbri.c b/sound/sparc/dbri.c index 73f9cba..4a4f1d7 100644 --- a/sound/sparc/dbri.c +++ b/sound/sparc/dbri.c @@ -69,7 +69,8 @@ #include #include -#include +#include +#include MODULE_AUTHOR("Rudolf Koenig, Brent Baccala and Martin Habets"); MODULE_DESCRIPTION("Sun DBRI"); diff --git a/sound/synth/emux/emux.c b/sound/synth/emux/emux.c index f16a3fc..9352207 100644 --- a/sound/synth/emux/emux.c +++ b/sound/synth/emux/emux.c @@ -24,6 +24,7 @@ #include #include #include +#include #include "emux_voice.h" MODULE_AUTHOR("Takashi Iwai"); diff --git a/sound/synth/emux/emux_oss.c b/sound/synth/emux/emux_oss.c index 87e4220..daf61ab 100644 --- a/sound/synth/emux/emux_oss.c +++ b/sound/synth/emux/emux_oss.c @@ -25,6 +25,7 @@ #ifdef CONFIG_SND_SEQUENCER_OSS +#include #include #include #include "emux_voice.h" @@ -117,12 +118,8 @@ snd_emux_open_seq_oss(struct snd_seq_oss_arg *arg, void *closure) if (snd_BUG_ON(!arg || !emu)) return -ENXIO; - mutex_lock(&emu->register_mutex); - - if (!snd_emux_inc_count(emu)) { - mutex_unlock(&emu->register_mutex); + if (!snd_emux_inc_count(emu)) return -EFAULT; - } memset(&callback, 0, sizeof(callback)); callback.owner = THIS_MODULE; @@ -134,7 +131,6 @@ snd_emux_open_seq_oss(struct snd_seq_oss_arg *arg, void *closure) if (p == NULL) { snd_printk(KERN_ERR "can't create port\n"); snd_emux_dec_count(emu); - mutex_unlock(&emu->register_mutex); return -ENOMEM; } @@ -147,8 +143,6 @@ snd_emux_open_seq_oss(struct snd_seq_oss_arg *arg, void *closure) reset_port_mode(p, arg->seq_mode); snd_emux_reset_port(p); - - mutex_unlock(&emu->register_mutex); return 0; } @@ -194,13 +188,11 @@ snd_emux_close_seq_oss(struct snd_seq_oss_arg *arg) if (snd_BUG_ON(!emu)) return -ENXIO; - mutex_lock(&emu->register_mutex); snd_emux_sounds_off_all(p); snd_soundfont_close_check(emu->sflist, SF_CLIENT_NO(p->chset.port)); snd_seq_event_port_detach(p->chset.client, p->chset.port); snd_emux_dec_count(emu); - mutex_unlock(&emu->register_mutex); return 0; } diff --git a/sound/synth/emux/emux_seq.c b/sound/synth/emux/emux_seq.c index ca5f7ef..a020920 100644 --- a/sound/synth/emux/emux_seq.c +++ b/sound/synth/emux/emux_seq.c @@ -21,7 +21,7 @@ #include "emux_voice.h" #include - +#include /* Prototypes for static functions */ static void free_port(void *private); @@ -124,12 +124,10 @@ snd_emux_detach_seq(struct snd_emux *emu) if (emu->voices) snd_emux_terminate_all(emu); - mutex_lock(&emu->register_mutex); if (emu->client >= 0) { snd_seq_delete_kernel_client(emu->client); emu->client = -1; } - mutex_unlock(&emu->register_mutex); } @@ -269,8 +267,8 @@ snd_emux_event_input(struct snd_seq_event *ev, int direct, void *private_data, /* * increment usage count */ -int -snd_emux_inc_count(struct snd_emux *emu) +static int +__snd_emux_inc_count(struct snd_emux *emu) { emu->used++; if (!try_module_get(emu->ops.owner)) @@ -284,12 +282,21 @@ snd_emux_inc_count(struct snd_emux *emu) return 1; } +int snd_emux_inc_count(struct snd_emux *emu) +{ + int ret; + + mutex_lock(&emu->register_mutex); + ret = __snd_emux_inc_count(emu); + mutex_unlock(&emu->register_mutex); + return ret; +} /* * decrease usage count */ -void -snd_emux_dec_count(struct snd_emux *emu) +static void +__snd_emux_dec_count(struct snd_emux *emu) { module_put(emu->card->module); emu->used--; @@ -298,6 +305,12 @@ snd_emux_dec_count(struct snd_emux *emu) module_put(emu->ops.owner); } +void snd_emux_dec_count(struct snd_emux *emu) +{ + mutex_lock(&emu->register_mutex); + __snd_emux_dec_count(emu); + mutex_unlock(&emu->register_mutex); +} /* * Routine that is called upon a first use of a particular port @@ -317,7 +330,7 @@ snd_emux_use(void *private_data, struct snd_seq_port_subscribe *info) mutex_lock(&emu->register_mutex); snd_emux_init_port(p); - snd_emux_inc_count(emu); + __snd_emux_inc_count(emu); mutex_unlock(&emu->register_mutex); return 0; } @@ -340,7 +353,7 @@ snd_emux_unuse(void *private_data, struct snd_seq_port_subscribe *info) mutex_lock(&emu->register_mutex); snd_emux_sounds_off_all(p); - snd_emux_dec_count(emu); + __snd_emux_dec_count(emu); mutex_unlock(&emu->register_mutex); return 0; } diff --git a/sound/synth/emux/emux_synth.c b/sound/synth/emux/emux_synth.c index 3e921b3..9a38de4 100644 --- a/sound/synth/emux/emux_synth.c +++ b/sound/synth/emux/emux_synth.c @@ -22,6 +22,7 @@ * */ +#include #include "emux_voice.h" #include diff --git a/sound/synth/emux/soundfont.c b/sound/synth/emux/soundfont.c index 67c9123..1137b85 100644 --- a/sound/synth/emux/soundfont.c +++ b/sound/synth/emux/soundfont.c @@ -27,6 +27,7 @@ */ #include #include +#include #include #include #include diff --git a/sound/synth/util_mem.c b/sound/synth/util_mem.c index c85522e..8e34bc4 100644 --- a/sound/synth/util_mem.c +++ b/sound/synth/util_mem.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include diff --git a/sound/usb/6fire/chip.c b/sound/usb/6fire/chip.c index c7dca7b..46a2816 100644 --- a/sound/usb/6fire/chip.c +++ b/sound/usb/6fire/chip.c @@ -102,7 +102,7 @@ static int __devinit usb6fire_chip_probe(struct usb_interface *intf, usb_set_intfdata(intf, chips[i]); mutex_unlock(®ister_mutex); return 0; - } else if (regidx < 0) + } else if (!devices[i] && regidx < 0) regidx = i; } if (regidx < 0) { diff --git a/sound/usb/6fire/comm.c b/sound/usb/6fire/comm.c index c994daa..af6ec8d 100644 --- a/sound/usb/6fire/comm.c +++ b/sound/usb/6fire/comm.c @@ -111,19 +111,37 @@ static int usb6fire_comm_send_buffer(u8 *buffer, struct usb_device *dev) static int usb6fire_comm_write8(struct comm_runtime *rt, u8 request, u8 reg, u8 value) { - u8 buffer[13]; /* 13: maximum length of message */ + u8 *buffer; + int ret; + + /* 13: maximum length of message */ + buffer = kmalloc(13, GFP_KERNEL); + if (!buffer) + return -ENOMEM; usb6fire_comm_init_buffer(buffer, 0x00, request, reg, value, 0x00); - return usb6fire_comm_send_buffer(buffer, rt->chip->dev); + ret = usb6fire_comm_send_buffer(buffer, rt->chip->dev); + + kfree(buffer); + return ret; } static int usb6fire_comm_write16(struct comm_runtime *rt, u8 request, u8 reg, u8 vl, u8 vh) { - u8 buffer[13]; /* 13: maximum length of message */ + u8 *buffer; + int ret; + + /* 13: maximum length of message */ + buffer = kmalloc(13, GFP_KERNEL); + if (!buffer) + return -ENOMEM; usb6fire_comm_init_buffer(buffer, 0x00, request, reg, vl, vh); - return usb6fire_comm_send_buffer(buffer, rt->chip->dev); + ret = usb6fire_comm_send_buffer(buffer, rt->chip->dev); + + kfree(buffer); + return ret; } int __devinit usb6fire_comm_init(struct sfire_chip *chip) @@ -136,6 +154,12 @@ int __devinit usb6fire_comm_init(struct sfire_chip *chip) if (!rt) return -ENOMEM; + rt->receiver_buffer = kzalloc(COMM_RECEIVER_BUFSIZE, GFP_KERNEL); + if (!rt->receiver_buffer) { + kfree(rt); + return -ENOMEM; + } + rt->serial = 1; rt->chip = chip; usb_init_urb(urb); @@ -153,6 +177,7 @@ int __devinit usb6fire_comm_init(struct sfire_chip *chip) urb->interval = 1; ret = usb_submit_urb(urb, GFP_KERNEL); if (ret < 0) { + kfree(rt->receiver_buffer); kfree(rt); snd_printk(KERN_ERR PREFIX "cannot create comm data receiver."); return ret; @@ -171,6 +196,9 @@ void usb6fire_comm_abort(struct sfire_chip *chip) void usb6fire_comm_destroy(struct sfire_chip *chip) { - kfree(chip->comm); + struct comm_runtime *rt = chip->comm; + + kfree(rt->receiver_buffer); + kfree(rt); chip->comm = NULL; } diff --git a/sound/usb/6fire/comm.h b/sound/usb/6fire/comm.h index edc5dc8..19e2f92 100644 --- a/sound/usb/6fire/comm.h +++ b/sound/usb/6fire/comm.h @@ -25,7 +25,7 @@ struct comm_runtime { struct sfire_chip *chip; struct urb receiver; - u8 receiver_buffer[COMM_RECEIVER_BUFSIZE]; + u8 *receiver_buffer; u8 serial; /* urb serial */ diff --git a/sound/usb/6fire/firmware.c b/sound/usb/6fire/firmware.c index 1e3ae33..3b5f517 100644 --- a/sound/usb/6fire/firmware.c +++ b/sound/usb/6fire/firmware.c @@ -15,7 +15,9 @@ */ #include +#include #include +#include #include "firmware.h" #include "chip.h" @@ -59,21 +61,19 @@ struct ihex_record { unsigned int txt_offset; /* current position in txt_data */ }; -static u8 usb6fire_fw_ihex_nibble(const u8 n) -{ - if (n >= '0' && n <= '9') - return n - '0'; - else if (n >= 'A' && n <= 'F') - return n - ('A' - 10); - else if (n >= 'a' && n <= 'f') - return n - ('a' - 10); - return 0; -} - static u8 usb6fire_fw_ihex_hex(const u8 *data, u8 *crc) { - u8 val = (usb6fire_fw_ihex_nibble(data[0]) << 4) | - usb6fire_fw_ihex_nibble(data[1]); + u8 val = 0; + int hval; + + hval = hex_to_bin(data[0]); + if (hval >= 0) + val |= (hval << 4); + + hval = hex_to_bin(data[1]); + if (hval >= 0) + val |= hval; + *crc += val; return val; } diff --git a/sound/usb/6fire/midi.c b/sound/usb/6fire/midi.c index 13f4509..8283c5d 100644 --- a/sound/usb/6fire/midi.c +++ b/sound/usb/6fire/midi.c @@ -20,6 +20,10 @@ #include "chip.h" #include "comm.h" +enum { + MIDI_BUFSIZE = 64 +}; + static void usb6fire_midi_out_handler(struct urb *urb) { struct midi_runtime *rt = urb->context; @@ -157,6 +161,12 @@ int __devinit usb6fire_midi_init(struct sfire_chip *chip) if (!rt) return -ENOMEM; + rt->out_buffer = kzalloc(MIDI_BUFSIZE, GFP_KERNEL); + if (!rt->out_buffer) { + kfree(rt); + return -ENOMEM; + } + rt->chip = chip; rt->in_received = usb6fire_midi_in_received; rt->out_buffer[0] = 0x80; /* 'send midi' command */ @@ -170,6 +180,7 @@ int __devinit usb6fire_midi_init(struct sfire_chip *chip) ret = snd_rawmidi_new(chip->card, "6FireUSB", 0, 1, 1, &rt->instance); if (ret < 0) { + kfree(rt->out_buffer); kfree(rt); snd_printk(KERN_ERR PREFIX "unable to create midi.\n"); return ret; @@ -198,6 +209,9 @@ void usb6fire_midi_abort(struct sfire_chip *chip) void usb6fire_midi_destroy(struct sfire_chip *chip) { - kfree(chip->midi); + struct midi_runtime *rt = chip->midi; + + kfree(rt->out_buffer); + kfree(rt); chip->midi = NULL; } diff --git a/sound/usb/6fire/midi.h b/sound/usb/6fire/midi.h index 97a7bf6..7f8f448 100644 --- a/sound/usb/6fire/midi.h +++ b/sound/usb/6fire/midi.h @@ -17,10 +17,6 @@ #include "common.h" -enum { - MIDI_BUFSIZE = 64 -}; - struct midi_runtime { struct sfire_chip *chip; struct snd_rawmidi *instance; @@ -33,7 +29,7 @@ struct midi_runtime { struct snd_rawmidi_substream *out; struct urb out_urb; u8 out_serial; /* serial number of out packet */ - u8 out_buffer[MIDI_BUFSIZE]; + u8 *out_buffer; int buffer_offset; void (*in_received)(struct midi_runtime *rt, u8 *data, int length); diff --git a/sound/usb/6fire/pcm.c b/sound/usb/6fire/pcm.c index d2fb012..8609c74 100644 --- a/sound/usb/6fire/pcm.c +++ b/sound/usb/6fire/pcm.c @@ -579,6 +579,33 @@ static void __devinit usb6fire_pcm_init_urb(struct pcm_urb *urb, urb->instance.number_of_packets = PCM_N_PACKETS_PER_URB; } +static int usb6fire_pcm_buffers_init(struct pcm_runtime *rt) +{ + int i; + + for (i = 0; i < PCM_N_URBS; i++) { + rt->out_urbs[i].buffer = kzalloc(PCM_N_PACKETS_PER_URB + * PCM_MAX_PACKET_SIZE, GFP_KERNEL); + if (!rt->out_urbs[i].buffer) + return -ENOMEM; + rt->in_urbs[i].buffer = kzalloc(PCM_N_PACKETS_PER_URB + * PCM_MAX_PACKET_SIZE, GFP_KERNEL); + if (!rt->in_urbs[i].buffer) + return -ENOMEM; + } + return 0; +} + +static void usb6fire_pcm_buffers_destroy(struct pcm_runtime *rt) +{ + int i; + + for (i = 0; i < PCM_N_URBS; i++) { + kfree(rt->out_urbs[i].buffer); + kfree(rt->in_urbs[i].buffer); + } +} + int __devinit usb6fire_pcm_init(struct sfire_chip *chip) { int i; @@ -590,6 +617,13 @@ int __devinit usb6fire_pcm_init(struct sfire_chip *chip) if (!rt) return -ENOMEM; + ret = usb6fire_pcm_buffers_init(rt); + if (ret) { + usb6fire_pcm_buffers_destroy(rt); + kfree(rt); + return ret; + } + rt->chip = chip; rt->stream_state = STREAM_DISABLED; rt->rate = ARRAY_SIZE(rates); @@ -611,6 +645,7 @@ int __devinit usb6fire_pcm_init(struct sfire_chip *chip) ret = snd_pcm_new(chip->card, "DMX6FireUSB", 0, 1, 1, &pcm); if (ret < 0) { + usb6fire_pcm_buffers_destroy(rt); kfree(rt); snd_printk(KERN_ERR PREFIX "cannot create pcm instance.\n"); return ret; @@ -626,6 +661,7 @@ int __devinit usb6fire_pcm_init(struct sfire_chip *chip) snd_dma_continuous_data(GFP_KERNEL), MAX_BUFSIZE, MAX_BUFSIZE); if (ret) { + usb6fire_pcm_buffers_destroy(rt); kfree(rt); snd_printk(KERN_ERR PREFIX "error preallocating pcm buffers.\n"); @@ -640,17 +676,25 @@ int __devinit usb6fire_pcm_init(struct sfire_chip *chip) void usb6fire_pcm_abort(struct sfire_chip *chip) { struct pcm_runtime *rt = chip->pcm; + unsigned long flags; int i; if (rt) { rt->panic = true; - if (rt->playback.instance) + if (rt->playback.instance) { + snd_pcm_stream_lock_irqsave(rt->playback.instance, flags); snd_pcm_stop(rt->playback.instance, SNDRV_PCM_STATE_XRUN); - if (rt->capture.instance) + snd_pcm_stream_unlock_irqrestore(rt->playback.instance, flags); + } + + if (rt->capture.instance) { + snd_pcm_stream_lock_irqsave(rt->capture.instance, flags); snd_pcm_stop(rt->capture.instance, SNDRV_PCM_STATE_XRUN); + snd_pcm_stream_unlock_irqrestore(rt->capture.instance, flags); + } for (i = 0; i < PCM_N_URBS; i++) { usb_poison_urb(&rt->in_urbs[i].instance); @@ -662,6 +706,9 @@ void usb6fire_pcm_abort(struct sfire_chip *chip) void usb6fire_pcm_destroy(struct sfire_chip *chip) { - kfree(chip->pcm); + struct pcm_runtime *rt = chip->pcm; + + usb6fire_pcm_buffers_destroy(rt); + kfree(rt); chip->pcm = NULL; } diff --git a/sound/usb/6fire/pcm.h b/sound/usb/6fire/pcm.h index 2bee813..a8e8899 100644 --- a/sound/usb/6fire/pcm.h +++ b/sound/usb/6fire/pcm.h @@ -33,7 +33,7 @@ struct pcm_urb { struct urb instance; struct usb_iso_packet_descriptor packets[PCM_N_PACKETS_PER_URB]; /* END DO NOT SEPARATE */ - u8 buffer[PCM_N_PACKETS_PER_URB * PCM_MAX_PACKET_SIZE]; + u8 *buffer; struct pcm_urb *peer; }; diff --git a/sound/usb/caiaq/device.c b/sound/usb/caiaq/device.c index 45bc4a2..3eb605b 100644 --- a/sound/usb/caiaq/device.c +++ b/sound/usb/caiaq/device.c @@ -50,7 +50,8 @@ MODULE_SUPPORTED_DEVICE("{{Native Instruments, RigKontrol2}," "{Native Instruments, Session I/O}," "{Native Instruments, GuitarRig mobile}" "{Native Instruments, Traktor Kontrol X1}" - "{Native Instruments, Traktor Kontrol S4}"); + "{Native Instruments, Traktor Kontrol S4}" + "{Native Instruments, Maschine Controller}"); static int index[SNDRV_CARDS] = SNDRV_DEFAULT_IDX; /* Index 0-max */ static char* id[SNDRV_CARDS] = SNDRV_DEFAULT_STR; /* Id for this card */ @@ -146,6 +147,11 @@ static struct usb_device_id snd_usb_id_table[] = { .idVendor = USB_VID_NATIVEINSTRUMENTS, .idProduct = USB_PID_TRAKTORAUDIO2 }, + { + .match_flags = USB_DEVICE_ID_MATCH_DEVICE, + .idVendor = USB_VID_NATIVEINSTRUMENTS, + .idProduct = USB_PID_MASCHINECONTROLLER + }, { /* terminator */ } }; diff --git a/sound/usb/caiaq/device.h b/sound/usb/caiaq/device.h index 3f9c633..562b0bf 100644 --- a/sound/usb/caiaq/device.h +++ b/sound/usb/caiaq/device.h @@ -18,6 +18,7 @@ #define USB_PID_TRAKTORKONTROLX1 0x2305 #define USB_PID_TRAKTORKONTROLS4 0xbaff #define USB_PID_TRAKTORAUDIO2 0x041d +#define USB_PID_MASCHINECONTROLLER 0x0808 #define EP1_BUFSIZE 64 #define EP4_BUFSIZE 512 diff --git a/sound/usb/caiaq/input.c b/sound/usb/caiaq/input.c index a213813..26a121b 100644 --- a/sound/usb/caiaq/input.c +++ b/sound/usb/caiaq/input.c @@ -67,6 +67,61 @@ static unsigned short keycode_kore[] = { KEY_BRL_DOT5 }; +#define MASCHINE_BUTTONS (42) +#define MASCHINE_BUTTON(X) ((X) + BTN_MISC) +#define MASCHINE_PADS (16) +#define MASCHINE_PAD(X) ((X) + ABS_PRESSURE) + +static unsigned short keycode_maschine[] = { + MASCHINE_BUTTON(40), /* mute */ + MASCHINE_BUTTON(39), /* solo */ + MASCHINE_BUTTON(38), /* select */ + MASCHINE_BUTTON(37), /* duplicate */ + MASCHINE_BUTTON(36), /* navigate */ + MASCHINE_BUTTON(35), /* pad mode */ + MASCHINE_BUTTON(34), /* pattern */ + MASCHINE_BUTTON(33), /* scene */ + KEY_RESERVED, /* spacer */ + + MASCHINE_BUTTON(30), /* rec */ + MASCHINE_BUTTON(31), /* erase */ + MASCHINE_BUTTON(32), /* shift */ + MASCHINE_BUTTON(28), /* grid */ + MASCHINE_BUTTON(27), /* > */ + MASCHINE_BUTTON(26), /* < */ + MASCHINE_BUTTON(25), /* restart */ + + MASCHINE_BUTTON(21), /* E */ + MASCHINE_BUTTON(22), /* F */ + MASCHINE_BUTTON(23), /* G */ + MASCHINE_BUTTON(24), /* H */ + MASCHINE_BUTTON(20), /* D */ + MASCHINE_BUTTON(19), /* C */ + MASCHINE_BUTTON(18), /* B */ + MASCHINE_BUTTON(17), /* A */ + + MASCHINE_BUTTON(0), /* control */ + MASCHINE_BUTTON(2), /* browse */ + MASCHINE_BUTTON(4), /* < */ + MASCHINE_BUTTON(6), /* snap */ + MASCHINE_BUTTON(7), /* autowrite */ + MASCHINE_BUTTON(5), /* > */ + MASCHINE_BUTTON(3), /* sampling */ + MASCHINE_BUTTON(1), /* step */ + + MASCHINE_BUTTON(15), /* 8 softkeys */ + MASCHINE_BUTTON(14), + MASCHINE_BUTTON(13), + MASCHINE_BUTTON(12), + MASCHINE_BUTTON(11), + MASCHINE_BUTTON(10), + MASCHINE_BUTTON(9), + MASCHINE_BUTTON(8), + + MASCHINE_BUTTON(16), /* note repeat */ + MASCHINE_BUTTON(29) /* play */ +}; + #define KONTROLX1_INPUTS (40) #define KONTROLS4_BUTTONS (12 * 8) #define KONTROLS4_AXIS (46) @@ -218,6 +273,29 @@ static void snd_caiaq_input_read_erp(struct snd_usb_caiaqdev *dev, input_report_abs(input_dev, ABS_HAT3Y, i); input_sync(input_dev); break; + + case USB_ID(USB_VID_NATIVEINSTRUMENTS, USB_PID_MASCHINECONTROLLER): + /* 4 under the left screen */ + input_report_abs(input_dev, ABS_HAT0X, decode_erp(buf[21], buf[20])); + input_report_abs(input_dev, ABS_HAT0Y, decode_erp(buf[15], buf[14])); + input_report_abs(input_dev, ABS_HAT1X, decode_erp(buf[9], buf[8])); + input_report_abs(input_dev, ABS_HAT1Y, decode_erp(buf[3], buf[2])); + + /* 4 under the right screen */ + input_report_abs(input_dev, ABS_HAT2X, decode_erp(buf[19], buf[18])); + input_report_abs(input_dev, ABS_HAT2Y, decode_erp(buf[13], buf[12])); + input_report_abs(input_dev, ABS_HAT3X, decode_erp(buf[7], buf[6])); + input_report_abs(input_dev, ABS_HAT3Y, decode_erp(buf[1], buf[0])); + + /* volume */ + input_report_abs(input_dev, ABS_RX, decode_erp(buf[17], buf[16])); + /* tempo */ + input_report_abs(input_dev, ABS_RY, decode_erp(buf[11], buf[10])); + /* swing */ + input_report_abs(input_dev, ABS_RZ, decode_erp(buf[5], buf[4])); + + input_sync(input_dev); + break; } } @@ -400,6 +478,25 @@ static void snd_usb_caiaq_tks4_dispatch(struct snd_usb_caiaqdev *dev, input_sync(dev->input_dev); } +#define MASCHINE_MSGBLOCK_SIZE 2 + +static void snd_usb_caiaq_maschine_dispatch(struct snd_usb_caiaqdev *dev, + const unsigned char *buf, + unsigned int len) +{ + unsigned int i, pad_id; + uint16_t pressure; + + for (i = 0; i < MASCHINE_PADS; i++) { + pressure = be16_to_cpu(buf[i * 2] << 8 | buf[(i * 2) + 1]); + pad_id = pressure >> 12; + + input_report_abs(dev->input_dev, MASCHINE_PAD(pad_id), pressure & 0xfff); + } + + input_sync(dev->input_dev); +} + static void snd_usb_caiaq_ep4_reply_dispatch(struct urb *urb) { struct snd_usb_caiaqdev *dev = urb->context; @@ -425,6 +522,13 @@ static void snd_usb_caiaq_ep4_reply_dispatch(struct urb *urb) case USB_ID(USB_VID_NATIVEINSTRUMENTS, USB_PID_TRAKTORKONTROLS4): snd_usb_caiaq_tks4_dispatch(dev, buf, urb->actual_length); break; + + case USB_ID(USB_VID_NATIVEINSTRUMENTS, USB_PID_MASCHINECONTROLLER): + if (urb->actual_length < (MASCHINE_PADS * MASCHINE_MSGBLOCK_SIZE)) + goto requeue; + + snd_usb_caiaq_maschine_dispatch(dev, buf, urb->actual_length); + break; } requeue: @@ -444,6 +548,7 @@ static int snd_usb_caiaq_input_open(struct input_dev *idev) switch (dev->chip.usb_id) { case USB_ID(USB_VID_NATIVEINSTRUMENTS, USB_PID_TRAKTORKONTROLX1): case USB_ID(USB_VID_NATIVEINSTRUMENTS, USB_PID_TRAKTORKONTROLS4): + case USB_ID(USB_VID_NATIVEINSTRUMENTS, USB_PID_MASCHINECONTROLLER): if (usb_submit_urb(dev->ep4_in_urb, GFP_KERNEL) != 0) return -EIO; break; @@ -462,6 +567,7 @@ static void snd_usb_caiaq_input_close(struct input_dev *idev) switch (dev->chip.usb_id) { case USB_ID(USB_VID_NATIVEINSTRUMENTS, USB_PID_TRAKTORKONTROLX1): case USB_ID(USB_VID_NATIVEINSTRUMENTS, USB_PID_TRAKTORKONTROLS4): + case USB_ID(USB_VID_NATIVEINSTRUMENTS, USB_PID_MASCHINECONTROLLER): usb_kill_urb(dev->ep4_in_urb); break; } @@ -652,6 +758,50 @@ int snd_usb_caiaq_input_init(struct snd_usb_caiaqdev *dev) break; + case USB_ID(USB_VID_NATIVEINSTRUMENTS, USB_PID_MASCHINECONTROLLER): + input->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS); + input->absbit[0] = BIT_MASK(ABS_HAT0X) | BIT_MASK(ABS_HAT0Y) | + BIT_MASK(ABS_HAT1X) | BIT_MASK(ABS_HAT1Y) | + BIT_MASK(ABS_HAT2X) | BIT_MASK(ABS_HAT2Y) | + BIT_MASK(ABS_HAT3X) | BIT_MASK(ABS_HAT3Y) | + BIT_MASK(ABS_RX) | BIT_MASK(ABS_RY) | + BIT_MASK(ABS_RZ); + + BUILD_BUG_ON(sizeof(dev->keycode) < sizeof(keycode_maschine)); + memcpy(dev->keycode, keycode_maschine, sizeof(keycode_maschine)); + input->keycodemax = ARRAY_SIZE(keycode_maschine); + + for (i = 0; i < MASCHINE_PADS; i++) { + input->absbit[0] |= MASCHINE_PAD(i); + input_set_abs_params(input, MASCHINE_PAD(i), 0, 0xfff, 5, 10); + } + + input_set_abs_params(input, ABS_HAT0X, 0, 999, 0, 10); + input_set_abs_params(input, ABS_HAT0Y, 0, 999, 0, 10); + input_set_abs_params(input, ABS_HAT1X, 0, 999, 0, 10); + input_set_abs_params(input, ABS_HAT1Y, 0, 999, 0, 10); + input_set_abs_params(input, ABS_HAT2X, 0, 999, 0, 10); + input_set_abs_params(input, ABS_HAT2Y, 0, 999, 0, 10); + input_set_abs_params(input, ABS_HAT3X, 0, 999, 0, 10); + input_set_abs_params(input, ABS_HAT3Y, 0, 999, 0, 10); + input_set_abs_params(input, ABS_RX, 0, 999, 0, 10); + input_set_abs_params(input, ABS_RY, 0, 999, 0, 10); + input_set_abs_params(input, ABS_RZ, 0, 999, 0, 10); + + dev->ep4_in_urb = usb_alloc_urb(0, GFP_KERNEL); + if (!dev->ep4_in_urb) { + ret = -ENOMEM; + goto exit_free_idev; + } + + usb_fill_bulk_urb(dev->ep4_in_urb, usb_dev, + usb_rcvbulkpipe(usb_dev, 0x4), + dev->ep4_in_buf, EP4_BUFSIZE, + snd_usb_caiaq_ep4_reply_dispatch, dev); + + snd_usb_caiaq_set_auto_msg(dev, 1, 10, 5); + break; + default: /* no input methods supported on this device */ goto exit_free_idev; @@ -664,15 +814,17 @@ int snd_usb_caiaq_input_init(struct snd_usb_caiaqdev *dev) for (i = 0; i < input->keycodemax; i++) __set_bit(dev->keycode[i], input->keybit); + dev->input_dev = input; + ret = input_register_device(input); if (ret < 0) goto exit_free_idev; - dev->input_dev = input; return 0; exit_free_idev: input_free_device(input); + dev->input_dev = NULL; return ret; } @@ -688,4 +840,3 @@ void snd_usb_caiaq_input_free(struct snd_usb_caiaqdev *dev) input_unregister_device(dev->input_dev); dev->input_dev = NULL; } - -- cgit v1.1