diff options
Diffstat (limited to 'arch/arm/mvp/mvpkm/mksck_kernel.c')
-rw-r--r-- | arch/arm/mvp/mvpkm/mksck_kernel.c | 2589 |
1 files changed, 2589 insertions, 0 deletions
diff --git a/arch/arm/mvp/mvpkm/mksck_kernel.c b/arch/arm/mvp/mvpkm/mksck_kernel.c new file mode 100644 index 0000000..6811a68 --- /dev/null +++ b/arch/arm/mvp/mvpkm/mksck_kernel.c @@ -0,0 +1,2589 @@ +/* + * Linux 2.6.32 and later Kernel module for VMware MVP Hypervisor Support + * + * Copyright (C) 2010-2012 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; see the file COPYING. If not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ +#line 5 + +/** + * @file + * + * @brief The monitor/kernel socket interface kernel extension. + */ + +#define __KERNEL_SYSCALLS__ +#include <linux/version.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/init.h> +#include <linux/slab.h> +#include <linux/fs.h> +#include <linux/errno.h> +#include <linux/types.h> +#include <linux/proc_fs.h> +#include <linux/fcntl.h> +#include <linux/syscalls.h> +#include <linux/kmod.h> +#include <linux/socket.h> +#include <linux/net.h> +#include <linux/skbuff.h> +#include <linux/miscdevice.h> +#include <linux/poll.h> +#include <linux/rcupdate.h> +#include <linux/smp.h> +#include <linux/spinlock.h> + +#include <linux/mm.h> +#include <linux/mman.h> +#include <linux/file.h> +#include <linux/vmalloc.h> + +#include <linux/debugfs.h> +#include <linux/seq_file.h> + +#include <net/sock.h> + +#include <asm/memory.h> +#include <asm/system.h> +#include <asm/uaccess.h> + +#include "mvp.h" +#include "actions.h" +#include "mvpkm_kernel.h" +#include "mksck_kernel.h" +#include "mksck_sockaddr.h" +#include "mutex_kernel.h" + +void NORETURN FatalError(char const *file, + int line, + FECode feCode, + int bugno, + char const *fmt, + ...) +{ + /* Lock around printing the error details so that the messages from multiple + * threads are not interleaved. */ + static DEFINE_MUTEX(fatalErrorMutex); + mutex_lock(&fatalErrorMutex); + + FATALERROR_COMMON(printk, vprintk, file, line, feCode, bugno, fmt); + + dump_stack(); + + /* done printing */ + mutex_unlock(&fatalErrorMutex); + + /* do_exit below exits the current thread but does not crash the kernel. + * Hence the stack dump will actually be readable from other user threads. + */ + do_exit(1); +} + + +/* + * The project uses a new address family: AF_MKSCK. Optimally this address + * family were accepted with the Linux community and a permanent number + * were assigned. This, however, is a dream only, not even the x86 team + * has been able to pull it off. + * + * Instead we ASSUME that DECnet is dead and re-use it's address family number. + * This is what the x86 world is moving too in the latest versions. + */ + +static struct proto mksckProto = { + .name = "AF_MKSCK", + .owner = THIS_MODULE, + .obj_size = sizeof (struct sock), +}; + +static int MksckCreate(struct net *net, + struct socket *sock, + int protocol, + int kern); + +static struct net_proto_family mksckFamilyOps = { + .family = AF_MKSCK, + .owner = THIS_MODULE, + .create = MksckCreate, +}; + +static int MksckFault(struct vm_area_struct *vma, struct vm_fault *vmf); + + +/** + * @brief Linux vma operations for receive windows established via Mksck + * mmap. + */ +static struct vm_operations_struct mksckVMOps = { + .fault = MksckFault +}; + +/* + * List of hosts and guests we know about. + */ +static spinlock_t mksckPageListLock; +static MksckPage *mksckPages[MKSCK_MAX_SHARES]; + +/* + * The following functions form the AF_MKSCK DGRAM operations. + */ +static int MksckRelease(struct socket *sock); +static int MksckBacklogRcv(struct sock *sk, struct sk_buff *skb); +static void MksckSkDestruct(struct sock *sk); +static int MksckBind(struct socket *sock, + struct sockaddr *addr, + int addrLen); +static int MksckBindGeneric(struct sock *sk, + Mksck_Address addr); +static int MksckDgramRecvMsg(struct kiocb *kiocb, + struct socket *sock, + struct msghdr *msg, + size_t len, + int flags); +static int MksckDgramSendMsg(struct kiocb *kiocb, + struct socket *sock, + struct msghdr *msg, + size_t len); +static int MksckGetName(struct socket *sock, + struct sockaddr *addr, + int *addrLen, + int peer); +static unsigned int MksckPoll(struct file *filp, + struct socket *sock, + poll_table *wait); +static int MksckDgramConnect(struct socket *sock, + struct sockaddr *addr, + int addrLen, + int flags); +static int MksckMMap(struct file *file, + struct socket *sock, + struct vm_area_struct *vma); + +static void MksckPageRelease(MksckPage *mksckPage); + +static struct proto_ops mksckDgramOps = { + .family = AF_MKSCK, + .owner = THIS_MODULE, + .release = MksckRelease, + .bind = MksckBind, + .connect = MksckDgramConnect, + .socketpair = sock_no_socketpair, + .accept = sock_no_accept, + .getname = MksckGetName, + .poll = MksckPoll, + .ioctl = sock_no_ioctl, + .listen = sock_no_listen, + .shutdown = sock_no_shutdown, /* MksckShutdown, */ + .setsockopt = sock_no_setsockopt, + .getsockopt = sock_no_getsockopt, + .sendmsg = MksckDgramSendMsg, + .recvmsg = MksckDgramRecvMsg, + .mmap = MksckMMap, + .sendpage = sock_no_sendpage, +}; + + +/** + * @brief Initialize the MKSCK protocol + * + * @return 0 on success, -errno on failure + */ +int +Mksck_Init(void) +{ + int err; + + spin_lock_init(&mksckPageListLock); + + /* + * Create a slab to allocate socket structs from. + */ + err = proto_register(&mksckProto, 1); + if (err != 0) { + printk(KERN_INFO + "Mksck_Init: Cannot register MKSCK protocol, errno = %d.\n", err); + return err; + } + + /* + * Register the socket family + */ + err = sock_register(&mksckFamilyOps); + if (err < 0) { + printk(KERN_INFO + "Mksck_Init: Could not register address family AF_MKSCK" + " (errno = %d).\n", err); + return err; + } + + return 0; +} + + +/** + * @brief De-register the MKSCK protocol + */ +void +Mksck_Exit(void) +{ + sock_unregister(mksckFamilyOps.family); + proto_unregister(&mksckProto); +} + + +/** + * @brief Create a new MKSCK socket + * + * @param net network namespace (2.6.24 or above) + * @param sock user socket structure + * @param protocol protocol to be used + * @param kern called from kernel mode + * + * @return 0 on success, -errno on failure + */ +static int +MksckCreate(struct net *net, + struct socket *sock, + int protocol, + int kern) +{ + struct sock *sk; + uid_t currentUid = current_euid(); + + if (!(currentUid == 0 || + currentUid == Mvpkm_vmwareUid)) { + printk(KERN_WARNING + "MksckCreate: rejected from process %s tgid=%d, pid=%d euid:%d.\n", + current->comm, + task_tgid_vnr(current), + task_pid_vnr(current), + currentUid); + return -EPERM; + } + + if (!sock) { + return -EINVAL; + } + + if (protocol) { + return -EPROTONOSUPPORT; + } + + switch (sock->type) { + case SOCK_DGRAM: { + sock->ops = &mksckDgramOps; + break; + } + default: { + return -ESOCKTNOSUPPORT; + } + } + + sock->state = SS_UNCONNECTED; + + /* + * Most recently (in 2.6.24), sk_alloc() was changed to expect the + * network namespace, and the option to zero the sock was dropped. + */ + sk = sk_alloc(net, mksckFamilyOps.family, GFP_KERNEL, &mksckProto); + + if (!sk) { + return -ENOMEM; + } + + sock_init_data(sock, sk); + + sk->sk_type = SOCK_DGRAM; + sk->sk_destruct = MksckSkDestruct; + sk->sk_backlog_rcv = MksckBacklogRcv; + + /* + * On socket lock... + * + * A bound socket will have an associated private area, the Mksck + * structure part of MksckPage. That area is pointed to by + * sk->sk_protinfo. In addition, a connected socket will have the + * peer field in its associated area set to point to the associated + * private area of the peer socket. A mechanism is needed to ensure + * that these private areas area not freed while they are being + * accessed within the scope of a function. A simple lock would not + * suffice as the interface functions (like MksckDgramRecvMsg()) + * may block. Hence a reference count mechanism is employed. When + * the mentioned references (sk->sk_protinfo and mksck->peer) to + * the respective private areas are set a refcount is incremented, + * and decremented when the references are deleted. + * + * The refcounts of areas pointed to by sk->sk_protinfo and + * mksck->peer will be decremented under the lock of the socket. + * Hence these private areas cannot disappear as long as the socket + * lock is held. + * + * The interface functions will have one of the following + * structures: + * + * simpleFn(sk) + * { + * lock_sock(sk); + * if ((mksck = sk->sk_protinfo)) { + * <non-blocking use of mksck> + * } + * release_sock(sk); + * } + * + * complexFn(sk) + * { + * lock_sock(sk); + * if ((mksck = sk->sk_protinfo)) { + * IncRefc(mksck); + * } + * release_sock(sk); + * + * if (mksck) { + * <use of mksck in a potentially blocking manner> + * DecRefc(mksck); + * } + * } + */ + sk->sk_protinfo = NULL; + sock_reset_flag(sk, SOCK_DONE); + + return 0; +} + + +/** + * @brief Delete a MKSCK socket + * + * @param sock user socket structure + * + * @return 0 on success, -errno on failure + */ +static int +MksckRelease(struct socket *sock) +{ + struct sock *sk = sock->sk; + + if (sk) { + lock_sock(sk); + sock_orphan(sk); + release_sock(sk); + sock_put(sk); + } + + sock->sk = NULL; + sock->state = SS_FREE; + + return 0; +} + + +static int +MksckBacklogRcv(struct sock *sk, struct sk_buff *skb) +{ + /* + * We should never get these as we never queue an skb. + */ + printk("MksckBacklogRcv: should never get here\n"); + return -EIO; +} + + +/** + * @brief Callback at socket destruction + * + * @param sk pointer to kernel socket structure + */ +static void +MksckSkDestruct(struct sock *sk) +{ + Mksck *mksck; + + lock_sock(sk); + mksck = sk->sk_protinfo; + + if (mksck != NULL) { + sk->sk_protinfo = NULL; + Mksck_CloseCommon(mksck); + } + + if (sk->sk_user_data != NULL) { + sock_kfree_s(sk, sk->sk_user_data, sizeof(int)); + sk->sk_user_data = NULL; + } + + release_sock(sk); +} + + +/** + * @brief Set the local address of a MKSCK socket + * + * @param sk kernel socket structure + * @param addr the new address of the socket + * + * @return 0 on success, -errno on failure + * + * If addr.port is undefined a new random port is assigned. + * If addr.vmId is undefined then the vmId computed from the tgid is used. + * Hence the vmId of a socket does not determine the host all the time. + * + * Assumed that the socket is locked. + * This function is called by explicit set (MksckBind) and implicit (Send). + */ +static int +MksckBindGeneric(struct sock *sk, + Mksck_Address addr) +{ + int err; + Mksck *mksck; + MksckPage *mksckPage; + + if (sk->sk_protinfo != NULL) { + return -EISCONN; + } + + /* + * Locate the page for the given host and increment its reference + * count so it can't get freed off while we are working on it. + */ + if (addr.vmId == MKSCK_VMID_UNDEF) { + mksckPage = MksckPage_GetFromTgidIncRefc(); + } else { + printk(KERN_WARNING "MksckBind: host bind called on vmid 0x%X\n", addr.vmId); + mksckPage = MksckPage_GetFromVmIdIncRefc(addr.vmId); + } + + if (mksckPage == NULL) { + printk(KERN_INFO "MksckBind: no mksckPage for vm 0x%X\n", addr.vmId); + return -ENETUNREACH; + } + addr.vmId = mksckPage->vmId; + + /* + * Before we can find an unused socket port on the page we have to + * lock the page for exclusive access so another thread can't + * allocate the same port. + */ + err = Mutex_Lock(&mksckPage->mutex, MutexModeEX); + if (err < 0) { + goto outDec; + } + + addr.port = MksckPage_GetFreePort(mksckPage, addr.port); + if (addr.port == MKSCK_PORT_UNDEF) { + err = -EINVAL; + goto outUnlockDec; + } + + /* + * At this point we have the mksckPage locked for exclusive access + * and its reference count incremented. Also, addr is completely + * filled in with vmId and port that we want to bind. + * + * Find an available mksck struct on the shared page and initialize + * it. + */ + mksck = MksckPage_AllocSocket(mksckPage, addr); + if (mksck == NULL) { + err = -EMFILE; + goto outUnlockDec; + } + + /* + * Stable, release mutex. Leave mksckPage->refCount incremented so + * mksckPage can't be freed until socket is closed. + */ + Mutex_Unlock(&mksckPage->mutex, MutexModeEX); + + /* + * This is why we start mksck->refCount at 1. When sk_protinfo gets + * cleared, we decrement mksck->refCount. + */ + sk->sk_protinfo = mksck; + + PRINTK(KERN_DEBUG "MksckBind: socket bound to %08X\n", mksck->addr.addr); + + return 0; + +outUnlockDec: + Mutex_Unlock(&mksckPage->mutex, MutexModeEX); +outDec: + MksckPage_DecRefc(mksckPage); + return err; +} + + +/** + * @brief Test if the socket is already bound to a local address and, + * if not, bind it to an unused address. + * + * @param sk kernel socket structure + * @return 0 on success, -errno on failure + * + * Assumed that the socket is locked. + */ +static inline int +MksckTryBind(struct sock *sk) +{ + int err = 0; + + if (!sk->sk_protinfo) { + static const Mksck_Address addr = { .addr = MKSCK_ADDR_UNDEF }; + err = MksckBindGeneric(sk, addr); + } + return err; +} + + + +/** + * @brief Set the address of a MKSCK socket (user call) + * + * @param sock user socket structure + * @param addr the new address of the socket + * @param addrLen length of the address + * + * @return 0 on success, -errno on failure + */ +static int +MksckBind(struct socket *sock, + struct sockaddr *addr, + int addrLen) +{ + int err; + struct sock *sk = sock->sk; + struct sockaddr_mk *addrMk = (struct sockaddr_mk *)addr; + + if (addrLen != sizeof *addrMk) { + return -EINVAL; + } + if (addrMk->mk_family != AF_MKSCK) { + return -EAFNOSUPPORT; + } + + /* + * Obtain the socket lock and call the generic Bind function. + */ + lock_sock(sk); + err = MksckBindGeneric(sk, addrMk->mk_addr); + release_sock(sk); + + return err; +} + +/** + * @brief Lock the peer socket by locating it, incrementing its refc + * @param addr the address of the peer socket + * @param[out] peerMksckR set to the locked peer socket pointer + * upon successful lookup + * @return 0 on success, -errno on failure + */ +static int +LockPeer(Mksck_Address addr, Mksck **peerMksckR) +{ + int err = 0; + MksckPage *peerMksckPage = MksckPage_GetFromVmIdIncRefc(addr.vmId); + Mksck *peerMksck; + + /* + * Find corresponding destination shared page and increment its + * reference count so it can't be freed while we are sending to the + * socket. Make sure that the address is indeed an address of a + * monitor/guest socket. + */ + if (peerMksckPage == NULL) { + printk(KERN_INFO "LockPeer: vmId %x is not in use!\n", addr.vmId); + return -ENETUNREACH; + } + if (!peerMksckPage->isGuest) { + MksckPage_DecRefc(peerMksckPage); + printk(KERN_INFO "LockPeer: vmId %x does not belong to a guest!\n", + addr.vmId); + return -ENETUNREACH; + } + + + err = Mutex_Lock(&peerMksckPage->mutex, MutexModeSH); + if (err < 0) { + MksckPage_DecRefc(peerMksckPage); + return err; + } + + /* + * Find corresponding destination socket on that shared page and + * increment its reference count so it can't be freed while we are + * trying to send to it. + */ + peerMksck = MksckPage_GetFromAddr(peerMksckPage, addr); + + if (peerMksck) { + ATOMIC_ADDV(peerMksck->refCount, 1); + *peerMksckR = peerMksck; + } else { + printk(KERN_INFO "LockPeer: addr %x is not a defined socket!\n", + addr.addr); + err = -ENETUNREACH; + } + + Mutex_Unlock(&peerMksckPage->mutex, MutexModeSH); + MksckPage_DecRefc(peerMksckPage); + return err; +} + +/** + * @brief Set the peer address of a MKSCK socket + * + * @param sock user socket structure + * @param addr the new address of the socket + * @param addrLen length of the address + * @param flags flags + * + * @return 0 on success, -errno on failure + */ +static int +MksckDgramConnect(struct socket *sock, + struct sockaddr *addr, + int addrLen, + int flags) +{ + struct sock *sk = sock->sk; + Mksck *mksck; + struct sockaddr_mk *peerAddrMk = (struct sockaddr_mk *)addr; + int err = 0; + + if (addrLen != sizeof *peerAddrMk) { + printk(KERN_INFO "MksckConnect: wrong address length!\n"); + return -EINVAL; + } + if (peerAddrMk->mk_family != AF_MKSCK) { + printk(KERN_INFO "MksckConnect: wrong address family!\n"); + return -EAFNOSUPPORT; + } + + lock_sock(sk); + + if ((err = MksckTryBind(sk))) { + goto releaseSock; + } + mksck = sk->sk_protinfo; + + /* + * First severe any past peer connections + */ + Mksck_DisconnectPeer(mksck); + sock->state = SS_UNCONNECTED; + + /* + * Then build new connections ... + */ + if (peerAddrMk->mk_addr.addr != MKSCK_ADDR_UNDEF) { + sock->state = SS_CONNECTED; + mksck->peerAddr = peerAddrMk->mk_addr; + err = LockPeer(mksck->peerAddr, &mksck->peer); + PRINTK(KERN_DEBUG "MksckConnect: socket %x is connected to %x!\n", + mksck->addr.addr, mksck->peerAddr.addr); + } + +releaseSock: + release_sock(sk); + + return err; +} + + +/** + * @brief returns the address of a MKSCK socket/peer address + * + * @param sock user socket structure + * @param addr the new address of the socket + * @param addrLen length of the address + * @param peer 1 if the peer address is sought + * + * @return 0 on success, -errno on failure + */ +static int +MksckGetName(struct socket *sock, + struct sockaddr *addr, + int *addrLen, + int peer) +{ + int err; + Mksck *mksck; + struct sock *sk = sock->sk; + + // MAX_SOCK_ADDR is size of *addr, Linux doesn't export it! + // ASSERT_ON_COMPILE(sizeof (struct sockaddr_mk) <= MAX_SOCK_ADDR); + + lock_sock(sk); + mksck = sk->sk_protinfo; + + if (mksck == NULL) { + if (peer) { + err = -ENOTCONN; + } else { + ((struct sockaddr_mk *)addr)->mk_family = AF_MKSCK; + ((struct sockaddr_mk *)addr)->mk_addr.addr = MKSCK_ADDR_UNDEF; + *addrLen = sizeof (struct sockaddr_mk); + err = 0; + } + } else if (!peer) { + ((struct sockaddr_mk *)addr)->mk_family = AF_MKSCK; + ((struct sockaddr_mk *)addr)->mk_addr = mksck->addr; + *addrLen = sizeof (struct sockaddr_mk); + err = 0; + } else if (mksck->peerAddr.addr == MKSCK_ADDR_UNDEF) { + err = -ENOTCONN; + } else { + ((struct sockaddr_mk *)addr)->mk_family = AF_MKSCK; + ((struct sockaddr_mk *)addr)->mk_addr = mksck->peerAddr; + *addrLen = sizeof (struct sockaddr_mk); + err = 0; + } + + release_sock(sk); + + return err; +} + + +/** + * @brief VMX polling a receipted packet from VMM. + * + * @param filp kernel file pointer to poll for + * @param sock user socket structure + * @param wait kernel polling table where to poll if not null + * + * @return poll mask state given from socket state. + */ +static unsigned int MksckPoll(struct file *filp, + struct socket *sock, + poll_table *wait) +{ + struct sock *sk = sock->sk; + unsigned int mask = 0; + Mksck *mksck = NULL; + uint32 read; + int err; + + lock_sock(sk); + if ((err = MksckTryBind(sk))) { + release_sock(sk); + return err; + } + mksck = sk->sk_protinfo; + + /* + * To avoid mksck disappearing right after the release_sock the + * refcount needs to be incremented. For more details read the + * block comment on locking in MksckCreate. + */ + ATOMIC_ADDV(mksck->refCount, 1); + release_sock(sk); + + /* + * Wait to make sure this is the only thread trying to access socket. + */ + if ((err = Mutex_Lock(&mksck->mutex, MutexModeEX)) < 0) { + /* we might get in this situation if we are signaled + (select() may handle this, so leave) */ + PRINTK(KERN_INFO "MksckPoll: try to abort\n"); + return mask; + } + + /* + * See if packet in ring. + */ + read = mksck->read; + if (read != mksck->write) { + mask |= POLLIN | POLLRDNORM; /* readable, socket is unlocked */ + /* Note that if we are implementing support for POLLOUT, we SHOULD + change this Mutex_Unlock by Mutex_UnlPoll, because there is no + obvious knowledge about the sleepy reason that is intended by user */ + Mutex_Unlock(&mksck->mutex, MutexModeEX); + } else { + Mutex_UnlPoll(&mksck->mutex, MutexModeEX, MKSCK_CVAR_FILL, filp, wait); + } + + /* + * Note that locking rules differ a little inside MksckPoll, since we are + * not only given a pointer to the struct socket but also a pointer to a + * struct file. This means that during the whole operation of this function + * and during any pending wait (registered with poll_wait()), the file itself + * is reference counted up, and we should rely on that 'upper' reference + * counting to prevent from tearing the Mksck down. That holds true since one + * never re-bind sockets ! + */ + Mksck_DecRefc(mksck); + return mask; +} + +/** + * @brief Manage a set of Mksck_PageDesc from a message or a stored array. + * + * @param pd set of Mksck_PageDesc + * @param pages Mksck_PageDesc pages count for this management operation + * @param incr ternary used to indicate if we want to reference (+1), or + * dereference (-1), or count (0) 4k pages + * + * @return length of bytes processed. + */ +static size_t +MksckPageDescManage(Mksck_PageDesc *pd, + uint32 pages, + int incr) +{ + size_t payloadLen = 0; + uint32 i; + + for (i = 0; i < pages && pd[i].mpn != INVALID_MPN; ++i) { + uint32 j; + + for (j = 0; j < 1 << pd[i].order; ++j) { + struct page *page; + MPN currMPN = pd[i].mpn + j; + + /* + * The monitor tried to send an invalid MPN, bad. + */ + if (!pfn_valid(currMPN)) { + printk("MksckPageDescManage: Invalid MPN %x\n", currMPN); + } else { + page = pfn_to_page(currMPN); + + if (incr == +1) { + get_page(page); + } + if (incr == -1) { + put_page(page); + } + } + + payloadLen += PAGE_SIZE; + } + } + + return payloadLen; +} + +/** + * @brief Management values to be used as third parameter of MksckPageDescManage + */ +#define MANAGE_INCREMENT +1 +#define MANAGE_DECREMENT -1 +#define MANAGE_COUNT 0 + + +/** + * @brief Map a set of Mksck_PageDesc from a message or a stored array. + * + * @param pd set of Mksck_PageDesc + * @param pages pages count for this mapping + * @param iov vectored user virtual addresses of the recv commands + * @param iovCount size for iov parameter + * @param vma virtual memory area used for the mapping, note that + * this is mandatorily required MksckPageDescMap is used + * on an indirect PageDesc context (i.e whenever iov is + * not computed by the kernel but by ourselves). + * + * Since find_vma() and vm_insert_page() are used, this function must + * be called with current's mmap_sem locked, or inside an MMap operation. + * + * @return length of bytes mapped. + */ +static size_t +MksckPageDescMap(Mksck_PageDesc *pd, + uint32 pages, + struct iovec *iov, + int iovCount, + struct vm_area_struct *vma) +{ + size_t payloadLen = 0; + uint32 i; + + for (i = 0; i < pages && pd[i].mpn != INVALID_MPN; ++i) { + uint32 j; + + for (j = 0; j < 1 << pd[i].order; ++j) { + HUVA huva = 0; + struct page *page; + MPN currMPN = pd[i].mpn + j; + + while (iovCount > 0 && iov->iov_len == 0) { + iovCount--; + iov++; + } + + if (iovCount == 0) { + printk("MksckPageDescMap: Invalid iov length\n"); + goto map_done; + } + + huva = (HUVA)iov->iov_base; + + /* + * iovecs for receiving the typed component of the message should + * have page aligned base and size sufficient for page descriptor's + * mappings. + */ + if (huva & (PAGE_SIZE - 1) || iov->iov_len < PAGE_SIZE) { + printk("MksckPageDescMap: Invalid huva %x or iov_len %d\n", + huva, + iov->iov_len); + goto map_done; + } + + /* + * Might be in a new vma... + */ + if (vma == NULL || huva < vma->vm_start || huva >= vma->vm_end) { + vma = find_vma(current->mm, huva); + + /* + * Couldn't find a matching vma for huva. + */ + if (vma == NULL || + huva < vma->vm_start || + vma->vm_ops != &mksckVMOps) { + printk("MksckPageDescMap: Invalid vma\n"); + goto map_done; + } + } + + /* + * The monitor tried to send an invalid MPN, bad. + */ + if (!pfn_valid(currMPN)) { + printk("MksckPageDescMap: Invalid MPN %x\n", currMPN); + } else { + int rc; + + page = pfn_to_page(currMPN); + + /* + * Map into the receive window. + */ + rc = vm_insert_page(vma, huva, page); + if (rc) { + printk("MksckPageDescMap: Failed to insert %x at %x, error %d\n", + currMPN, + huva, + rc); + goto map_done; + } + + ASSERT(iov->iov_len >= PAGE_SIZE); + iov->iov_base += PAGE_SIZE; + iov->iov_len -= PAGE_SIZE; + } + + payloadLen += PAGE_SIZE; + } + } + +map_done: + return payloadLen; +} + + +/** + * @brief Check if the provided MsgHdr has still room for a receive operation. + * + * @param msg user buffer + * @return 1 if MsgHdr has IO space room in order to receive a mapping, 0 otherwise. + */ +static int +MsgHdrHasAvailableRoom(struct msghdr *msg) +{ + struct iovec *vec = msg->msg_iov; + uint32 count = msg->msg_iovlen; + + while (count > 0 && vec->iov_len == 0) { + count--; + vec++; + } + + return (count != 0); +} + + +/** + * Whenever a typed message is received from the monitor, we may choose to store + * all the page descriptor content in a linked state of descriptors, through the + * following information context + */ +typedef struct MksckPageDescInfo { + struct MksckPageDescInfo *next; + uint32 flags; + uint32 pages; + uint32 mapCounts; + Mksck_PageDesc descs[0]; +} MksckPageDescInfo; + +static void MksckPageDescSkDestruct(struct sock *sk); +static int MksckPageDescMMap(struct file *file, + struct socket *sock, + struct vm_area_struct *vma); +static int MksckPageDescIoctl(struct socket *sock, + unsigned int cmd, + unsigned long arg); + +/** + * @brief Delete a page descriptor container socket + * + * @param sock user socket structure + * @return 0 on success, -errno on failure + */ +static int +MksckPageDescRelease(struct socket *sock) +{ + /* This is generic socket release */ + struct sock *sk = sock->sk; + + if (sk) { + lock_sock(sk); + sock_orphan(sk); + release_sock(sk); + sock_put(sk); + } + + sock->sk = NULL; + sock->state = SS_FREE; + + return 0; +} + + +/** + * Whenever a typed message is received from the monitor, we may choose to store + * all the page descriptor content for a future mapping. One shall put a context + * usable by host userland, that means trough a file descriptor, and as a secure + * implementation we choose to define a strict set of operations that are used + * only for that purpose. This set of operation is reduced to leaving the + * default "PageDesc(s) accumulating" mode (inside ioctl), mapping the context, + * and generic socket destruction. + */ +static struct proto_ops mksckPageDescOps = { + .family = AF_MKSCK, + .owner = THIS_MODULE, + .release = MksckPageDescRelease, + .bind = sock_no_bind, + .connect = sock_no_connect, + .socketpair = sock_no_socketpair, + .accept = sock_no_accept, + .getname = sock_no_getname, + .poll = sock_no_poll, + .ioctl = MksckPageDescIoctl, + .listen = sock_no_listen, + .shutdown = sock_no_shutdown, + .setsockopt = sock_no_setsockopt, + .getsockopt = sock_no_getsockopt, + .sendmsg = sock_no_sendmsg, + .recvmsg = sock_no_recvmsg, + .mmap = MksckPageDescMMap, + .sendpage = sock_no_sendpage, +}; + + +/** + * @brief Create or accumulate to a PageDesc context, backed as a descriptor. + * + * @param sock user socket structure + * @param msg user buffer to receive the file descriptor as ancillary data + * @param pd source descriptor part of a message + * @param pages pages count for this mapping + * + * @return error if negative, 0 otherwise + * + */ +static int +MksckPageDescToFd(struct socket *sock, + struct msghdr *msg, + Mksck_PageDesc *pd, + uint32 pages) +{ + int retval; + int newfd; + struct socket *newsock; + struct sock *newsk; + struct sock *sk = sock->sk; + MksckPageDescInfo **pmpdi, *mpdi; + lock_sock(sk); + + /* + * Relation between any mk socket and the PageDesc context is as follow: + * + * From the mk socket to the PageDesc context: + * - sk->sk_user_data is a WEAK LINK, containing only a file descriptor + * numerical value such that accumulating is keyed on it. + * + * From the PageDesc context to the mk socket: + * - sk->sk_protinfo contains a MksckPageDescInfo struct. + * - sk->sk_user_data is a pointer REF-COUNTED sock_hold() LINK, also it is + * rarely dereferenced but usually used to check that the + * right socket pair is used. Full dereferencing is used + * only to break the described links. + */ + if (sk->sk_user_data) { + MksckPageDescInfo *mpdi2; + + /* continue any previous on-going mapping, i.e accumulate */ + newfd = *((int *)sk->sk_user_data); + newsock = sockfd_lookup(newfd, &retval); // promote the weak link + if (!newsock) { + retval = -EINVAL; + goto endProcessingReleaseSock; + } + + newsk = newsock->sk; + lock_sock(newsk); + sockfd_put(newsock); + + if (((struct sock *)newsk->sk_user_data) != sk) { + /* One way of going into this situation would be for userland to dup + the file descriptor just received, close the original number, and + open a new mk socket in the very same spot. The userland code have + a lot of way of interacting with the kernel without this driver + code to be notified. */ + retval = -EINVAL; + release_sock(newsk); + goto endProcessingReleaseSock; + } + + mpdi = sock_kmalloc(newsk, sizeof(MksckPageDescInfo) + + pages*sizeof(Mksck_PageDesc), GFP_KERNEL); + if (IS_ERR(mpdi)) { + retval = PTR_ERR(mpdi); + release_sock(newsk); + goto endProcessingReleaseSock; + } + + /* There is no mandatory needs for us to notify userland from + the progress in "appending" to the file descriptor, but it + would feel strange if the userland would have no mean to + tell if the received message was just not thrown away. So, in + order to be consistent one fill the ancillary message while + "creating" and "appending to" file descriptors. */ + retval = put_cmsg(msg, SOL_DECNET, 0, sizeof(int), &newfd); + if (retval < 0) { + goto endProcessingKFreeReleaseSock; + } + + release_sock(sk); + + mpdi2 = (MksckPageDescInfo *)newsk->sk_protinfo; + while (mpdi2->next) { + mpdi2 = mpdi2->next; + } + pmpdi = &(mpdi2->next); + + } else { + /* Create a new socket, new context and a new file descriptor. */ + retval = sock_create(sk->sk_family, sock->type, 0, &newsock); + if (retval < 0) { + goto endProcessingReleaseSock; + } + + newsk = newsock->sk; + lock_sock(newsk); + newsk->sk_destruct = &MksckPageDescSkDestruct; + newsk->sk_user_data = sk; + sock_hold(sk); // keeps a reference to parent mk socket + newsock->ops = &mksckPageDescOps; + + mpdi = sock_kmalloc(newsk, sizeof(MksckPageDescInfo) + + pages*sizeof(Mksck_PageDesc), GFP_KERNEL); + if (IS_ERR(mpdi)) { + retval = PTR_ERR(mpdi); + goto endProcessingFreeNewSock; + } + + sk->sk_user_data = sock_kmalloc(sk, sizeof(int), GFP_KERNEL); + if (IS_ERR(sk->sk_user_data)) { + retval = PTR_ERR(sk->sk_user_data); + sk->sk_user_data = NULL; + goto endProcessingKFreeAndNewSock; + } + + /* mapping to a file descriptor may fail if a thread is closing + in parallel of sock_map_fd/sock_alloc_fd, or kernel memory is full */ + newfd = sock_map_fd(newsock, O_CLOEXEC); + if (newfd < 0) { + retval = newfd; + sock_kfree_s(sk, sk->sk_user_data, sizeof(int)); + sk->sk_user_data = NULL; + goto endProcessingKFreeAndNewSock; + } + + /* notify userland from a new file descriptor, alike AF_UNIX ancillary */ + retval = put_cmsg(msg, SOL_DECNET, 0, sizeof(int), &newfd); + if (retval < 0) { + sock_kfree_s(sk, sk->sk_user_data, sizeof(int)); + sk->sk_user_data = NULL; + sock_kfree_s(newsk, mpdi, sizeof(MksckPageDescInfo) + + mpdi->pages*sizeof(Mksck_PageDesc)); + release_sock(newsk); + sockfd_put(newsock); + sock_release(newsock); + put_unused_fd(newfd); + goto endProcessingReleaseSock; + } + + *(int*)sk->sk_user_data = newfd; + release_sock(sk); + pmpdi = (MksckPageDescInfo **)(&(newsk->sk_protinfo)); + } + + mpdi->next = NULL; + mpdi->flags = 0; + mpdi->mapCounts = 0; + mpdi->pages = pages; + memcpy(mpdi->descs, pd, pages*sizeof(Mksck_PageDesc)); + + *pmpdi = mpdi; // link + release_sock(newsk); + + /* increment all reference counters for the pages */ + MksckPageDescManage(pd, pages, MANAGE_INCREMENT); + return 0; + +endProcessingKFreeAndNewSock: + sock_kfree_s(newsk, mpdi, sizeof(MksckPageDescInfo) + + mpdi->pages*sizeof(Mksck_PageDesc)); +endProcessingFreeNewSock: + release_sock(newsk); + sock_release(newsock); + release_sock(sk); + return retval; + +endProcessingKFreeReleaseSock: + sock_kfree_s(newsk, mpdi, sizeof(MksckPageDescInfo) + + mpdi->pages*sizeof(Mksck_PageDesc)); + release_sock(newsk); +endProcessingReleaseSock: + release_sock(sk); + return retval; +} + +/** + * @brief Callback at socket destruction + * + * @param sk pointer to kernel socket structure + */ +static void +MksckPageDescSkDestruct(struct sock *sk) +{ + struct sock *mkSk = NULL; + MksckPageDescInfo *mpdi; + lock_sock(sk); + mpdi = sk->sk_protinfo; + while (mpdi) { + MksckPageDescInfo *next = mpdi->next; + MksckPageDescManage(mpdi->descs, mpdi->pages, + MANAGE_DECREMENT); + sock_kfree_s(sk, mpdi, sizeof(MksckPageDescInfo) + + mpdi->pages*sizeof(Mksck_PageDesc)); + mpdi = next; + } + if (sk->sk_user_data) { + mkSk = (struct sock *)sk->sk_user_data; + sk->sk_user_data = NULL; + } + sk->sk_protinfo = NULL; + release_sock(sk); + /* clean the monki socket that we are holding */ + if (mkSk) { + lock_sock(mkSk); + sock_kfree_s(mkSk, mkSk->sk_user_data, sizeof(int)); + mkSk->sk_user_data = NULL; + release_sock(mkSk); + sock_put(mkSk); // revert of sock_hold() + } +} + +/** + * @brief The mmap operation of the PageDesc context file descriptor. + * + * The mmap command is used to mmap any detached (i.e. no more accumulating) + * PageDesc context, full of the content from its parent communication mk + * socket. Mapping may be done a specified number of times, so that the + * PageDesc context could become useless (as a security restriction). + * + * Also note that mapping from an offset different from zero is considered + * as a userland invalid operation. + * + * @param file user file structure + * @param sock user socket structure + * @param vma virtual memory area structure + * + * @return error code, 0 on success + */ +static int +MksckPageDescMMap(struct file *file, + struct socket *sock, + struct vm_area_struct *vma) +{ + struct sock *sk = sock->sk; + MksckPageDescInfo *mpdi; + struct iovec iov; + unsigned long vm_flags; + int freed = 0; + + iov.iov_base = (void*)vma->vm_start; + iov.iov_len = vma->vm_end - vma->vm_start; + + lock_sock(sk); + mpdi = sk->sk_protinfo; + + // vma->vm_pgoff is checked, since offsetting the map is not supported + if (!mpdi || sk->sk_user_data || vma->vm_pgoff) { + release_sock(sk); + printk(KERN_INFO "MMAP failed for virt %lx size %lx\n", + vma->vm_start, vma->vm_end - vma->vm_start); + return -EINVAL; + } + + vm_flags = mpdi->flags; + if ((vma->vm_flags & ~vm_flags) & (VM_READ|VM_WRITE)) { + release_sock(sk); + return -EACCES; + } + + while (mpdi) { + MksckPageDescInfo *next = mpdi->next; + MksckPageDescMap(mpdi->descs, mpdi->pages, &iov, 1, vma); + if (mpdi->mapCounts && !--mpdi->mapCounts) { + MksckPageDescManage(mpdi->descs, mpdi->pages, + MANAGE_DECREMENT); + sock_kfree_s(sk, mpdi, sizeof(MksckPageDescInfo) + + mpdi->pages*sizeof(Mksck_PageDesc)); + freed = 1; + } + mpdi = next; + } + + if (freed) { + sk->sk_protinfo = NULL; + } + vma->vm_ops = &mksckVMOps; + release_sock(sk); + return 0; +} + +/** + * @brief The ioctl operation of the PageDesc context file descriptor. + * + * The ioctl MKSCK_DETACH command is used to detach the PageDesc context + * from its parent communication mk socket. Once done, the context + * is able to remap the transferred PageDesc(s) of typed messages accumulated + * into the context. + * + * @param sock user socket structure + * @param cmd select which cmd function needs to be performed + * @param arg argument for command + * + * @return error code, 0 on success + */ +static int +MksckPageDescIoctl(struct socket *sock, + unsigned int cmd, + unsigned long arg) +{ + struct sock *monkiSk = NULL; + struct sock *sk = sock->sk; + MksckPageDescInfo *mpdi; + int retval = 0; + + switch (cmd) { + /** + * ioctl MKSCK_DETACH (in and out): + * Detach, compute size and define allowed protection access rights + * + * [in]: unsigned long flags, similar to prot argument of mmap() + * unsigned long number of available further mappings + * with 0 meaning unlimited number of mappings + * [out]: unsigned long size of the available mappable area + */ + case MKSCK_DETACH: { + unsigned long ul[2]; + lock_sock(sk); + mpdi = sk->sk_protinfo; + // read unsigned long argument that contains the mmap alike flags + if (copy_from_user(ul, (void *)arg, sizeof ul)) { + retval = -EFAULT; + // check that the file descriptor has a parent and some context there + } else if (!mpdi || !sk->sk_user_data) { + retval = -EINVAL; + } else { + /* compute mapping protection bits from argument and size of the + * mapping, that is also given back to userland as unsigned long. + */ + uint32 flags = calc_vm_prot_bits(ul[0]); + ul[0] = 0; + while (mpdi) { + MksckPageDescInfo *next = mpdi->next; + ul[0] += MksckPageDescManage(mpdi->descs, mpdi->pages, + MANAGE_COUNT); + mpdi->mapCounts = ul[1]; + mpdi = next; + } + if (copy_to_user((void *)arg, ul, sizeof(ul[0]))) { + retval = -EFAULT; + } else { + mpdi = sk->sk_protinfo; + mpdi->flags = flags; + monkiSk = (struct sock *)sk->sk_user_data; + sk->sk_user_data = NULL; + } + } + release_sock(sk); + // clean the monki socket that we are holding + if ((sk = monkiSk)) { + lock_sock(sk); + sock_kfree_s(sk, sk->sk_user_data, sizeof(int)); + sk->sk_user_data = NULL; + release_sock(sk); + sock_put(sk); + } + break; + } + default: { + retval = -EINVAL; + break; + } + } + return retval; +} + + +/** + * @brief VMX receiving a packet from VMM. + * + * @param kiocb kernel io control block (unused) + * @param sock user socket structure + * @param msg user buffer to receive the packet + * @param len size of the user buffer + * @param flags flags + * + * @return -errno on failure, else length of untyped portion + total number + * of bytes mapped for typed portion. + */ +static int +MksckDgramRecvMsg(struct kiocb *kiocb, + struct socket *sock, + struct msghdr *msg, + size_t len, + int flags) +{ + int err = 0; + struct sock *sk = sock->sk; + Mksck *mksck; + Mksck_Datagram *dg; + struct sockaddr_mk *fromAddr; + uint32 read; + struct iovec *iov; + size_t payloadLen, untypedLen; + uint32 iovCount; + + if (flags & MSG_OOB || flags & MSG_ERRQUEUE) { + return -EOPNOTSUPP; + } + + if ((msg->msg_name != NULL) && (msg->msg_namelen < sizeof *fromAddr)) { + return -EINVAL; + } + + lock_sock(sk); + if ((err = MksckTryBind(sk))) { + release_sock(sk); + return err; + } + mksck = sk->sk_protinfo; + + /* + * To avoid mksck disappearing right after the release_sock the + * refcount needs to be incremented. For more details read the + * block comment on locking in MksckCreate. + */ + ATOMIC_ADDV(mksck->refCount, 1); + release_sock(sk); + + /* + * Get pointer to next packet in ring to be dequeued. + */ + while (1) { + + /* + * Wait to make sure this is the only thread trying to access socket. + */ + if ((err = Mutex_Lock(&mksck->mutex, MutexModeEX)) < 0) { + goto decRefc; + } + + /* + * See if packet in ring. + */ + read = mksck->read; + if (read != mksck->write) { + break; + } + + /* + * Nothing there, if user wants us not to block then just return EAGAIN. + */ + if (flags & MSG_DONTWAIT) { + Mutex_Unlock(&mksck->mutex, MutexModeEX); + err = -EAGAIN; + goto decRefc; + } + + /* + * Nothing there, unlock socket and wait for data. + */ + mksck->foundEmpty ++; + err = Mutex_UnlSleep(&mksck->mutex, MutexModeEX, MKSCK_CVAR_FILL); + if (err < 0) { + PRINTK(KERN_INFO "MksckDgramRecvMsg: aborted\n"); + goto decRefc; + } + } + + /* + * Point to packet in ring. + */ + dg = (void *)&mksck->buff[read]; + + /* + * Provide the address of the sender. + */ + if (msg->msg_name != NULL) { + fromAddr = (void *)msg->msg_name; + fromAddr->mk_addr = dg->fromAddr; + fromAddr->mk_family = AF_MKSCK; + msg->msg_namelen = sizeof *fromAddr; + } else { + msg->msg_namelen = 0; + } + + /* + * Copy data from ring buffer to caller's buffer and remove packet from + * ring buffer. + */ + iov = msg->msg_iov; + iovCount = msg->msg_iovlen; + payloadLen = untypedLen = + dg->len - dg->pages * sizeof(Mksck_PageDesc) - dg->pad; + + /* + * Handle the untyped portion of the message. + */ + if (untypedLen <= len) { + err = memcpy_toiovec(iov, + dg->data, + untypedLen); + if (err < 0) { + printk("MksckDgramRecvMsg: Failed to memcpy_to_iovec untyped message component " + "(buf len %d datagram len %d (untyped %d))\n", + len, + dg->len, + untypedLen); + } + } else { + err = -EINVAL; + } + + /* + * Map in the typed descriptor. + */ + if (err >= 0 && dg->pages > 0) { + Mksck_PageDesc *pd = (Mksck_PageDesc *)(dg->data + untypedLen + dg->pad); + + /* + * There are 3 ways of receiving typed messages from the monitor. + * - The typed message is mapped directly into a VMA. To indicate this the + * userland sets msg_controllen == 0. + * - The typed message is mapped directly into a VMA and a file descriptor + * created for further mappings on the host (in same userland address + * space or an alternate userland address space). In this case + * msg_controllen should be set to sizeof(fd). + * - The typed message is not mapped directly into a VMA, but a file + * descriptor is created for later mapping on the host. In this case + * msg_controllen should be set to sizeof(fd) and the supplied iovec + * shall not specify a receive window. + * + * The conjuncts below decide on which of these 3 cases we've encountered. + */ + + if ((msg->msg_controllen <= 0) || + ((err = MksckPageDescToFd(sock, msg, pd, dg->pages)) != 0) || + (MsgHdrHasAvailableRoom(msg) != 0)) { + + down_write(¤t->mm->mmap_sem); // lock for a change of mapping + payloadLen += MksckPageDescMap(pd, dg->pages, iov, iovCount, NULL); + up_write(¤t->mm->mmap_sem); + } + } + + /* + * Now that packet is removed, it is safe to unlock socket so another thread + * can do a recv(). We also want to wake someone waiting for room to insert + * a new packet. + */ + if ((err >= 0) && Mksck_IncReadIndex(mksck, read, dg)) { + Mutex_UnlWake(&mksck->mutex, MutexModeEX, MKSCK_CVAR_ROOM, true); + } else { + Mutex_Unlock(&mksck->mutex, MutexModeEX); + } + + /* + * If memcpy error, return error status. + * Otherwise, return number of bytes copied. + */ + if (err >= 0) { + err = payloadLen; + } + +decRefc: + Mksck_DecRefc(mksck); + return err; +} + + +/** + * @brief VMX sending a packet to VMM. + * + * @param kiocb kernel io control block + * @param sock user socket structure + * @param msg packet to be transmitted + * @param len length of the packet + * + * @return length of the sent msg on success, -errno on failure + */ +static int +MksckDgramSendMsg(struct kiocb *kiocb, + struct socket *sock, + struct msghdr *msg, + size_t len) +{ + int err = 0; + struct sock *sk = sock->sk; + Mksck *peerMksck; + Mksck_Datagram *dg; + uint32 needed; + uint32 write; + Mksck_Address fromAddr; + + if (msg->msg_flags & MSG_OOB) { + return -EOPNOTSUPP; + } + + if (len > MKSCK_XFER_MAX) { + return -EMSGSIZE; + } + + /* + * In the next locked section peerMksck pointer needs to be set and + * its refcount needs to be incremented. + */ + lock_sock(sk); + do { + Mksck *mksck; + Mksck_Address peerAddr = + { .addr = (msg->msg_name ? + ((struct sockaddr_mk *)msg->msg_name)->mk_addr.addr : + MKSCK_ADDR_UNDEF) }; + + if ((err = MksckTryBind(sk))) { + break; + } + mksck = sk->sk_protinfo; + fromAddr = mksck->addr; + + /* + * If the socket is connected, use that address (no sendto for + * connected sockets). Otherwise, use the provided address if any. + */ + if ((peerMksck = mksck->peer)) { + if (peerAddr.addr != MKSCK_ADDR_UNDEF && + peerAddr.addr != mksck->peerAddr.addr) { + err = -EISCONN; + break; + } + /* + * To avoid mksckPeer disappearing right after the + * release_sock the refcount needs to be incremented. For + * more details read the block comment on locking in + * MksckCreate. + */ + ATOMIC_ADDV(peerMksck->refCount, 1); + } else if (peerAddr.addr == MKSCK_ADDR_UNDEF) { + err = -ENOTCONN; + } else { + /* + * LockPeer also increments the refc on the peer. + */ + err = LockPeer(peerAddr, &peerMksck); + } + } while(0); + release_sock(sk); + + if (err) { + return err; + } + + /* + * Get pointer to sufficient empty space in ring buffer. + */ + needed = MKSCK_DGSIZE(len); + while (1) { + /* + * Wait to make sure this is the only thread trying to write to ring. + */ + if ((err = Mutex_Lock(&peerMksck->mutex, MutexModeEX)) < 0) { + goto decRefc; + } + + /* + * Check if socket can receive data. + */ + if (peerMksck->shutDown & MKSCK_SHUT_RD) { + err = -ENOTCONN; + goto unlockDecRefc; + } + + /* + * See if there is room for the packet. + */ + write = Mksck_FindSendRoom(peerMksck, needed); + if (write != MKSCK_FINDSENDROOM_FULL) { + break; + } + + /* + * No room, unlock socket and maybe wait for room. + */ + if (msg->msg_flags & MSG_DONTWAIT) { + err = -EAGAIN; + goto unlockDecRefc; + } + + peerMksck->foundFull ++; + err = Mutex_UnlSleep(&peerMksck->mutex, + MutexModeEX, + MKSCK_CVAR_ROOM); + if (err < 0) { + PRINTK(KERN_INFO "MksckDgramSendMsg: aborted\n"); + goto decRefc; + } + } + + /* + * Point to room in ring and fill in message. + */ + dg = (void *)&peerMksck->buff[write]; + + dg->fromAddr = fromAddr; + dg->len = len; + + if ((err = memcpy_fromiovec(dg->data, msg->msg_iov, len)) != 0) { + goto unlockDecRefc; + } + + /* + * Increment past message. + */ + Mksck_IncWriteIndex(peerMksck, write, needed); + + /* + * Unlock socket and wake someone trying to receive, ie, we filled + * in a message. + */ + Mutex_UnlWake(&peerMksck->mutex, MutexModeEX, MKSCK_CVAR_FILL, false); + + /* + * Maybe guest is in a general 'wait for interrupt' wait or + * grinding away executing guest instructions. + * + * If it has a receive callback armed for the socket and is + * waiting a message, just wake it up. Else send an IPI to the CPU + * running the guest so it will interrupt whatever it is doing and + * read the message. + * + * Holding the mksckPage->mutex prevents mksckPage->vmHKVA from + * clearing on us. + */ + if (peerMksck->rcvCBEntryMVA != 0) { + MksckPage *peerMksckPage = Mksck_ToSharedPage(peerMksck); + + if ((err = Mutex_Lock(&peerMksckPage->mutex, MutexModeSH)) == 0) { + uint32 sockIdx = peerMksck->index; + MvpkmVM *vm = (MvpkmVM *) peerMksckPage->vmHKVA; + + /* + * The destruction of vm and wsp is blocked by the + * mksckPage->mutex. + */ + if (vm) { + WorldSwitchPage *wsp = vm->wsp; + + ASSERT(sockIdx < 8 * sizeof peerMksckPage->wakeVMMRecv); + ATOMIC_ORV(peerMksckPage->wakeVMMRecv, 1U << sockIdx); + + if (wsp) { + Mvpkm_WakeGuest(vm, ACTION_MKSCK); + } + } + Mutex_Unlock(&peerMksckPage->mutex, MutexModeSH); + } + } + + /* + * If all are happy tell the caller the number of transferred bytes. + */ + if (!err) { + err = len; + } + + /* + * Now that we are done with target socket, allow it to be freed. + */ +decRefc: + Mksck_DecRefc(peerMksck); + return err; + +unlockDecRefc: + Mutex_Unlock(&peerMksck->mutex, MutexModeEX); + goto decRefc; +} + + +/** + * @brief Page fault handler for receive windows. Since the host process + * should not be faulting in this region and only be accessing + * memory that has been established via a typed message transfer, + * we always signal the fault back to the process. + */ +static int +MksckFault(struct vm_area_struct *vma, struct vm_fault *vmf) +{ + return VM_FAULT_SIGBUS; +} + +/** + * @brief Establish a region in the host process suitable for use as a + * receive window. + * + * @param file file reference (ignored). + * @param sock user socket structure. + * @param vma Linux virtual memory area defining the region. + * + * @return 0 on success, otherwise error code. + */ +static int +MksckMMap(struct file *file, struct socket *sock, struct vm_area_struct *vma) +{ + /* + * All the hard work is done in MksckDgramRecvMsg. Here we simply mark the + * vma as belonging to Mksck. + */ + vma->vm_ops = &mksckVMOps; + + return 0; +} + +/** + * @brief This gets called after returning from the monitor. + * Since the monitor doesn't directly wake VMX threads when it sends + * something to VMX (for efficiency), this routine checks for the + * omitted wakes and does them. + * @param mksckPage some shared page that the monitor writes packets to, ie + * an host shared page + */ +void +Mksck_WakeBlockedSockets(MksckPage *mksckPage) +{ + Mksck *mksck; + uint32 i, wakeHostRecv; + + wakeHostRecv = mksckPage->wakeHostRecv; + if (wakeHostRecv != 0) { + mksckPage->wakeHostRecv = 0; + for (i = 0; wakeHostRecv != 0; i ++) { + if (wakeHostRecv & 1) { + mksck = &mksckPage->sockets[i]; + Mutex_CondSig(&mksck->mutex, MKSCK_CVAR_FILL, true); + } + wakeHostRecv >>= 1; + } + } +} + +/** + * @brief allocate and initialize a shared page. + * @return pointer to shared page.<br> + * NULL on error + */ +MksckPage * +MksckPageAlloc(void) +{ + uint32 jj; + /* + * Ask for pages in the virtual kernel space. There is no + * requirement to be physically contiguous. + */ + MksckPage *mksckPage = vmalloc(MKSCKPAGE_SIZE); + + if (mksckPage) { + + /* + * Initialize its contents. Start refCount at 1 and decrement it + * when the worldswitch or VM page gets freed. + */ + memset(mksckPage, 0, MKSCKPAGE_SIZE); + ATOMIC_SETV(mksckPage->refCount, 1); + mksckPage->portStore = MKSCK_PORT_HIGH; + + Mutex_Init(&mksckPage->mutex); + for (jj = 0; jj<MKSCK_SOCKETS_PER_PAGE; jj++) { + Mutex_Init(&mksckPage->sockets[jj].mutex); + } + } + + return mksckPage; +} + +/** + * @brief Release the allocated pages. + * @param mksckPage the address of the mksckPage to be released + */ +static void +MksckPageRelease(MksckPage *mksckPage) +{ + int ii; + + for (ii = 0; ii<MKSCK_SOCKETS_PER_PAGE; ii++) { + Mutex_Destroy(&mksckPage->sockets[ii].mutex); + } + Mutex_Destroy(&mksckPage->mutex); + + vfree(mksckPage); +} + +/** + * @brief Using the tgid locate the vmid of this process. + * Assumed that mksckPageListLock is held + * @return the vmId if page is already allocated, + * the first vacant vmid if not yet allocated.<br> + * MKSCK_PORT_UNDEF if no slot is vacant + */ +static inline Mksck_VmId +GetHostVmId(void) +{ + uint32 jj; + Mksck_VmId vmId, vmIdFirstVacant = MKSCK_VMID_UNDEF; + MksckPage *mksckPage; + uint32 tgid = task_tgid_vnr(current); + /* + * Assign an unique vmId to the shared page. Start the search from + * the vmId that is the result of hashing tgid to 15 bits. As a + * used page with a given vmId can occupy only a given slot in the + * mksckPages array, it is enough to search through the + * MKSCK_MAX_SHARES slots for a vacancy. + */ + for (jj = 0, vmId = MKSCK_TGID2VMID(tgid); + jj < MKSCK_MAX_SHARES; + jj++, vmId++) { + if (vmId > MKSCK_VMID_HIGH) { + vmId = 0; + } + mksckPage = mksckPages[MKSCK_VMID2IDX(vmId)]; + + if (mksckPage) { + if (mksckPage->tgid == tgid && + !mksckPage->isGuest) { + return mksckPage->vmId; + } + + } else if (vmIdFirstVacant == MKSCK_VMID_UNDEF) { + vmIdFirstVacant = vmId; + } + } + return vmIdFirstVacant; +} + + +/** + * @brief Locate the first empty slot + * Assumed that mksckPageListLock is held + * @return the first vacant vmid.<br> + * MKSCK_PORT_UNDEF if no slot is vacant + */ +static inline Mksck_VmId +GetNewGuestVmId(void) +{ + Mksck_VmId vmId; + + for (vmId = 0; vmId < MKSCK_MAX_SHARES; vmId++) { + if (!mksckPages[MKSCK_VMID2IDX(vmId)]) { + return vmId; + } + } + return MKSCK_VMID_UNDEF; +} + + +/** + * @brief Find shared page for a given idx. The page referred to be the + * idx should exist and be locked by the caller. + * @param idx index of the page in the array + * @return pointer to shared page + */ +MksckPage * +MksckPage_GetFromIdx(uint32 idx) +{ + MksckPage *mksckPage = mksckPages[idx]; + ASSERT(mksckPage); + ASSERT(idx<MKSCK_MAX_SHARES); + ASSERT(ATOMIC_GETO(mksckPage->refCount)); + return mksckPage; +} + +/** + * @brief find shared page for a given vmId + * The vmid should exist and be locked by the caller. + * @param vmId vmId to look for, either an host vmId or a guest vmId + * @return pointer to shared page + */ +MksckPage * +MksckPage_GetFromVmId(Mksck_VmId vmId) +{ + MksckPage *mksckPage = mksckPages[MKSCK_VMID2IDX(vmId)]; + ASSERT(mksckPage); + ASSERT(mksckPage->vmId == vmId); + ASSERT(ATOMIC_GETO(mksckPage->refCount)); + return mksckPage; +} + + +/** + * @brief find shared page for a given vmId + * @param vmId vmId to look for, either an host vmId or a guest vmId + * @return NULL: no such shared page exists<br> + * else: pointer to shared page. + * Call Mksck_DecRefc() when done with pointer + */ +MksckPage * +MksckPage_GetFromVmIdIncRefc(Mksck_VmId vmId) +{ + MksckPage *mksckPage; + + spin_lock(&mksckPageListLock); + mksckPage = mksckPages[MKSCK_VMID2IDX(vmId)]; + + if (!mksckPage || (mksckPage->vmId != vmId)) { + printk(KERN_INFO "MksckPage_GetFromVmIdIncRefc: vmId %04X not found\n", + vmId); + mksckPage = NULL; + } else { + ATOMIC_ADDV(mksckPage->refCount, 1); + } + spin_unlock(&mksckPageListLock); + return mksckPage; +} + + +/** + * @brief find or allocate shared page using tgid + * @return NULL: no such shared page exists<br> + * else: pointer to shared page. + * Call Mksck_DecRefc() when done with pointer + */ +MksckPage * +MksckPage_GetFromTgidIncRefc(void) +{ + MksckPage *mksckPage; + Mksck_VmId vmId; + + while (1) { + spin_lock(&mksckPageListLock); + vmId = GetHostVmId(); + + if (vmId == MKSCK_VMID_UNDEF) { + /* + * No vmId has been allocated yet and there is no free slot. + */ + spin_unlock(&mksckPageListLock); + return NULL; + } + + mksckPage = mksckPages[MKSCK_VMID2IDX(vmId)]; + if (mksckPage != NULL) { + /* + * There is a vmid already allocated, increment the refc on it. + */ + ATOMIC_ADDV(mksckPage->refCount, 1); + spin_unlock(&mksckPageListLock); + return mksckPage; + } + + /* + * Have to release spinlock to allocate a new page. + */ + spin_unlock(&mksckPageListLock); + mksckPage = MksckPageAlloc(); + if (mksckPage == NULL) { + return NULL; + } + + /* + * Re-lock and make sure no one else allocated while unlocked. + * If someone else did allocate, free ours off and use theirs. + */ + spin_lock(&mksckPageListLock); + vmId = GetHostVmId(); + if ((vmId != MKSCK_VMID_UNDEF) && + (mksckPages[MKSCK_VMID2IDX(vmId)] == NULL)) { + break; + } + spin_unlock(&mksckPageListLock); + MksckPageRelease(mksckPage); + } + + /* + * This is a successful new allocation. insert it into the table + * and initialize the fields. + */ + mksckPages[MKSCK_VMID2IDX(vmId)] = mksckPage; + mksckPage->vmId = vmId; + mksckPage->isGuest = false; + mksckPage->vmHKVA = 0; + mksckPage->tgid = task_tgid_vnr(current); + printk(KERN_DEBUG "New host mksck page is allocated: idx %x, vmId %x, tgid %d\n", + MKSCK_VMID2IDX(vmId), vmId, mksckPage->tgid); + + spin_unlock(&mksckPageListLock); + return mksckPage; +} + +/** + * @brief Initialize the VMX provided wsp. Allocate communication page. + * @param vm which virtual machine we're running + * @return 0 if all OK, error value otherwise + */ +int +Mksck_WspInitialize(MvpkmVM *vm) +{ + WorldSwitchPage *wsp = vm->wsp; + int err; + Mksck_VmId vmId; + MksckPage *mksckPage; + + if (wsp->guestId) { + err = -EBUSY; + } else if (!(mksckPage = MksckPageAlloc())) { + err = -ENOMEM; + } else { + spin_lock(&mksckPageListLock); + + if ((vmId = GetNewGuestVmId()) == MKSCK_VMID_UNDEF) { + + err = -EMFILE; + MksckPageRelease(mksckPage); + + printk(KERN_INFO "Mksck_WspInitialize: Cannot allocate vmId\n"); + + } else { + /* + * Now that the mksckPage is all initialized, let others see it. + */ + mksckPages[MKSCK_VMID2IDX(vmId)] = mksckPage; + mksckPage->vmId = vmId; + mksckPage->isGuest = true; + mksckPage->vmHKVA = (HKVA)vm; + /* mksckPage->tgid is undefined when isGuest is true */ + + wsp->guestId = vmId; + + printk(KERN_DEBUG "New guest mksck page is allocated: idx %x, vmId %x\n", + MKSCK_VMID2IDX(vmId), vmId); + + err = 0; + } + + /* + * All stable, ie, mksckPages[] written, ok to unlock now. + */ + spin_unlock(&mksckPageListLock); + } + + return err; +} + +/** + * @brief Release the wsp. Clean up after the monitor. Free the + * associated communication page. + * @param wsp which worldswitch page (VCPU) + */ +void +Mksck_WspRelease(WorldSwitchPage *wsp) +{ + int ii; + int err; + MksckPage *mksckPage = MksckPage_GetFromVmId(wsp->guestId); + + /* + * The worldswitch page for a particular VCPU is about to be freed + * off, so we know the monitor will never execute again. But the + * monitor most likely left some sockets open. Those may have + * outbound connections to host sockets that we must close. + * + * Loop through all possibly open sockets. + */ + uint32 isOpened = wsp->isOpened; + Mksck *mksck = mksckPage->sockets; + while (isOpened) { + if (isOpened & 1) { + ASSERT(ATOMIC_GETO(mksck->refCount) != 0); + /* + * The socket may be connected to a peer (host) socket, so we + * have to decrement that target socket's reference + * count. Unfortunately, Mksck_DisconnectPeer(mksck) cannot + * be called as mksck->peer is an mva not an hkva. Translate + * the address first. + */ + if (mksck->peer) { + MksckPage *mksckPagePeer = MksckPage_GetFromVmId(mksck->peerAddr.vmId); + ASSERT(mksckPagePeer); + mksck->peer = MksckPage_GetFromAddr(mksckPagePeer, mksck->peerAddr); + ASSERT(mksck->peer); + /* mksck->peer is now a hkva */ + } + + Mksck_CloseCommon(mksck); + } + isOpened >>= 1; + mksck++; + } + + /* + * A host socket may be in the process of sending to the guest. It + * will attempt to wake up the guest using mksckPage->vmHKVA and + * mksckPage->vmHKVA->wsp. To assure that the vm and wsp structures + * are not disappearing from under the sending thread we lock the + * page here. + */ + err = Mutex_Lock(&mksckPage->mutex, MutexModeEX); + ASSERT(!err); + mksckPage->vmHKVA = 0; + Mutex_Unlock(&mksckPage->mutex, MutexModeEX); + /* + * Decrement refcount set by MksckPageAlloc() call in + * Mksck_WspInitialize(). + */ + MksckPage_DecRefc(mksckPage); + + /* + * Decrement refcount set by VMM:Mksck_Init() referring to the local + * variable guestMksckPage. + */ + if (wsp->guestPageMapped) { + wsp->guestPageMapped = false; + MksckPage_DecRefc(mksckPage); + } + + /* + * Another task is to decrement the reference count on the mksck + * pages the monitor accessed. Those pages are listed in the + * wsp->isPageMapped list. They were locked by the monitor + * calling WSCALL_GET_PAGE_FROM_VMID + */ + for (ii = 0; ii < MKSCK_MAX_SHARES; ii++) { + if (wsp->isPageMapped[ii]) { + MksckPage *mksckPageOther = MksckPage_GetFromIdx(ii); + + wsp->isPageMapped[ii] = false; + MksckPage_DecRefc(mksckPageOther); + } + } +} + +/** + * @brief disconnect from peer by decrementing + * peer socket's reference count and clearing the pointer. + * @param mksck local socket to check for connection + */ +void +Mksck_DisconnectPeer(Mksck *mksck) +{ + Mksck *peerMksck = mksck->peer; + if (peerMksck != NULL) { + mksck->peer = NULL; + mksck->peerAddr.addr = MKSCK_ADDR_UNDEF; + Mksck_DecRefc(peerMksck); + } +} + + +/** + * @brief decrement shared page reference count, free page if it goes zero. + * also do a dmb first to make sure all activity on the struct is + * finished before decrementing the ref count. + * @param mksckPage shared page + */ +void +MksckPage_DecRefc(MksckPage *mksckPage) +{ + uint32 oldRefc; + + DMB(); + do { + while ((oldRefc = ATOMIC_GETO(mksckPage->refCount)) == 1) { + + /* + * Find corresponding entry in list of known shared pages and + * clear it so we can't open any new sockets on this shared + * page, thus preventing its refCount from being incremented. + */ + spin_lock(&mksckPageListLock); + if (ATOMIC_SETIF(mksckPage->refCount, 0, 1)) { + uint32 ii = MKSCK_VMID2IDX(mksckPage->vmId); + ASSERT(ii < MKSCK_MAX_SHARES); + ASSERT(mksckPages[ii] == mksckPage); + mksckPages[ii] = NULL; + spin_unlock(&mksckPageListLock); + printk(KERN_DEBUG "%s mksck page is released: idx %x, vmId %x, tgid %d\n", + mksckPage->isGuest?"Guest":"Host", + ii, mksckPage->vmId, mksckPage->tgid); + MksckPageRelease(mksckPage); + return; + } + spin_unlock(&mksckPageListLock); + } + ASSERT(oldRefc != 0); + } while (!ATOMIC_SETIF(mksckPage->refCount, oldRefc - 1, oldRefc)); +} + +/** + * @brief Lookup if the provided mpn belongs to one of the Mksck pages. Map if found. + * @return 0 if all OK, error value otherwise + */ +int +MksckPage_LookupAndInsertPage(struct vm_area_struct *vma, + unsigned long address, + MPN mpn) +{ + int ii, jj; + MksckPage **mksckPagePtr = mksckPages; + + spin_lock(&mksckPageListLock); + for (jj = MKSCK_MAX_SHARES; jj--; mksckPagePtr++) { + if (*mksckPagePtr) { + for (ii = 0; ii < MKSCKPAGE_TOTAL; ii++) { + if (vmalloc_to_pfn((void*)(((HKVA)*mksckPagePtr) + ii*PAGE_SIZE)) == mpn && + vm_insert_page(vma, address, pfn_to_page(mpn)) == 0) { + spin_unlock(&mksckPageListLock); + return 0; + } + } + } + } + spin_unlock(&mksckPageListLock); + return -1; +} + + +/** + * @brief Print information on the allocated shared pages + * + * This function reports (among many other things) on the use of locks + * on the mksck page (page lock and individual socket locks). To avoid + * the Hiesenberg effect it avoids using locks unless there is a + * danger of dereferencing freed memory. In particular, holding + * mksckPageListLock ensures that the mksck page is not freed while it + * is read. But under very rare conditions this function may report + * inconsistent or garbage data. + */ +static int +MksckPageInfoShow(struct seq_file *m, void *private) +{ + int ii, jj; + uint32 isPageMapped = 0; + int err; + MvpkmVM *vm; + + /* + * Lock is needed to atomize the test and dereference of + * mksckPages[ii] + */ + spin_lock(&mksckPageListLock); + for (ii = 0; ii < MKSCK_MAX_SHARES; ii++) { + MksckPage *mksckPage = mksckPages[ii]; + if (mksckPage != NULL && mksckPage->isGuest) { + /* + * After the refcount is incremented mksckPage will not be + * freed and it can continued to be dereferenced after the + * unlock of mksckPageListLock. + */ + ATOMIC_ADDV(mksckPage->refCount, 1); + spin_unlock(&mksckPageListLock); + + /* + * To dereference mksckPage->vmHKVA, we need to have the page + * lock. + */ + err = Mutex_Lock(&mksckPage->mutex, MutexModeEX); + vm = (MvpkmVM *) mksckPage->vmHKVA; + + if (err == 0 && vm && vm->wsp) { + for (jj = 0; jj < MKSCK_MAX_SHARES; jj++) { + if (vm->wsp->isPageMapped[jj]) isPageMapped |= 1<<jj; + } + } + Mutex_Unlock(&mksckPage->mutex, MutexModeEX); + /* + * Decrement the page refcount and relock the + * mksckPageListLock for the next for loop. + */ + MksckPage_DecRefc(mksckPage); + spin_lock(&mksckPageListLock); + break; + } + } + + /* mksckPageListLock is still locked, mksckPages[ii] can be dereferenced */ + for (ii = 0; ii < MKSCK_MAX_SHARES; ii++) { + MksckPage *mksckPage = mksckPages[ii]; + if (mksckPage != NULL) { + uint32 lState = ATOMIC_GETO(mksckPage->mutex.state); + uint32 isOpened = 0; /* guest has an implicit ref */ + + seq_printf(m, "MksckPage[%02d]: { vmId = %4x(%c), refC = %2d%s", + ii, mksckPage->vmId, + mksckPage->isGuest?'G':'H', + ATOMIC_GETO(mksckPage->refCount), + (isPageMapped&(1<<ii) ? "*" : "")); + + if (lState) { + seq_printf(m, ", lock=%x locked by line %d, unlocked by %d", + lState, mksckPage->mutex.line, mksckPage->mutex.lineUnl); + } + + + if (!mksckPage->isGuest) { + struct task_struct *target; + seq_printf(m, ", tgid = %d", mksckPage->tgid); + + rcu_read_lock(); + + target = pid_task(find_vpid(mksckPage->tgid), PIDTYPE_PID); + seq_printf(m, "(%s)", target ? target->comm : "no such process"); + + rcu_read_unlock(); + } else { + ATOMIC_ADDV(mksckPage->refCount, 1); + spin_unlock(&mksckPageListLock); + + err = Mutex_Lock(&mksckPage->mutex, MutexModeEX); + vm = (MvpkmVM *) mksckPage->vmHKVA; + + if (err == 0 && vm && vm->wsp) { + isOpened = vm->wsp->isOpened; + } + Mutex_Unlock(&mksckPage->mutex, MutexModeEX); + MksckPage_DecRefc(mksckPage); + spin_lock(&mksckPageListLock); + /* + * As the mksckPageListLock was unlocked, nothing + * prevented the MksckPage_DecRefc from actually freeing + * the page. Lets verify that the page is still there. + */ + if (mksckPage != mksckPages[ii]) { + seq_printf(m, " released }\n"); + continue; + } + } + seq_printf(m, ", sockets[] = {"); + + for (jj = 0; jj < mksckPage->numAllocSocks; jj++, isOpened >>= 1) { + Mksck *mksck = mksckPage->sockets + jj; + + if (ATOMIC_GETO(mksck->refCount)) { + uint32 blocked; + lState = ATOMIC_GETO(mksck->mutex.state); + seq_printf(m, "\n { addr = %8x, refC = %2d%s%s%s", + mksck->addr.addr, + ATOMIC_GETO(mksck->refCount), + (isOpened & 1 ? "*" : ""), + (mksck->shutDown & MKSCK_SHUT_RD ? " SHUTD_RD":""), + (mksck->shutDown & MKSCK_SHUT_WR ? " SHUTD_WR":"")); + + if (mksck->peer) { + seq_printf(m, ", peerAddr = %8x", + mksck->peerAddr.addr); + } + + if (lState) { + seq_printf(m, ", lock=%x locked by line %d, unlocked by %d", + lState, mksck->mutex.line, mksck->mutex.lineUnl); + } + + if ((blocked = ATOMIC_GETO(mksck->mutex.blocked))) { + seq_printf(m, ", blocked=%d", blocked); + } + + seq_printf(m, " }"); + } + } + seq_printf(m, " } }\n"); + } + } + spin_unlock(&mksckPageListLock); + + return 0; +} + + +static int +MksckPageInfoOpen(struct inode *inode, struct file *file) +{ + return single_open(file, MksckPageInfoShow, inode->i_private); +} + +static const struct file_operations mksckPageInfoFops = { + .open = MksckPageInfoOpen, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static struct dentry *mksckPageDentry = NULL; + +void +MksckPageInfo_Init(void) +{ + mksckPageDentry = debugfs_create_file("mksckPage", + S_IROTH, + NULL, + NULL, + &mksckPageInfoFops); +} + +void +MksckPageInfo_Exit(void) +{ + if (mksckPageDentry) { + debugfs_remove(mksckPageDentry); + } +} |