1 files changed, 2589 insertions, 0 deletions
diff --git a/arch/arm/mvp/mvpkm/mksck_kernel.c b/arch/arm/mvp/mvpkm/mksck_kernel.c
new file mode 100644
index 0000000..6811a68
--- /dev/null
+++ b/arch/arm/mvp/mvpkm/mksck_kernel.c
@@ -0,0 +1,2589 @@
+/*
+ * Linux 2.6.32 and later Kernel module for VMware MVP Hypervisor Support
+ *
+ * Copyright (C) 2010-2012 VMware, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; see the file COPYING.  If not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+#line 5
+
+/**
+ * @file
+ *
+ * @brief The monitor/kernel socket interface kernel extension.
+ */
+
+#define __KERNEL_SYSCALLS__
+#include <linux/version.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/fs.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/proc_fs.h>
+#include <linux/fcntl.h>
+#include <linux/syscalls.h>
+#include <linux/kmod.h>
+#include <linux/socket.h>
+#include <linux/net.h>
+#include <linux/skbuff.h>
+#include <linux/miscdevice.h>
+#include <linux/poll.h>
+#include <linux/rcupdate.h>
+#include <linux/smp.h>
+#include <linux/spinlock.h>
+
+#include <linux/mm.h>
+#include <linux/mman.h>
+#include <linux/file.h>
+#include <linux/vmalloc.h>
+
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+
+#include <net/sock.h>
+
+#include <asm/memory.h>
+#include <asm/system.h>
+#include <asm/uaccess.h>
+
+#include "mvp.h"
+#include "actions.h"
+#include "mvpkm_kernel.h"
+#include "mksck_kernel.h"
+#include "mksck_sockaddr.h"
+#include "mutex_kernel.h"
+
+void NORETURN FatalError(char const *file,
+                int line,
+                FECode feCode,
+                int bugno,
+                char const *fmt,
+                ...)
+{
+   /* Lock around printing the error details so that the messages from multiple
+    * threads are not interleaved. */
+   static DEFINE_MUTEX(fatalErrorMutex);
+   mutex_lock(&fatalErrorMutex);
+
+   FATALERROR_COMMON(printk, vprintk, file, line, feCode, bugno, fmt);
+
+   dump_stack();
+
+   /* done printing */
+   mutex_unlock(&fatalErrorMutex);
+
+   /* do_exit below exits the current thread but does not crash the kernel.
+    * Hence the stack dump will actually be readable from other user threads.
+    */
+   do_exit(1);
+}
+
+
+/*
+ * The project uses a new address family: AF_MKSCK. Optimally this address
+ * family were accepted with the Linux community and a permanent number
+ * were assigned. This, however, is a dream only, not even the x86 team
+ * has been able to pull it off.
+ *
+ * Instead we ASSUME that DECnet is dead and re-use it's address family number.
+ * This is what the x86 world is moving too in the latest versions.
+ */
+
+static struct proto mksckProto = {
+   .name     = "AF_MKSCK",
+   .owner    = THIS_MODULE,
+   .obj_size = sizeof (struct sock),
+};
+
+static int MksckCreate(struct net *net,
+                       struct socket *sock,
+                       int protocol,
+                       int kern);
+
+static struct net_proto_family mksckFamilyOps = {
+   .family = AF_MKSCK,
+   .owner  = THIS_MODULE,
+   .create = MksckCreate,
+};
+
+static int MksckFault(struct vm_area_struct *vma, struct vm_fault *vmf);
+
+
+/**
+ * @brief Linux vma operations for receive windows established via Mksck
+ *        mmap.
+ */
+static struct vm_operations_struct mksckVMOps = {
+   .fault = MksckFault
+};
+
+/*
+ * List of hosts and guests we know about.
+ */
+static spinlock_t mksckPageListLock;
+static MksckPage *mksckPages[MKSCK_MAX_SHARES];
+
+/*
+ * The following functions form the AF_MKSCK DGRAM operations.
+ */
+static int MksckRelease(struct socket *sock);
+static int MksckBacklogRcv(struct sock *sk, struct sk_buff *skb);
+static void MksckSkDestruct(struct sock *sk);
+static int MksckBind(struct socket *sock,
+                     struct sockaddr *addr,
+                     int addrLen);
+static int MksckBindGeneric(struct sock *sk,
+                            Mksck_Address addr);
+static int MksckDgramRecvMsg(struct kiocb *kiocb,
+                             struct socket *sock,
+                             struct msghdr *msg,
+                             size_t len,
+                             int flags);
+static int MksckDgramSendMsg(struct kiocb *kiocb,
+                             struct socket *sock,
+                             struct msghdr *msg,
+                             size_t len);
+static int MksckGetName(struct socket *sock,
+                        struct sockaddr *addr,
+                        int *addrLen,
+                        int peer);
+static unsigned int MksckPoll(struct file *filp,
+                              struct socket *sock,
+                              poll_table *wait);
+static int MksckDgramConnect(struct socket *sock,
+                             struct sockaddr *addr,
+                             int addrLen,
+                             int flags);
+static int MksckMMap(struct file *file,
+                     struct socket *sock,
+                     struct vm_area_struct *vma);
+
+static void MksckPageRelease(MksckPage *mksckPage);
+
+static struct proto_ops mksckDgramOps = {
+   .family     = AF_MKSCK,
+   .owner      = THIS_MODULE,
+   .release    = MksckRelease,
+   .bind       = MksckBind,
+   .connect    = MksckDgramConnect,
+   .socketpair = sock_no_socketpair,
+   .accept     = sock_no_accept,
+   .getname    = MksckGetName,
+   .poll       = MksckPoll,
+   .ioctl      = sock_no_ioctl,
+   .listen     = sock_no_listen,
+   .shutdown   = sock_no_shutdown, /* MksckShutdown, */
+   .setsockopt = sock_no_setsockopt,
+   .getsockopt = sock_no_getsockopt,
+   .sendmsg    = MksckDgramSendMsg,
+   .recvmsg    = MksckDgramRecvMsg,
+   .mmap       = MksckMMap,
+   .sendpage   = sock_no_sendpage,
+};
+
+
+/**
+ * @brief Initialize the MKSCK protocol
+ *
+ * @return 0 on success, -errno on failure
+ */
+int
+Mksck_Init(void)
+{
+   int err;
+
+   spin_lock_init(&mksckPageListLock);
+
+   /*
+    * Create a slab to allocate socket structs from.
+    */
+   err = proto_register(&mksckProto, 1);
+   if (err != 0) {
+      printk(KERN_INFO
+             "Mksck_Init: Cannot register MKSCK protocol, errno = %d.\n", err);
+      return err;
+   }
+
+   /*
+    * Register the socket family
+    */
+   err = sock_register(&mksckFamilyOps);
+   if (err < 0) {
+      printk(KERN_INFO
+            "Mksck_Init: Could not register address family AF_MKSCK"
+            " (errno = %d).\n", err);
+      return err;
+   }
+
+   return 0;
+}
+
+
+/**
+ * @brief De-register the MKSCK protocol
+ */
+void
+Mksck_Exit(void)
+{
+   sock_unregister(mksckFamilyOps.family);
+   proto_unregister(&mksckProto);
+}
+
+
+/**
+ * @brief Create a new MKSCK socket
+ *
+ * @param net      network namespace (2.6.24 or above)
+ * @param sock     user socket structure
+ * @param protocol protocol to be used
+ * @param kern     called from kernel mode
+ *
+ * @return 0 on success, -errno on failure
+ */
+static int
+MksckCreate(struct net *net,
+            struct socket *sock,
+            int protocol,
+            int kern)
+{
+   struct sock *sk;
+   uid_t currentUid = current_euid();
+
+   if (!(currentUid == 0 ||
+         currentUid == Mvpkm_vmwareUid)) {
+      printk(KERN_WARNING
+             "MksckCreate: rejected from process %s tgid=%d, pid=%d euid:%d.\n",
+             current->comm,
+             task_tgid_vnr(current),
+             task_pid_vnr(current),
+             currentUid);
+      return -EPERM;
+   }
+
+   if (!sock) {
+      return -EINVAL;
+   }
+
+   if (protocol) {
+      return -EPROTONOSUPPORT;
+   }
+
+   switch (sock->type) {
+      case SOCK_DGRAM: {
+         sock->ops = &mksckDgramOps;
+         break;
+      }
+      default: {
+         return -ESOCKTNOSUPPORT;
+      }
+   }
+
+   sock->state = SS_UNCONNECTED;
+
+   /*
+    * Most recently (in 2.6.24), sk_alloc() was changed to expect the
+    * network namespace, and the option to zero the sock was dropped.
+    */
+   sk = sk_alloc(net, mksckFamilyOps.family, GFP_KERNEL, &mksckProto);
+
+   if (!sk) {
+      return -ENOMEM;
+   }
+
+   sock_init_data(sock, sk);
+
+   sk->sk_type        = SOCK_DGRAM;
+   sk->sk_destruct    = MksckSkDestruct;
+   sk->sk_backlog_rcv = MksckBacklogRcv;
+
+   /*
+    * On socket lock...
+    *
+    * A bound socket will have an associated private area, the Mksck
+    * structure part of MksckPage. That area is pointed to by
+    * sk->sk_protinfo. In addition, a connected socket will have the
+    * peer field in its associated area set to point to the associated
+    * private area of the peer socket. A mechanism is needed to ensure
+    * that these private areas area not freed while they are being
+    * accessed within the scope of a function. A simple lock would not
+    * suffice as the interface functions (like MksckDgramRecvMsg())
+    * may block. Hence a reference count mechanism is employed. When
+    * the mentioned references (sk->sk_protinfo and mksck->peer) to
+    * the respective private areas are set a refcount is incremented,
+    * and decremented when the references are deleted.
+    *
+    * The refcounts of areas pointed to by sk->sk_protinfo and
+    * mksck->peer will be decremented under the lock of the socket.
+    * Hence these private areas cannot disappear as long as the socket
+    * lock is held.
+    *
+    * The interface functions will have one of the following
+    * structures:
+    *
+    * simpleFn(sk)
+    * {
+    *    lock_sock(sk);
+    *    if ((mksck = sk->sk_protinfo)) {
+    *        <non-blocking use of mksck>
+    *    }
+    *    release_sock(sk);
+    * }
+    *
+    * complexFn(sk)
+    * {
+    *    lock_sock(sk);
+    *    if ((mksck = sk->sk_protinfo)) {
+    *       IncRefc(mksck);
+    *    }
+    *    release_sock(sk);
+    *
+    *    if (mksck) {
+    *       <use of mksck in a potentially blocking manner>
+    *       DecRefc(mksck);
+    *    }
+    * }
+    */
+   sk->sk_protinfo = NULL;
+   sock_reset_flag(sk, SOCK_DONE);
+
+   return 0;
+}
+
+
+/**
+ * @brief Delete a MKSCK socket
+ *
+ * @param sock user socket structure
+ *
+ * @return 0 on success, -errno on failure
+ */
+static int
+MksckRelease(struct socket *sock)
+{
+   struct sock *sk = sock->sk;
+
+   if (sk) {
+      lock_sock(sk);
+      sock_orphan(sk);
+      release_sock(sk);
+      sock_put(sk);
+   }
+
+   sock->sk = NULL;
+   sock->state = SS_FREE;
+
+   return 0;
+}
+
+
+static int
+MksckBacklogRcv(struct sock *sk, struct sk_buff *skb)
+{
+   /*
+    * We should never get these as we never queue an skb.
+    */
+   printk("MksckBacklogRcv: should never get here\n");
+   return -EIO;
+}
+
+
+/**
+ * @brief Callback at socket destruction
+ *
+ * @param sk pointer to kernel socket structure
+ */
+static void
+MksckSkDestruct(struct sock *sk)
+{
+   Mksck *mksck;
+
+   lock_sock(sk);
+   mksck = sk->sk_protinfo;
+
+   if (mksck != NULL) {
+      sk->sk_protinfo = NULL;
+      Mksck_CloseCommon(mksck);
+   }
+
+   if (sk->sk_user_data != NULL) {
+      sock_kfree_s(sk, sk->sk_user_data, sizeof(int));
+      sk->sk_user_data = NULL;
+   }
+
+   release_sock(sk);
+}
+
+
+/**
+ * @brief Set the local address of a MKSCK socket
+ *
+ * @param sk   kernel socket structure
+ * @param addr the new address of the socket
+ *
+ * @return 0 on success, -errno on failure
+ *
+ * If addr.port is undefined a new random port is assigned.
+ * If addr.vmId is undefined then the vmId computed from the tgid is used.
+ * Hence the vmId of a socket does not determine the host all the time.
+ *
+ * Assumed that the socket is locked.
+ * This function is called by explicit set (MksckBind) and implicit (Send).
+ */
+static int
+MksckBindGeneric(struct sock *sk,
+                 Mksck_Address addr)
+{
+   int err;
+   Mksck *mksck;
+   MksckPage *mksckPage;
+
+   if (sk->sk_protinfo != NULL) {
+      return -EISCONN;
+   }
+
+   /*
+    * Locate the page for the given host and increment its reference
+    * count so it can't get freed off while we are working on it.
+    */
+   if (addr.vmId == MKSCK_VMID_UNDEF) {
+      mksckPage = MksckPage_GetFromTgidIncRefc();
+   } else {
+      printk(KERN_WARNING "MksckBind: host bind called on vmid 0x%X\n", addr.vmId);
+      mksckPage = MksckPage_GetFromVmIdIncRefc(addr.vmId);
+   }
+
+   if (mksckPage == NULL) {
+      printk(KERN_INFO "MksckBind: no mksckPage for vm 0x%X\n", addr.vmId);
+      return -ENETUNREACH;
+   }
+   addr.vmId = mksckPage->vmId;
+
+   /*
+    * Before we can find an unused socket port on the page we have to
+    * lock the page for exclusive access so another thread can't
+    * allocate the same port.
+    */
+   err = Mutex_Lock(&mksckPage->mutex, MutexModeEX);
+   if (err < 0) {
+      goto outDec;
+   }
+
+   addr.port = MksckPage_GetFreePort(mksckPage, addr.port);
+   if (addr.port == MKSCK_PORT_UNDEF) {
+      err = -EINVAL;
+      goto outUnlockDec;
+   }
+
+   /*
+    * At this point we have the mksckPage locked for exclusive access
+    * and its reference count incremented.  Also, addr is completely
+    * filled in with vmId and port that we want to bind.
+    *
+    * Find an available mksck struct on the shared page and initialize
+    * it.
+    */
+   mksck = MksckPage_AllocSocket(mksckPage, addr);
+   if (mksck == NULL) {
+      err = -EMFILE;
+      goto outUnlockDec;
+   }
+
+   /*
+    * Stable, release mutex.  Leave mksckPage->refCount incremented so
+    * mksckPage can't be freed until socket is closed.
+    */
+   Mutex_Unlock(&mksckPage->mutex, MutexModeEX);
+
+   /*
+    * This is why we start mksck->refCount at 1.  When sk_protinfo gets
+    * cleared, we decrement mksck->refCount.
+    */
+   sk->sk_protinfo = mksck;
+
+   PRINTK(KERN_DEBUG "MksckBind: socket bound to %08X\n", mksck->addr.addr);
+
+   return 0;
+
+outUnlockDec:
+   Mutex_Unlock(&mksckPage->mutex, MutexModeEX);
+outDec:
+   MksckPage_DecRefc(mksckPage);
+   return err;
+}
+
+
+/**
+ * @brief Test if the socket is already bound to a local address and,
+ *        if not, bind it to an unused address.
+ *
+ * @param sk   kernel socket structure
+ * @return 0 on success, -errno on failure
+ *
+ * Assumed that the socket is locked.
+ */
+static inline int
+MksckTryBind(struct sock *sk)
+{
+   int err = 0;
+
+   if (!sk->sk_protinfo) {
+      static const Mksck_Address addr = { .addr = MKSCK_ADDR_UNDEF };
+      err = MksckBindGeneric(sk, addr);
+   }
+   return err;
+}
+
+
+
+/**
+ * @brief Set the address of a MKSCK socket (user call)
+ *
+ * @param sock    user socket structure
+ * @param addr    the new address of the socket
+ * @param addrLen length of the address
+ *
+ * @return 0 on success, -errno on failure
+ */
+static int
+MksckBind(struct socket *sock,
+          struct sockaddr *addr,
+          int addrLen)
+{
+   int err;
+   struct sock *sk            = sock->sk;
+   struct sockaddr_mk *addrMk = (struct sockaddr_mk *)addr;
+
+   if (addrLen != sizeof *addrMk) {
+      return -EINVAL;
+   }
+   if (addrMk->mk_family != AF_MKSCK) {
+      return -EAFNOSUPPORT;
+   }
+
+   /*
+    * Obtain the socket lock and call the generic Bind function.
+    */
+   lock_sock(sk);
+   err = MksckBindGeneric(sk, addrMk->mk_addr);
+   release_sock(sk);
+
+   return err;
+}
+
+/**
+ * @brief Lock the peer socket by locating it, incrementing its refc
+ * @param addr the address of the peer socket
+ * @param[out] peerMksckR set to the locked peer socket pointer
+ *                   upon successful lookup
+ * @return 0 on success, -errno on failure
+ */
+static int
+LockPeer(Mksck_Address addr, Mksck **peerMksckR)
+{
+   int        err = 0;
+   MksckPage *peerMksckPage = MksckPage_GetFromVmIdIncRefc(addr.vmId);
+   Mksck     *peerMksck;
+
+   /*
+    * Find corresponding destination shared page and increment its
+    * reference count so it can't be freed while we are sending to the
+    * socket. Make sure that the address is indeed an address of a
+    * monitor/guest socket.
+    */
+   if (peerMksckPage == NULL) {
+      printk(KERN_INFO "LockPeer: vmId %x is not in use!\n", addr.vmId);
+      return -ENETUNREACH;
+   }
+   if (!peerMksckPage->isGuest) {
+      MksckPage_DecRefc(peerMksckPage);
+      printk(KERN_INFO "LockPeer: vmId %x does not belong to a guest!\n",
+             addr.vmId);
+      return -ENETUNREACH;
+   }
+
+
+   err = Mutex_Lock(&peerMksckPage->mutex, MutexModeSH);
+   if (err < 0) {
+      MksckPage_DecRefc(peerMksckPage);
+      return err;
+   }
+
+   /*
+    * Find corresponding destination socket on that shared page and
+    * increment its reference count so it can't be freed while we are
+    * trying to send to it.
+    */
+   peerMksck = MksckPage_GetFromAddr(peerMksckPage, addr);
+
+   if (peerMksck) {
+      ATOMIC_ADDV(peerMksck->refCount, 1);
+      *peerMksckR = peerMksck;
+   } else {
+      printk(KERN_INFO "LockPeer: addr %x is not a defined socket!\n",
+             addr.addr);
+      err = -ENETUNREACH;
+   }
+
+   Mutex_Unlock(&peerMksckPage->mutex, MutexModeSH);
+   MksckPage_DecRefc(peerMksckPage);
+   return err;
+}
+
+/**
+ * @brief Set the peer address of a MKSCK socket
+ *
+ * @param sock    user socket structure
+ * @param addr    the new address of the socket
+ * @param addrLen length of the address
+ * @param flags flags
+ *
+ * @return 0 on success, -errno on failure
+ */
+static int
+MksckDgramConnect(struct socket *sock,
+                  struct sockaddr *addr,
+                  int addrLen,
+                  int flags)
+{
+   struct sock *sk = sock->sk;
+   Mksck *mksck;
+   struct sockaddr_mk *peerAddrMk = (struct sockaddr_mk *)addr;
+   int err = 0;
+
+   if (addrLen != sizeof *peerAddrMk) {
+      printk(KERN_INFO "MksckConnect: wrong address length!\n");
+      return -EINVAL;
+   }
+   if (peerAddrMk->mk_family != AF_MKSCK) {
+      printk(KERN_INFO "MksckConnect: wrong address family!\n");
+      return -EAFNOSUPPORT;
+   }
+
+   lock_sock(sk);
+
+   if ((err = MksckTryBind(sk))) {
+      goto releaseSock;
+   }
+   mksck = sk->sk_protinfo;
+
+   /*
+    * First severe any past peer connections
+    */
+   Mksck_DisconnectPeer(mksck);
+   sock->state = SS_UNCONNECTED;
+
+   /*
+    * Then build new connections ...
+    */
+   if (peerAddrMk->mk_addr.addr != MKSCK_ADDR_UNDEF) {
+      sock->state = SS_CONNECTED;
+      mksck->peerAddr = peerAddrMk->mk_addr;
+      err = LockPeer(mksck->peerAddr, &mksck->peer);
+      PRINTK(KERN_DEBUG "MksckConnect: socket %x is connected to %x!\n",
+             mksck->addr.addr,  mksck->peerAddr.addr);
+   }
+
+releaseSock:
+   release_sock(sk);
+
+   return err;
+}
+
+
+/**
+ * @brief returns the address of a MKSCK socket/peer address
+ *
+ * @param sock    user socket structure
+ * @param addr    the new address of the socket
+ * @param addrLen length of the address
+ * @param peer    1 if the peer address is sought
+ *
+ * @return 0 on success, -errno on failure
+ */
+static int
+MksckGetName(struct socket *sock,
+             struct sockaddr *addr,
+             int *addrLen,
+             int peer)
+{
+   int err;
+   Mksck *mksck;
+   struct sock *sk = sock->sk;
+
+   // MAX_SOCK_ADDR is size of *addr, Linux doesn't export it!
+   // ASSERT_ON_COMPILE(sizeof (struct sockaddr_mk) <= MAX_SOCK_ADDR);
+
+   lock_sock(sk);
+   mksck = sk->sk_protinfo;
+
+   if (mksck == NULL) {
+      if (peer) {
+          err = -ENOTCONN;
+      } else {
+         ((struct sockaddr_mk *)addr)->mk_family    = AF_MKSCK;
+         ((struct sockaddr_mk *)addr)->mk_addr.addr = MKSCK_ADDR_UNDEF;
+         *addrLen = sizeof (struct sockaddr_mk);
+         err = 0;
+      }
+   } else if (!peer) {
+      ((struct sockaddr_mk *)addr)->mk_family = AF_MKSCK;
+      ((struct sockaddr_mk *)addr)->mk_addr   = mksck->addr;
+      *addrLen = sizeof (struct sockaddr_mk);
+      err = 0;
+   } else if (mksck->peerAddr.addr == MKSCK_ADDR_UNDEF) {
+      err = -ENOTCONN;
+   } else {
+      ((struct sockaddr_mk *)addr)->mk_family = AF_MKSCK;
+      ((struct sockaddr_mk *)addr)->mk_addr   = mksck->peerAddr;
+      *addrLen = sizeof (struct sockaddr_mk);
+      err = 0;
+   }
+
+   release_sock(sk);
+
+   return err;
+}
+
+
+/**
+ * @brief VMX polling a receipted packet from VMM.
+ *
+ * @param filp  kernel file pointer to poll for
+ * @param sock  user socket structure
+ * @param wait  kernel polling table where to poll if not null
+ *
+ * @return poll mask state given from socket state.
+ */
+static unsigned int MksckPoll(struct file *filp,
+                              struct socket *sock,
+                              poll_table *wait)
+{
+   struct sock *sk = sock->sk;
+   unsigned int mask = 0;
+   Mksck *mksck = NULL;
+   uint32 read;
+   int err;
+
+   lock_sock(sk);
+   if ((err = MksckTryBind(sk))) {
+      release_sock(sk);
+      return err;
+   }
+   mksck = sk->sk_protinfo;
+
+   /*
+    * To avoid mksck disappearing right after the release_sock the
+    * refcount needs to be incremented. For more details read the
+    * block comment on locking in MksckCreate.
+    */
+   ATOMIC_ADDV(mksck->refCount, 1);
+   release_sock(sk);
+
+   /*
+    * Wait to make sure this is the only thread trying to access socket.
+    */
+   if ((err = Mutex_Lock(&mksck->mutex, MutexModeEX)) < 0) {
+      /* we might get in this situation if we are signaled
+         (select() may handle this, so leave) */
+      PRINTK(KERN_INFO "MksckPoll: try to abort\n");
+      return mask;
+   }
+
+   /*
+    * See if packet in ring.
+    */
+   read = mksck->read;
+   if (read != mksck->write) {
+      mask |= POLLIN | POLLRDNORM; /* readable, socket is unlocked */
+      /* Note that if we are implementing support for POLLOUT, we SHOULD
+         change this Mutex_Unlock by Mutex_UnlPoll, because there is no
+         obvious knowledge about the sleepy reason that is intended by user */
+      Mutex_Unlock(&mksck->mutex, MutexModeEX);
+   } else {
+      Mutex_UnlPoll(&mksck->mutex, MutexModeEX, MKSCK_CVAR_FILL, filp, wait);
+   }
+
+   /*
+    * Note that locking rules differ a little inside MksckPoll, since we are
+    * not only given a pointer to the struct socket but also a pointer to a
+    * struct file. This means that during the whole operation of this function
+    * and during any pending wait (registered with poll_wait()), the file itself
+    * is reference counted up, and we should rely on that 'upper' reference
+    * counting to prevent from tearing the Mksck down. That holds true since one
+    * never re-bind sockets !
+    */
+   Mksck_DecRefc(mksck);
+   return mask;
+}
+
+/**
+ * @brief Manage a set of Mksck_PageDesc from a message or a stored array.
+ *
+ * @param pd       set of Mksck_PageDesc
+ * @param pages    Mksck_PageDesc pages count for this management operation
+ * @param incr     ternary used to indicate if we want to reference (+1), or
+ *                 dereference (-1), or count (0) 4k pages
+ *
+ * @return length of bytes processed.
+ */
+static size_t
+MksckPageDescManage(Mksck_PageDesc *pd,
+                    uint32 pages,
+                    int incr)
+{
+   size_t payloadLen = 0;
+   uint32 i;
+
+   for (i = 0; i < pages && pd[i].mpn != INVALID_MPN; ++i) {
+      uint32 j;
+
+      for (j = 0; j < 1 << pd[i].order; ++j) {
+         struct page *page;
+         MPN currMPN = pd[i].mpn + j;
+
+         /*
+          * The monitor tried to send an invalid MPN, bad.
+          */
+         if (!pfn_valid(currMPN)) {
+            printk("MksckPageDescManage: Invalid MPN %x\n", currMPN);
+         } else {
+            page = pfn_to_page(currMPN);
+
+            if (incr == +1) {
+               get_page(page);
+            }
+            if (incr == -1) {
+               put_page(page);
+            }
+         }
+
+         payloadLen += PAGE_SIZE;
+      }
+   }
+
+   return payloadLen;
+}
+
+/**
+ * @brief Management values to be used as third parameter of MksckPageDescManage
+ */
+#define MANAGE_INCREMENT +1
+#define MANAGE_DECREMENT -1
+#define MANAGE_COUNT      0
+
+
+/**
+ * @brief Map a set of Mksck_PageDesc from a message or a stored array.
+ *
+ * @param pd       set of Mksck_PageDesc
+ * @param pages    pages count for this mapping
+ * @param iov      vectored user virtual addresses of the recv commands
+ * @param iovCount size for iov parameter
+ * @param vma      virtual memory area used for the mapping, note that
+ *                 this is mandatorily required MksckPageDescMap is used
+ *                 on an indirect PageDesc context (i.e whenever iov is
+ *                 not computed by the kernel but by ourselves).
+ *
+ * Since find_vma() and vm_insert_page() are used, this function must
+ * be called with current's mmap_sem locked, or inside an MMap operation.
+ *
+ * @return length of bytes mapped.
+ */
+static size_t
+MksckPageDescMap(Mksck_PageDesc *pd,
+                 uint32 pages,
+                 struct iovec *iov,
+                 int iovCount,
+                 struct vm_area_struct *vma)
+{
+   size_t payloadLen = 0;
+   uint32 i;
+
+   for (i = 0; i < pages && pd[i].mpn != INVALID_MPN; ++i) {
+      uint32 j;
+
+      for (j = 0; j < 1 << pd[i].order; ++j) {
+         HUVA huva = 0;
+         struct page *page;
+         MPN currMPN = pd[i].mpn + j;
+
+         while (iovCount > 0 && iov->iov_len == 0) {
+            iovCount--;
+            iov++;
+         }
+
+         if (iovCount == 0) {
+            printk("MksckPageDescMap: Invalid iov length\n");
+            goto map_done;
+         }
+
+         huva = (HUVA)iov->iov_base;
+
+         /*
+          * iovecs for receiving the typed component of the message should
+          * have page aligned base and size sufficient for page descriptor's
+          * mappings.
+          */
+         if (huva & (PAGE_SIZE - 1) || iov->iov_len < PAGE_SIZE) {
+            printk("MksckPageDescMap: Invalid huva %x or iov_len %d\n",
+                   huva,
+                   iov->iov_len);
+            goto map_done;
+         }
+
+         /*
+          * Might be in a new vma...
+          */
+         if (vma == NULL || huva < vma->vm_start || huva >= vma->vm_end) {
+            vma = find_vma(current->mm, huva);
+
+            /*
+             * Couldn't find a matching vma for huva.
+             */
+            if (vma == NULL ||
+                huva < vma->vm_start ||
+                vma->vm_ops != &mksckVMOps) {
+               printk("MksckPageDescMap: Invalid vma\n");
+               goto map_done;
+            }
+         }
+
+         /*
+          * The monitor tried to send an invalid MPN, bad.
+          */
+         if (!pfn_valid(currMPN)) {
+            printk("MksckPageDescMap: Invalid MPN %x\n", currMPN);
+         } else {
+            int rc;
+
+            page = pfn_to_page(currMPN);
+
+            /*
+             * Map into the receive window.
+             */
+            rc = vm_insert_page(vma, huva, page);
+            if (rc) {
+               printk("MksckPageDescMap: Failed to insert %x at %x, error %d\n",
+                      currMPN,
+                      huva,
+                      rc);
+               goto map_done;
+            }
+
+            ASSERT(iov->iov_len >= PAGE_SIZE);
+            iov->iov_base += PAGE_SIZE;
+            iov->iov_len -= PAGE_SIZE;
+         }
+
+         payloadLen += PAGE_SIZE;
+      }
+   }
+
+map_done:
+   return payloadLen;
+}
+
+
+/**
+ * @brief Check if the provided MsgHdr has still room for a receive operation.
+ *
+ * @param msg   user buffer
+ * @return 1 if MsgHdr has IO space room in order to receive a mapping, 0 otherwise.
+ */
+static int
+MsgHdrHasAvailableRoom(struct msghdr *msg)
+{
+   struct iovec *vec = msg->msg_iov;
+   uint32 count = msg->msg_iovlen;
+
+   while (count > 0 && vec->iov_len == 0) {
+      count--;
+      vec++;
+   }
+
+   return (count != 0);
+}
+
+
+/**
+ * Whenever a typed message is received from the monitor, we may choose to store
+ * all the page descriptor content in a linked state of descriptors, through the
+ * following information context
+ */
+typedef struct MksckPageDescInfo {
+   struct MksckPageDescInfo *next;
+   uint32 flags;
+   uint32 pages;
+   uint32 mapCounts;
+   Mksck_PageDesc descs[0];
+} MksckPageDescInfo;
+
+static void MksckPageDescSkDestruct(struct sock *sk);
+static int MksckPageDescMMap(struct file *file,
+                             struct socket *sock,
+                             struct vm_area_struct *vma);
+static int MksckPageDescIoctl(struct socket *sock,
+                              unsigned int cmd,
+                              unsigned long arg);
+
+/**
+ * @brief Delete a page descriptor container socket
+ *
+ * @param sock user socket structure
+ * @return 0 on success, -errno on failure
+ */
+static int
+MksckPageDescRelease(struct socket *sock)
+{
+   /* This is generic socket release */
+   struct sock *sk = sock->sk;
+
+   if (sk) {
+      lock_sock(sk);
+      sock_orphan(sk);
+      release_sock(sk);
+      sock_put(sk);
+   }
+
+   sock->sk = NULL;
+   sock->state = SS_FREE;
+
+   return 0;
+}
+
+
+/**
+ * Whenever a typed message is received from the monitor, we may choose to store
+ * all the page descriptor content for a future mapping. One shall put a context
+ * usable by host userland, that means trough a file descriptor, and as a secure
+ * implementation we choose to define a strict set of operations that are used
+ * only for that purpose. This set of operation is reduced to leaving the
+ * default "PageDesc(s) accumulating" mode (inside ioctl), mapping the context,
+ * and generic socket destruction.
+ */
+static struct proto_ops mksckPageDescOps = {
+   .family     = AF_MKSCK,
+   .owner      = THIS_MODULE,
+   .release    = MksckPageDescRelease,
+   .bind       = sock_no_bind,
+   .connect    = sock_no_connect,
+   .socketpair = sock_no_socketpair,
+   .accept     = sock_no_accept,
+   .getname    = sock_no_getname,
+   .poll       = sock_no_poll,
+   .ioctl      = MksckPageDescIoctl,
+   .listen     = sock_no_listen,
+   .shutdown   = sock_no_shutdown,
+   .setsockopt = sock_no_setsockopt,
+   .getsockopt = sock_no_getsockopt,
+   .sendmsg    = sock_no_sendmsg,
+   .recvmsg    = sock_no_recvmsg,
+   .mmap       = MksckPageDescMMap,
+   .sendpage   = sock_no_sendpage,
+};
+
+
+/**
+ * @brief Create or accumulate to a PageDesc context, backed as a descriptor.
+ *
+ * @param sock  user socket structure
+ * @param msg   user buffer to receive the file descriptor as ancillary data
+ * @param pd    source descriptor part of a message
+ * @param pages pages count for this mapping
+ *
+ * @return error if negative,  0 otherwise
+ *
+ */
+static int
+MksckPageDescToFd(struct socket *sock,
+                  struct msghdr *msg,
+                  Mksck_PageDesc *pd,
+                  uint32 pages)
+{
+   int retval;
+   int newfd;
+   struct socket *newsock;
+   struct sock *newsk;
+   struct sock *sk = sock->sk;
+   MksckPageDescInfo **pmpdi, *mpdi;
+   lock_sock(sk);
+
+   /*
+    * Relation between any mk socket and the PageDesc context is as follow:
+    *
+    * From the mk socket to the PageDesc context:
+    * - sk->sk_user_data is a WEAK LINK, containing only a file descriptor
+    *                    numerical value such that accumulating is keyed on it.
+    *
+    * From the PageDesc context to the mk socket:
+    * - sk->sk_protinfo contains a MksckPageDescInfo struct.
+    * - sk->sk_user_data is a pointer REF-COUNTED sock_hold() LINK, also it is
+    *                    rarely dereferenced but usually used to check that the
+    *                    right socket pair is used. Full dereferencing is used
+    *                    only to break the described links.
+    */
+   if (sk->sk_user_data) {
+      MksckPageDescInfo *mpdi2;
+
+      /* continue any previous on-going mapping, i.e accumulate */
+      newfd = *((int *)sk->sk_user_data);
+      newsock = sockfd_lookup(newfd, &retval); // promote the weak link
+      if (!newsock) {
+         retval = -EINVAL;
+         goto endProcessingReleaseSock;
+      }
+
+      newsk = newsock->sk;
+      lock_sock(newsk);
+      sockfd_put(newsock);
+
+      if (((struct sock *)newsk->sk_user_data) != sk) {
+         /* One way of going into this situation would be for userland to dup
+            the file descriptor just received, close the original number, and
+            open a new mk socket in the very same spot. The userland code have
+            a lot of way of interacting with the kernel without this driver
+            code to be notified. */
+         retval = -EINVAL;
+         release_sock(newsk);
+         goto endProcessingReleaseSock;
+      }
+
+      mpdi = sock_kmalloc(newsk, sizeof(MksckPageDescInfo) +
+                          pages*sizeof(Mksck_PageDesc), GFP_KERNEL);
+      if (IS_ERR(mpdi)) {
+         retval = PTR_ERR(mpdi);
+         release_sock(newsk);
+         goto endProcessingReleaseSock;
+      }
+
+      /* There is no mandatory needs for us to notify userland from
+         the progress in "appending" to the file descriptor, but it
+         would feel strange if the userland would have no mean to
+         tell if the received message was just not thrown away. So, in
+         order to be consistent one fill the ancillary message while
+         "creating" and "appending to" file descriptors. */
+      retval = put_cmsg(msg, SOL_DECNET, 0, sizeof(int), &newfd);
+      if (retval < 0) {
+         goto endProcessingKFreeReleaseSock;
+      }
+
+      release_sock(sk);
+
+      mpdi2 = (MksckPageDescInfo *)newsk->sk_protinfo;
+      while (mpdi2->next) {
+         mpdi2 = mpdi2->next;
+      }
+      pmpdi = &(mpdi2->next);
+
+   } else {
+      /* Create a new socket, new context and a new file descriptor. */
+      retval = sock_create(sk->sk_family, sock->type, 0, &newsock);
+      if (retval < 0) {
+         goto endProcessingReleaseSock;
+      }
+
+      newsk = newsock->sk;
+      lock_sock(newsk);
+      newsk->sk_destruct = &MksckPageDescSkDestruct;
+      newsk->sk_user_data = sk;
+      sock_hold(sk); // keeps a reference to parent mk socket
+      newsock->ops = &mksckPageDescOps;
+
+      mpdi = sock_kmalloc(newsk, sizeof(MksckPageDescInfo) +
+                          pages*sizeof(Mksck_PageDesc), GFP_KERNEL);
+      if (IS_ERR(mpdi)) {
+         retval = PTR_ERR(mpdi);
+         goto endProcessingFreeNewSock;
+      }
+
+      sk->sk_user_data = sock_kmalloc(sk, sizeof(int), GFP_KERNEL);
+      if (IS_ERR(sk->sk_user_data)) {
+         retval = PTR_ERR(sk->sk_user_data);
+         sk->sk_user_data = NULL;
+         goto endProcessingKFreeAndNewSock;
+      }
+
+      /* mapping to a file descriptor may fail if a thread is closing
+         in parallel of sock_map_fd/sock_alloc_fd, or kernel memory is full */
+      newfd = sock_map_fd(newsock, O_CLOEXEC);
+      if (newfd < 0) {
+         retval = newfd;
+         sock_kfree_s(sk, sk->sk_user_data, sizeof(int));
+         sk->sk_user_data = NULL;
+         goto endProcessingKFreeAndNewSock;
+      }
+
+      /* notify userland from a new file descriptor, alike AF_UNIX ancillary */
+      retval = put_cmsg(msg, SOL_DECNET, 0, sizeof(int), &newfd);
+      if (retval < 0) {
+         sock_kfree_s(sk, sk->sk_user_data, sizeof(int));
+         sk->sk_user_data = NULL;
+         sock_kfree_s(newsk, mpdi, sizeof(MksckPageDescInfo) +
+                      mpdi->pages*sizeof(Mksck_PageDesc));
+         release_sock(newsk);
+         sockfd_put(newsock);
+         sock_release(newsock);
+         put_unused_fd(newfd);
+         goto endProcessingReleaseSock;
+      }
+
+      *(int*)sk->sk_user_data = newfd;
+      release_sock(sk);
+      pmpdi = (MksckPageDescInfo **)(&(newsk->sk_protinfo));
+   }
+
+   mpdi->next  = NULL;
+   mpdi->flags = 0;
+   mpdi->mapCounts = 0;
+   mpdi->pages = pages;
+   memcpy(mpdi->descs, pd, pages*sizeof(Mksck_PageDesc));
+
+   *pmpdi = mpdi; // link
+   release_sock(newsk);
+
+   /* increment all reference counters for the pages */
+   MksckPageDescManage(pd, pages, MANAGE_INCREMENT);
+   return 0;
+
+endProcessingKFreeAndNewSock:
+   sock_kfree_s(newsk, mpdi, sizeof(MksckPageDescInfo) +
+                mpdi->pages*sizeof(Mksck_PageDesc));
+endProcessingFreeNewSock:
+   release_sock(newsk);
+   sock_release(newsock);
+   release_sock(sk);
+   return retval;
+
+endProcessingKFreeReleaseSock:
+   sock_kfree_s(newsk, mpdi, sizeof(MksckPageDescInfo) +
+                mpdi->pages*sizeof(Mksck_PageDesc));
+   release_sock(newsk);
+endProcessingReleaseSock:
+   release_sock(sk);
+   return retval;
+}
+
+/**
+ * @brief Callback at socket destruction
+ *
+ * @param sk pointer to kernel socket structure
+ */
+static void
+MksckPageDescSkDestruct(struct sock *sk)
+{
+   struct sock *mkSk = NULL;
+   MksckPageDescInfo *mpdi;
+   lock_sock(sk);
+   mpdi = sk->sk_protinfo;
+   while (mpdi) {
+      MksckPageDescInfo *next = mpdi->next;
+      MksckPageDescManage(mpdi->descs, mpdi->pages,
+                          MANAGE_DECREMENT);
+      sock_kfree_s(sk, mpdi, sizeof(MksckPageDescInfo) +
+                   mpdi->pages*sizeof(Mksck_PageDesc));
+      mpdi = next;
+   }
+   if (sk->sk_user_data) {
+      mkSk = (struct sock *)sk->sk_user_data;
+      sk->sk_user_data = NULL;
+   }
+   sk->sk_protinfo  = NULL;
+   release_sock(sk);
+   /* clean the monki socket that we are holding */
+   if (mkSk) {
+      lock_sock(mkSk);
+      sock_kfree_s(mkSk, mkSk->sk_user_data, sizeof(int));
+      mkSk->sk_user_data = NULL;
+      release_sock(mkSk);
+      sock_put(mkSk); // revert of sock_hold()
+   }
+}
+
+/**
+ * @brief The mmap operation of the PageDesc context file descriptor.
+ *
+ * The mmap command is used to mmap any detached (i.e. no more accumulating)
+ * PageDesc context, full of the content from its parent communication mk
+ * socket. Mapping may be done a specified number of times, so that the
+ * PageDesc context could become useless (as a security restriction).
+ *
+ * Also note that mapping from an offset different from zero is considered
+ * as a userland invalid operation.
+ *
+ * @param file  user file structure
+ * @param sock  user socket structure
+ * @param vma   virtual memory area structure
+ *
+ * @return error code, 0 on success
+ */
+static int
+MksckPageDescMMap(struct file *file,
+                  struct socket *sock,
+                  struct vm_area_struct *vma)
+{
+   struct sock *sk = sock->sk;
+   MksckPageDescInfo *mpdi;
+   struct iovec iov;
+   unsigned long vm_flags;
+   int freed = 0;
+
+   iov.iov_base = (void*)vma->vm_start;
+   iov.iov_len  = vma->vm_end - vma->vm_start;
+
+   lock_sock(sk);
+   mpdi = sk->sk_protinfo;
+
+   // vma->vm_pgoff is checked, since offsetting the map is not supported
+   if (!mpdi || sk->sk_user_data || vma->vm_pgoff) {
+      release_sock(sk);
+      printk(KERN_INFO "MMAP failed for virt %lx size %lx\n",
+             vma->vm_start, vma->vm_end - vma->vm_start);
+      return -EINVAL;
+   }
+
+   vm_flags = mpdi->flags;
+   if ((vma->vm_flags & ~vm_flags) & (VM_READ|VM_WRITE)) {
+      release_sock(sk);
+      return -EACCES;
+   }
+
+   while (mpdi) {
+      MksckPageDescInfo *next = mpdi->next;
+      MksckPageDescMap(mpdi->descs, mpdi->pages, &iov, 1, vma);
+      if (mpdi->mapCounts && !--mpdi->mapCounts) {
+         MksckPageDescManage(mpdi->descs, mpdi->pages,
+                             MANAGE_DECREMENT);
+         sock_kfree_s(sk, mpdi, sizeof(MksckPageDescInfo) +
+                      mpdi->pages*sizeof(Mksck_PageDesc));
+         freed = 1;
+      }
+      mpdi = next;
+   }
+
+   if (freed) {
+      sk->sk_protinfo  = NULL;
+   }
+   vma->vm_ops = &mksckVMOps;
+   release_sock(sk);
+   return 0;
+}
+
+/**
+ * @brief The ioctl operation of the PageDesc context file descriptor.
+ *
+ * The ioctl MKSCK_DETACH command is used to detach the PageDesc context
+ * from its parent communication mk socket. Once done, the context
+ * is able to remap the transferred PageDesc(s) of typed messages accumulated
+ * into the context.
+ *
+ * @param sock  user socket structure
+ * @param cmd   select which cmd function needs to be performed
+ * @param arg   argument for command
+ *
+ * @return error code, 0 on success
+ */
+static int
+MksckPageDescIoctl(struct socket *sock,
+                   unsigned int  cmd,
+                   unsigned long arg)
+{
+   struct sock *monkiSk = NULL;
+   struct sock *sk = sock->sk;
+   MksckPageDescInfo *mpdi;
+   int retval = 0;
+
+   switch (cmd) {
+      /**
+       * ioctl MKSCK_DETACH (in and out):
+       * Detach, compute size and define allowed protection access rights
+       *
+       * [in]:  unsigned long flags, similar to prot argument of mmap()
+       *        unsigned long number of available further mappings
+       *           with 0 meaning unlimited number of mappings
+       * [out]: unsigned long size of the available mappable area
+       */
+      case MKSCK_DETACH: {
+         unsigned long ul[2];
+         lock_sock(sk);
+         mpdi = sk->sk_protinfo;
+         // read unsigned long argument that contains the mmap alike flags
+         if (copy_from_user(ul, (void *)arg, sizeof ul)) {
+            retval = -EFAULT;
+         // check that the file descriptor has a parent and some context there
+         } else if (!mpdi || !sk->sk_user_data) {
+            retval = -EINVAL;
+         } else {
+            /* compute mapping protection bits from argument and size of the
+             * mapping, that is also given back to userland as unsigned long.
+             */
+            uint32 flags = calc_vm_prot_bits(ul[0]);
+            ul[0] = 0;
+            while (mpdi) {
+               MksckPageDescInfo *next = mpdi->next;
+               ul[0] += MksckPageDescManage(mpdi->descs, mpdi->pages,
+                                            MANAGE_COUNT);
+               mpdi->mapCounts = ul[1];
+               mpdi = next;
+            }
+            if (copy_to_user((void *)arg, ul, sizeof(ul[0]))) {
+               retval = -EFAULT;
+            } else {
+               mpdi = sk->sk_protinfo;
+               mpdi->flags = flags;
+               monkiSk = (struct sock *)sk->sk_user_data;
+               sk->sk_user_data = NULL;
+            }
+         }
+         release_sock(sk);
+         // clean the monki socket that we are holding
+         if ((sk = monkiSk)) {
+            lock_sock(sk);
+            sock_kfree_s(sk, sk->sk_user_data, sizeof(int));
+            sk->sk_user_data = NULL;
+            release_sock(sk);
+            sock_put(sk);
+         }
+         break;
+      }
+      default: {
+         retval = -EINVAL;
+         break;
+      }
+   }
+   return retval;
+}
+
+
+/**
+ * @brief VMX receiving a packet from VMM.
+ *
+ * @param kiocb kernel io control block (unused)
+ * @param sock  user socket structure
+ * @param msg   user buffer to receive the packet
+ * @param len   size of the user buffer
+ * @param flags flags
+ *
+ * @return -errno on failure, else length of untyped portion + total number
+ *           of bytes mapped for typed portion.
+ */
+static int
+MksckDgramRecvMsg(struct kiocb *kiocb,
+                  struct socket *sock,
+                  struct msghdr *msg,
+                  size_t len,
+                  int flags)
+{
+   int err = 0;
+   struct sock *sk = sock->sk;
+   Mksck *mksck;
+   Mksck_Datagram *dg;
+   struct sockaddr_mk *fromAddr;
+   uint32 read;
+   struct iovec *iov;
+   size_t payloadLen, untypedLen;
+   uint32 iovCount;
+
+   if (flags & MSG_OOB || flags & MSG_ERRQUEUE) {
+      return -EOPNOTSUPP;
+   }
+
+   if ((msg->msg_name != NULL) && (msg->msg_namelen < sizeof *fromAddr)) {
+      return -EINVAL;
+   }
+
+   lock_sock(sk);
+   if ((err = MksckTryBind(sk))) {
+      release_sock(sk);
+      return err;
+   }
+   mksck = sk->sk_protinfo;
+
+   /*
+    * To avoid mksck disappearing right after the release_sock the
+    * refcount needs to be incremented. For more details read the
+    * block comment on locking in MksckCreate.
+    */
+   ATOMIC_ADDV(mksck->refCount, 1);
+   release_sock(sk);
+
+   /*
+    * Get pointer to next packet in ring to be dequeued.
+    */
+   while (1) {
+
+      /*
+       * Wait to make sure this is the only thread trying to access socket.
+       */
+      if ((err = Mutex_Lock(&mksck->mutex, MutexModeEX)) < 0) {
+         goto decRefc;
+      }
+
+      /*
+       * See if packet in ring.
+       */
+      read = mksck->read;
+      if (read != mksck->write) {
+         break;
+      }
+
+      /*
+       * Nothing there, if user wants us not to block then just return EAGAIN.
+       */
+      if (flags & MSG_DONTWAIT) {
+         Mutex_Unlock(&mksck->mutex, MutexModeEX);
+         err = -EAGAIN;
+         goto decRefc;
+      }
+
+      /*
+       * Nothing there, unlock socket and wait for data.
+       */
+      mksck->foundEmpty ++;
+      err = Mutex_UnlSleep(&mksck->mutex, MutexModeEX, MKSCK_CVAR_FILL);
+      if (err < 0) {
+         PRINTK(KERN_INFO "MksckDgramRecvMsg: aborted\n");
+         goto decRefc;
+      }
+   }
+
+   /*
+    * Point to packet in ring.
+    */
+   dg = (void *)&mksck->buff[read];
+
+   /*
+    * Provide the address of the sender.
+    */
+   if (msg->msg_name != NULL) {
+      fromAddr            = (void *)msg->msg_name;
+      fromAddr->mk_addr   = dg->fromAddr;
+      fromAddr->mk_family = AF_MKSCK;
+      msg->msg_namelen    = sizeof *fromAddr;
+   } else {
+      msg->msg_namelen = 0;
+   }
+
+   /*
+    * Copy data from ring buffer to caller's buffer and remove packet from
+    * ring buffer.
+    */
+   iov = msg->msg_iov;
+   iovCount = msg->msg_iovlen;
+   payloadLen = untypedLen =
+      dg->len - dg->pages * sizeof(Mksck_PageDesc) - dg->pad;
+
+   /*
+    * Handle the untyped portion of the message.
+    */
+   if (untypedLen <= len) {
+      err = memcpy_toiovec(iov,
+                           dg->data,
+                           untypedLen);
+      if (err < 0) {
+         printk("MksckDgramRecvMsg: Failed to memcpy_to_iovec untyped message component "
+                "(buf len %d datagram len %d (untyped %d))\n",
+                len,
+                dg->len,
+                untypedLen);
+      }
+   } else {
+      err = -EINVAL;
+   }
+
+   /*
+    * Map in the typed descriptor.
+    */
+   if (err >= 0 && dg->pages > 0) {
+      Mksck_PageDesc *pd = (Mksck_PageDesc *)(dg->data + untypedLen + dg->pad);
+
+      /*
+       * There are 3 ways of receiving typed messages from the monitor.
+       * - The typed message is mapped directly into a VMA. To indicate this the
+       *   userland sets msg_controllen == 0.
+       * - The typed message is mapped directly into a VMA and a file descriptor
+       *   created for further mappings on the host (in same userland address
+       *   space or an alternate userland address space). In this case
+       *   msg_controllen should be set to sizeof(fd).
+       * - The typed message is not mapped directly into a VMA, but a file
+       *   descriptor is created for later mapping on the host. In this case
+       *   msg_controllen should be set to sizeof(fd) and the supplied iovec
+       *   shall not specify a receive window.
+       *
+       * The conjuncts below decide on which of these 3 cases we've encountered.
+       */
+
+      if ((msg->msg_controllen <= 0) ||
+          ((err = MksckPageDescToFd(sock, msg, pd, dg->pages)) != 0) ||
+          (MsgHdrHasAvailableRoom(msg) != 0)) {
+
+         down_write(&current->mm->mmap_sem); // lock for a change of mapping
+         payloadLen += MksckPageDescMap(pd, dg->pages, iov, iovCount, NULL);
+         up_write(&current->mm->mmap_sem);
+      }
+   }
+
+   /*
+    * Now that packet is removed, it is safe to unlock socket so another thread
+    * can do a recv().  We also want to wake someone waiting for room to insert
+    * a new packet.
+    */
+   if ((err >= 0) && Mksck_IncReadIndex(mksck, read, dg)) {
+      Mutex_UnlWake(&mksck->mutex, MutexModeEX, MKSCK_CVAR_ROOM, true);
+   } else {
+      Mutex_Unlock(&mksck->mutex, MutexModeEX);
+   }
+
+   /*
+    * If memcpy error, return error status.
+    * Otherwise, return number of bytes copied.
+    */
+   if (err >= 0) {
+      err = payloadLen;
+   }
+
+decRefc:
+   Mksck_DecRefc(mksck);
+   return err;
+}
+
+
+/**
+ * @brief VMX sending a packet to VMM.
+ *
+ * @param kiocb kernel io control block
+ * @param sock  user socket structure
+ * @param msg   packet to be transmitted
+ * @param len   length of the packet
+ *
+ * @return length of the sent msg on success, -errno on failure
+ */
+static int
+MksckDgramSendMsg(struct kiocb *kiocb,
+                  struct socket *sock,
+                  struct msghdr *msg,
+                  size_t len)
+{
+   int             err = 0;
+   struct sock    *sk = sock->sk;
+   Mksck          *peerMksck;
+   Mksck_Datagram *dg;
+   uint32          needed;
+   uint32          write;
+   Mksck_Address   fromAddr;
+
+   if (msg->msg_flags & MSG_OOB) {
+      return -EOPNOTSUPP;
+   }
+
+   if (len > MKSCK_XFER_MAX) {
+      return -EMSGSIZE;
+   }
+
+   /*
+    * In the next locked section peerMksck pointer needs to be set and
+    * its refcount needs to be incremented.
+    */
+   lock_sock(sk);
+   do {
+      Mksck *mksck;
+      Mksck_Address peerAddr =
+         { .addr = (msg->msg_name ?
+                    ((struct sockaddr_mk *)msg->msg_name)->mk_addr.addr :
+                    MKSCK_ADDR_UNDEF) };
+
+      if ((err = MksckTryBind(sk))) {
+         break;
+      }
+      mksck = sk->sk_protinfo;
+      fromAddr = mksck->addr;
+
+      /*
+       * If the socket is connected, use that address (no sendto for
+       * connected sockets). Otherwise, use the provided address if any.
+       */
+       if ((peerMksck = mksck->peer)) {
+         if (peerAddr.addr != MKSCK_ADDR_UNDEF &&
+             peerAddr.addr != mksck->peerAddr.addr) {
+            err = -EISCONN;
+            break;
+         }
+         /*
+          * To avoid mksckPeer disappearing right after the
+          * release_sock the refcount needs to be incremented. For
+          * more details read the block comment on locking in
+          * MksckCreate.
+          */
+         ATOMIC_ADDV(peerMksck->refCount, 1);
+       } else if (peerAddr.addr == MKSCK_ADDR_UNDEF) {
+          err = -ENOTCONN;
+       } else {
+          /*
+           * LockPeer also increments the refc on the peer.
+           */
+          err = LockPeer(peerAddr, &peerMksck);
+       }
+   } while(0);
+   release_sock(sk);
+
+   if (err) {
+      return err;
+   }
+
+   /*
+    * Get pointer to sufficient empty space in ring buffer.
+    */
+   needed = MKSCK_DGSIZE(len);
+   while (1) {
+      /*
+       * Wait to make sure this is the only thread trying to write to ring.
+       */
+      if ((err = Mutex_Lock(&peerMksck->mutex, MutexModeEX)) < 0) {
+         goto decRefc;
+      }
+
+      /*
+       * Check if socket can receive data.
+       */
+      if (peerMksck->shutDown & MKSCK_SHUT_RD) {
+         err = -ENOTCONN;
+         goto unlockDecRefc;
+      }
+
+      /*
+       * See if there is room for the packet.
+       */
+      write = Mksck_FindSendRoom(peerMksck, needed);
+      if (write != MKSCK_FINDSENDROOM_FULL) {
+         break;
+      }
+
+      /*
+       * No room, unlock socket and maybe wait for room.
+       */
+      if (msg->msg_flags & MSG_DONTWAIT) {
+         err = -EAGAIN;
+         goto unlockDecRefc;
+      }
+
+      peerMksck->foundFull ++;
+      err = Mutex_UnlSleep(&peerMksck->mutex,
+                           MutexModeEX,
+                           MKSCK_CVAR_ROOM);
+      if (err < 0) {
+         PRINTK(KERN_INFO "MksckDgramSendMsg: aborted\n");
+         goto decRefc;
+      }
+   }
+
+   /*
+    * Point to room in ring and fill in message.
+    */
+   dg = (void *)&peerMksck->buff[write];
+
+   dg->fromAddr = fromAddr;
+   dg->len      = len;
+
+   if ((err = memcpy_fromiovec(dg->data, msg->msg_iov, len)) != 0) {
+      goto unlockDecRefc;
+   }
+
+   /*
+    * Increment past message.
+    */
+   Mksck_IncWriteIndex(peerMksck, write, needed);
+
+   /*
+    * Unlock socket and wake someone trying to receive, ie, we filled
+    * in a message.
+    */
+   Mutex_UnlWake(&peerMksck->mutex, MutexModeEX, MKSCK_CVAR_FILL, false);
+
+   /*
+    * Maybe guest is in a general 'wait for interrupt' wait or
+    * grinding away executing guest instructions.
+    *
+    * If it has a receive callback armed for the socket and is
+    * waiting a message, just wake it up.  Else send an IPI to the CPU
+    * running the guest so it will interrupt whatever it is doing and
+    * read the message.
+    *
+    * Holding the mksckPage->mutex prevents mksckPage->vmHKVA from
+    * clearing on us.
+    */
+   if (peerMksck->rcvCBEntryMVA != 0) {
+      MksckPage *peerMksckPage = Mksck_ToSharedPage(peerMksck);
+
+      if ((err = Mutex_Lock(&peerMksckPage->mutex, MutexModeSH)) == 0) {
+         uint32 sockIdx = peerMksck->index;
+         MvpkmVM *vm = (MvpkmVM *) peerMksckPage->vmHKVA;
+
+         /*
+          * The destruction of vm and wsp is blocked by the
+          * mksckPage->mutex.
+          */
+         if (vm) {
+            WorldSwitchPage *wsp = vm->wsp;
+
+            ASSERT(sockIdx < 8 * sizeof peerMksckPage->wakeVMMRecv);
+            ATOMIC_ORV(peerMksckPage->wakeVMMRecv, 1U << sockIdx);
+
+            if (wsp) {
+               Mvpkm_WakeGuest(vm, ACTION_MKSCK);
+            }
+         }
+         Mutex_Unlock(&peerMksckPage->mutex, MutexModeSH);
+      }
+   }
+
+   /*
+    * If all are happy tell the caller the number of transferred bytes.
+    */
+   if (!err) {
+      err = len;
+   }
+
+   /*
+    * Now that we are done with target socket, allow it to be freed.
+    */
+decRefc:
+   Mksck_DecRefc(peerMksck);
+   return err;
+
+unlockDecRefc:
+   Mutex_Unlock(&peerMksck->mutex, MutexModeEX);
+   goto decRefc;
+}
+
+
+/**
+ * @brief Page fault handler for receive windows. Since the host process
+ *        should not be faulting in this region and only be accessing
+ *        memory that has been established via a typed message transfer,
+ *        we always signal the fault back to the process.
+ */
+static int
+MksckFault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+   return VM_FAULT_SIGBUS;
+}
+
+/**
+ * @brief Establish a region in the host process suitable for use as a
+ *        receive window.
+ *
+ * @param file file reference (ignored).
+ * @param sock user socket structure.
+ * @param vma Linux virtual memory area defining the region.
+ *
+ * @return 0 on success, otherwise error code.
+ */
+static int
+MksckMMap(struct file *file, struct socket *sock, struct vm_area_struct *vma)
+{
+   /*
+    * All the hard work is done in MksckDgramRecvMsg. Here we simply mark the
+    * vma as belonging to Mksck.
+    */
+   vma->vm_ops = &mksckVMOps;
+
+   return 0;
+}
+
+/**
+ * @brief This gets called after returning from the monitor.
+ *        Since the monitor doesn't directly wake VMX threads when it sends
+ *        something to VMX (for efficiency), this routine checks for the
+ *        omitted wakes and does them.
+ * @param mksckPage some shared page that the monitor writes packets to, ie
+ *                  an host shared page
+ */
+void
+Mksck_WakeBlockedSockets(MksckPage *mksckPage)
+{
+   Mksck *mksck;
+   uint32 i, wakeHostRecv;
+
+   wakeHostRecv = mksckPage->wakeHostRecv;
+   if (wakeHostRecv != 0) {
+      mksckPage->wakeHostRecv = 0;
+      for (i = 0; wakeHostRecv != 0; i ++) {
+         if (wakeHostRecv & 1) {
+             mksck = &mksckPage->sockets[i];
+             Mutex_CondSig(&mksck->mutex, MKSCK_CVAR_FILL, true);
+         }
+         wakeHostRecv >>= 1;
+      }
+   }
+}
+
+/**
+ * @brief allocate and initialize a shared page.
+ * @return pointer to shared page.<br>
+ *         NULL on error
+ */
+MksckPage *
+MksckPageAlloc(void)
+{
+   uint32 jj;
+   /*
+    * Ask for pages in the virtual kernel space. There is no
+    * requirement to be physically contiguous.
+    */
+   MksckPage *mksckPage = vmalloc(MKSCKPAGE_SIZE);
+
+   if (mksckPage) {
+
+      /*
+       * Initialize its contents.  Start refCount at 1 and decrement it
+       * when the worldswitch or VM page gets freed.
+       */
+      memset(mksckPage, 0, MKSCKPAGE_SIZE);
+      ATOMIC_SETV(mksckPage->refCount, 1);
+      mksckPage->portStore = MKSCK_PORT_HIGH;
+
+      Mutex_Init(&mksckPage->mutex);
+      for (jj = 0; jj<MKSCK_SOCKETS_PER_PAGE; jj++) {
+         Mutex_Init(&mksckPage->sockets[jj].mutex);
+      }
+   }
+
+   return mksckPage;
+}
+
+/**
+ * @brief Release the allocated pages.
+ * @param mksckPage the address of the mksckPage to be released
+ */
+static void
+MksckPageRelease(MksckPage *mksckPage)
+{
+   int ii;
+
+   for (ii = 0; ii<MKSCK_SOCKETS_PER_PAGE; ii++) {
+      Mutex_Destroy(&mksckPage->sockets[ii].mutex);
+   }
+   Mutex_Destroy(&mksckPage->mutex);
+
+   vfree(mksckPage);
+}
+
+/**
+ * @brief Using the tgid locate the vmid of this process.
+ *        Assumed that mksckPageListLock is held
+ * @return the vmId if page is already allocated,
+ *         the first vacant vmid if not yet allocated.<br>
+ *         MKSCK_PORT_UNDEF if no slot is vacant
+ */
+static inline Mksck_VmId
+GetHostVmId(void)
+{
+   uint32 jj;
+   Mksck_VmId vmId, vmIdFirstVacant = MKSCK_VMID_UNDEF;
+   MksckPage *mksckPage;
+   uint32 tgid = task_tgid_vnr(current);
+   /*
+    * Assign an unique vmId to the shared page. Start the search from
+    * the vmId that is the result of hashing tgid to 15 bits. As a
+    * used page with a given vmId can occupy only a given slot in the
+    * mksckPages array, it is enough to search through the
+    * MKSCK_MAX_SHARES slots for a vacancy.
+    */
+   for (jj = 0, vmId = MKSCK_TGID2VMID(tgid);
+        jj < MKSCK_MAX_SHARES;
+        jj++, vmId++) {
+      if (vmId > MKSCK_VMID_HIGH) {
+         vmId = 0;
+      }
+      mksckPage = mksckPages[MKSCK_VMID2IDX(vmId)];
+
+      if (mksckPage) {
+         if (mksckPage->tgid == tgid &&
+             !mksckPage->isGuest) {
+            return mksckPage->vmId;
+         }
+
+      } else if (vmIdFirstVacant == MKSCK_VMID_UNDEF) {
+         vmIdFirstVacant = vmId;
+      }
+   }
+   return vmIdFirstVacant;
+}
+
+
+/**
+ * @brief Locate the first empty slot
+ *        Assumed that mksckPageListLock is held
+ * @return the first vacant vmid.<br>
+ *         MKSCK_PORT_UNDEF if no slot is vacant
+ */
+static inline Mksck_VmId
+GetNewGuestVmId(void)
+{
+   Mksck_VmId vmId;
+
+   for (vmId = 0; vmId < MKSCK_MAX_SHARES; vmId++) {
+      if (!mksckPages[MKSCK_VMID2IDX(vmId)]) {
+         return vmId;
+      }
+   }
+   return MKSCK_VMID_UNDEF;
+}
+
+
+/**
+ * @brief Find shared page for a given idx. The page referred to be the
+ *        idx should exist and be locked by the caller.
+ * @param idx index of the page in the array
+ * @return pointer to shared page
+ */
+MksckPage *
+MksckPage_GetFromIdx(uint32 idx)
+{
+   MksckPage *mksckPage = mksckPages[idx];
+   ASSERT(mksckPage);
+   ASSERT(idx<MKSCK_MAX_SHARES);
+   ASSERT(ATOMIC_GETO(mksckPage->refCount));
+   return mksckPage;
+}
+
+/**
+ * @brief find shared page for a given vmId
+ *        The vmid should exist and be locked by the caller.
+ * @param vmId vmId to look for, either an host vmId or a guest vmId
+ * @return pointer to shared page
+ */
+MksckPage *
+MksckPage_GetFromVmId(Mksck_VmId vmId)
+{
+   MksckPage *mksckPage = mksckPages[MKSCK_VMID2IDX(vmId)];
+   ASSERT(mksckPage);
+   ASSERT(mksckPage->vmId == vmId);
+   ASSERT(ATOMIC_GETO(mksckPage->refCount));
+   return mksckPage;
+}
+
+
+/**
+ * @brief find shared page for a given vmId
+ * @param vmId vmId to look for, either an host vmId or a guest vmId
+ * @return NULL: no such shared page exists<br>
+ *         else: pointer to shared page.
+ *               Call Mksck_DecRefc() when done with pointer
+ */
+MksckPage *
+MksckPage_GetFromVmIdIncRefc(Mksck_VmId vmId)
+{
+   MksckPage *mksckPage;
+
+   spin_lock(&mksckPageListLock);
+   mksckPage = mksckPages[MKSCK_VMID2IDX(vmId)];
+
+   if (!mksckPage || (mksckPage->vmId != vmId)) {
+      printk(KERN_INFO "MksckPage_GetFromVmIdIncRefc: vmId %04X not found\n",
+             vmId);
+      mksckPage = NULL;
+   } else {
+      ATOMIC_ADDV(mksckPage->refCount, 1);
+   }
+   spin_unlock(&mksckPageListLock);
+   return mksckPage;
+}
+
+
+/**
+ * @brief find or allocate shared page using tgid
+ * @return NULL: no such shared page exists<br>
+ *         else: pointer to shared page.
+ *               Call Mksck_DecRefc() when done with pointer
+ */
+MksckPage *
+MksckPage_GetFromTgidIncRefc(void)
+{
+   MksckPage *mksckPage;
+   Mksck_VmId vmId;
+
+   while (1) {
+      spin_lock(&mksckPageListLock);
+      vmId = GetHostVmId();
+
+      if (vmId == MKSCK_VMID_UNDEF) {
+         /*
+          * No vmId has been allocated yet and there is no free slot.
+          */
+         spin_unlock(&mksckPageListLock);
+         return NULL;
+      }
+
+      mksckPage = mksckPages[MKSCK_VMID2IDX(vmId)];
+      if (mksckPage != NULL) {
+         /*
+          * There is a vmid already allocated, increment the refc on it.
+          */
+         ATOMIC_ADDV(mksckPage->refCount, 1);
+         spin_unlock(&mksckPageListLock);
+         return mksckPage;
+      }
+
+      /*
+       * Have to release spinlock to allocate a new page.
+       */
+      spin_unlock(&mksckPageListLock);
+      mksckPage = MksckPageAlloc();
+      if (mksckPage == NULL) {
+         return NULL;
+      }
+
+      /*
+       * Re-lock and make sure no one else allocated while unlocked.
+       * If someone else did allocate, free ours off and use theirs.
+       */
+      spin_lock(&mksckPageListLock);
+      vmId = GetHostVmId();
+      if ((vmId != MKSCK_VMID_UNDEF) &&
+          (mksckPages[MKSCK_VMID2IDX(vmId)] == NULL)) {
+         break;
+      }
+      spin_unlock(&mksckPageListLock);
+      MksckPageRelease(mksckPage);
+   }
+
+   /*
+    * This is a successful new allocation. insert it into the table
+    * and initialize the fields.
+    */
+   mksckPages[MKSCK_VMID2IDX(vmId)] = mksckPage;
+   mksckPage->vmId    = vmId;
+   mksckPage->isGuest = false;
+   mksckPage->vmHKVA  = 0;
+   mksckPage->tgid    = task_tgid_vnr(current);
+   printk(KERN_DEBUG "New host mksck page is allocated: idx %x, vmId %x, tgid %d\n",
+          MKSCK_VMID2IDX(vmId), vmId, mksckPage->tgid);
+
+   spin_unlock(&mksckPageListLock);
+   return mksckPage;
+}
+
+/**
+ * @brief Initialize the VMX provided wsp. Allocate communication page.
+ * @param vm  which virtual machine we're running
+ * @return 0 if all OK, error value otherwise
+ */
+int
+Mksck_WspInitialize(MvpkmVM *vm)
+{
+   WorldSwitchPage *wsp = vm->wsp;
+   int err;
+   Mksck_VmId vmId;
+   MksckPage *mksckPage;
+
+   if (wsp->guestId) {
+      err = -EBUSY;
+   } else if (!(mksckPage = MksckPageAlloc())) {
+      err = -ENOMEM;
+   } else {
+      spin_lock(&mksckPageListLock);
+
+      if ((vmId = GetNewGuestVmId()) == MKSCK_VMID_UNDEF) {
+
+         err = -EMFILE;
+         MksckPageRelease(mksckPage);
+
+         printk(KERN_INFO "Mksck_WspInitialize: Cannot allocate vmId\n");
+
+      } else {
+         /*
+          * Now that the mksckPage is all initialized, let others see it.
+          */
+         mksckPages[MKSCK_VMID2IDX(vmId)] = mksckPage;
+         mksckPage->vmId    = vmId;
+         mksckPage->isGuest = true;
+         mksckPage->vmHKVA  = (HKVA)vm;
+         /* mksckPage->tgid is undefined when isGuest is true */
+
+         wsp->guestId = vmId;
+
+         printk(KERN_DEBUG "New guest mksck page is allocated: idx %x, vmId %x\n",
+                MKSCK_VMID2IDX(vmId), vmId);
+
+         err = 0;
+      }
+
+      /*
+       * All stable, ie, mksckPages[] written, ok to unlock now.
+       */
+      spin_unlock(&mksckPageListLock);
+   }
+
+   return err;
+}
+
+/**
+ * @brief Release the wsp. Clean up after the monitor. Free the
+ *        associated communication page.
+ * @param wsp which worldswitch page (VCPU)
+ */
+void
+Mksck_WspRelease(WorldSwitchPage *wsp)
+{
+   int ii;
+   int err;
+   MksckPage *mksckPage = MksckPage_GetFromVmId(wsp->guestId);
+
+   /*
+    * The worldswitch page for a particular VCPU is about to be freed
+    * off, so we know the monitor will never execute again.  But the
+    * monitor most likely left some sockets open. Those may have
+    * outbound connections to host sockets that we must close.
+    *
+    * Loop through all possibly open sockets.
+    */
+   uint32 isOpened = wsp->isOpened;
+   Mksck *mksck = mksckPage->sockets;
+   while (isOpened) {
+      if (isOpened & 1) {
+         ASSERT(ATOMIC_GETO(mksck->refCount) != 0);
+         /*
+          * The socket may be connected to a peer (host) socket, so we
+          * have to decrement that target socket's reference
+          * count. Unfortunately, Mksck_DisconnectPeer(mksck) cannot
+          * be called as mksck->peer is an mva not an hkva. Translate
+          * the address first.
+          */
+         if (mksck->peer) {
+            MksckPage *mksckPagePeer = MksckPage_GetFromVmId(mksck->peerAddr.vmId);
+            ASSERT(mksckPagePeer);
+            mksck->peer = MksckPage_GetFromAddr(mksckPagePeer, mksck->peerAddr);
+            ASSERT(mksck->peer);
+            /* mksck->peer is now a hkva */
+         }
+
+         Mksck_CloseCommon(mksck);
+      }
+      isOpened >>= 1;
+      mksck++;
+   }
+
+   /*
+    * A host socket may be in the process of sending to the guest. It
+    * will attempt to wake up the guest using mksckPage->vmHKVA and
+    * mksckPage->vmHKVA->wsp. To assure that the vm and wsp structures
+    * are not disappearing from under the sending thread we lock the
+    * page here.
+    */
+   err = Mutex_Lock(&mksckPage->mutex, MutexModeEX);
+   ASSERT(!err);
+   mksckPage->vmHKVA = 0;
+   Mutex_Unlock(&mksckPage->mutex, MutexModeEX);
+   /*
+    * Decrement refcount set by MksckPageAlloc() call in
+    * Mksck_WspInitialize().
+    */
+   MksckPage_DecRefc(mksckPage);
+
+   /*
+    * Decrement refcount set by VMM:Mksck_Init() referring to the local
+    * variable guestMksckPage.
+    */
+   if (wsp->guestPageMapped) {
+      wsp->guestPageMapped = false;
+      MksckPage_DecRefc(mksckPage);
+   }
+
+   /*
+    * Another task is to decrement the reference count on the mksck
+    * pages the monitor accessed. Those pages are listed in the
+    * wsp->isPageMapped list. They were locked by the monitor
+    * calling WSCALL_GET_PAGE_FROM_VMID
+    */
+   for (ii = 0; ii < MKSCK_MAX_SHARES; ii++) {
+      if (wsp->isPageMapped[ii]) {
+         MksckPage *mksckPageOther = MksckPage_GetFromIdx(ii);
+
+         wsp->isPageMapped[ii] = false;
+         MksckPage_DecRefc(mksckPageOther);
+      }
+   }
+}
+
+/**
+ * @brief disconnect from peer by decrementing
+ *        peer socket's reference count and clearing the pointer.
+ * @param mksck local socket to check for connection
+ */
+void
+Mksck_DisconnectPeer(Mksck *mksck)
+{
+   Mksck *peerMksck = mksck->peer;
+   if (peerMksck != NULL) {
+      mksck->peer = NULL;
+      mksck->peerAddr.addr = MKSCK_ADDR_UNDEF;
+      Mksck_DecRefc(peerMksck);
+   }
+}
+
+
+/**
+ * @brief decrement shared page reference count, free page if it goes zero.
+ *        also do a dmb first to make sure all activity on the struct is
+ *        finished before decrementing the ref count.
+ * @param mksckPage shared page
+ */
+void
+MksckPage_DecRefc(MksckPage *mksckPage)
+{
+   uint32 oldRefc;
+
+   DMB();
+   do {
+      while ((oldRefc = ATOMIC_GETO(mksckPage->refCount)) == 1) {
+
+         /*
+          * Find corresponding entry in list of known shared pages and
+          * clear it so we can't open any new sockets on this shared
+          * page, thus preventing its refCount from being incremented.
+          */
+         spin_lock(&mksckPageListLock);
+         if (ATOMIC_SETIF(mksckPage->refCount, 0, 1)) {
+            uint32 ii = MKSCK_VMID2IDX(mksckPage->vmId);
+            ASSERT(ii < MKSCK_MAX_SHARES);
+            ASSERT(mksckPages[ii] == mksckPage);
+            mksckPages[ii] = NULL;
+            spin_unlock(&mksckPageListLock);
+            printk(KERN_DEBUG "%s mksck page is released: idx %x, vmId %x, tgid %d\n",
+                   mksckPage->isGuest?"Guest":"Host",
+                   ii, mksckPage->vmId, mksckPage->tgid);
+            MksckPageRelease(mksckPage);
+            return;
+         }
+         spin_unlock(&mksckPageListLock);
+      }
+      ASSERT(oldRefc != 0);
+   } while (!ATOMIC_SETIF(mksckPage->refCount, oldRefc - 1, oldRefc));
+}
+
+/**
+ * @brief Lookup if the provided mpn belongs to one of the Mksck pages. Map if found.
+ * @return 0 if all OK, error value otherwise
+ */
+int
+MksckPage_LookupAndInsertPage(struct vm_area_struct *vma,
+                              unsigned long address,
+                              MPN mpn)
+{
+   int ii, jj;
+   MksckPage **mksckPagePtr = mksckPages;
+
+   spin_lock(&mksckPageListLock);
+   for (jj = MKSCK_MAX_SHARES; jj--; mksckPagePtr++) {
+      if (*mksckPagePtr) {
+         for (ii = 0; ii < MKSCKPAGE_TOTAL; ii++) {
+            if (vmalloc_to_pfn((void*)(((HKVA)*mksckPagePtr) + ii*PAGE_SIZE)) == mpn &&
+                vm_insert_page(vma, address, pfn_to_page(mpn)) == 0) {
+               spin_unlock(&mksckPageListLock);
+               return 0;
+            }
+         }
+      }
+   }
+   spin_unlock(&mksckPageListLock);
+   return -1;
+}
+
+
+/**
+ * @brief Print information on the allocated shared pages
+ *
+ * This function reports (among many other things) on the use of locks
+ * on the mksck page (page lock and individual socket locks). To avoid
+ * the Hiesenberg effect it avoids using locks unless there is a
+ * danger of dereferencing freed memory. In particular, holding
+ * mksckPageListLock ensures that the mksck page is not freed while it
+ * is read. But under very rare conditions this function may report
+ * inconsistent or garbage data.
+ */
+static int
+MksckPageInfoShow(struct seq_file *m, void *private)
+{
+   int ii, jj;
+   uint32 isPageMapped = 0;
+   int err;
+   MvpkmVM *vm;
+
+   /*
+    * Lock is needed to atomize the test and dereference of
+    * mksckPages[ii]
+    */
+   spin_lock(&mksckPageListLock);
+   for (ii = 0; ii < MKSCK_MAX_SHARES; ii++) {
+      MksckPage *mksckPage  = mksckPages[ii];
+      if (mksckPage != NULL && mksckPage->isGuest) {
+         /*
+          * After the refcount is incremented mksckPage will not be
+          * freed and it can continued to be dereferenced after the
+          * unlock of mksckPageListLock.
+          */
+         ATOMIC_ADDV(mksckPage->refCount, 1);
+         spin_unlock(&mksckPageListLock);
+
+         /*
+          * To dereference mksckPage->vmHKVA, we need to have the page
+          * lock.
+          */
+         err = Mutex_Lock(&mksckPage->mutex, MutexModeEX);
+         vm = (MvpkmVM *) mksckPage->vmHKVA;
+
+         if (err == 0 && vm && vm->wsp) {
+            for (jj = 0; jj < MKSCK_MAX_SHARES; jj++) {
+               if (vm->wsp->isPageMapped[jj]) isPageMapped |= 1<<jj;
+            }
+         }
+         Mutex_Unlock(&mksckPage->mutex, MutexModeEX);
+         /*
+          * Decrement the page refcount and relock the
+          * mksckPageListLock for the next for loop.
+          */
+         MksckPage_DecRefc(mksckPage);
+         spin_lock(&mksckPageListLock);
+         break;
+      }
+   }
+
+   /* mksckPageListLock is still locked,  mksckPages[ii] can be dereferenced */
+   for (ii = 0; ii < MKSCK_MAX_SHARES; ii++) {
+      MksckPage *mksckPage  = mksckPages[ii];
+      if (mksckPage != NULL) {
+         uint32 lState = ATOMIC_GETO(mksckPage->mutex.state);
+         uint32 isOpened = 0; /* guest has an implicit ref */
+
+         seq_printf(m, "MksckPage[%02d]: { vmId = %4x(%c), refC = %2d%s",
+                    ii, mksckPage->vmId,
+                    mksckPage->isGuest?'G':'H',
+                    ATOMIC_GETO(mksckPage->refCount),
+                    (isPageMapped&(1<<ii) ? "*" : ""));
+
+         if (lState) {
+            seq_printf(m, ", lock=%x locked by line %d, unlocked by %d",
+                       lState, mksckPage->mutex.line, mksckPage->mutex.lineUnl);
+         }
+
+
+         if (!mksckPage->isGuest) {
+            struct task_struct *target;
+            seq_printf(m, ", tgid = %d", mksckPage->tgid);
+
+            rcu_read_lock();
+
+            target = pid_task(find_vpid(mksckPage->tgid), PIDTYPE_PID);
+            seq_printf(m, "(%s)", target ? target->comm : "no such process");
+
+            rcu_read_unlock();
+         } else {
+            ATOMIC_ADDV(mksckPage->refCount, 1);
+            spin_unlock(&mksckPageListLock);
+
+            err = Mutex_Lock(&mksckPage->mutex, MutexModeEX);
+            vm = (MvpkmVM *) mksckPage->vmHKVA;
+
+            if (err == 0 && vm && vm->wsp) {
+               isOpened = vm->wsp->isOpened;
+            }
+            Mutex_Unlock(&mksckPage->mutex, MutexModeEX);
+            MksckPage_DecRefc(mksckPage);
+            spin_lock(&mksckPageListLock);
+            /*
+             * As the mksckPageListLock was unlocked, nothing
+             * prevented the MksckPage_DecRefc from actually freeing
+             * the page. Lets verify that the page is still there.
+             */
+            if (mksckPage != mksckPages[ii]) {
+               seq_printf(m, " released }\n");
+               continue;
+            }
+         }
+         seq_printf(m, ", sockets[] = {");
+
+         for (jj = 0; jj < mksckPage->numAllocSocks; jj++, isOpened >>= 1) {
+            Mksck *mksck = mksckPage->sockets + jj;
+
+            if (ATOMIC_GETO(mksck->refCount)) {
+               uint32 blocked;
+               lState = ATOMIC_GETO(mksck->mutex.state);
+               seq_printf(m, "\n             { addr = %8x, refC = %2d%s%s%s",
+                          mksck->addr.addr,
+                          ATOMIC_GETO(mksck->refCount),
+                          (isOpened & 1 ? "*" : ""),
+                          (mksck->shutDown & MKSCK_SHUT_RD ? " SHUTD_RD":""),
+                          (mksck->shutDown & MKSCK_SHUT_WR ? " SHUTD_WR":""));
+
+               if (mksck->peer) {
+                  seq_printf(m, ", peerAddr = %8x",
+                             mksck->peerAddr.addr);
+               }
+
+               if (lState) {
+                  seq_printf(m, ", lock=%x locked by line %d, unlocked by %d",
+                             lState, mksck->mutex.line, mksck->mutex.lineUnl);
+               }
+
+               if ((blocked = ATOMIC_GETO(mksck->mutex.blocked))) {
+                  seq_printf(m, ", blocked=%d", blocked);
+               }
+
+               seq_printf(m, " }");
+            }
+         }
+         seq_printf(m, " } }\n");
+      }
+   }
+   spin_unlock(&mksckPageListLock);
+
+   return 0;
+}
+
+
+static int
+MksckPageInfoOpen(struct inode *inode, struct file *file)
+{
+   return single_open(file, MksckPageInfoShow, inode->i_private);
+}
+
+static const struct file_operations mksckPageInfoFops = {
+   .open = MksckPageInfoOpen,
+   .read = seq_read,
+   .llseek = seq_lseek,
+   .release = single_release,
+};
+
+static struct dentry *mksckPageDentry = NULL;
+
+void
+MksckPageInfo_Init(void)
+{
+   mksckPageDentry = debugfs_create_file("mksckPage",
+                                         S_IROTH,
+                                         NULL,
+                                         NULL,
+                                         &mksckPageInfoFops);
+}
+
+void
+MksckPageInfo_Exit(void)
+{
+   if (mksckPageDentry) {
+      debugfs_remove(mksckPageDentry);
+   }
+}