Revert 57921 - Pull seccomp-sandbox in via DEPS rather than using an in-tree copy

This means changes to the sandbox won't have to be committed twice, to both trees. BUG=none TEST=smoke test of running chromium with --enable-seccomp-sandbox Review URL: http://codereview.chromium.org/3249003 TBR=mseaborn@chromium.org Review URL: http://codereview.chromium.org/3245011 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@57933 0039d316-1c4b-4281-b951-d872f2087c98
author: nsylvain@chromium.org <nsylvain@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> 2010-08-31 01:16:35 +0000
committer: nsylvain@chromium.org <nsylvain@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> 2010-08-31 01:16:35 +0000
commit: fb7b5328a5fd3aecfc27f765dea94b961c657597 (patch)
tree: 84adc617db0031a881265e95f9c569de66fa733d
parent: 7302ea910ce937d482780649d6a84bbfff4ac521 (diff)
download: chromium_src-fb7b5328a5fd3aecfc27f765dea94b961c657597.zip
chromium_src-fb7b5328a5fd3aecfc27f765dea94b961c657597.tar.gz
chromium_src-fb7b5328a5fd3aecfc27f765dea94b961c657597.tar.bz2
46 files changed, 14319 insertions, 3 deletions
diff --git a/DEPS b/DEPS
index c356536..004c6f4 100644
--- a/DEPS
+++ b/DEPS
@@ -21,9 +21,6 @@ deps = {
   "src/googleurl":
     "http://google-url.googlecode.com/svn/trunk@145",
 
-  "src/sandbox/linux/seccomp":
-    "http://seccompsandbox.googlecode.com/svn/trunk@91",
-
   "src/sdch/open-vcdiff":
     "http://open-vcdiff.googlecode.com/svn/trunk@28",
 
diff --git a/sandbox/linux/seccomp/Makefile b/sandbox/linux/seccomp/Makefile
new file mode 100644
index 0000000..141d8c3
--- /dev/null
+++ b/sandbox/linux/seccomp/Makefile
@@ -0,0 +1,59 @@
+# Copyright (c) 2010 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+# This Makefile temporarily has been checked into the source tree so that
+# we can run the tests. It will be replaced with a proper gyp file.
+
+CFLAGS = -g -O0 -Wall -Werror -Wextra -Wno-missing-field-initializers         \
+         -Wno-unused-parameter -I.
+LDFLAGS = -g
+CPPFLAGS =
+MODS := allocator library debug maps x86_decode securemem sandbox     \
+        syscall syscall_table trusted_thread trusted_process                  \
+        access exit clone getpid gettid ioctl ipc madvise mmap mprotect       \
+        munmap open sigaction sigprocmask socketcall stat
+OBJS64 := $(shell echo ${MODS} | xargs -n 1 | sed -e 's/$$/.o64/')
+OBJS32 := $(shell echo ${MODS} | xargs -n 1 | sed -e 's/$$/.o32/')
+HEADERS:= $(shell for i in ${MODS}; do [ -r "$$i" ] && echo "$$i"; done)
+
+.SUFFIXES: .o64 .o32
+
+all: test
+
+clean:
+	-rm -f *.o *.o32 *.o64 tests/*.o32 tests/*.o.64
+	-rm -f core core.* vgcore vgcore.* strace.log*
+	-rm -f run_tests_32 run_tests_64
+	-rm -f tests/test_syscalls.o64 tests/test_syscalls.o32
+	-rm -f tests/test-list.h
+
+test: run_tests_64 run_tests_32
+	./run_tests_64
+	./run_tests_32
+
+# TODO: Track header file dependencies properly
+tests/test_syscalls.o64 tests/test_syscalls.o32: tests/test-list.h
+
+tests/test-list.h: tests/list_tests.py tests/test_syscalls.cc
+	python tests/list_tests.py tests/test_syscalls.cc > $@
+
+run_tests_64: $(OBJS64) tests/test_syscalls.o64 tests/test-list.h
+	g++ -m64 tests/test_syscalls.o64 $(OBJS64) -lpthread -lutil -o $@
+run_tests_32: $(OBJS32) tests/test_syscalls.o32 tests/test-list.h
+	g++ -m32 tests/test_syscalls.o32 $(OBJS32) -lpthread -lutil -o $@
+
+.cc.o: ${HEADERS}
+	${CXX} ${CFLAGS} ${CPPFLAGS} -c -o $@ $<
+
+.cc.o64: ${HEADERS}
+	${CXX} ${CFLAGS} ${CPPFLAGS} -fPIC -c -o $@ $<
+
+.c.o64: ${HEADERS}
+	${CC} ${CFLAGS} ${CPPFLAGS} --std=gnu99 -fPIC -c -o $@ $<
+
+.cc.o32: ${HEADERS}
+	${CXX} ${CFLAGS} ${CPPFLAGS} -m32 -fPIC -c -o $@ $<
+
+.c.o32: ${HEADERS}
+	${CC} ${CFLAGS} ${CPPFLAGS} -m32 --std=gnu99 -fPIC -c -o $@ $<
diff --git a/sandbox/linux/seccomp/access.cc b/sandbox/linux/seccomp/access.cc
new file mode 100644
index 0000000..fbe7e53
--- /dev/null
+++ b/sandbox/linux/seccomp/access.cc
@@ -0,0 +1,97 @@
+// Copyright (c) 2010 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "debug.h"
+#include "sandbox_impl.h"
+
+namespace playground {
+
+long Sandbox::sandbox_access(const char *pathname, int mode) {
+  long long tm;
+  Debug::syscall(&tm, __NR_access, "Executing handler");
+  size_t len                      = strlen(pathname);
+  struct Request {
+    int       sysnum;
+    long long cookie;
+    Access    access_req;
+    char      pathname[0];
+  } __attribute__((packed)) *request;
+  char data[sizeof(struct Request) + len];
+  request                         = reinterpret_cast<struct Request*>(data);
+  request->sysnum                 = __NR_access;
+  request->cookie                 = cookie();
+  request->access_req.path_length = len;
+  request->access_req.mode        = mode;
+  memcpy(request->pathname, pathname, len);
+
+  long rc;
+  SysCalls sys;
+  if (write(sys, processFdPub(), request, sizeof(data)) != (int)sizeof(data) ||
+      read(sys, threadFdPub(), &rc, sizeof(rc)) != sizeof(rc)) {
+    die("Failed to forward access() request [sandbox]");
+  }
+  Debug::elapsed(tm, __NR_access);
+  return rc;
+}
+
+bool Sandbox::process_access(int parentMapsFd, int sandboxFd, int threadFdPub,
+                           int threadFd, SecureMem::Args* mem) {
+  // Read request
+  SysCalls sys;
+  Access access_req;
+  if (read(sys, sandboxFd, &access_req, sizeof(access_req)) !=
+      sizeof(access_req)) {
+ read_parm_failed:
+    die("Failed to read parameters for access() [process]");
+  }
+  int   rc                    = -ENAMETOOLONG;
+  if (access_req.path_length >= sizeof(mem->pathname)) {
+    char buf[32];
+    while (access_req.path_length > 0) {
+      size_t len              = access_req.path_length > sizeof(buf) ?
+                                sizeof(buf) : access_req.path_length;
+      ssize_t i               = read(sys, sandboxFd, buf, len);
+      if (i <= 0) {
+        goto read_parm_failed;
+      }
+      access_req.path_length -= i;
+    }
+    if (write(sys, threadFd, &rc, sizeof(rc)) != sizeof(rc)) {
+      die("Failed to return data from access() [process]");
+    }
+    return false;
+  }
+
+  if (!g_policy.allow_file_namespace) {
+    // After locking the mutex, we can no longer abandon the system call. So,
+    // perform checks before clobbering the securely shared memory.
+    char tmp[access_req.path_length];
+    if (read(sys, sandboxFd, tmp, access_req.path_length) !=
+        (ssize_t)access_req.path_length) {
+      goto read_parm_failed;
+    }
+    Debug::message(("Denying access to \"" + std::string(tmp) + "\"").c_str());
+    SecureMem::abandonSystemCall(threadFd, -EACCES);
+    return false;
+  }
+
+  SecureMem::lockSystemCall(parentMapsFd, mem);
+  if (read(sys, sandboxFd, mem->pathname, access_req.path_length) !=
+      (ssize_t)access_req.path_length) {
+    goto read_parm_failed;
+  }
+  mem->pathname[access_req.path_length] = '\000';
+
+  // TODO(markus): Implement sandboxing policy
+  Debug::message(("Allowing access to \"" + std::string(mem->pathname) +
+                  "\"").c_str());
+
+  // Tell trusted thread to access the file.
+  SecureMem::sendSystemCall(threadFdPub, true, parentMapsFd, mem, __NR_access,
+                            mem->pathname - (char*)mem + (char*)mem->self,
+                            access_req.mode);
+  return true;
+}
+
+} // namespace
diff --git a/sandbox/linux/seccomp/allocator.cc b/sandbox/linux/seccomp/allocator.cc
new file mode 100644
index 0000000..6e11a4a
--- /dev/null
+++ b/sandbox/linux/seccomp/allocator.cc
@@ -0,0 +1,136 @@
+// Copyright (c) 2010 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// The allocator is very simplistic. It requests memory pages directly from
+// the system. Each page starts with a header describing the allocation. This
+// makes sure that we can return the memory to the system when it is
+// deallocated.
+// For allocations that are smaller than a single page, we try to squeeze
+// multiple of them into the same page.
+// We expect to use this allocator for a moderate number of small allocations.
+// In most cases, it will only need to ever make a single request to the
+// operating system for the lifetime of the STL container object.
+// We don't worry about memory fragmentation as the allocator is expected to
+// be short-lived.
+
+#include <stdint.h>
+#include <sys/mman.h>
+
+#include "allocator.h"
+#include "linux_syscall_support.h"
+
+namespace playground {
+
+class SysCalls {
+ public:
+  #define SYS_CPLUSPLUS
+  #define SYS_ERRNO     my_errno
+  #define SYS_INLINE    inline
+  #define SYS_PREFIX    -1
+  #undef  SYS_LINUX_SYSCALL_SUPPORT_H
+  #include "linux_syscall_support.h"
+  SysCalls() : my_errno(0) { }
+  int my_errno;
+};
+#ifdef __NR_mmap2
+  #define      MMAP      mmap2
+  #define __NR_MMAP __NR_mmap2
+#else
+  #define      MMAP      mmap
+  #define __NR_MMAP __NR_mmap
+#endif
+
+// We only ever keep track of the very last partial page that was used for
+// allocations. This approach simplifies the code a lot. It can theoretically
+// lead to more memory fragmentation, but for our use case that is unlikely
+// to happen.
+struct Header {
+  // The total amount of memory allocated for this chunk of memory. Typically,
+  // this would be a single page.
+  size_t total_len;
+
+  // "used" keeps track of the number of bytes currently allocated in this
+  // page. Note that as elements are freed from this page, "used" is updated
+  // allowing us to track when the page is free. However, these holes in the
+  // page are never re-used, so "tail" is the only way to find out how much
+  // free space remains and when we need to request another chunk of memory
+  // from the system.
+  size_t used;
+  void   *tail;
+};
+static Header* last_alloc;
+
+void* SystemAllocatorHelper::sys_allocate(size_t size) {
+  // Number of bytes that need to be allocated
+  if (size + 3 < size) {
+    return NULL;
+  }
+  size_t len = (size + 3) & ~3;
+
+  if (last_alloc) {
+    // Remaining space in the last chunk of memory allocated from system
+    size_t remainder = last_alloc->total_len -
+        (reinterpret_cast<char *>(last_alloc->tail) -
+         reinterpret_cast<char *>(last_alloc));
+
+    if (remainder >= len) {
+      void* ret = last_alloc->tail;
+      last_alloc->tail = reinterpret_cast<char *>(last_alloc->tail) + len;
+      last_alloc->used += len;
+      return ret;
+    }
+  }
+
+  SysCalls sys;
+  if (sizeof(Header) + len + 4095 < len) {
+    return NULL;
+  }
+  size_t total_len = (sizeof(Header) + len + 4095) & ~4095;
+  Header* mem = reinterpret_cast<Header *>(
+      sys.MMAP(NULL, total_len, PROT_READ|PROT_WRITE,
+               MAP_PRIVATE|MAP_ANONYMOUS, -1, 0));
+  if (mem == MAP_FAILED) {
+    return NULL;
+  }
+
+  // If we were only asked to allocate a single page, then we will use any
+  // remaining space for other small allocations.
+  if (total_len - sizeof(Header) - len >= 4) {
+    last_alloc = mem;
+  }
+  mem->total_len = total_len;
+  mem->used = len;
+  char* ret = reinterpret_cast<char *>(mem) + sizeof(Header);
+  mem->tail = ret + len;
+
+  return ret;
+}
+
+void SystemAllocatorHelper::sys_deallocate(void* p, size_t size) {
+  // Number of bytes in this allocation
+  if (size + 3 < size) {
+    return;
+  }
+  size_t len = (size + 3) & ~3;
+
+  // All allocations (small and large) have starting addresses in the
+  // first page that was allocated from the system. This page starts with
+  // a header that keeps track of how many bytes are currently used. The
+  // header can be found by truncating the last few bits of the address.
+  Header* header = reinterpret_cast<Header *>(
+      reinterpret_cast<uintptr_t>(p) & ~4095);
+  header->used -= len;
+
+  // After the last allocation has been freed, return the page(s) to the
+  // system
+  if (!header->used) {
+    SysCalls sys;
+    sys.munmap(header, header->total_len);
+    if (last_alloc == header) {
+      last_alloc = NULL;
+    }
+  }
+}
+
+}  // namespace
diff --git a/sandbox/linux/seccomp/allocator.h b/sandbox/linux/seccomp/allocator.h
new file mode 100644
index 0000000..29e0065
--- /dev/null
+++ b/sandbox/linux/seccomp/allocator.h
@@ -0,0 +1,88 @@
+// Copyright (c) 2010 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// Implement a very basic memory allocator that make direct system calls
+// instead of relying on libc.
+// This allocator is not thread-safe.
+
+#ifndef ALLOCATOR_H__
+#define ALLOCATOR_H__
+
+#include <cstddef>
+
+namespace playground {
+
+class SystemAllocatorHelper {
+ protected:
+  static void *sys_allocate(size_t size);
+  static void sys_deallocate(void* p, size_t size);
+};
+
+template <class T>
+class SystemAllocator : SystemAllocatorHelper {
+ public:
+  typedef T         value_type;
+  typedef T*        pointer;
+  typedef const T*  const_pointer;
+  typedef T&        reference;
+  typedef const T&  const_reference;
+  typedef size_t    size_type;
+  typedef std::ptrdiff_t difference_type;
+
+  template <class U>
+  struct rebind {
+    typedef SystemAllocator<U> other;
+  };
+
+  pointer address(reference value) const {
+    return &value;
+  }
+
+  const_pointer address(const_reference value) const {
+    return &value;
+  }
+
+  SystemAllocator() throw() { }
+  SystemAllocator(const SystemAllocator& src) throw() { }
+  template <class U> SystemAllocator(const SystemAllocator<U>& src) throw() { }
+  ~SystemAllocator() throw() { }
+
+  size_type max_size() const throw() {
+    return (1 << 30) / sizeof(T);
+  }
+
+  pointer allocate(size_type num, const void* = 0) {
+    if (num > max_size()) {
+      return NULL;
+    }
+    return (pointer)sys_allocate(num * sizeof(T));
+  }
+
+  void construct(pointer p, const T& value) {
+    new(reinterpret_cast<void *>(p))T(value);
+  }
+
+  void destroy(pointer p) {
+    p->~T();
+  }
+
+  void deallocate(pointer p, size_type num) {
+    sys_deallocate(p, num * sizeof(T));
+  }
+};
+
+template <class T1, class T2>
+bool operator== (const SystemAllocator<T1>&, const SystemAllocator<T2>&)
+    throw() {
+  return true;
+}
+template <class T1, class T2>
+bool operator!= (const SystemAllocator<T1>&, const SystemAllocator<T2>&)
+    throw() {
+  return false;
+}
+
+}  // namespace
+
+#endif  // ALLOCATOR_H__
diff --git a/sandbox/linux/seccomp/clone.cc b/sandbox/linux/seccomp/clone.cc
new file mode 100644
index 0000000..0d35181
--- /dev/null
+++ b/sandbox/linux/seccomp/clone.cc
@@ -0,0 +1,179 @@
+// Copyright (c) 2010 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "debug.h"
+#include "sandbox_impl.h"
+
+namespace playground {
+
+long Sandbox::sandbox_clone(int flags, char* stack, int* pid, int* ctid,
+                            void* tls, void *wrapper_sp) {
+  long long tm;
+  Debug::syscall(&tm, __NR_clone, "Executing handler");
+  struct {
+    int       sysnum;
+    long long cookie;
+    Clone     clone_req;
+  } __attribute__((packed)) request;
+  request.sysnum               = __NR_clone;
+  request.cookie               = cookie();
+  request.clone_req.flags      = flags;
+  request.clone_req.stack      = stack;
+  request.clone_req.pid        = pid;
+  request.clone_req.ctid       = ctid;
+  request.clone_req.tls        = tls;
+
+  // TODO(markus): Passing stack == 0 currently does not do the same thing
+  // that the kernel would do without the sandbox. This is just going to
+  // cause a crash. We should detect this case, and replace the stack pointer
+  // with the correct value, instead.
+  // This is complicated by the fact that we will temporarily be executing
+  // both threads from the same stack. Some synchronization will be necessary.
+  // Fortunately, this complication also explains why hardly anybody ever
+  // does this.
+  // See trusted_thread.cc for more information.
+  long rc;
+  if (stack == 0) {
+    rc = -EINVAL;
+  } else {
+    // Pass along the address on the stack where syscallWrapper() stored the
+    // original CPU registers. These registers will be restored in the newly
+    // created thread prior to returning from the wrapped system call.
+    #if defined(__x86_64__)
+    memcpy(&request.clone_req.regs64, wrapper_sp,
+           sizeof(request.clone_req.regs64) + sizeof(void *));
+    #elif defined(__i386__)
+    memcpy(&request.clone_req.regs32, wrapper_sp,
+           sizeof(request.clone_req.regs32) + sizeof(void *));
+    #else
+    #error Unsupported target platform
+    #endif
+
+    // In order to unblock the signal mask in the newly created thread and
+    // after entering Seccomp mode, we have to call sigreturn(). But that
+    // requires access to a proper stack frame describing a valid signal.
+    // We trigger a signal now and make sure the stack frame ends up on the
+    // new stack. Our segv() handler (in sandbox.cc) does that for us.
+    // See trusted_thread.cc for more details on how threads get created.
+    //
+    // In general we rely on the kernel for generating the signal stack
+    // frame, as the exact binary format has been extended several times over
+    // the course of the kernel's development. Fortunately, the kernel
+    // developers treat the initial part of the stack frame as a stable part
+    // of the ABI. So, we can rely on fixed, well-defined offsets for accessing
+    // register values and for accessing the signal mask.
+    #if defined(__x86_64__)
+    // Red zone compensation. The instrumented system call will remove 128
+    // bytes from the thread's stack prior to returning to the original
+    // call site.
+    stack                   -= 128;
+    request.clone_req.stack  = stack;
+    void *dummy;
+    asm volatile("mov %%rsp, %%rcx\n"
+                 "mov %3, %%rsp\n"
+                 "int $0\n"
+                 "mov %%rcx, %%rsp\n"
+                 : "=a"(request.clone_req.stack), "=&c"(dummy)
+                 : "a"(__NR_clone + 0xF000), "m"(request.clone_req.stack)
+                 : "memory");
+    #elif defined(__i386__)
+    void *dummy;
+    asm volatile("mov %%esp, %%ecx\n"
+                 "mov %3, %%esp\n"
+                 "int $0\n"
+                 "mov %%ecx, %%esp\n"
+                 : "=a"(request.clone_req.stack), "=&c"(dummy)
+                 : "a"(__NR_clone + 0xF000), "m"(request.clone_req.stack)
+                 : "memory");
+    #else
+    #error Unsupported target platform
+    #endif
+
+    // Adjust the signal stack frame so that it contains the correct stack
+    // pointer upon returning from sigreturn().
+    #if defined(__x86_64__)
+    *(char **)(request.clone_req.stack + 0xA0) = stack;
+    #elif defined(__i386__)
+    *(char **)(request.clone_req.stack + 0x1C) = stack;
+    #else
+    #error Unsupported target platform
+    #endif
+
+    SysCalls sys;
+    if (write(sys, processFdPub(), &request, sizeof(request)) !=
+        sizeof(request) ||
+        read(sys, threadFdPub(), &rc, sizeof(rc)) != sizeof(rc)) {
+      die("Failed to forward clone() request [sandbox]");
+    }
+  }
+  Debug::elapsed(tm, __NR_clone);
+  return rc;
+}
+
+bool Sandbox::process_clone(int parentMapsFd, int sandboxFd, int threadFdPub,
+                            int threadFd, SecureMem::Args* mem) {
+  // Read request
+  Clone clone_req;
+  SysCalls sys;
+  if (read(sys, sandboxFd, &clone_req, sizeof(clone_req)) !=sizeof(clone_req)){
+    die("Failed to read parameters for clone() [process]");
+  }
+
+  // TODO(markus): add policy restricting parameters for clone
+  if ((clone_req.flags & ~CLONE_DETACHED) != (CLONE_VM|CLONE_FS|CLONE_FILES|
+      CLONE_SIGHAND|CLONE_THREAD|CLONE_SYSVSEM|CLONE_SETTLS|
+      CLONE_PARENT_SETTID|CLONE_CHILD_CLEARTID)) {
+    SecureMem::abandonSystemCall(threadFd, -EPERM);
+    return false;
+  } else {
+    SecureMem::Args* newMem = getNewSecureMem();
+    if (!newMem) {
+      SecureMem::abandonSystemCall(threadFd, -ENOMEM);
+      return false;
+    } else {
+      // clone() has unusual semantics. We don't want to return back into the
+      // trusted thread, but instead we need to continue execution at the IP
+      // where we got called initially.
+      SecureMem::lockSystemCall(parentMapsFd, mem);
+      mem->ret              = clone_req.ret;
+      #if defined(__x86_64__)
+      mem->rbp              = clone_req.regs64.rbp;
+      mem->rbx              = clone_req.regs64.rbx;
+      mem->rcx              = clone_req.regs64.rcx;
+      mem->rdx              = clone_req.regs64.rdx;
+      mem->rsi              = clone_req.regs64.rsi;
+      mem->rdi              = clone_req.regs64.rdi;
+      mem->r8               = clone_req.regs64.r8;
+      mem->r9               = clone_req.regs64.r9;
+      mem->r10              = clone_req.regs64.r10;
+      mem->r11              = clone_req.regs64.r11;
+      mem->r12              = clone_req.regs64.r12;
+      mem->r13              = clone_req.regs64.r13;
+      mem->r14              = clone_req.regs64.r14;
+      mem->r15              = clone_req.regs64.r15;
+      #elif defined(__i386__)
+      mem->ebp              = clone_req.regs32.ebp;
+      mem->edi              = clone_req.regs32.edi;
+      mem->esi              = clone_req.regs32.esi;
+      mem->edx              = clone_req.regs32.edx;
+      mem->ecx              = clone_req.regs32.ecx;
+      mem->ebx              = clone_req.regs32.ebx;
+      #else
+      #error Unsupported target platform
+      #endif
+      newMem->sequence      = 0;
+      newMem->shmId         = -1;
+      mem->newSecureMem     = newMem;
+      mem->processFdPub     = processFdPub_;
+      mem->cloneFdPub       = cloneFdPub_;
+
+      SecureMem::sendSystemCall(threadFdPub, true, parentMapsFd, mem,
+                                __NR_clone, clone_req.flags, clone_req.stack,
+                                clone_req.pid, clone_req.ctid, clone_req.tls);
+      return true;
+    }
+  }
+}
+
+} // namespace
diff --git a/sandbox/linux/seccomp/debug.cc b/sandbox/linux/seccomp/debug.cc
new file mode 100644
index 0000000..5d6de49
--- /dev/null
+++ b/sandbox/linux/seccomp/debug.cc
@@ -0,0 +1,363 @@
+// Copyright (c) 2010 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef NDEBUG
+
+#include "debug.h"
+
+namespace playground {
+
+bool Debug::enabled_;
+int  Debug::numSyscallNames_;
+const char **Debug::syscallNames_;
+std::map<int, std::string> Debug::syscallNamesMap_;
+
+Debug Debug::debug_;
+
+Debug::Debug() {
+  // Logging is disabled by default, but can be turned on by setting an
+  // appropriate environment variable. Initialize this code from a global
+  // constructor, so that it runs before the sandbox is turned on.
+  enabled_ = !!getenv("SECCOMP_SANDBOX_DEBUGGING");
+
+  // Read names of system calls from header files, if available. Symbolic
+  // names make debugging so much nicer.
+  if (enabled_) {
+    static const char *filenames[] = {
+      #if __WORDSIZE == 64
+      "/usr/include/asm/unistd_64.h",
+      #elif __WORDSIZE == 32
+      "/usr/include/asm/unistd_32.h",
+      #endif
+      "/usr/include/asm/unistd.h",
+      NULL };
+    numSyscallNames_ = 0;
+    for (const char **fn = filenames; *fn; ++fn) {
+      FILE *fp = fopen(*fn, "r");
+      if (fp) {
+        std::string baseName;
+        int         baseNum = -1;
+        char buf[80];
+        while (fgets(buf, sizeof(buf), fp)) {
+          // Check if the line starts with "#define"
+          static const char* whitespace = " \t\r\n";
+          char *token, *save;
+          token = strtok_r(buf, whitespace, &save);
+          if (token && !strcmp(token, "#define")) {
+
+            // Only parse identifiers that start with "__NR_"
+            token = strtok_r(NULL, whitespace, &save);
+            if (token) {
+              if (strncmp(token, "__NR_", 5)) {
+                continue;
+              }
+              std::string syscallName(token + 5);
+
+              // Parse the value of the symbol. Try to be forgiving in what
+              // we accept, as the file format might change over time.
+              token = strtok_r(NULL, "\r\n", &save);
+              if (token) {
+                // Some values are defined relative to previous values, we
+                // detect these examples by finding an earlier symbol name
+                // followed by a '+' plus character.
+                bool isRelative = false;
+                char *base = strstr(token, baseName.c_str());
+                if (baseNum >= 0 && base) {
+                  base += baseName.length();
+                  while (*base == ' ' || *base == '\t') {
+                    ++base;
+                  }
+                  if (*base == '+') {
+                    isRelative = true;
+                    token = base;
+                  }
+                }
+
+                // Skip any characters that are not part of the syscall number.
+                while (*token < '0' || *token > '9') {
+                  token++;
+                }
+
+                // If we now have a valid datum, enter it into our map.
+                if (*token) {
+                  int sysnum = atoi(token);
+
+                  // Deal with symbols that are defined relative to earlier
+                  // ones.
+                  if (isRelative) {
+                    sysnum += baseNum;
+                  } else {
+                    baseNum  = sysnum;
+                    baseName = syscallName;
+                  }
+
+                  // Keep track of the highest syscall number that we know
+                  // about.
+                  if (sysnum >= numSyscallNames_) {
+                    numSyscallNames_ = sysnum + 1;
+                  }
+
+                  syscallNamesMap_[sysnum] = syscallName;
+                }
+              }
+            }
+          }
+        }
+        fclose(fp);
+        break;
+      }
+    }
+    if (numSyscallNames_) {
+      // We cannot make system calls at the time, when we are looking up
+      // the names. So, copy them into a data structure that can be
+      // accessed without having to allocated memory (i.e. no more STL).
+      syscallNames_ = reinterpret_cast<const char **>(
+          calloc(sizeof(char *), numSyscallNames_));
+      for (std::map<int, std::string>::const_iterator iter =
+               syscallNamesMap_.begin();
+           iter != syscallNamesMap_.end();
+           ++iter) {
+        syscallNames_[iter->first] = iter->second.c_str();
+      }
+    }
+  }
+}
+
+bool Debug::enter() {
+  // Increment the recursion level in TLS storage. This allows us to
+  // make system calls from within our debugging functions, without triggering
+  // additional debugging output.
+  //
+  // This function can be called from both the sandboxed process and from the
+  // trusted process. Only the sandboxed process needs to worry about
+  // recursively calling system calls. The trusted process doesn't intercept
+  // system calls and thus doesn't have this problem. It also doesn't have
+  // a TLS. We explicitly set the segment selector to zero, when in the
+  // trusted process, so that we can avoid tracking recursion levels.
+  int level;
+  #if defined(__x86_64__)
+  asm volatile("mov  %%gs, %0\n"
+               "test %0, %0\n"
+               "jz   1f\n"
+               "movl %%gs:0x1050-0xE0, %0\n"
+               "incl %%gs:0x1050-0xE0\n"
+             "1:\n"
+               : "=r"(level)
+               :
+               : "memory");
+  #elif defined(__i386__)
+  asm volatile("mov  %%fs, %0\n"
+               "test %0, %0\n"
+               "jz   1f\n"
+               "movl %%fs:0x1034-0x58, %0\n"
+               "incl %%fs:0x1034-0x58\n"
+             "1:\n"
+               : "=r"(level)
+               :
+               : "memory");
+  #else
+  #error "Unsupported target platform"
+  #endif
+  return !level;
+}
+
+bool Debug::leave() {
+  // Decrement the recursion level in TLS storage. This allows us to
+  // make system calls from within our debugging functions, without triggering
+  // additional debugging output.
+  //
+  // This function can be called from both the sandboxed process and from the
+  // trusted process. Only the sandboxed process needs to worry about
+  // recursively calling system calls. The trusted process doesn't intercept
+  // system calls and thus doesn't have this problem. It also doesn't have
+  // a TLS. We explicitly set the segment selector to zero, when in the
+  // trusted process, so that we can avoid tracking recursion levels.
+  int level;
+  #if defined(__x86_64__)
+  asm volatile("mov  %%gs, %0\n"
+               "test %0, %0\n"
+               "jz   1f\n"
+               "decl %%gs:0x1050-0xE0\n"
+               "movl %%gs:0x1050-0xE0, %0\n"
+             "1:\n"
+               : "=r"(level)
+               :
+               : "memory");
+  #elif defined(__i386__)
+  asm volatile("mov  %%fs, %0\n"
+               "test %0, %0\n"
+               "jz   1f\n"
+               "decl %%fs:0x1034-0x58\n"
+               "movl %%fs:0x1034-0x58, %0\n"
+             "1:\n"
+               : "=r"(level)
+               :
+               : "memory");
+  #else
+  #error Unsupported target platform
+  #endif
+  return !level;
+}
+
+void Debug::_message(const char* msg) {
+  if (enabled_) {
+    Sandbox::SysCalls sys;
+    size_t len = strlen(msg);
+    if (len && msg[len-1] != '\n') {
+      // Write operations should be atomic, so that we don't interleave
+      // messages from multiple threads. Append a newline, if it is not
+      // already there.
+      char copy[len + 1];
+      memcpy(copy, msg, len);
+      copy[len] = '\n';
+      Sandbox::write(sys, 2, copy, len + 1);
+    } else {
+      Sandbox::write(sys, 2, msg, len);
+    }
+  }
+}
+
+void Debug::message(const char* msg) {
+  if (enabled_) {
+    if (enter()) {
+      _message(msg);
+    }
+    leave();
+  }
+}
+
+void Debug::gettimeofday(long long* tm) {
+  if (tm) {
+    struct timeval tv;
+    #if defined(__i386__)
+    // Zero out the lastSyscallNum, so that we don't try to coalesce
+    // calls to gettimeofday(). For debugging purposes, we need the
+    // exact time.
+    asm volatile("movl $0, %fs:0x102C-0x58");
+    #elif !defined(__x86_64__)
+    #error Unsupported target platform
+    #endif
+    ::gettimeofday(&tv, NULL);
+    *tm = 1000ULL*1000ULL*static_cast<unsigned>(tv.tv_sec) +
+          static_cast<unsigned>(tv.tv_usec);
+  }
+}
+
+void Debug::syscall(long long* tm, int sysnum, const char* msg, int call) {
+  // This function gets called from the system call wrapper. Avoid calling
+  // any library functions that themselves need system calls.
+  if (enabled_) {
+    if (enter() || !tm) {
+      gettimeofday(tm);
+
+      const char *sysname = NULL;
+      if (sysnum >= 0 && sysnum < numSyscallNames_) {
+        sysname = syscallNames_[sysnum];
+      }
+      static const char kUnnamedMessage[] = "Unnamed syscall #";
+      char unnamed[40];
+      if (!sysname) {
+        memcpy(unnamed, kUnnamedMessage, sizeof(kUnnamedMessage) - 1);
+        itoa(unnamed + sizeof(kUnnamedMessage) - 1, sysnum);
+        sysname = unnamed;
+      }
+      #if defined(__NR_socketcall) || defined(__NR_ipc)
+      char extra[40];
+      *extra = '\000';
+      #if defined(__NR_socketcall)
+      if (sysnum == __NR_socketcall) {
+        static const char* socketcall_name[] = {
+          0, "socket", "bind", "connect", "listen", "accept", "getsockname",
+          "getpeername", "socketpair", "send", "recv", "sendto","recvfrom",
+          "shutdown", "setsockopt", "getsockopt", "sendmsg", "recvmsg",
+          "accept4"
+        };
+        if (call >= 1 &&
+            call < (int)(sizeof(socketcall_name)/sizeof(char *))) {
+          strcat(strcpy(extra, " "), socketcall_name[call]);
+        } else {
+          itoa(strcpy(extra, " #") + 2, call);
+        }
+      }
+      #endif
+      #if defined(__NR_ipc)
+      if (sysnum == __NR_ipc) {
+        static const char* ipc_name[] = {
+          0, "semop", "semget", "semctl", "semtimedop", 0, 0, 0, 0, 0, 0,
+          "msgsnd", "msgrcv", "msgget", "msgctl", 0, 0, 0, 0, 0, 0,
+          "shmat", "shmdt", "shmget", "shmctl" };
+        if (call >= 1 && call < (int)(sizeof(ipc_name)/sizeof(char *)) &&
+            ipc_name[call]) {
+          strcat(strcpy(extra, " "), ipc_name[call]);
+        } else {
+          itoa(strcpy(extra, " #") + 2, call);
+        }
+      }
+      #endif
+      #else
+      static const char extra[1] = { 0 };
+      #endif
+      char buf[strlen(sysname) + strlen(extra) + (msg ? strlen(msg) : 0) + 4];
+      strcat(strcat(strcat(strcat(strcpy(buf, sysname), extra), ": "),
+                    msg ? msg : ""), "\n");
+      _message(buf);
+    }
+    leave();
+  }
+}
+
+char* Debug::itoa(char* s, int n) {
+  // Remember return value
+  char *ret   = s;
+
+  // Insert sign for negative numbers
+  if (n < 0) {
+    *s++      = '-';
+    n         = -n;
+  }
+
+  // Convert to decimal (in reverse order)
+  char *start = s;
+  do {
+    *s++      = '0' + (n % 10);
+    n        /= 10;
+  } while (n);
+  *s--        = '\000';
+
+  // Reverse order of digits
+  while (start < s) {
+    char ch   = *s;
+    *s--      = *start;
+    *start++  = ch;
+  }
+
+  return ret;
+}
+
+void Debug::elapsed(long long tm, int sysnum, int call) {
+  if (enabled_) {
+    if (enter()) {
+      // Compute the time that has passed since the system call started.
+      long long delta;
+      gettimeofday(&delta);
+      delta -= tm;
+
+      // Format "Elapsed time: %d.%03dms" without using sprintf().
+      char buf[80];
+      itoa(strrchr(strcpy(buf, "Elapsed time: "), '\000'), delta/1000);
+      delta %= 1000;
+      strcat(buf, delta < 100 ? delta < 10 ? ".00" : ".0" : ".");
+      itoa(strrchr(buf, '\000'), delta);
+      strcat(buf, "ms");
+
+      // Print system call name and elapsed time.
+      syscall(NULL, sysnum, buf, call);
+    }
+    leave();
+  }
+}
+
+} // namespace
+
+#endif // NDEBUG
diff --git a/sandbox/linux/seccomp/debug.h b/sandbox/linux/seccomp/debug.h
new file mode 100644
index 0000000..eb5a194
--- /dev/null
+++ b/sandbox/linux/seccomp/debug.h
@@ -0,0 +1,80 @@
+// Copyright (c) 2010 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef DEBUG_H__
+#define DEBUG_H__
+
+#include <map>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string>
+#include <string.h>
+
+#include "sandbox_impl.h"
+
+namespace playground {
+
+class Debug {
+ public:
+  // If debugging is enabled, write a message to stderr.
+  static void message(const char* msg)
+  #ifndef NDEBUG
+  asm("playground$debugMessage")
+  #if defined(__x86_64__)
+  __attribute__((visibility("internal")))
+  #endif
+  ;
+  #else
+  { }
+  #endif
+
+  // If debugging is enabled, write the name of the syscall and an optional
+  // message to stderr.
+  static void syscall(long long* tm, int sysnum,
+                      const char* msg, int call = -1)
+  #ifndef NDEBUG
+  ;
+  #else
+  { }
+  #endif
+
+  // Print how much wall-time has elapsed since the last call to syscall()
+  static void elapsed(long long tm, int sysnum, int call = -1)
+  #ifndef NDEBUG
+  ;
+  #else
+  {
+  }
+  #endif
+
+  // Check whether debugging is enabled.
+  static bool isEnabled() {
+    #ifndef NDEBUG
+    return enabled_;
+    #else
+    return false;
+    #endif
+  }
+
+ private:
+  #ifndef NDEBUG
+  Debug();
+  static bool  enter();
+  static bool  leave();
+  static void  _message(const char* msg);
+  static void  gettimeofday(long long* tm);
+  static char* itoa(char* s, int n);
+
+  static Debug debug_;
+
+  static bool  enabled_;
+  static int   numSyscallNames_;
+  static const char **syscallNames_;
+  static std::map<int, std::string> syscallNamesMap_;
+  #endif
+};
+
+} // namespace
+
+#endif // DEBUG_H__
diff --git a/sandbox/linux/seccomp/exit.cc b/sandbox/linux/seccomp/exit.cc
new file mode 100644
index 0000000..f4db643
--- /dev/null
+++ b/sandbox/linux/seccomp/exit.cc
@@ -0,0 +1,38 @@
+// Copyright (c) 2010 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "debug.h"
+#include "sandbox_impl.h"
+
+namespace playground {
+
+long Sandbox::sandbox_exit(int status) {
+  long long tm;
+  Debug::syscall(&tm, __NR_exit, "Executing handler");
+  struct {
+    int       sysnum;
+    long long cookie;
+  } __attribute__((packed)) request;
+  request.sysnum = __NR_exit;
+  request.cookie = cookie();
+
+  SysCalls sys;
+  if (write(sys, processFdPub(), &request, sizeof(request)) !=
+      sizeof(request)) {
+    die("Failed to forward exit() request [sandbox]");
+  }
+  for (;;) {
+    sys._exit(status);
+  }
+}
+
+bool Sandbox::process_exit(int parentMapsFd, int sandboxFd, int threadFdPub,
+                           int threadFd, SecureMem::Args* mem) {
+  SecureMem::lockSystemCall(parentMapsFd, mem);
+  SecureMem::sendSystemCall(threadFdPub, true, parentMapsFd, mem,
+                            __NR_exit, 0);
+  return true;
+}
+
+} // namespace
diff --git a/sandbox/linux/seccomp/getpid.cc b/sandbox/linux/seccomp/getpid.cc
new file mode 100644
index 0000000..be5449b
--- /dev/null
+++ b/sandbox/linux/seccomp/getpid.cc
@@ -0,0 +1,17 @@
+// Copyright (c) 2010 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "debug.h"
+#include "sandbox_impl.h"
+
+namespace playground {
+
+long Sandbox::sandbox_getpid() {
+  long long tm;
+  Debug::syscall(&tm, __NR_getpid, "Executing handler");
+  Debug::elapsed(tm, __NR_getpid);
+  return pid_;
+}
+
+} // namespace
diff --git a/sandbox/linux/seccomp/gettid.cc b/sandbox/linux/seccomp/gettid.cc
new file mode 100644
index 0000000..699774a
--- /dev/null
+++ b/sandbox/linux/seccomp/gettid.cc
@@ -0,0 +1,18 @@
+// Copyright (c) 2010 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "debug.h"
+#include "sandbox_impl.h"
+
+namespace playground {
+
+long Sandbox::sandbox_gettid() {
+  long long tm;
+  Debug::syscall(&tm, __NR_gettid, "Executing handler");
+  pid_t t = tid();
+  Debug::elapsed(tm, __NR_gettid);
+  return t;
+}
+
+} // namespace
diff --git a/sandbox/linux/seccomp/ioctl.cc b/sandbox/linux/seccomp/ioctl.cc
new file mode 100644
index 0000000..4d2b3c5c5
--- /dev/null
+++ b/sandbox/linux/seccomp/ioctl.cc
@@ -0,0 +1,61 @@
+// Copyright (c) 2010 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "debug.h"
+#include "sandbox_impl.h"
+
+namespace playground {
+
+long Sandbox::sandbox_ioctl(int d, int req, void *arg) {
+  long long tm;
+  Debug::syscall(&tm, __NR_ioctl, "Executing handler");
+  struct {
+    int       sysnum;
+    long long cookie;
+    IOCtl     ioctl_req;
+  } __attribute__((packed)) request;
+  request.sysnum        = __NR_ioctl;
+  request.cookie        = cookie();
+  request.ioctl_req.d   = d;
+  request.ioctl_req.req = req;
+  request.ioctl_req.arg = arg;
+
+  long rc;
+  SysCalls sys;
+  if (write(sys, processFdPub(), &request, sizeof(request)) !=
+      sizeof(request) ||
+      read(sys, threadFdPub(), &rc, sizeof(rc)) != sizeof(rc)) {
+    die("Failed to forward ioctl() request [sandbox]");
+  }
+  Debug::elapsed(tm, __NR_ioctl);
+  return rc;
+}
+
+bool Sandbox::process_ioctl(int parentMapsFd, int sandboxFd, int threadFdPub,
+                            int threadFd, SecureMem::Args* mem) {
+  // Read request
+  IOCtl ioctl_req;
+  SysCalls sys;
+  if (read(sys, sandboxFd, &ioctl_req, sizeof(ioctl_req)) !=sizeof(ioctl_req)){
+    die("Failed to read parameters for ioctl() [process]");
+  }
+  int rc = -EINVAL;
+  switch (ioctl_req.req) {
+    case TCGETS:
+    case TIOCGWINSZ:
+      SecureMem::sendSystemCall(threadFdPub, false, -1, mem, __NR_ioctl,
+                                ioctl_req.d, ioctl_req.req, ioctl_req.arg);
+      return true;
+    default:
+      if (Debug::isEnabled()) {
+        char buf[80];
+        sprintf(buf, "Unsupported ioctl: 0x%04X\n", ioctl_req.req);
+        Debug::message(buf);
+      }
+      SecureMem::abandonSystemCall(threadFd, rc);
+      return false;
+  }
+}
+
+} // namespace
diff --git a/sandbox/linux/seccomp/ipc.cc b/sandbox/linux/seccomp/ipc.cc
new file mode 100644
index 0000000..67a4e34
--- /dev/null
+++ b/sandbox/linux/seccomp/ipc.cc
@@ -0,0 +1,351 @@
+// Copyright (c) 2010 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "debug.h"
+#include "sandbox_impl.h"
+
+namespace playground {
+
+#ifndef IPC_PRIVATE
+#define IPC_PRIVATE 0
+#endif
+#ifndef IPC_RMID
+#define IPC_RMID    0
+#endif
+#ifndef IPC_64
+#define IPC_64      256
+#endif
+
+#if defined(__NR_shmget)
+void* Sandbox::sandbox_shmat(int shmid, const void* shmaddr, int shmflg) {
+  long long tm;
+  Debug::syscall(&tm, __NR_shmat, "Executing handler");
+
+  struct {
+    int       sysnum;
+    long long cookie;
+    ShmAt     shmat_req;
+  } __attribute__((packed)) request;
+  request.sysnum             = __NR_shmat;
+  request.cookie             = cookie();
+  request.shmat_req.shmid    = shmid;
+  request.shmat_req.shmaddr  = shmaddr;
+  request.shmat_req.shmflg   = shmflg;
+
+  long rc;
+  SysCalls sys;
+  if (write(sys, processFdPub(), &request, sizeof(request)) !=
+      sizeof(request) ||
+      read(sys, threadFdPub(), &rc, sizeof(rc)) != sizeof(rc)) {
+    die("Failed to forward shmat() request [sandbox]");
+  }
+  Debug::elapsed(tm, __NR_shmat);
+  return reinterpret_cast<void *>(rc);
+}
+
+long Sandbox::sandbox_shmctl(int shmid, int cmd, void* buf) {
+  long long tm;
+  Debug::syscall(&tm, __NR_shmctl, "Executing handler");
+
+  struct {
+    int       sysnum;
+    long long cookie;
+    ShmCtl    shmctl_req;
+  } __attribute__((packed)) request;
+  request.sysnum           = __NR_shmctl;
+  request.cookie           = cookie();
+  request.shmctl_req.shmid = shmid;
+  request.shmctl_req.cmd   = cmd;
+  request.shmctl_req.buf   = buf;
+
+  long rc;
+  SysCalls sys;
+  if (write(sys, processFdPub(), &request, sizeof(request)) !=
+      sizeof(request) ||
+      read(sys, threadFdPub(), &rc, sizeof(rc)) != sizeof(rc)) {
+    die("Failed to forward shmctl() request [sandbox]");
+  }
+  Debug::elapsed(tm, __NR_shmctl);
+  return rc;
+}
+
+long Sandbox::sandbox_shmdt(const void* shmaddr) {
+  long long tm;
+  Debug::syscall(&tm, __NR_shmdt, "Executing handler");
+
+  struct {
+    int       sysnum;
+    long long cookie;
+    ShmDt     shmdt_req;
+  } __attribute__((packed)) request;
+  request.sysnum             = __NR_shmdt;
+  request.cookie             = cookie();
+  request.shmdt_req.shmaddr  = shmaddr;
+
+  long rc;
+  SysCalls sys;
+  if (write(sys, processFdPub(), &request, sizeof(request)) !=
+      sizeof(request) ||
+      read(sys, threadFdPub(), &rc, sizeof(rc)) != sizeof(rc)) {
+    die("Failed to forward shmdt() request [sandbox]");
+  }
+  Debug::elapsed(tm, __NR_shmdt);
+  return rc;
+}
+
+long Sandbox::sandbox_shmget(int key, size_t size, int shmflg) {
+  long long tm;
+  Debug::syscall(&tm, __NR_shmget, "Executing handler");
+
+  struct {
+    int       sysnum;
+    long long cookie;
+    ShmGet    shmget_req;
+  } __attribute__((packed)) request;
+  request.sysnum            = __NR_shmget;
+  request.cookie            = cookie();
+  request.shmget_req.key    = key;
+  request.shmget_req.size   = size;
+  request.shmget_req.shmflg = shmflg;
+
+  long rc;
+  SysCalls sys;
+  if (write(sys, processFdPub(), &request, sizeof(request)) !=
+      sizeof(request) ||
+      read(sys, threadFdPub(), &rc, sizeof(rc)) != sizeof(rc)) {
+    die("Failed to forward shmget() request [sandbox]");
+  }
+  Debug::elapsed(tm, __NR_shmget);
+  return rc;
+}
+
+bool Sandbox::process_shmat(int parentMapsFd, int sandboxFd, int threadFdPub,
+                            int threadFd, SecureMem::Args* mem) {
+  // Read request
+  ShmAt shmat_req;
+  SysCalls sys;
+  if (read(sys, sandboxFd, &shmat_req, sizeof(shmat_req)) !=
+      sizeof(shmat_req)) {
+    die("Failed to read parameters for shmat() [process]");
+  }
+
+  // We only allow attaching to the shm identifier that was returned by
+  // the most recent call to shmget(IPC_PRIVATE)
+  if (shmat_req.shmaddr || shmat_req.shmflg || shmat_req.shmid != mem->shmId) {
+    mem->shmId = -1;
+    SecureMem::abandonSystemCall(threadFd, -EINVAL);
+    return false;
+  }
+
+  mem->shmId = -1;
+  SecureMem::sendSystemCall(threadFdPub, false, -1, mem,
+                            __NR_shmat, shmat_req.shmid, shmat_req.shmaddr,
+                            shmat_req.shmflg);
+  return true;
+}
+
+bool Sandbox::process_shmctl(int parentMapsFd, int sandboxFd, int threadFdPub,
+                             int threadFd, SecureMem::Args* mem) {
+  // Read request
+  ShmCtl shmctl_req;
+  SysCalls sys;
+  if (read(sys, sandboxFd, &shmctl_req, sizeof(shmctl_req)) !=
+      sizeof(shmctl_req)) {
+    die("Failed to read parameters for shmctl() [process]");
+  }
+
+  // The only shmctl() operation that we need to support is removal. This
+  // operation is generally safe.
+  if ((shmctl_req.cmd & ~(IPC_64 | IPC_RMID)) || shmctl_req.buf) {
+    mem->shmId = -1;
+    SecureMem::abandonSystemCall(threadFd, -EINVAL);
+    return false;
+  }
+
+  mem->shmId = -1;
+  SecureMem::sendSystemCall(threadFdPub, false, -1, mem,
+                            __NR_shmctl, shmctl_req.shmid, shmctl_req.cmd,
+                            shmctl_req.buf);
+  return true;
+}
+
+bool Sandbox::process_shmdt(int parentMapsFd, int sandboxFd, int threadFdPub,
+                            int threadFd, SecureMem::Args* mem) {
+  // Read request
+  ShmDt shmdt_req;
+  SysCalls sys;
+  if (read(sys, sandboxFd, &shmdt_req, sizeof(shmdt_req)) !=
+      sizeof(shmdt_req)) {
+    die("Failed to read parameters for shmdt() [process]");
+  }
+
+  // Detaching shared memory segments it generally safe, but just in case
+  // of a kernel bug, we make sure that the address does not fall into any
+  // of the reserved memory regions.
+  ProtectedMap::const_iterator iter = protectedMap_.lower_bound(
+      (void *)shmdt_req.shmaddr);
+  if (iter != protectedMap_.begin()) {
+    --iter;
+  }
+  for (; iter != protectedMap_.end() && iter->first <= shmdt_req.shmaddr;
+       ++iter){
+    if (shmdt_req.shmaddr < reinterpret_cast<void *>(
+            reinterpret_cast<char *>(iter->first) + iter->second) &&
+        shmdt_req.shmaddr >= iter->first) {
+      mem->shmId = -1;
+      SecureMem::abandonSystemCall(threadFd, -EINVAL);
+      return false;
+    }
+  }
+
+  mem->shmId = -1;
+  SecureMem::sendSystemCall(threadFdPub, false, -1, mem,
+                            __NR_shmdt, shmdt_req.shmaddr);
+  return true;
+}
+
+bool Sandbox::process_shmget(int parentMapsFd, int sandboxFd, int threadFdPub,
+                             int threadFd, SecureMem::Args* mem) {
+  // Read request
+  ShmGet shmget_req;
+  SysCalls sys;
+  if (read(sys, sandboxFd, &shmget_req, sizeof(shmget_req)) !=
+      sizeof(shmget_req)) {
+    die("Failed to read parameters for shmget() [process]");
+  }
+
+  // We do not want to allow the sandboxed application to access arbitrary
+  // shared memory regions. We only allow it to access regions that it
+  // created itself.
+  if (shmget_req.key != IPC_PRIVATE || shmget_req.shmflg & ~0777) {
+    mem->shmId = -1;
+    SecureMem::abandonSystemCall(threadFd, -EINVAL);
+    return false;
+  }
+
+  mem->shmId = -1;
+  SecureMem::sendSystemCall(threadFdPub, false, -1, mem,
+                            __NR_shmget, shmget_req.key, shmget_req.size,
+                            shmget_req.shmflg);
+  return true;
+}
+#endif
+
+#if defined(__NR_ipc)
+#ifndef SHMAT
+#define SHMAT       21
+#endif
+#ifndef SHMDT
+#define SHMDT       22
+#endif
+#ifndef SHMGET
+#define SHMGET      23
+#endif
+#ifndef SHMCTL
+#define SHMCTL      24
+#endif
+
+long Sandbox::sandbox_ipc(unsigned call, int first, int second, int third,
+                         void* ptr, long fifth) {
+  long long tm;
+  Debug::syscall(&tm, __NR_ipc, "Executing handler", call);
+  struct {
+    int       sysnum;
+    long long cookie;
+    IPC       ipc_req;
+  } __attribute__((packed)) request;
+  request.sysnum         = __NR_ipc;
+  request.cookie         = cookie();
+  request.ipc_req.call   = call;
+  request.ipc_req.first  = first;
+  request.ipc_req.second = second;
+  request.ipc_req.third  = third;
+  request.ipc_req.ptr    = ptr;
+  request.ipc_req.fifth  = fifth;
+
+  long rc;
+  SysCalls sys;
+  if (write(sys, processFdPub(), &request, sizeof(request)) !=
+      sizeof(request) ||
+      read(sys, threadFdPub(), &rc, sizeof(rc)) != sizeof(rc)) {
+    die("Failed to forward ipc() request [sandbox]");
+  }
+  Debug::elapsed(tm, __NR_ipc, call);
+  return rc;
+}
+
+bool Sandbox::process_ipc(int parentMapsFd, int sandboxFd, int threadFdPub,
+                          int threadFd, SecureMem::Args* mem) {
+  // Read request
+  IPC ipc_req;
+  SysCalls sys;
+  if (read(sys, sandboxFd, &ipc_req, sizeof(ipc_req)) != sizeof(ipc_req)) {
+    die("Failed to read parameters for ipc() [process]");
+  }
+
+  // We do not support all of the SysV IPC calls. In fact, we only support
+  // the minimum feature set necessary for Chrome's renderers to share memory
+  // with the X server.
+  switch (ipc_req.call) {
+    case SHMAT: {
+      // We only allow attaching to the shm identifier that was returned by
+      // the most recent call to shmget(IPC_PRIVATE)
+      if (ipc_req.ptr || ipc_req.second || ipc_req.first != mem->shmId) {
+        goto deny;
+      }
+    accept:
+      mem->shmId = -1;
+      SecureMem::sendSystemCall(threadFdPub, false, -1, mem,
+                                __NR_ipc, ipc_req.call, ipc_req.first,
+                                ipc_req.second, ipc_req.third, ipc_req.ptr,
+                                ipc_req.fifth);
+      return true;
+    }
+    case SHMCTL:
+      // The only shmctl() operation that we need to support is removal. This
+      // operation is generally safe.
+      if ((ipc_req.second & ~(IPC_64 | IPC_RMID)) || ipc_req.ptr) {
+        goto deny;
+      } else {
+        goto accept;
+      }
+    case SHMDT: {
+      // Detaching shared memory segments it generally safe, but just in case
+      // of a kernel bug, we make sure that the address does not fall into any
+      // of the reserved memory regions.
+      ProtectedMap::const_iterator iter = protectedMap_.lower_bound(
+          (void *)ipc_req.ptr);
+      if (iter != protectedMap_.begin()) {
+        --iter;
+      }
+      for (; iter != protectedMap_.end() && iter->first <=ipc_req.ptr; ++iter){
+        if (ipc_req.ptr < reinterpret_cast<void *>(
+                reinterpret_cast<char *>(iter->first) + iter->second) &&
+            ipc_req.ptr >= iter->first) {
+          goto deny;
+        }
+      }
+      goto accept;
+    }
+    case SHMGET:
+      // We do not want to allow the sandboxed application to access arbitrary
+      // shared memory regions. We only allow it to access regions that it
+      // created itself.
+      if (ipc_req.first != IPC_PRIVATE || ipc_req.third & ~0777) {
+        goto deny;
+      } else {
+        goto accept;
+      }
+    default:
+      // Other than SysV shared memory, we do not actually need to support any
+      // other SysV IPC calls.
+    deny:
+      mem->shmId = -1;
+      SecureMem::abandonSystemCall(threadFd, -EINVAL);
+      return false;
+  }
+}
+#endif
+
+} // namespace
diff --git a/sandbox/linux/seccomp/library.cc b/sandbox/linux/seccomp/library.cc
new file mode 100644
index 0000000..8dd9b93
--- /dev/null
+++ b/sandbox/linux/seccomp/library.cc
@@ -0,0 +1,1208 @@
+// Copyright (c) 2010 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#define XOPEN_SOURCE 500
+#include <algorithm>
+#include <elf.h>
+#include <errno.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <linux/unistd.h>
+#include <set>
+#include <signal.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/ptrace.h>
+#include <sys/resource.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#include "allocator.h"
+#include "debug.h"
+#include "library.h"
+#include "sandbox_impl.h"
+#include "syscall.h"
+#include "syscall_table.h"
+#include "x86_decode.h"
+
+#if defined(__x86_64__)
+typedef Elf64_Phdr    Elf_Phdr;
+typedef Elf64_Rela    Elf_Rel;
+
+typedef Elf64_Half    Elf_Half;
+typedef Elf64_Word    Elf_Word;
+typedef Elf64_Sword   Elf_Sword;
+typedef Elf64_Xword   Elf_Xword;
+typedef Elf64_Sxword  Elf_Sxword;
+typedef Elf64_Off     Elf_Off;
+typedef Elf64_Section Elf_Section;
+typedef Elf64_Versym  Elf_Versym;
+
+#define ELF_ST_BIND   ELF64_ST_BIND
+#define ELF_ST_TYPE   ELF64_ST_TYPE
+#define ELF_ST_INFO   ELF64_ST_INFO
+#define ELF_R_SYM     ELF64_R_SYM
+#define ELF_R_TYPE    ELF64_R_TYPE
+#define ELF_R_INFO    ELF64_R_INFO
+
+#define ELF_REL_PLT   ".rela.plt"
+#define ELF_JUMP_SLOT R_X86_64_JUMP_SLOT
+#elif defined(__i386__)
+typedef Elf32_Phdr    Elf_Phdr;
+typedef Elf32_Rel     Elf_Rel;
+
+typedef Elf32_Half    Elf_Half;
+typedef Elf32_Word    Elf_Word;
+typedef Elf32_Sword   Elf_Sword;
+typedef Elf32_Xword   Elf_Xword;
+typedef Elf32_Sxword  Elf_Sxword;
+typedef Elf32_Off     Elf_Off;
+typedef Elf32_Section Elf_Section;
+typedef Elf32_Versym  Elf_Versym;
+
+#define ELF_ST_BIND   ELF32_ST_BIND
+#define ELF_ST_TYPE   ELF32_ST_TYPE
+#define ELF_ST_INFO   ELF32_ST_INFO
+#define ELF_R_SYM     ELF32_R_SYM
+#define ELF_R_TYPE    ELF32_R_TYPE
+#define ELF_R_INFO    ELF32_R_INFO
+
+#define ELF_REL_PLT   ".rel.plt"
+#define ELF_JUMP_SLOT R_386_JMP_SLOT
+#else
+#error Unsupported target platform
+#endif
+
+namespace playground {
+
+char* Library::__kernel_vsyscall;
+char* Library::__kernel_sigreturn;
+char* Library::__kernel_rt_sigreturn;
+
+Library::~Library() {
+  if (image_size_) {
+    // We no longer need access to a full mapping of the underlying library
+    // file. Move the temporarily extended mapping back to where we originally
+    // found. Make sure to preserve any changes that we might have made since.
+    Sandbox::SysCalls sys;
+    sys.mprotect(image_, 4096, PROT_READ | PROT_WRITE | PROT_EXEC);
+    if (memcmp(image_, memory_ranges_.rbegin()->second.start, 4096)) {
+      // Only copy data, if we made any changes in this data. Otherwise there
+      // is no need to create another modified COW mapping.
+      memcpy(image_, memory_ranges_.rbegin()->second.start, 4096);
+    }
+    sys.mprotect(image_, 4096, PROT_READ | PROT_EXEC);
+    sys.mremap(image_, image_size_, 4096, MREMAP_MAYMOVE | MREMAP_FIXED,
+               memory_ranges_.rbegin()->second.start);
+  }
+}
+
+char* Library::getBytes(char* dst, const char* src, ssize_t len) {
+  // Some kernels don't allow accessing the VDSO from write()
+  if (isVDSO_ &&
+      src >= memory_ranges_.begin()->second.start &&
+      src <= memory_ranges_.begin()->second.stop) {
+    ssize_t max =
+      reinterpret_cast<char *>(memory_ranges_.begin()->second.stop) - src;
+    if (len > max) {
+      len = max;
+    }
+    memcpy(dst, src, len);
+    return dst;
+  }
+
+  // Read up to "len" bytes from "src" and copy them to "dst". Short
+  // copies are possible, if we are at the end of a mapping. Returns
+  // NULL, if the operation failed completely.
+  static int helper_socket[2];
+  Sandbox::SysCalls sys;
+  if (!helper_socket[0] && !helper_socket[1]) {
+    // Copy data through a socketpair, as this allows us to access it
+    // without incurring a segmentation fault.
+    sys.socketpair(AF_UNIX, SOCK_STREAM, 0, helper_socket);
+  }
+  char* ptr = dst;
+  int   inc = 4096;
+  while (len > 0) {
+    ssize_t l = inc == 1 ? inc : 4096 - (reinterpret_cast<long>(src) & 0xFFF);
+    if (l > len) {
+      l = len;
+    }
+    l = NOINTR_SYS(sys.write(helper_socket[0], src, l));
+    if (l == -1) {
+      if (sys.my_errno == EFAULT) {
+        if (inc == 1) {
+          if (ptr == dst) {
+            return NULL;
+          }
+          break;
+        }
+        inc = 1;
+        continue;
+      } else {
+        return NULL;
+      }
+    }
+    l = sys.read(helper_socket[1], ptr, l);
+    if (l <= 0) {
+      return NULL;
+    }
+    ptr += l;
+    src += l;
+    len -= l;
+  }
+  return dst;
+}
+
+char *Library::get(Elf_Addr offset, char *buf, size_t len) {
+  if (!valid_) {
+    memset(buf, 0, len);
+    return NULL;
+  }
+  RangeMap::const_iterator iter = memory_ranges_.lower_bound(offset);
+  if (iter == memory_ranges_.end()) {
+    memset(buf, 0, len);
+    return NULL;
+  }
+  offset -= iter->first;
+  long size = reinterpret_cast<char *>(iter->second.stop) -
+              reinterpret_cast<char *>(iter->second.start);
+  if (offset > size - len) {
+    memset(buf, 0, len);
+    return NULL;
+  }
+  char *src = reinterpret_cast<char *>(iter->second.start) + offset;
+  memset(buf, 0, len);
+  if (!getBytes(buf, src, len)) {
+    return NULL;
+  }
+  return buf;
+}
+
+Library::string Library::get(Elf_Addr offset) {
+  if (!valid_) {
+    return "";
+  }
+  RangeMap::const_iterator iter = memory_ranges_.lower_bound(offset);
+  if (iter == memory_ranges_.end()) {
+    return "";
+  }
+  offset -= iter->first;
+  const char *start = reinterpret_cast<char *>(iter->second.start) + offset;
+  const char *stop  = reinterpret_cast<char *>(iter->second.stop) + offset;
+  char buf[4096]    = { 0 };
+  getBytes(buf, start, stop - start >= (int)sizeof(buf) ?
+           sizeof(buf) - 1 : stop - start);
+  start             = buf;
+  stop              = buf;
+  while (*stop) {
+    ++stop;
+  }
+  string s = stop > start ? string(start, stop - start) : "";
+  return s;
+}
+
+char *Library::getOriginal(Elf_Addr offset, char *buf, size_t len) {
+  if (!valid_) {
+    memset(buf, 0, len);
+    return NULL;
+  }
+  Sandbox::SysCalls sys;
+  if (!image_ && !isVDSO_ && !memory_ranges_.empty() &&
+      memory_ranges_.rbegin()->first == 0) {
+    // Extend the mapping of the very first page of the underlying library
+    // file. This way, we can read the original file contents of the entire
+    // library.
+    // We have to be careful, because doing so temporarily removes the first
+    // 4096 bytes of the library from memory. And we don't want to accidentally
+    // unmap code that we are executing. So, only use functions that can be
+    // inlined.
+    void* start = memory_ranges_.rbegin()->second.start;
+    image_size_ = memory_ranges_.begin()->first +
+      (reinterpret_cast<char *>(memory_ranges_.begin()->second.stop) -
+       reinterpret_cast<char *>(memory_ranges_.begin()->second.start));
+    if (image_size_ < 8192) {
+      // It is possible to create a library that is only a single page in
+      // size. In that case, we have to make sure that we artificially map
+      // one extra page past the end of it, as our code relies on mremap()
+      // actually moving the mapping.
+      image_size_ = 8192;
+    }
+    image_ = reinterpret_cast<char *>(sys.mremap(start, 4096, image_size_,
+                                                 MREMAP_MAYMOVE));
+    if (image_size_ == 8192 && image_ == start) {
+      // We really mean it, when we say we want the memory to be moved.
+      image_ = reinterpret_cast<char *>(sys.mremap(start, 4096, image_size_,
+                                                   MREMAP_MAYMOVE));
+      sys.munmap(reinterpret_cast<char *>(start) + 4096, 4096);
+    }
+    if (image_ == MAP_FAILED) {
+      image_ = NULL;
+    } else {
+      sys.MMAP(start, 4096, PROT_READ | PROT_WRITE | PROT_EXEC,
+               MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0);
+      for (int i = 4096 / sizeof(long); --i;
+           reinterpret_cast<long *>(start)[i] =
+             reinterpret_cast<long *>(image_)[i]);
+    }
+  }
+
+  if (image_) {
+    if (offset + len > image_size_) {
+      // It is quite likely that we initially did not map the entire file as
+      // we did not know how large it is. So, if necessary, try to extend the
+      // mapping.
+      size_t new_size = (offset + len + 4095) & ~4095;
+      char* tmp =
+        reinterpret_cast<char *>(sys.mremap(image_, image_size_, new_size,
+                                            MREMAP_MAYMOVE));
+      if (tmp != MAP_FAILED) {
+        image_      = tmp;
+        image_size_ = new_size;
+      }
+    }
+    if (buf && offset + len <= image_size_) {
+      return reinterpret_cast<char *>(memcpy(buf, image_ + offset, len));
+    }
+    return NULL;
+  }
+  return buf ? get(offset, buf, len) : NULL;
+}
+
+Library::string Library::getOriginal(Elf_Addr offset) {
+  if (!valid_) {
+    return "";
+  }
+  // Make sure we actually have a mapping that we can access. If the string
+  // is located at the end of the image, we might not yet have extended the
+  // mapping sufficiently.
+  if (!image_ || image_size_ <= offset) {
+    getOriginal(offset, NULL, 1);
+  }
+
+  if (image_) {
+    if (offset < image_size_) {
+      char* start = image_ + offset;
+      char* stop  = start;
+      while (stop < image_ + image_size_ && *stop) {
+        ++stop;
+        if (stop >= image_ + image_size_) {
+          getOriginal(stop - image_, NULL, 1);
+        }
+      }
+      return string(start, stop - start);
+    }
+    return "";
+  }
+  return get(offset);
+}
+
+const Elf_Ehdr* Library::getEhdr() {
+  if (!valid_) {
+    return NULL;
+  }
+  return &ehdr_;
+}
+
+const Elf_Shdr* Library::getSection(const string& section) {
+  if (!valid_) {
+    return NULL;
+  }
+  SectionTable::const_iterator iter = section_table_.find(section);
+  if (iter == section_table_.end()) {
+    return NULL;
+  }
+  return &iter->second.second;
+}
+
+int Library::getSectionIndex(const string& section) {
+  if (!valid_) {
+    return -1;
+  }
+  SectionTable::const_iterator iter = section_table_.find(section);
+  if (iter == section_table_.end()) {
+    return -1;
+  }
+  return iter->second.first;
+}
+
+void Library::makeWritable(bool state) const {
+  for (RangeMap::const_iterator iter = memory_ranges_.begin();
+       iter != memory_ranges_.end(); ++iter) {
+    const Range& range = iter->second;
+    long length = reinterpret_cast<char *>(range.stop) -
+                  reinterpret_cast<char *>(range.start);
+    Sandbox::SysCalls sys;
+    sys.mprotect(range.start, length,
+                 range.prot | (state ? PROT_WRITE : 0));
+  }
+}
+
+bool Library::isSafeInsn(unsigned short insn) {
+  // Check if the instruction has no unexpected side-effects. If so, it can
+  // be safely relocated from the function that we are patching into the
+  // out-of-line scratch space that we are setting up. This is often necessary
+  // to make room for the JMP into the scratch space.
+  return ((insn & 0x7) < 0x6 && (insn & 0xF0) < 0x40
+          /* ADD, OR, ADC, SBB, AND, SUB, XOR, CMP */) ||
+         #if defined(__x86_64__)
+         insn == 0x63 /* MOVSXD */ ||
+         #endif
+         (insn >= 0x80 && insn <= 0x8E /* ADD, OR, ADC,
+         SBB, AND, SUB, XOR, CMP, TEST, XCHG, MOV, LEA */) ||
+         (insn == 0x90) || /* NOP */
+         (insn >= 0xA0 && insn <= 0xA9) /* MOV, TEST */ ||
+         (insn >= 0xB0 && insn <= 0xBF /* MOV */) ||
+         (insn >= 0xC0 && insn <= 0xC1) || /* Bit Shift */
+         (insn >= 0xD0 && insn <= 0xD3) || /* Bit Shift */
+         (insn >= 0xC6 && insn <= 0xC7 /* MOV */) ||
+         (insn == 0xF7) /* TEST, NOT, NEG, MUL, IMUL, DIV, IDIV */;
+}
+
+char* Library::getScratchSpace(const Maps* maps, char* near, int needed,
+                               char** extraSpace, int* extraLength) {
+  if (needed > *extraLength ||
+      labs(*extraSpace - reinterpret_cast<char *>(near)) > (1536 << 20)) {
+    if (*extraSpace) {
+      // Start a new scratch page and mark any previous page as write-protected
+      Sandbox::SysCalls sys;
+      sys.mprotect(*extraSpace, 4096, PROT_READ|PROT_EXEC);
+    }
+    // Our new scratch space is initially executable and writable.
+    *extraLength = 4096;
+    *extraSpace = maps->allocNearAddr(near, *extraLength,
+                                      PROT_READ|PROT_WRITE|PROT_EXEC);
+  }
+  if (*extraSpace) {
+    *extraLength -= needed;
+    return *extraSpace + *extraLength;
+  }
+  Sandbox::die("Insufficient space to intercept system call");
+}
+
+void Library::patchSystemCallsInFunction(const Maps* maps, char *start,
+                                         char *end, char** extraSpace,
+                                         int* extraLength) {
+  std::set<char *, std::less<char *>, SystemAllocator<char *> > branch_targets;
+  for (char *ptr = start; ptr < end; ) {
+    unsigned short insn = next_inst((const char **)&ptr, __WORDSIZE == 64);
+    char *target;
+    if ((insn >= 0x70 && insn <= 0x7F) /* Jcc */ || insn == 0xEB /* JMP */) {
+      target = ptr + (reinterpret_cast<signed char *>(ptr))[-1];
+    } else if (insn == 0xE8 /* CALL */ || insn == 0xE9 /* JMP */ ||
+               (insn >= 0x0F80 && insn <= 0x0F8F) /* Jcc */) {
+      target = ptr + (reinterpret_cast<int *>(ptr))[-1];
+    } else {
+      continue;
+    }
+    branch_targets.insert(target);
+  }
+  struct Code {
+    char*          addr;
+    int            len;
+    unsigned short insn;
+    bool           is_ip_relative;
+  } code[5] = { { 0 } };
+  int codeIdx = 0;
+  char* ptr = start;
+  while (ptr < end) {
+    // Keep a ring-buffer of the last few instruction in order to find the
+    // correct place to patch the code.
+    char *mod_rm;
+    code[codeIdx].addr = ptr;
+    code[codeIdx].insn = next_inst((const char **)&ptr, __WORDSIZE == 64,
+                                   0, 0, &mod_rm, 0, 0);
+    code[codeIdx].len = ptr - code[codeIdx].addr;
+    code[codeIdx].is_ip_relative =
+      #if defined(__x86_64__)
+        mod_rm && (*mod_rm & 0xC7) == 0x5;
+      #else
+        false;
+      #endif
+
+    // Whenever we find a system call, we patch it with a jump to out-of-line
+    // code that redirects to our system call wrapper.
+    bool is_syscall = true;
+    #if defined(__x86_64__)
+    bool is_indirect_call = false;
+    if (code[codeIdx].insn == 0x0F05 /* SYSCALL */ ||
+        // In addition, on x86-64, we need to redirect all CALLs between the
+        // VDSO and the VSyscalls page. We want these to jump to our own
+        // modified copy of the VSyscalls. As we know that the VSyscalls are
+        // always more than 2GB away from the VDSO, the compiler has to
+        // generate some form of indirect jumps. We can find all indirect
+        // CALLs and redirect them to a separate scratch area, where we can
+        // inspect the destination address. If it indeed points to the
+        // VSyscall area, we then adjust the destination address accordingly.
+        (is_indirect_call =
+         (isVDSO_ && vsys_offset_ && code[codeIdx].insn == 0xFF &&
+          !code[codeIdx].is_ip_relative &&
+          mod_rm && (*mod_rm & 0x38) == 0x10 /* CALL (indirect) */))) {
+      is_syscall = !is_indirect_call;
+    #elif defined(__i386__)
+    bool is_gs_call = false;
+    if (code[codeIdx].len  == 7 &&
+        code[codeIdx].insn == 0xFF &&
+        code[codeIdx].addr[2] == '\x15' /* CALL (indirect) */ &&
+        code[codeIdx].addr[0] == '\x65' /* %gs prefix */) {
+      char* target;
+      asm volatile("mov %%gs:(%1), %0\n"
+                   : "=a"(target)
+                   : "c"(*reinterpret_cast<int *>(code[codeIdx].addr+3)));
+      if (target == __kernel_vsyscall) {
+        is_gs_call = true;
+        // TODO(markus): also handle the other vsyscalls
+      }
+    }
+    if (is_gs_call ||
+        (code[codeIdx].insn == 0xCD &&
+         code[codeIdx].addr[1] == '\x80' /* INT $0x80 */)) {
+    #else
+    #error Unsupported target platform
+    #endif
+      // Found a system call. Search backwards to figure out how to redirect
+      // the code. We will need to overwrite a couple of instructions and,
+      // of course, move these instructions somewhere else.
+      int startIdx = codeIdx;
+      int endIdx = codeIdx;
+      int length = code[codeIdx].len;
+      for (int idx = codeIdx;
+           (idx = (idx + (sizeof(code) / sizeof(struct Code)) - 1) %
+                  (sizeof(code) / sizeof(struct Code))) != codeIdx; ) {
+        std::set<char *>::const_iterator iter =
+            std::upper_bound(branch_targets.begin(), branch_targets.end(),
+                             code[idx].addr);
+        if (iter != branch_targets.end() && *iter < ptr) {
+          // Found a branch pointing to somewhere past our instruction. This
+          // instruction cannot be moved safely. Leave it in place.
+          break;
+        }
+        if (code[idx].addr && !code[idx].is_ip_relative &&
+            isSafeInsn(code[idx].insn)) {
+          // These are all benign instructions with no side-effects and no
+          // dependency on the program counter. We should be able to safely
+          // relocate them.
+          startIdx = idx;
+          length   = ptr - code[startIdx].addr;
+        } else {
+          break;
+        }
+      }
+      // Search forward past the system call, too. Sometimes, we can only
+      // find relocatable instructions following the system call.
+      #if defined(__i386__)
+   findEndIdx:
+      #endif
+      char *next = ptr;
+      for (int i = codeIdx;
+           next < end &&
+           (i = (i + 1) % (sizeof(code) / sizeof(struct Code))) != startIdx;
+           ) {
+        std::set<char *>::const_iterator iter =
+            std::lower_bound(branch_targets.begin(), branch_targets.end(),
+                             next);
+        if (iter != branch_targets.end() && *iter == next) {
+          // Found branch target pointing to our instruction
+          break;
+        }
+        char *tmp_rm;
+        code[i].addr = next;
+        code[i].insn = next_inst((const char **)&next, __WORDSIZE == 64,
+                                 0, 0, &tmp_rm, 0, 0);
+        code[i].len = next - code[i].addr;
+        code[i].is_ip_relative = tmp_rm && (*tmp_rm & 0xC7) == 0x5;
+        if (!code[i].is_ip_relative && isSafeInsn(code[i].insn)) {
+          endIdx = i;
+          length = next - code[startIdx].addr;
+        } else {
+          break;
+        }
+      }
+      // We now know, how many instructions neighboring the system call we
+      // can safely overwrite. On x86-32 we need six bytes, and on x86-64
+      // We need five bytes to insert a JMPQ and a 32bit address. We then
+      // jump to a code fragment that safely forwards to our system call
+      // wrapper.
+      // On x86-64, this is complicated by the fact that the API allows up
+      // to 128 bytes of red-zones below the current stack pointer. So, we
+      // cannot write to the stack until we have adjusted the stack
+      // pointer.
+      // On both x86-32 and x86-64 we take care to leave the stack unchanged
+      // while we are executing the preamble and postamble. This allows us
+      // to treat instructions that reference %esp/%rsp as safe for
+      // relocation.
+      // In particular, this means that on x86-32 we cannot use CALL, but
+      // have to use a PUSH/RET combination to change the instruction pointer.
+      // On x86-64, we can instead use a 32bit JMPQ.
+      //
+      // .. .. .. .. ; any leading instructions copied from original code
+      // 48 81 EC 80 00 00 00        SUB  $0x80, %rsp
+      // 50                          PUSH %rax
+      // 48 8D 05 .. .. .. ..        LEA  ...(%rip), %rax
+      // 50                          PUSH %rax
+      // 48 B8 .. .. .. ..           MOV  $syscallWrapper, %rax
+      // .. .. .. ..
+      // 50                          PUSH %rax
+      // 48 8D 05 06 00 00 00        LEA  6(%rip), %rax
+      // 48 87 44 24 10              XCHG %rax, 16(%rsp)
+      // C3                          RETQ
+      // 48 81 C4 80 00 00 00        ADD  $0x80, %rsp
+      // .. .. .. .. ; any trailing instructions copied from original code
+      // E9 .. .. .. ..              JMPQ ...
+      //
+      // Total: 52 bytes + any bytes that were copied
+      //
+      // On x86-32, the stack is available and we can do:
+      //
+      // TODO(markus): Try to maintain frame pointers on x86-32
+      //
+      // .. .. .. .. ; any leading instructions copied from original code
+      // 68 .. .. .. ..              PUSH return_addr
+      // 68 .. .. .. ..              PUSH $syscallWrapper
+      // C3                          RET
+      // .. .. .. .. ; any trailing instructions copied from original code
+      // 68 .. .. .. ..              PUSH return_addr
+      // C3                          RET
+      //
+      // Total: 17 bytes + any bytes that were copied
+      //
+      // For indirect jumps from the VDSO to the VSyscall page, we instead
+      // replace the following code (this is only necessary on x86-64). This
+      // time, we don't have to worry about red zones:
+      //
+      // .. .. .. .. ; any leading instructions copied from original code
+      // E8 00 00 00 00              CALL .
+      // 48 83 04 24 ..              ADDQ $.., (%rsp)
+      // FF .. .. .. .. ..           PUSH ..  ; from original CALL instruction
+      // 48 81 3C 24 00 00 00 FF     CMPQ $0xFFFFFFFFFF000000, 0(%rsp)
+      // 72 10                       JB   . + 16
+      // 81 2C 24 .. .. .. ..        SUBL ..., 0(%rsp)
+      // C7 44 24 04 00 00 00 00     MOVL $0, 4(%rsp)
+      // C3                          RETQ
+      // 48 87 04 24                 XCHG %rax,(%rsp)
+      // 48 89 44 24 08              MOV  %rax,0x8(%rsp)
+      // 58                          POP  %rax
+      // C3                          RETQ
+      // .. .. .. .. ; any trailing instructions copied from original code
+      // E9 .. .. .. ..              JMPQ ...
+      //
+      // Total: 52 bytes + any bytes that were copied
+
+      if (length < (__WORDSIZE == 32 ? 6 : 5)) {
+        // There are a very small number of instruction sequences that we
+        // cannot easily intercept, and that have been observed in real world
+        // examples. Handle them here:
+        #if defined(__i386__)
+        int diff;
+        if (!memcmp(code[codeIdx].addr, "\xCD\x80\xEB", 3) &&
+            (diff = *reinterpret_cast<signed char *>(
+                 code[codeIdx].addr + 3)) < 0 && diff >= -6) {
+          // We have seen...
+          //   for (;;) {
+          //      _exit(0);
+          //   }
+          // ..get compiled to:
+          //   B8 01 00 00 00      MOV  $__NR_exit, %eax
+          //   66 90               XCHG %ax, %ax
+          //   31 DB             0:XOR  %ebx, %ebx
+          //   CD 80               INT  $0x80
+          //   EB FA               JMP  0b
+          // The JMP is really superfluous as the system call never returns.
+          // And there are in fact no returning system calls that need to be
+          // unconditionally repeated in an infinite loop.
+          // If we replace the JMP with NOPs, the system call can successfully
+          // be intercepted.
+          *reinterpret_cast<unsigned short *>(code[codeIdx].addr + 2) = 0x9090;
+          goto findEndIdx;
+        }
+        #elif defined(__x86_64__)
+        std::set<char *>::const_iterator iter;
+        #endif
+        // If we cannot figure out any other way to intercept this system call,
+        // we replace it with a call to INT0. This causes a SEGV which we then
+        // handle in the signal handler. That's a lot slower than rewriting the
+        // instruction with a jump, but it should only happen very rarely.
+        if (is_syscall) {
+          memcpy(code[codeIdx].addr, "\xCD", 2);
+          if (code[codeIdx].len > 2) {
+            memset(code[codeIdx].addr + 2, 0x90, code[codeIdx].len - 2);
+          }
+          goto replaced;
+        }
+        #if defined(__x86_64__)
+        // On x86-64, we occasionally see code like this in the VDSO:
+        //   48 8B 05 CF FE FF FF  MOV   -0x131(%rip),%rax
+        //   FF 50 20              CALLQ *0x20(%rax)
+        // By default, we would not replace the MOV instruction, as it is
+        // IP relative. But if the following instruction is also IP relative,
+        // we are left with only three bytes which is not enough to insert a
+        // jump.
+        // We recognize this particular situation, and as long as the CALLQ
+        // is not a branch target, we decide to still relocate the entire
+        // sequence. We just have to make sure that we then patch up the
+        // IP relative addressing.
+        else if (is_indirect_call && startIdx == codeIdx &&
+                 code[startIdx = (startIdx + (sizeof(code) /
+                                              sizeof(struct Code)) - 1) %
+                      (sizeof(code) / sizeof(struct Code))].addr &&
+                 ptr - code[startIdx].addr >= 5 &&
+                 code[startIdx].is_ip_relative &&
+                 isSafeInsn(code[startIdx].insn) &&
+                 ((iter = std::upper_bound(branch_targets.begin(),
+                                           branch_targets.end(),
+                                           code[startIdx].addr)) ==
+                  branch_targets.end() || *iter >= ptr)) {
+          // We changed startIdx to include the IP relative instruction.
+          // When copying this preamble, we make sure to patch up the
+          // offset.
+        }
+        #endif
+        else {
+          Sandbox::die("Cannot intercept system call");
+        }
+      }
+      int needed = (__WORDSIZE == 32 ? 6 : 5) - code[codeIdx].len;
+      int first = codeIdx;
+      while (needed > 0 && first != startIdx) {
+        first = (first + (sizeof(code) / sizeof(struct Code)) - 1) %
+                (sizeof(code) / sizeof(struct Code));
+        needed -= code[first].len;
+      }
+      int second = codeIdx;
+      while (needed > 0) {
+        second = (second + 1) % (sizeof(code) / sizeof(struct Code));
+        needed -= code[second].len;
+      }
+      int preamble = code[codeIdx].addr - code[first].addr;
+      int postamble = code[second].addr + code[second].len -
+                      code[codeIdx].addr - code[codeIdx].len;
+
+      // The following is all the code that construct the various bits of
+      // assembly code.
+      #if defined(__x86_64__)
+      if (is_indirect_call) {
+        needed = 52 + preamble + code[codeIdx].len + postamble;
+      } else {
+        needed = 52 + preamble + postamble;
+      }
+      #elif defined(__i386__)
+      needed = 17 + preamble + postamble;
+      #else
+      #error Unsupported target platform
+      #endif
+
+      // Allocate scratch space and copy the preamble of code that was moved
+      // from the function that we are patching.
+      char* dest = getScratchSpace(maps, code[first].addr, needed,
+                                   extraSpace, extraLength);
+      memcpy(dest, code[first].addr, preamble);
+
+      // For jumps from the VDSO to the VSyscalls we sometimes allow exactly
+      // one IP relative instruction in the preamble.
+      if (code[first].is_ip_relative) {
+        *reinterpret_cast<int *>(dest + (code[codeIdx].addr -
+                                         code[first].addr) - 4)
+          -= dest - code[first].addr;
+      }
+
+      // For indirect calls, we need to copy the actual CALL instruction and
+      // turn it into a PUSH instruction.
+      #if defined(__x86_64__)
+      if (is_indirect_call) {
+        memcpy(dest + preamble, "\xE8\x00\x00\x00\x00\x48\x83\x04\x24", 9);
+        dest[preamble + 9] = code[codeIdx].len + 42;
+        memcpy(dest + preamble + 10, code[codeIdx].addr, code[codeIdx].len);
+
+        // Convert CALL -> PUSH
+        dest[preamble + 10 + (mod_rm - code[codeIdx].addr)] |= 0x20;
+        preamble += 10 + code[codeIdx].len;
+      }
+      #endif
+
+      // Copy the static body of the assembly code.
+      memcpy(dest + preamble,
+           #if defined(__x86_64__)
+           is_indirect_call ?
+           "\x48\x81\x3C\x24\x00\x00\x00\xFF\x72\x10\x81\x2C\x24\x00\x00\x00"
+           "\x00\xC7\x44\x24\x04\x00\x00\x00\x00\xC3\x48\x87\x04\x24\x48\x89"
+           "\x44\x24\x08\x58\xC3" :
+           "\x48\x81\xEC\x80\x00\x00\x00\x50\x48\x8D\x05\x00\x00\x00\x00\x50"
+           "\x48\xB8\x00\x00\x00\x00\x00\x00\x00\x00\x50\x48\x8D\x05\x06\x00"
+           "\x00\x00\x48\x87\x44\x24\x10\xC3\x48\x81\xC4\x80\x00\x00",
+           is_indirect_call ? 37 : 47
+           #elif defined(__i386__)
+           "\x68\x00\x00\x00\x00\x68\x00\x00\x00\x00\xC3", 11
+           #else
+           #error Unsupported target platform
+           #endif
+           );
+
+      // Copy the postamble that was moved from the function that we are
+      // patching.
+      memcpy(dest + preamble +
+             #if defined(__x86_64__)
+             (is_indirect_call ? 37 : 47),
+             #elif defined(__i386__)
+             11,
+             #else
+             #error Unsupported target platform
+             #endif
+             code[codeIdx].addr + code[codeIdx].len,
+             postamble);
+
+      // Patch up the various computed values
+      #if defined(__x86_64__)
+      int post = preamble + (is_indirect_call ? 37 : 47) + postamble;
+      dest[post] = '\xE9';
+      *reinterpret_cast<int *>(dest + post + 1) =
+          (code[second].addr + code[second].len) - (dest + post + 5);
+      if (is_indirect_call) {
+        *reinterpret_cast<int *>(dest + preamble + 13) = vsys_offset_;
+      } else {
+        *reinterpret_cast<int *>(dest + preamble + 11) =
+            (code[second].addr + code[second].len) - (dest + preamble + 15);
+        *reinterpret_cast<void **>(dest + preamble + 18) =
+            reinterpret_cast<void *>(&syscallWrapper);
+      }
+      #elif defined(__i386__)
+      *(dest + preamble + 11 + postamble) = '\x68'; // PUSH
+      *reinterpret_cast<char **>(dest + preamble + 12 + postamble) =
+          code[second].addr + code[second].len;
+      *(dest + preamble + 16 + postamble) = '\xC3'; // RET
+      *reinterpret_cast<char **>(dest + preamble + 1) =
+          dest + preamble + 11;
+      *reinterpret_cast<void (**)()>(dest + preamble + 6) = syscallWrapper;
+      #else
+      #error Unsupported target platform
+      #endif
+
+      // Pad unused space in the original function with NOPs
+      memset(code[first].addr, 0x90 /* NOP */,
+             code[second].addr + code[second].len - code[first].addr);
+
+      // Replace the system call with an unconditional jump to our new code.
+      #if defined(__x86_64__)
+      *code[first].addr = '\xE9';   // JMPQ
+      *reinterpret_cast<int *>(code[first].addr + 1) =
+          dest - (code[first].addr + 5);
+      #elif defined(__i386__)
+      code[first].addr[0] = '\x68'; // PUSH
+      *reinterpret_cast<char **>(code[first].addr + 1) = dest;
+      code[first].addr[5] = '\xC3'; // RET
+      #else
+      #error Unsupported target platform
+      #endif
+    }
+   replaced:
+    codeIdx = (codeIdx + 1) % (sizeof(code) / sizeof(struct Code));
+  }
+}
+
+void Library::patchVDSO(char** extraSpace, int* extraLength){
+  #if defined(__i386__)
+  Sandbox::SysCalls sys;
+  if (!__kernel_vsyscall ||
+      sys.mprotect(reinterpret_cast<void *>(
+                     reinterpret_cast<long>(__kernel_vsyscall) & ~0xFFF),
+                   4096, PROT_READ|PROT_WRITE|PROT_EXEC)) {
+    return;
+  }
+
+  // x86-32 has a small number of well-defined functions in the VDSO library.
+  // These functions do not easily lend themselves to be rewritten by the
+  // automatic code. Instead, we explicitly find new definitions for them.
+  //
+  // We don't bother with optimizing the syscall instruction instead always
+  // use INT $0x80, no matter whether the hardware supports more modern
+  // calling conventions.
+  //
+  // TODO(markus): Investigate whether it is worthwhile to optimize this
+  // code path and use the platform-specific entry code.
+  if (__kernel_vsyscall) {
+    // Replace the kernel entry point with:
+    //
+    // E9 .. .. .. ..    JMP syscallWrapper
+    *__kernel_vsyscall = '\xE9';
+    *reinterpret_cast<long *>(__kernel_vsyscall + 1) =
+        reinterpret_cast<char *>(&syscallWrapper) -
+        reinterpret_cast<char *>(__kernel_vsyscall + 5);
+  }
+  if (__kernel_sigreturn) {
+    // Replace the sigreturn() system call with a jump to code that does:
+    //
+    // 58                POP %eax
+    // B8 77 00 00 00    MOV $0x77, %eax
+    // E8 .. .. .. ..    CALL syscallWrapper
+    char* dest = getScratchSpace(maps_, __kernel_sigreturn, 11, extraSpace,
+                                 extraLength);
+    memcpy(dest, "\x58\xB8\x77\x00\x00\x00\xE8", 7);
+    *reinterpret_cast<long *>(dest + 7) =
+        reinterpret_cast<char *>(&syscallWrapper) - dest - 11;;
+    *__kernel_sigreturn = '\xE9';
+    *reinterpret_cast<long *>(__kernel_sigreturn + 1) =
+        dest - reinterpret_cast<char *>(__kernel_sigreturn) - 5;
+  }
+  if (__kernel_rt_sigreturn) {
+    // Replace the rt_sigreturn() system call with a jump to code that does:
+    //
+    // B8 AD 00 00 00    MOV $0xAD, %eax
+    // E8 .. .. .. ..    CALL syscallWrapper
+    char* dest = getScratchSpace(maps_, __kernel_rt_sigreturn, 10, extraSpace,
+                                 extraLength);
+    memcpy(dest, "\xB8\xAD\x00\x00\x00\xE8", 6);
+    *reinterpret_cast<long *>(dest + 6) =
+        reinterpret_cast<char *>(&syscallWrapper) - dest - 10;
+    *__kernel_rt_sigreturn = '\xE9';
+    *reinterpret_cast<long *>(__kernel_rt_sigreturn + 1) =
+        dest - reinterpret_cast<char *>(__kernel_rt_sigreturn) - 5;
+  }
+  #endif
+}
+
+int Library::patchVSystemCalls() {
+  #if defined(__x86_64__)
+  // VSyscalls live in a shared 4kB page at the top of the address space. This
+  // page cannot be unmapped nor remapped. We have to create a copy within
+  // 2GB of the page, and rewrite all IP-relative accesses to shared variables.
+  // As the top of the address space is not accessible by mmap(), this means
+  // that we need to wrap around addresses to the bottom 2GB of the address
+  // space.
+  // Only x86-64 has VSyscalls.
+  if (maps_->vsyscall()) {
+    char* copy = maps_->allocNearAddr(maps_->vsyscall(), 0x1000,
+                                      PROT_READ|PROT_WRITE|PROT_EXEC);
+    char* extraSpace = copy;
+    int extraLength = 0x1000;
+    memcpy(copy, maps_->vsyscall(), 0x1000);
+    long adjust = (long)maps_->vsyscall() - (long)copy;
+    for (int vsys = 0; vsys < 0x1000; vsys += 0x400) {
+      char* start = copy + vsys;
+      char* end   = start + 0x400;
+
+      // There can only be up to four VSyscalls starting at an offset of
+      // n*0x1000, each. VSyscalls are invoked by functions in the VDSO
+      // and provide fast implementations of a time source. We don't exactly
+      // know where the code and where the data is in the VSyscalls page.
+      // So, we disassemble the code for each function and find all branch
+      // targets within the function in order to find the last address of
+      // function.
+      for (char *last = start, *vars = end, *ptr = start; ptr < end; ) {
+     new_function:
+        char* mod_rm;
+        unsigned short insn = next_inst((const char **)&ptr, true, 0, 0,
+                                        &mod_rm, 0, 0);
+        if (mod_rm && (*mod_rm & 0xC7) == 0x5) {
+          // Instruction has IP relative addressing mode. Adjust to reference
+          // the variables in the original VSyscall segment.
+          long offset = *reinterpret_cast<int *>(mod_rm + 1);
+          char* var = ptr + offset;
+          if (var >= ptr && var < vars) {
+            // Variables are stored somewhere past all the functions. Remember
+            // the first variable in the VSyscall slot, so that we stop
+            // scanning for instructions once we reach that address.
+            vars = var;
+          }
+          offset += adjust;
+          if ((offset >> 32) && (offset >> 32) != -1) {
+            Sandbox::die("Cannot patch [vsystemcall]");
+          }
+          *reinterpret_cast<int *>(mod_rm + 1) = offset;
+        }
+
+        // Check for jump targets to higher addresses (but within our own
+        // VSyscall slot). They extend the possible end-address of this
+        // function.
+        char *target = 0;
+        if ((insn >= 0x70 && insn <= 0x7F) /* Jcc */ ||
+            insn == 0xEB /* JMP */) {
+          target = ptr + (reinterpret_cast<signed char *>(ptr))[-1];
+        } else if (insn == 0xE8 /* CALL */ || insn == 0xE9 /* JMP */ ||
+                   (insn >= 0x0F80 && insn <= 0x0F8F) /* Jcc */) {
+          target = ptr + (reinterpret_cast<int *>(ptr))[-1];
+        }
+
+        // The function end is found, once the loop reaches the last valid
+        // address in the VSyscall slot, or once it finds a RET instruction
+        // that is not followed by any jump targets. Unconditional jumps that
+        // point backwards are treated the same as a RET instruction.
+        if (insn == 0xC3 /* RET */ ||
+            (target < ptr &&
+             (insn == 0xEB /* JMP */ || insn == 0xE9 /* JMP */))) {
+          if (last >= ptr) {
+            continue;
+          } else {
+            // The function can optionally be followed by more functions in
+            // the same VSyscall slot. Allow for alignment to a 16 byte
+            // boundary. If we then find more non-zero bytes, and if this is
+            // not the known start of the variables, assume a new function
+            // started.
+            for (; ptr < vars; ++ptr) {
+              if ((long)ptr & 0xF) {
+                if (*ptr && *ptr != '\x90' /* NOP */) {
+                  goto new_function;
+                }
+                *ptr = '\x90'; // NOP
+              } else {
+                if (*ptr && *ptr != '\x90' /* NOP */) {
+                  goto new_function;
+                }
+                break;
+              }
+            }
+
+            // Translate all SYSCALLs to jumps into our system call handler.
+            patchSystemCallsInFunction(NULL, start, ptr,
+                                       &extraSpace, &extraLength);
+            break;
+          }
+        }
+
+        // Adjust assumed end address for this function, if a valid jump
+        // target has been found that originates from the current instruction.
+        if (target > last && target < start + 0x100) {
+          last = target;
+        }
+      }
+    }
+
+    // We are done. Write-protect our code and make it executable.
+    Sandbox::SysCalls sys;
+    sys.mprotect(copy, 0x1000, PROT_READ|PROT_EXEC);
+    return maps_->vsyscall() - copy;
+  }
+  #endif
+  return 0;
+}
+
+void Library::patchSystemCalls() {
+  if (!valid_) {
+    return;
+  }
+  int extraLength = 0;
+  char* extraSpace = NULL;
+  if (isVDSO_) {
+    // patchVDSO() calls patchSystemCallsInFunction() which needs vsys_offset_
+    // iff processing the VDSO library. So, make sure we call
+    // patchVSystemCalls() first.
+    vsys_offset_ = patchVSystemCalls();
+    #if defined(__i386__)
+    patchVDSO(&extraSpace, &extraLength);
+    return;
+    #endif
+  }
+  SectionTable::const_iterator iter;
+  if ((iter = section_table_.find(".text")) == section_table_.end()) {
+    return;
+  }
+  const Elf_Shdr& shdr = iter->second.second;
+  char* start = reinterpret_cast<char *>(shdr.sh_addr + asr_offset_);
+  char* stop = start + shdr.sh_size;
+  char* func = start;
+  int nopcount = 0;
+  bool has_syscall = false;
+  for (char *ptr = start; ptr < stop; ptr++) {
+    #if defined(__x86_64__)
+    if ((*ptr == '\x0F' && ptr[1] == '\x05' /* SYSCALL */) ||
+        (isVDSO_ && *ptr == '\xFF')) {
+    #elif defined(__i386__)
+    if ((*ptr   == '\xCD' && ptr[1] == '\x80' /* INT $0x80 */) ||
+        (*ptr   == '\x65' && ptr[1] == '\xFF' &&
+         ptr[2] == '\x15' /* CALL %gs:.. */)) {
+    #else
+    #error Unsupported target platform
+    #endif
+      ptr++;
+      has_syscall = true;
+      nopcount    = 0;
+    } else if (*ptr == '\x90' /* NOP */) {
+      nopcount++;
+    } else if (!(reinterpret_cast<long>(ptr) & 0xF)) {
+      if (nopcount > 2) {
+        // This is very likely the beginning of a new function. Functions
+        // are aligned on 16 byte boundaries and the preceding function is
+        // padded out with NOPs.
+        //
+        // For performance reasons, we quickly scan the entire text segment
+        // for potential SYSCALLs, and then patch the code in increments of
+        // individual functions.
+        if (has_syscall) {
+          has_syscall = false;
+          // Our quick scan of the function found a potential system call.
+          // Do a more thorough scan, now.
+          patchSystemCallsInFunction(maps_, func, ptr, &extraSpace,
+                                     &extraLength);
+        }
+        func = ptr;
+      }
+      nopcount = 0;
+    } else {
+      nopcount = 0;
+    }
+  }
+  if (has_syscall) {
+    // Patch any remaining system calls that were in the last function before
+    // the loop terminated.
+    patchSystemCallsInFunction(maps_, func, stop, &extraSpace, &extraLength);
+  }
+
+  // Mark our scratch space as write-protected and executable.
+  if (extraSpace) {
+    Sandbox::SysCalls sys;
+    sys.mprotect(extraSpace, 4096, PROT_READ|PROT_EXEC);
+  }
+}
+
+bool Library::parseElf() {
+  valid_ = true;
+
+  // Verify ELF header
+  Elf_Shdr str_shdr;
+  if (!getOriginal(0, &ehdr_) ||
+      ehdr_.e_ehsize < sizeof(Elf_Ehdr) ||
+      ehdr_.e_phentsize < sizeof(Elf_Phdr) ||
+      ehdr_.e_shentsize < sizeof(Elf_Shdr) ||
+      !getOriginal(ehdr_.e_shoff + ehdr_.e_shstrndx * ehdr_.e_shentsize,
+                   &str_shdr)) {
+    // Not all memory mappings are necessarily ELF files. Skip memory
+    // mappings that we cannot identify.
+ error:
+    valid_ = false;
+    return false;
+  }
+
+  // Parse section table and find all sections in this ELF file
+  for (int i = 0; i < ehdr_.e_shnum; i++) {
+    Elf_Shdr shdr;
+    if (!getOriginal(ehdr_.e_shoff + i*ehdr_.e_shentsize, &shdr)) {
+      continue;
+    }
+    section_table_.insert(
+       std::make_pair(getOriginal(str_shdr.sh_offset + shdr.sh_name),
+                      std::make_pair(i, shdr)));
+  }
+
+  // Compute the offset of entries in the .text segment
+  const Elf_Shdr* text = getSection(".text");
+  if (text == NULL) {
+    // On x86-32, the VDSO is unusual in as much as it does not have a single
+    // ".text" section. Instead, it has one section per function. Each
+    // section name starts with ".text". We just need to pick an arbitrary
+    // one in order to find the asr_offset_ -- which would typically be zero
+    // for the VDSO.
+    for (SectionTable::const_iterator iter = section_table_.begin();
+         iter != section_table_.end(); ++iter) {
+      if (!strncmp(iter->first.c_str(), ".text", 5)) {
+        text = &iter->second.second;
+        break;
+      }
+    }
+  }
+
+  // Now that we know where the .text segment is located, we can compute the
+  // asr_offset_.
+  if (text) {
+    RangeMap::const_iterator iter =
+        memory_ranges_.lower_bound(text->sh_offset);
+    if (iter != memory_ranges_.end()) {
+      asr_offset_ = reinterpret_cast<char *>(iter->second.start) -
+          (text->sh_addr - (text->sh_offset - iter->first));
+    } else {
+      goto error;
+    }
+  } else {
+    goto error;
+  }
+
+  return !isVDSO_ || parseSymbols();
+}
+
+bool Library::parseSymbols() {
+  if (!valid_) {
+    return false;
+  }
+
+  Elf_Shdr str_shdr;
+  getOriginal(ehdr_.e_shoff + ehdr_.e_shstrndx * ehdr_.e_shentsize, &str_shdr);
+
+  // Find PLT and symbol tables
+  const Elf_Shdr* plt = getSection(ELF_REL_PLT);
+  const Elf_Shdr* symtab = getSection(".dynsym");
+  Elf_Shdr strtab = { 0 };
+  if (symtab) {
+    if (symtab->sh_link >= ehdr_.e_shnum ||
+        !getOriginal(ehdr_.e_shoff + symtab->sh_link * ehdr_.e_shentsize,
+                     &strtab)) {
+      Debug::message("Cannot find valid symbol table\n");
+      valid_ = false;
+      return false;
+    }
+  }
+
+  if (plt && symtab) {
+    // Parse PLT table and add its entries
+    for (int i = plt->sh_size/sizeof(Elf_Rel); --i >= 0; ) {
+      Elf_Rel rel;
+      if (!getOriginal(plt->sh_offset + i * sizeof(Elf_Rel), &rel) ||
+          ELF_R_SYM(rel.r_info)*sizeof(Elf_Sym) >= symtab->sh_size) {
+        Debug::message("Encountered invalid plt entry\n");
+        valid_ = false;
+        return false;
+      }
+
+      if (ELF_R_TYPE(rel.r_info) != ELF_JUMP_SLOT) {
+        continue;
+      }
+      Elf_Sym sym;
+      if (!getOriginal(symtab->sh_offset +
+                       ELF_R_SYM(rel.r_info)*sizeof(Elf_Sym), &sym) ||
+          sym.st_shndx >= ehdr_.e_shnum) {
+        Debug::message("Encountered invalid symbol for plt entry\n");
+        valid_ = false;
+        return false;
+      }
+      string name = getOriginal(strtab.sh_offset + sym.st_name);
+      if (name.empty()) {
+        continue;
+      }
+      plt_entries_.insert(std::make_pair(name, rel.r_offset));
+    }
+  }
+
+  if (symtab) {
+    // Parse symbol table and add its entries
+    for (Elf_Addr addr = 0; addr < symtab->sh_size; addr += sizeof(Elf_Sym)) {
+      Elf_Sym sym;
+      if (!getOriginal(symtab->sh_offset + addr, &sym) ||
+          (sym.st_shndx >= ehdr_.e_shnum &&
+           sym.st_shndx < SHN_LORESERVE)) {
+        Debug::message("Encountered invalid symbol\n");
+        valid_ = false;
+        return false;
+      }
+      string name = getOriginal(strtab.sh_offset + sym.st_name);
+      if (name.empty()) {
+        continue;
+      }
+      symbols_.insert(std::make_pair(name, sym));
+    }
+  }
+
+  SymbolTable::const_iterator iter = symbols_.find("__kernel_vsyscall");
+  if (iter != symbols_.end() && iter->second.st_value) {
+    __kernel_vsyscall = asr_offset_ + iter->second.st_value;
+  }
+  iter = symbols_.find("__kernel_sigreturn");
+  if (iter != symbols_.end() && iter->second.st_value) {
+    __kernel_sigreturn = asr_offset_ + iter->second.st_value;
+  }
+  iter = symbols_.find("__kernel_rt_sigreturn");
+  if (iter != symbols_.end() && iter->second.st_value) {
+    __kernel_rt_sigreturn = asr_offset_ + iter->second.st_value;
+  }
+
+  return true;
+}
+
+} // namespace
diff --git a/sandbox/linux/seccomp/library.h b/sandbox/linux/seccomp/library.h
new file mode 100644
index 0000000..e27bfde
--- /dev/null
+++ b/sandbox/linux/seccomp/library.h
@@ -0,0 +1,199 @@
+// Copyright (c) 2010 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef LIBRARY_H__
+#define LIBRARY_H__
+
+#include <elf.h>
+#include <functional>
+#include <map>
+#include <set>
+#include <string>
+#include <string.h>
+#include <sys/mman.h>
+
+#include "maps.h"
+
+#if defined(__x86_64__)
+typedef Elf64_Ehdr Elf_Ehdr;
+typedef Elf64_Shdr Elf_Shdr;
+typedef Elf64_Sym  Elf_Sym;
+typedef Elf64_Addr Elf_Addr;
+#elif defined(__i386__)
+typedef Elf32_Ehdr Elf_Ehdr;
+typedef Elf32_Shdr Elf_Shdr;
+typedef Elf32_Sym  Elf_Sym;
+typedef Elf32_Addr Elf_Addr;
+#else
+#error Unsupported target platform
+#endif
+
+struct SyscallTable;
+namespace playground {
+
+class Library {
+  friend class Maps;
+ public:
+  typedef Maps::string string;
+
+  Library() :
+      valid_(false),
+      isVDSO_(false),
+      asr_offset_(0),
+      vsys_offset_(0),
+      maps_(0),
+      image_(0),
+      image_size_(0) {
+  }
+
+  ~Library();
+
+  void setLibraryInfo(Maps* maps) {
+    if (!maps_) {
+      maps_ = maps;
+    }
+  }
+
+  void addMemoryRange(void* start, void* stop, Elf_Addr offset,
+                      int prot, int isVDSO) {
+    isVDSO_ = isVDSO;
+    RangeMap::const_iterator iter = memory_ranges_.find(offset);
+    if (iter != memory_ranges_.end()) {
+      // It is possible to have overlapping mappings. This is particularly
+      // likely to happen with very small programs or libraries. If it does
+      // happen, we really only care about the text segment. Look for a
+      // mapping that is mapped executable.
+      if ((prot & PROT_EXEC) == 0) {
+        return;
+      }
+    }
+    memory_ranges_.insert(std::make_pair(offset, Range(start, stop, prot)));
+  }
+
+  char *get(Elf_Addr offset, char *buf, size_t len);
+  string get(Elf_Addr offset);
+  char *getOriginal(Elf_Addr offset, char *buf, size_t len);
+  string getOriginal(Elf_Addr offset);
+
+  template<class T>T* get(Elf_Addr offset, T* t) {
+    if (!valid_) {
+      memset(t, 0, sizeof(T));
+      return NULL;
+    }
+    return reinterpret_cast<T *>(get(offset, reinterpret_cast<char *>(t),
+                                     sizeof(T)));
+  }
+
+  template<class T>T* getOriginal(Elf_Addr offset, T* t) {
+    if (!valid_) {
+      memset(t, 0, sizeof(T));
+      return NULL;
+    }
+    return reinterpret_cast<T *>(getOriginal(offset,
+                                             reinterpret_cast<char *>(t),
+                                             sizeof(T)));
+  }
+
+  template<class T>bool set(void *addr, T* value) {
+    if (!valid_) {
+      return false;
+    }
+    *reinterpret_cast<T *>(addr) = *value;
+    return true;
+  }
+
+  template<class T>bool set(Elf_Addr offset, T* value) {
+    if (!valid_) {
+      return false;
+    }
+    RangeMap::const_iterator iter = memory_ranges_.lower_bound(offset);
+    if (iter == memory_ranges_.end()) {
+      return false;
+    }
+    offset -= iter->first;
+    if (offset >
+        reinterpret_cast<char *>(iter->second.stop) -
+        reinterpret_cast<char *>(iter->second.start) -
+        sizeof(T)) {
+      return false;
+    }
+    *reinterpret_cast<T *>(
+        reinterpret_cast<char *>(iter->second.start) + offset) = *value;
+    return true;
+  }
+
+  bool parseElf();
+  const Elf_Ehdr* getEhdr();
+  const Elf_Shdr* getSection(const string& section);
+  int getSectionIndex(const string& section);
+  void makeWritable(bool state) const;
+  void patchSystemCalls();
+  bool isVDSO() const { return isVDSO_; }
+
+ protected:
+  bool parseSymbols();
+
+ private:
+  class GreaterThan : public std::binary_function<Elf_Addr, Elf_Addr, bool> {
+    // We create the RangeMap with a GreaterThan rather than the default
+    // comparator, as that allows us to use lower_bound() to find memory
+    // mappings.
+   public:
+    bool operator() (Elf_Addr s1, Elf_Addr s2) const {
+      return s1 > s2;
+    }
+  };
+
+  struct Range {
+    Range(void* start, void* stop, int prot) :
+        start(start), stop(stop), prot(prot) { }
+    void* start;
+    void* stop;
+    int   prot;
+  };
+
+  typedef std::map<Elf_Addr, Range, GreaterThan,
+                   SystemAllocator<std::pair<const Elf_Addr,
+                                             Range> > > RangeMap;
+  typedef std::map<string, std::pair<int, Elf_Shdr>, std::less<string>,
+                   SystemAllocator<std::pair<const string,
+                                             std::pair<int, Elf_Shdr> > > >
+                   SectionTable;
+  typedef std::map<string, Elf_Sym, std::less<string>,
+                   SystemAllocator<std::pair<const string,
+                                             Elf_Sym> > > SymbolTable;
+  typedef std::map<string, Elf_Addr, std::less<string>,
+                   SystemAllocator<std::pair<const string,
+                                             Elf_Addr> > > PltTable;
+
+  char* getBytes(char* dst, const char* src, ssize_t len);
+  static bool isSafeInsn(unsigned short insn);
+  static int isSimpleSystemCall(char *start, char *end);
+  static char* getScratchSpace(const Maps* maps, char* near, int needed,
+                               char** extraSpace, int* extraLength);
+  void patchSystemCallsInFunction(const Maps* maps, char *start, char *end,
+                                  char** extraSpace, int* extraLength);
+  int  patchVSystemCalls();
+  void patchVDSO(char** extraSpace, int* extraLength);
+
+  RangeMap        memory_ranges_;
+  bool            valid_;
+  bool            isVDSO_;
+  char*           asr_offset_;
+  int             vsys_offset_;
+  Maps*           maps_;
+  Elf_Ehdr        ehdr_;
+  SectionTable    section_table_;
+  SymbolTable     symbols_;
+  PltTable        plt_entries_;
+  char*           image_;
+  size_t          image_size_;
+  static char*    __kernel_vsyscall;
+  static char*    __kernel_sigreturn;
+  static char*    __kernel_rt_sigreturn;
+};
+
+} // namespace
+
+#endif // LIBRARY_H__
diff --git a/sandbox/linux/seccomp/linux_syscall_support.h b/sandbox/linux/seccomp/linux_syscall_support.h
new file mode 100644
index 0000000..2ee0426
--- /dev/null
+++ b/sandbox/linux/seccomp/linux_syscall_support.h
@@ -0,0 +1,3208 @@
+/* Copyright (c) 2005-2010, Google Inc.
+ * Author: Markus Gutschke
+ *
+ * All rights reserved.
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the Chromium LICENSE file.
+ */
+
+/* This file includes Linux-specific support functions common to the
+ * coredumper and the thread lister; primarily, this is a collection
+ * of direct system calls, and a couple of symbols missing from
+ * standard header files.
+ * There are a few options that the including file can set to control
+ * the behavior of this file:
+ *
+ * SYS_CPLUSPLUS:
+ *   The entire header file will normally be wrapped in 'extern "C" { }",
+ *   making it suitable for compilation as both C and C++ source. If you
+ *   do not want to do this, you can set the SYS_CPLUSPLUS macro to inhibit
+ *   the wrapping. N.B. doing so will suppress inclusion of all prerequisite
+ *   system header files, too. It is the caller's responsibility to provide
+ *   the necessary definitions.
+ *
+ * SYS_ERRNO:
+ *   All system calls will update "errno" unless overriden by setting the
+ *   SYS_ERRNO macro prior to including this file. SYS_ERRNO should be
+ *   an l-value.
+ *
+ * SYS_INLINE:
+ *   New symbols will be defined "static inline", unless overridden by
+ *   the SYS_INLINE macro.
+ *
+ * SYS_LINUX_SYSCALL_SUPPORT_H
+ *   This macro is used to avoid multiple inclusions of this header file.
+ *   If you need to include this file more than once, make sure to
+ *   unset SYS_LINUX_SYSCALL_SUPPORT_H before each inclusion.
+ *
+ * SYS_PREFIX:
+ *   New system calls will have a prefix of "sys_" unless overridden by
+ *   the SYS_PREFIX macro. Valid values for this macro are [0..9] which
+ *   results in prefixes "sys[0..9]_". It is also possible to set this
+ *   macro to -1, which avoids all prefixes.
+ *
+ * This file defines a few internal symbols that all start with "LSS_".
+ * Do not access these symbols from outside this file. They are not part
+ * of the supported API.
+ */
+#ifndef SYS_LINUX_SYSCALL_SUPPORT_H
+#define SYS_LINUX_SYSCALL_SUPPORT_H
+
+/* We currently only support x86-32, x86-64, ARM, MIPS, and PPC on Linux.
+ * Porting to other related platforms should not be difficult.
+ */
+#if (defined(__i386__) || defined(__x86_64__) || defined(__ARM_ARCH_3__) ||   \
+     defined(__mips__) || defined(__PPC__)) && defined(__linux)
+
+#ifndef SYS_CPLUSPLUS
+#ifdef __cplusplus
+/* Some system header files in older versions of gcc neglect to properly
+ * handle being included from C++. As it appears to be harmless to have
+ * multiple nested 'extern "C"' blocks, just add another one here.
+ */
+extern "C" {
+#endif
+
+#include <errno.h>
+#include <signal.h>
+#include <stdarg.h>
+#include <stddef.h>
+#include <string.h>
+#include <sys/ptrace.h>
+#include <sys/resource.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <syscall.h>
+#include <unistd.h>
+#include <linux/unistd.h>
+#include <endian.h>
+
+#ifdef __mips__
+/* Include definitions of the ABI currently in use.                          */
+#include <sgidefs.h>
+#endif
+
+#endif
+
+/* As glibc often provides subtly incompatible data structures (and implicit
+ * wrapper functions that convert them), we provide our own kernel data
+ * structures for use by the system calls.
+ * These structures have been developed by using Linux 2.6.23 headers for
+ * reference. Note though, we do not care about exact API compatibility
+ * with the kernel, and in fact the kernel often does not have a single
+ * API that works across architectures. Instead, we try to mimic the glibc
+ * API where reasonable, and only guarantee ABI compatibility with the
+ * kernel headers.
+ * Most notably, here are a few changes that were made to the structures
+ * defined by kernel headers:
+ *
+ * - we only define structures, but not symbolic names for kernel data
+ *   types. For the latter, we directly use the native C datatype
+ *   (i.e. "unsigned" instead of "mode_t").
+ * - in a few cases, it is possible to define identical structures for
+ *   both 32bit (e.g. i386) and 64bit (e.g. x86-64) platforms by
+ *   standardizing on the 64bit version of the data types. In particular,
+ *   this means that we use "unsigned" where the 32bit headers say
+ *   "unsigned long".
+ * - overall, we try to minimize the number of cases where we need to
+ *   conditionally define different structures.
+ * - the "struct kernel_sigaction" class of structures have been
+ *   modified to more closely mimic glibc's API by introducing an
+ *   anonymous union for the function pointer.
+ * - a small number of field names had to have an underscore appended to
+ *   them, because glibc defines a global macro by the same name.
+ */
+
+/* include/linux/dirent.h                                                    */
+struct kernel_dirent64 {
+  unsigned long long d_ino;
+  long long          d_off;
+  unsigned short     d_reclen;
+  unsigned char      d_type;
+  char               d_name[256];
+};
+
+/* include/linux/dirent.h                                                    */
+struct kernel_dirent {
+  long               d_ino;
+  long               d_off;
+  unsigned short     d_reclen;
+  char               d_name[256];
+};
+
+/* include/linux/uio.h                                                       */
+struct kernel_iovec {
+  void               *iov_base;
+  unsigned long      iov_len;
+};
+
+/* include/linux/socket.h                                                    */
+struct kernel_msghdr {
+  void               *msg_name;
+  int                msg_namelen;
+  struct kernel_iovec*msg_iov;
+  unsigned long      msg_iovlen;
+  void               *msg_control;
+  unsigned long      msg_controllen;
+  unsigned           msg_flags;
+};
+
+/* include/asm-generic/poll.h                                                */
+struct kernel_pollfd {
+  int                fd;
+  short              events;
+  short              revents;
+};
+
+/* include/linux/resource.h                                                  */
+struct kernel_rlimit {
+  unsigned long      rlim_cur;
+  unsigned long      rlim_max;
+};
+
+/* include/linux/time.h                                                      */
+struct kernel_timespec {
+  long               tv_sec;
+  long               tv_nsec;
+};
+
+/* include/linux/time.h                                                      */
+struct kernel_timeval {
+  long               tv_sec;
+  long               tv_usec;
+};
+
+/* include/linux/resource.h                                                  */
+struct kernel_rusage {
+  struct kernel_timeval ru_utime;
+  struct kernel_timeval ru_stime;
+  long               ru_maxrss;
+  long               ru_ixrss;
+  long               ru_idrss;
+  long               ru_isrss;
+  long               ru_minflt;
+  long               ru_majflt;
+  long               ru_nswap;
+  long               ru_inblock;
+  long               ru_oublock;
+  long               ru_msgsnd;
+  long               ru_msgrcv;
+  long               ru_nsignals;
+  long               ru_nvcsw;
+  long               ru_nivcsw;
+};
+
+struct siginfo;
+#if defined(__i386__) || defined(__ARM_ARCH_3__) || defined(__PPC__)
+
+/* include/asm-{arm,i386,mips,ppc}/signal.h                                  */
+struct kernel_old_sigaction {
+  union {
+    void             (*sa_handler_)(int);
+    void             (*sa_sigaction_)(int, struct siginfo *, void *);
+  };
+  unsigned long      sa_mask;
+  unsigned long      sa_flags;
+  void               (*sa_restorer)(void);
+} __attribute__((packed,aligned(4)));
+#elif (defined(__mips__) && _MIPS_SIM == _MIPS_SIM_ABI32)
+  #define kernel_old_sigaction kernel_sigaction
+#endif
+
+/* Some kernel functions (e.g. sigaction() in 2.6.23) require that the
+ * exactly match the size of the signal set, even though the API was
+ * intended to be extensible. We define our own KERNEL_NSIG to deal with
+ * this.
+ * Please note that glibc provides signals [1.._NSIG-1], whereas the
+ * kernel (and this header) provides the range [1..KERNEL_NSIG]. The
+ * actual number of signals is obviously the same, but the constants
+ * differ by one.
+ */
+#ifdef __mips__
+#define KERNEL_NSIG 128
+#else
+#define KERNEL_NSIG  64
+#endif
+
+/* include/asm-{arm,i386,mips,x86_64}/signal.h                               */
+struct kernel_sigset_t {
+  unsigned long sig[(KERNEL_NSIG + 8*sizeof(unsigned long) - 1)/
+                    (8*sizeof(unsigned long))];
+};
+
+/* include/asm-{arm,i386,mips,x86_64,ppc}/signal.h                           */
+struct kernel_sigaction {
+#ifdef __mips__
+  unsigned long      sa_flags;
+  union {
+    void             (*sa_handler_)(int);
+    void             (*sa_sigaction_)(int, struct siginfo *, void *);
+  };
+  struct kernel_sigset_t sa_mask;
+#else
+  union {
+    void             (*sa_handler_)(int);
+    void             (*sa_sigaction_)(int, struct siginfo *, void *);
+  };
+  unsigned long      sa_flags;
+  void               (*sa_restorer)(void);
+  struct kernel_sigset_t sa_mask;
+#endif
+};
+
+/* include/linux/socket.h                                                    */
+struct kernel_sockaddr {
+  unsigned short     sa_family;
+  char               sa_data[14];
+};
+
+/* include/asm-{arm,i386,mips,ppc}/stat.h                                    */
+#ifdef __mips__
+#if _MIPS_SIM == _MIPS_SIM_ABI64
+struct kernel_stat {
+#else
+struct kernel_stat64 {
+#endif
+  unsigned           st_dev;
+  unsigned           __pad0[3];
+  unsigned long long st_ino;
+  unsigned           st_mode;
+  unsigned           st_nlink;
+  unsigned           st_uid;
+  unsigned           st_gid;
+  unsigned           st_rdev;
+  unsigned           __pad1[3];
+  long long          st_size;
+  unsigned           st_atime_;
+  unsigned           st_atime_nsec_;
+  unsigned           st_mtime_;
+  unsigned           st_mtime_nsec_;
+  unsigned           st_ctime_;
+  unsigned           st_ctime_nsec_;
+  unsigned           st_blksize;
+  unsigned           __pad2;
+  unsigned long long st_blocks;
+};
+#elif defined __PPC__
+struct kernel_stat64 {
+  unsigned long long st_dev;
+  unsigned long long st_ino;
+  unsigned           st_mode;
+  unsigned           st_nlink;
+  unsigned           st_uid;
+  unsigned           st_gid;
+  unsigned long long st_rdev;
+  unsigned short int __pad2;
+  long long          st_size;
+  long               st_blksize;
+  long long          st_blocks;
+  long               st_atime_;
+  unsigned long      st_atime_nsec_;
+  long               st_mtime_;
+  unsigned long      st_mtime_nsec_;
+  long               st_ctime_;
+  unsigned long      st_ctime_nsec_;
+  unsigned long      __unused4;
+  unsigned long      __unused5;
+};
+#else
+struct kernel_stat64 {
+  unsigned long long st_dev;
+  unsigned char      __pad0[4];
+  unsigned           __st_ino;
+  unsigned           st_mode;
+  unsigned           st_nlink;
+  unsigned           st_uid;
+  unsigned           st_gid;
+  unsigned long long st_rdev;
+  unsigned char      __pad3[4];
+  long long          st_size;
+  unsigned           st_blksize;
+  unsigned long long st_blocks;
+  unsigned           st_atime_;
+  unsigned           st_atime_nsec_;
+  unsigned           st_mtime_;
+  unsigned           st_mtime_nsec_;
+  unsigned           st_ctime_;
+  unsigned           st_ctime_nsec_;
+  unsigned long long st_ino;
+};
+#endif
+
+/* include/asm-{arm,i386,mips,x86_64,ppc}/stat.h                             */
+#if defined(__i386__) || defined(__ARM_ARCH_3__)
+struct kernel_stat {
+  /* The kernel headers suggest that st_dev and st_rdev should be 32bit
+   * quantities encoding 12bit major and 20bit minor numbers in an interleaved
+   * format. In reality, we do not see useful data in the top bits. So,
+   * we'll leave the padding in here, until we find a better solution.
+   */
+  unsigned short     st_dev;
+  short              pad1;
+  unsigned           st_ino;
+  unsigned short     st_mode;
+  unsigned short     st_nlink;
+  unsigned short     st_uid;
+  unsigned short     st_gid;
+  unsigned short     st_rdev;
+  short              pad2;
+  unsigned           st_size;
+  unsigned           st_blksize;
+  unsigned           st_blocks;
+  unsigned           st_atime_;
+  unsigned           st_atime_nsec_;
+  unsigned           st_mtime_;
+  unsigned           st_mtime_nsec_;
+  unsigned           st_ctime_;
+  unsigned           st_ctime_nsec_;
+  unsigned           __unused4;
+  unsigned           __unused5;
+};
+#elif defined(__x86_64__)
+struct kernel_stat {
+  unsigned long      st_dev;
+  unsigned long      st_ino;
+  unsigned long      st_nlink;
+  unsigned           st_mode;
+  unsigned           st_uid;
+  unsigned           st_gid;
+  unsigned           __pad0;
+  unsigned long      st_rdev;
+  long               st_size;
+  long               st_blksize;
+  long               st_blocks;
+  unsigned long      st_atime_;
+  unsigned long      st_atime_nsec_;
+  unsigned long      st_mtime_;
+  unsigned long      st_mtime_nsec_;
+  unsigned long      st_ctime_;
+  unsigned long      st_ctime_nsec_;
+  long               __unused[3];
+};
+#elif defined(__PPC__)
+struct kernel_stat {
+  unsigned           st_dev;
+  unsigned long      st_ino;      // ino_t
+  unsigned long      st_mode;     // mode_t
+  unsigned short     st_nlink;    // nlink_t
+  unsigned           st_uid;      // uid_t
+  unsigned           st_gid;      // gid_t
+  unsigned           st_rdev;
+  long               st_size;     // off_t
+  unsigned long      st_blksize;
+  unsigned long      st_blocks;
+  unsigned long      st_atime_;
+  unsigned long      st_atime_nsec_;
+  unsigned long      st_mtime_;
+  unsigned long      st_mtime_nsec_;
+  unsigned long      st_ctime_;
+  unsigned long      st_ctime_nsec_;
+  unsigned long      __unused4;
+  unsigned long      __unused5;
+};
+#elif (defined(__mips__) && _MIPS_SIM != _MIPS_SIM_ABI64)
+struct kernel_stat {
+  unsigned           st_dev;
+  int                st_pad1[3];
+  unsigned           st_ino;
+  unsigned           st_mode;
+  unsigned           st_nlink;
+  unsigned           st_uid;
+  unsigned           st_gid;
+  unsigned           st_rdev;
+  int                st_pad2[2];
+  long               st_size;
+  int                st_pad3;
+  long               st_atime_;
+  long               st_atime_nsec_;
+  long               st_mtime_;
+  long               st_mtime_nsec_;
+  long               st_ctime_;
+  long               st_ctime_nsec_;
+  int                st_blksize;
+  int                st_blocks;
+  int                st_pad4[14];
+};
+#endif
+
+/* include/asm-{arm,i386,mips,x86_64,ppc}/statfs.h                           */
+#ifdef __mips__
+#if _MIPS_SIM != _MIPS_SIM_ABI64
+struct kernel_statfs64 {
+  unsigned long      f_type;
+  unsigned long      f_bsize;
+  unsigned long      f_frsize;
+  unsigned long      __pad;
+  unsigned long long f_blocks;
+  unsigned long long f_bfree;
+  unsigned long long f_files;
+  unsigned long long f_ffree;
+  unsigned long long f_bavail;
+  struct { int val[2]; } f_fsid;
+  unsigned long      f_namelen;
+  unsigned long      f_spare[6];
+};
+#endif
+#elif !defined(__x86_64__)
+struct kernel_statfs64 {
+  unsigned long      f_type;
+  unsigned long      f_bsize;
+  unsigned long long f_blocks;
+  unsigned long long f_bfree;
+  unsigned long long f_bavail;
+  unsigned long long f_files;
+  unsigned long long f_ffree;
+  struct { int val[2]; } f_fsid;
+  unsigned long      f_namelen;
+  unsigned long      f_frsize;
+  unsigned long      f_spare[5];
+};
+#endif
+
+/* include/asm-{arm,i386,mips,x86_64,ppc,generic}/statfs.h                   */
+#ifdef __mips__
+struct kernel_statfs {
+  long               f_type;
+  long               f_bsize;
+  long               f_frsize;
+  long               f_blocks;
+  long               f_bfree;
+  long               f_files;
+  long               f_ffree;
+  long               f_bavail;
+  struct { int val[2]; } f_fsid;
+  long               f_namelen;
+  long               f_spare[6];
+};
+#else
+struct kernel_statfs {
+  /* x86_64 actually defines all these fields as signed, whereas all other  */
+  /* platforms define them as unsigned. Leaving them at unsigned should not */
+  /* cause any problems.                                                    */
+  unsigned long      f_type;
+  unsigned long      f_bsize;
+  unsigned long      f_blocks;
+  unsigned long      f_bfree;
+  unsigned long      f_bavail;
+  unsigned long      f_files;
+  unsigned long      f_ffree;
+  struct { int val[2]; } f_fsid;
+  unsigned long      f_namelen;
+  unsigned long      f_frsize;
+  unsigned long      f_spare[5];
+};
+#endif
+
+
+/* Definitions missing from the standard header files                        */
+#ifndef O_DIRECTORY
+#if defined(__ARM_ARCH_3__)
+#define O_DIRECTORY             0040000
+#else
+#define O_DIRECTORY             0200000
+#endif
+#endif
+#ifndef NT_PRXFPREG
+#define NT_PRXFPREG             0x46e62b7f
+#endif
+#ifndef PTRACE_GETFPXREGS
+#define PTRACE_GETFPXREGS       ((enum __ptrace_request)18)
+#endif
+#ifndef PR_GET_DUMPABLE
+#define PR_GET_DUMPABLE         3
+#endif
+#ifndef PR_SET_DUMPABLE
+#define PR_SET_DUMPABLE         4
+#endif
+#ifndef PR_GET_SECCOMP
+#define PR_GET_SECCOMP          21
+#endif
+#ifndef PR_SET_SECCOMP
+#define PR_SET_SECCOMP          22
+#endif
+#ifndef AT_FDCWD
+#define AT_FDCWD                (-100)
+#endif
+#ifndef AT_SYMLINK_NOFOLLOW
+#define AT_SYMLINK_NOFOLLOW     0x100
+#endif
+#ifndef AT_REMOVEDIR
+#define AT_REMOVEDIR            0x200
+#endif
+#ifndef MREMAP_FIXED
+#define MREMAP_FIXED            2
+#endif
+#ifndef SA_RESTORER
+#define SA_RESTORER             0x04000000
+#endif
+#ifndef CPUCLOCK_PROF
+#define CPUCLOCK_PROF           0
+#endif
+#ifndef CPUCLOCK_VIRT
+#define CPUCLOCK_VIRT           1
+#endif
+#ifndef CPUCLOCK_SCHED
+#define CPUCLOCK_SCHED          2
+#endif
+#ifndef CPUCLOCK_PERTHREAD_MASK
+#define CPUCLOCK_PERTHREAD_MASK 4
+#endif
+#ifndef MAKE_PROCESS_CPUCLOCK
+#define MAKE_PROCESS_CPUCLOCK(pid, clock)                                     \
+        ((~(int)(pid) << 3) | (int)(clock))
+#endif
+#ifndef MAKE_THREAD_CPUCLOCK
+#define MAKE_THREAD_CPUCLOCK(tid, clock)                                      \
+        ((~(int)(tid) << 3) | (int)((clock) | CPUCLOCK_PERTHREAD_MASK))
+#endif
+
+#ifndef FUTEX_WAIT
+#define FUTEX_WAIT                0
+#endif
+#ifndef FUTEX_WAKE
+#define FUTEX_WAKE                1
+#endif
+#ifndef FUTEX_FD
+#define FUTEX_FD                  2
+#endif
+#ifndef FUTEX_REQUEUE
+#define FUTEX_REQUEUE             3
+#endif
+#ifndef FUTEX_CMP_REQUEUE
+#define FUTEX_CMP_REQUEUE         4
+#endif
+#ifndef FUTEX_WAKE_OP
+#define FUTEX_WAKE_OP             5
+#endif
+#ifndef FUTEX_LOCK_PI
+#define FUTEX_LOCK_PI             6
+#endif
+#ifndef FUTEX_UNLOCK_PI
+#define FUTEX_UNLOCK_PI           7
+#endif
+#ifndef FUTEX_TRYLOCK_PI
+#define FUTEX_TRYLOCK_PI          8
+#endif
+#ifndef FUTEX_PRIVATE_FLAG
+#define FUTEX_PRIVATE_FLAG        128
+#endif
+#ifndef FUTEX_CMD_MASK
+#define FUTEX_CMD_MASK            ~FUTEX_PRIVATE_FLAG
+#endif
+#ifndef FUTEX_WAIT_PRIVATE
+#define FUTEX_WAIT_PRIVATE        (FUTEX_WAIT | FUTEX_PRIVATE_FLAG)
+#endif
+#ifndef FUTEX_WAKE_PRIVATE
+#define FUTEX_WAKE_PRIVATE        (FUTEX_WAKE | FUTEX_PRIVATE_FLAG)
+#endif
+#ifndef FUTEX_REQUEUE_PRIVATE
+#define FUTEX_REQUEUE_PRIVATE     (FUTEX_REQUEUE | FUTEX_PRIVATE_FLAG)
+#endif
+#ifndef FUTEX_CMP_REQUEUE_PRIVATE
+#define FUTEX_CMP_REQUEUE_PRIVATE (FUTEX_CMP_REQUEUE | FUTEX_PRIVATE_FLAG)
+#endif
+#ifndef FUTEX_WAKE_OP_PRIVATE
+#define FUTEX_WAKE_OP_PRIVATE     (FUTEX_WAKE_OP | FUTEX_PRIVATE_FLAG)
+#endif
+#ifndef FUTEX_LOCK_PI_PRIVATE
+#define FUTEX_LOCK_PI_PRIVATE     (FUTEX_LOCK_PI | FUTEX_PRIVATE_FLAG)
+#endif
+#ifndef FUTEX_UNLOCK_PI_PRIVATE
+#define FUTEX_UNLOCK_PI_PRIVATE   (FUTEX_UNLOCK_PI | FUTEX_PRIVATE_FLAG)
+#endif
+#ifndef FUTEX_TRYLOCK_PI_PRIVATE
+#define FUTEX_TRYLOCK_PI_PRIVATE  (FUTEX_TRYLOCK_PI | FUTEX_PRIVATE_FLAG)
+#endif
+
+
+#if defined(__x86_64__)
+#ifndef ARCH_SET_GS
+#define ARCH_SET_GS             0x1001
+#endif
+#ifndef ARCH_GET_GS
+#define ARCH_GET_GS             0x1004
+#endif
+#endif
+
+#if defined(__i386__)
+#ifndef __NR_quotactl
+#define __NR_quotactl           131
+#endif
+#ifndef __NR_setresuid
+#define __NR_setresuid          164
+#define __NR_getresuid          165
+#define __NR_setresgid          170
+#define __NR_getresgid          171
+#endif
+#ifndef __NR_rt_sigaction
+#define __NR_rt_sigreturn       173
+#define __NR_rt_sigaction       174
+#define __NR_rt_sigprocmask     175
+#define __NR_rt_sigpending      176
+#define __NR_rt_sigsuspend      179
+#endif
+#ifndef __NR_pread64
+#define __NR_pread64            180
+#endif
+#ifndef __NR_pwrite64
+#define __NR_pwrite64           181
+#endif
+#ifndef __NR_ugetrlimit
+#define __NR_ugetrlimit         191
+#endif
+#ifndef __NR_stat64
+#define __NR_stat64             195
+#endif
+#ifndef __NR_fstat64
+#define __NR_fstat64            197
+#endif
+#ifndef __NR_setresuid32
+#define __NR_setresuid32        208
+#define __NR_getresuid32        209
+#define __NR_setresgid32        210
+#define __NR_getresgid32        211
+#endif
+#ifndef __NR_setfsuid32
+#define __NR_setfsuid32         215
+#define __NR_setfsgid32         216
+#endif
+#ifndef __NR_getdents64
+#define __NR_getdents64         220
+#endif
+#ifndef __NR_gettid
+#define __NR_gettid             224
+#endif
+#ifndef __NR_readahead
+#define __NR_readahead          225
+#endif
+#ifndef __NR_setxattr
+#define __NR_setxattr           226
+#endif
+#ifndef __NR_lsetxattr
+#define __NR_lsetxattr          227
+#endif
+#ifndef __NR_getxattr
+#define __NR_getxattr           229
+#endif
+#ifndef __NR_lgetxattr
+#define __NR_lgetxattr          230
+#endif
+#ifndef __NR_listxattr
+#define __NR_listxattr          232
+#endif
+#ifndef __NR_llistxattr
+#define __NR_llistxattr         233
+#endif
+#ifndef __NR_tkill
+#define __NR_tkill              238
+#endif
+#ifndef __NR_futex
+#define __NR_futex              240
+#endif
+#ifndef __NR_sched_setaffinity
+#define __NR_sched_setaffinity  241
+#define __NR_sched_getaffinity  242
+#endif
+#ifndef __NR_set_tid_address
+#define __NR_set_tid_address    258
+#endif
+#ifndef __NR_clock_gettime
+#define __NR_clock_gettime      265
+#endif
+#ifndef __NR_clock_getres
+#define __NR_clock_getres       266
+#endif
+#ifndef __NR_statfs64
+#define __NR_statfs64           268
+#endif
+#ifndef __NR_fstatfs64
+#define __NR_fstatfs64          269
+#endif
+#ifndef __NR_fadvise64_64
+#define __NR_fadvise64_64       272
+#endif
+#ifndef __NR_ioprio_set
+#define __NR_ioprio_set         289
+#endif
+#ifndef __NR_ioprio_get
+#define __NR_ioprio_get         290
+#endif
+#ifndef __NR_openat
+#define __NR_openat             295
+#endif
+#ifndef __NR_fstatat64
+#define __NR_fstatat64          300
+#endif
+#ifndef __NR_unlinkat
+#define __NR_unlinkat           301
+#endif
+#ifndef __NR_move_pages
+#define __NR_move_pages         317
+#endif
+#ifndef __NR_getcpu
+#define __NR_getcpu             318
+#endif
+#ifndef __NR_fallocate
+#define __NR_fallocate          324
+#endif
+/* End of i386 definitions                                                   */
+#elif defined(__ARM_ARCH_3__)
+#ifndef __NR_setresuid
+#define __NR_setresuid          (__NR_SYSCALL_BASE + 164)
+#define __NR_getresuid          (__NR_SYSCALL_BASE + 165)
+#define __NR_setresgid          (__NR_SYSCALL_BASE + 170)
+#define __NR_getresgid          (__NR_SYSCALL_BASE + 171)
+#endif
+#ifndef __NR_rt_sigaction
+#define __NR_rt_sigreturn       (__NR_SYSCALL_BASE + 173)
+#define __NR_rt_sigaction       (__NR_SYSCALL_BASE + 174)
+#define __NR_rt_sigprocmask     (__NR_SYSCALL_BASE + 175)
+#define __NR_rt_sigpending      (__NR_SYSCALL_BASE + 176)
+#define __NR_rt_sigsuspend      (__NR_SYSCALL_BASE + 179)
+#endif
+#ifndef __NR_pread64
+#define __NR_pread64            (__NR_SYSCALL_BASE + 180)
+#endif
+#ifndef __NR_pwrite64
+#define __NR_pwrite64           (__NR_SYSCALL_BASE + 181)
+#endif
+#ifndef __NR_ugetrlimit
+#define __NR_ugetrlimit         (__NR_SYSCALL_BASE + 191)
+#endif
+#ifndef __NR_stat64
+#define __NR_stat64             (__NR_SYSCALL_BASE + 195)
+#endif
+#ifndef __NR_fstat64
+#define __NR_fstat64            (__NR_SYSCALL_BASE + 197)
+#endif
+#ifndef __NR_setresuid32
+#define __NR_setresuid32        (__NR_SYSCALL_BASE + 208)
+#define __NR_getresuid32        (__NR_SYSCALL_BASE + 209)
+#define __NR_setresgid32        (__NR_SYSCALL_BASE + 210)
+#define __NR_getresgid32        (__NR_SYSCALL_BASE + 211)
+#endif
+#ifndef __NR_setfsuid32
+#define __NR_setfsuid32         (__NR_SYSCALL_BASE + 215)
+#define __NR_setfsgid32         (__NR_SYSCALL_BASE + 216)
+#endif
+#ifndef __NR_getdents64
+#define __NR_getdents64         (__NR_SYSCALL_BASE + 217)
+#endif
+#ifndef __NR_gettid
+#define __NR_gettid             (__NR_SYSCALL_BASE + 224)
+#endif
+#ifndef __NR_readahead
+#define __NR_readahead          (__NR_SYSCALL_BASE + 225)
+#endif
+#ifndef __NR_setxattr
+#define __NR_setxattr           (__NR_SYSCALL_BASE + 226)
+#endif
+#ifndef __NR_lsetxattr
+#define __NR_lsetxattr          (__NR_SYSCALL_BASE + 227)
+#endif
+#ifndef __NR_getxattr
+#define __NR_getxattr           (__NR_SYSCALL_BASE + 229)
+#endif
+#ifndef __NR_lgetxattr
+#define __NR_lgetxattr          (__NR_SYSCALL_BASE + 230)
+#endif
+#ifndef __NR_listxattr
+#define __NR_listxattr          (__NR_SYSCALL_BASE + 232)
+#endif
+#ifndef __NR_llistxattr
+#define __NR_llistxattr         (__NR_SYSCALL_BASE + 233)
+#endif
+#ifndef __NR_tkill
+#define __NR_tkill              (__NR_SYSCALL_BASE + 238)
+#endif
+#ifndef __NR_futex
+#define __NR_futex              (__NR_SYSCALL_BASE + 240)
+#endif
+#ifndef __NR_sched_setaffinity
+#define __NR_sched_setaffinity  (__NR_SYSCALL_BASE + 241)
+#define __NR_sched_getaffinity  (__NR_SYSCALL_BASE + 242)
+#endif
+#ifndef __NR_set_tid_address
+#define __NR_set_tid_address    (__NR_SYSCALL_BASE + 256)
+#endif
+#ifndef __NR_clock_gettime
+#define __NR_clock_gettime      (__NR_SYSCALL_BASE + 263)
+#endif
+#ifndef __NR_clock_getres
+#define __NR_clock_getres       (__NR_SYSCALL_BASE + 264)
+#endif
+#ifndef __NR_statfs64
+#define __NR_statfs64           (__NR_SYSCALL_BASE + 266)
+#endif
+#ifndef __NR_fstatfs64
+#define __NR_fstatfs64          (__NR_SYSCALL_BASE + 267)
+#endif
+#ifndef __NR_ioprio_set
+#define __NR_ioprio_set         (__NR_SYSCALL_BASE + 314)
+#endif
+#ifndef __NR_ioprio_get
+#define __NR_ioprio_get         (__NR_SYSCALL_BASE + 315)
+#endif
+#ifndef __NR_move_pages
+#define __NR_move_pages         (__NR_SYSCALL_BASE + 344)
+#endif
+#ifndef __NR_getcpu
+#define __NR_getcpu             (__NR_SYSCALL_BASE + 345)
+#endif
+/* End of ARM 3 definitions                                                  */
+#elif defined(__x86_64__)
+#ifndef __NR_pread64
+#define __NR_pread64             17
+#endif
+#ifndef __NR_pwrite64
+#define __NR_pwrite64            18
+#endif
+#ifndef __NR_setresuid
+#define __NR_setresuid          117
+#define __NR_getresuid          118
+#define __NR_setresgid          119
+#define __NR_getresgid          120
+#endif
+#ifndef __NR_quotactl
+#define __NR_quotactl           179
+#endif
+#ifndef __NR_gettid
+#define __NR_gettid             186
+#endif
+#ifndef __NR_readahead
+#define __NR_readahead          187
+#endif
+#ifndef __NR_setxattr
+#define __NR_setxattr           188
+#endif
+#ifndef __NR_lsetxattr
+#define __NR_lsetxattr          189
+#endif
+#ifndef __NR_getxattr
+#define __NR_getxattr           191
+#endif
+#ifndef __NR_lgetxattr
+#define __NR_lgetxattr          192
+#endif
+#ifndef __NR_listxattr
+#define __NR_listxattr          194
+#endif
+#ifndef __NR_llistxattr
+#define __NR_llistxattr         195
+#endif
+#ifndef __NR_tkill
+#define __NR_tkill              200
+#endif
+#ifndef __NR_futex
+#define __NR_futex              202
+#endif
+#ifndef __NR_sched_setaffinity
+#define __NR_sched_setaffinity  203
+#define __NR_sched_getaffinity  204
+#endif
+#ifndef __NR_getdents64
+#define __NR_getdents64         217
+#endif
+#ifndef __NR_set_tid_address
+#define __NR_set_tid_address    218
+#endif
+#ifndef __NR_fadvise64
+#define __NR_fadvise64          221
+#endif
+#ifndef __NR_clock_gettime
+#define __NR_clock_gettime      228
+#endif
+#ifndef __NR_clock_getres
+#define __NR_clock_getres       229
+#endif
+#ifndef __NR_ioprio_set
+#define __NR_ioprio_set         251
+#endif
+#ifndef __NR_ioprio_get
+#define __NR_ioprio_get         252
+#endif
+#ifndef __NR_openat
+#define __NR_openat             257
+#endif
+#ifndef __NR_newfstatat
+#define __NR_newfstatat         262
+#endif
+#ifndef __NR_unlinkat
+#define __NR_unlinkat           263
+#endif
+#ifndef __NR_move_pages
+#define __NR_move_pages         279
+#endif
+#ifndef __NR_fallocate
+#define __NR_fallocate          285
+#endif
+/* End of x86-64 definitions                                                 */
+#elif defined(__mips__)
+#if _MIPS_SIM == _MIPS_SIM_ABI32
+#ifndef __NR_setresuid
+#define __NR_setresuid          (__NR_Linux + 185)
+#define __NR_getresuid          (__NR_Linux + 186)
+#define __NR_setresgid          (__NR_Linux + 190)
+#define __NR_getresgid          (__NR_Linux + 191)
+#endif
+#ifndef __NR_rt_sigaction
+#define __NR_rt_sigreturn       (__NR_Linux + 193)
+#define __NR_rt_sigaction       (__NR_Linux + 194)
+#define __NR_rt_sigprocmask     (__NR_Linux + 195)
+#define __NR_rt_sigpending      (__NR_Linux + 196)
+#define __NR_rt_sigsuspend      (__NR_Linux + 199)
+#endif
+#ifndef __NR_pread64
+#define __NR_pread64            (__NR_Linux + 200)
+#endif
+#ifndef __NR_pwrite64
+#define __NR_pwrite64           (__NR_Linux + 201)
+#endif
+#ifndef __NR_stat64
+#define __NR_stat64             (__NR_Linux + 213)
+#endif
+#ifndef __NR_fstat64
+#define __NR_fstat64            (__NR_Linux + 215)
+#endif
+#ifndef __NR_getdents64
+#define __NR_getdents64         (__NR_Linux + 219)
+#endif
+#ifndef __NR_gettid
+#define __NR_gettid             (__NR_Linux + 222)
+#endif
+#ifndef __NR_readahead
+#define __NR_readahead          (__NR_Linux + 223)
+#endif
+#ifndef __NR_setxattr
+#define __NR_setxattr           (__NR_Linux + 224)
+#endif
+#ifndef __NR_lsetxattr
+#define __NR_lsetxattr          (__NR_Linux + 225)
+#endif
+#ifndef __NR_getxattr
+#define __NR_getxattr           (__NR_Linux + 227)
+#endif
+#ifndef __NR_lgetxattr
+#define __NR_lgetxattr          (__NR_Linux + 228)
+#endif
+#ifndef __NR_listxattr
+#define __NR_listxattr          (__NR_Linux + 230)
+#endif
+#ifndef __NR_llistxattr
+#define __NR_llistxattr         (__NR_Linux + 231)
+#endif
+#ifndef __NR_tkill
+#define __NR_tkill              (__NR_Linux + 236)
+#endif
+#ifndef __NR_futex
+#define __NR_futex              (__NR_Linux + 238)
+#endif
+#ifndef __NR_sched_setaffinity
+#define __NR_sched_setaffinity  (__NR_Linux + 239)
+#define __NR_sched_getaffinity  (__NR_Linux + 240)
+#endif
+#ifndef __NR_set_tid_address
+#define __NR_set_tid_address    (__NR_Linux + 252)
+#endif
+#ifndef __NR_statfs64
+#define __NR_statfs64           (__NR_Linux + 255)
+#endif
+#ifndef __NR_fstatfs64
+#define __NR_fstatfs64          (__NR_Linux + 256)
+#endif
+#ifndef __NR_clock_gettime
+#define __NR_clock_gettime      (__NR_Linux + 263)
+#endif
+#ifndef __NR_clock_getres
+#define __NR_clock_getres       (__NR_Linux + 264)
+#endif
+#ifndef __NR_openat
+#define __NR_openat             (__NR_Linux + 288)
+#endif
+#ifndef __NR_fstatat
+#define __NR_fstatat            (__NR_Linux + 293)
+#endif
+#ifndef __NR_unlinkat
+#define __NR_unlinkat           (__NR_Linux + 294)
+#endif
+#ifndef __NR_move_pages
+#define __NR_move_pages         (__NR_Linux + 308)
+#endif
+#ifndef __NR_getcpu
+#define __NR_getcpu             (__NR_Linux + 312)
+#endif
+#ifndef __NR_ioprio_set
+#define __NR_ioprio_set         (__NR_Linux + 314)
+#endif
+#ifndef __NR_ioprio_get
+#define __NR_ioprio_get         (__NR_Linux + 315)
+#endif
+/* End of MIPS (old 32bit API) definitions */
+#elif  _MIPS_SIM == _MIPS_SIM_ABI64
+#ifndef __NR_pread64
+#define __NR_pread64            (__NR_Linux +  16)
+#endif
+#ifndef __NR_pwrite64
+#define __NR_pwrite64           (__NR_Linux +  17)
+#endif
+#ifndef __NR_setresuid
+#define __NR_setresuid          (__NR_Linux + 115)
+#define __NR_getresuid          (__NR_Linux + 116)
+#define __NR_setresgid          (__NR_Linux + 117)
+#define __NR_getresgid          (__NR_Linux + 118)
+#endif
+#ifndef __NR_gettid
+#define __NR_gettid             (__NR_Linux + 178)
+#endif
+#ifndef __NR_readahead
+#define __NR_readahead          (__NR_Linux + 179)
+#endif
+#ifndef __NR_setxattr
+#define __NR_setxattr           (__NR_Linux + 180)
+#endif
+#ifndef __NR_lsetxattr
+#define __NR_lsetxattr          (__NR_Linux + 181)
+#endif
+#ifndef __NR_getxattr
+#define __NR_getxattr           (__NR_Linux + 183)
+#endif
+#ifndef __NR_lgetxattr
+#define __NR_lgetxattr          (__NR_Linux + 184)
+#endif
+#ifndef __NR_listxattr
+#define __NR_listxattr          (__NR_Linux + 186)
+#endif
+#ifndef __NR_llistxattr
+#define __NR_llistxattr         (__NR_Linux + 187)
+#endif
+#ifndef __NR_tkill
+#define __NR_tkill              (__NR_Linux + 192)
+#endif
+#ifndef __NR_futex
+#define __NR_futex              (__NR_Linux + 194)
+#endif
+#ifndef __NR_sched_setaffinity
+#define __NR_sched_setaffinity  (__NR_Linux + 195)
+#define __NR_sched_getaffinity  (__NR_Linux + 196)
+#endif
+#ifndef __NR_set_tid_address
+#define __NR_set_tid_address    (__NR_Linux + 212)
+#endif
+#ifndef __NR_clock_gettime
+#define __NR_clock_gettime      (__NR_Linux + 222)
+#endif
+#ifndef __NR_clock_getres
+#define __NR_clock_getres       (__NR_Linux + 223)
+#endif
+#ifndef __NR_openat
+#define __NR_openat             (__NR_Linux + 247)
+#endif
+#ifndef __NR_fstatat
+#define __NR_fstatat            (__NR_Linux + 252)
+#endif
+#ifndef __NR_unlinkat
+#define __NR_unlinkat           (__NR_Linux + 253)
+#endif
+#ifndef __NR_move_pages
+#define __NR_move_pages         (__NR_Linux + 267)
+#endif
+#ifndef __NR_getcpu
+#define __NR_getcpu             (__NR_Linux + 271)
+#endif
+#ifndef __NR_ioprio_set
+#define __NR_ioprio_set         (__NR_Linux + 273)
+#endif
+#ifndef __NR_ioprio_get
+#define __NR_ioprio_get         (__NR_Linux + 274)
+#endif
+/* End of MIPS (64bit API) definitions */
+#else
+#ifndef __NR_setresuid
+#define __NR_setresuid          (__NR_Linux + 115)
+#define __NR_getresuid          (__NR_Linux + 116)
+#define __NR_setresgid          (__NR_Linux + 117)
+#define __NR_getresgid          (__NR_Linux + 118)
+#endif
+#ifndef __NR_gettid
+#define __NR_gettid             (__NR_Linux + 178)
+#endif
+#ifndef __NR_readahead
+#define __NR_readahead          (__NR_Linux + 179)
+#endif
+#ifndef __NR_setxattr
+#define __NR_setxattr           (__NR_Linux + 180)
+#endif
+#ifndef __NR_lsetxattr
+#define __NR_lsetxattr          (__NR_Linux + 181)
+#endif
+#ifndef __NR_getxattr
+#define __NR_getxattr           (__NR_Linux + 183)
+#endif
+#ifndef __NR_lgetxattr
+#define __NR_lgetxattr          (__NR_Linux + 184)
+#endif
+#ifndef __NR_listxattr
+#define __NR_listxattr          (__NR_Linux + 186)
+#endif
+#ifndef __NR_llistxattr
+#define __NR_llistxattr         (__NR_Linux + 187)
+#endif
+#ifndef __NR_tkill
+#define __NR_tkill              (__NR_Linux + 192)
+#endif
+#ifndef __NR_futex
+#define __NR_futex              (__NR_Linux + 194)
+#endif
+#ifndef __NR_sched_setaffinity
+#define __NR_sched_setaffinity  (__NR_Linux + 195)
+#define __NR_sched_getaffinity  (__NR_Linux + 196)
+#endif
+#ifndef __NR_set_tid_address
+#define __NR_set_tid_address    (__NR_Linux + 213)
+#endif
+#ifndef __NR_statfs64
+#define __NR_statfs64           (__NR_Linux + 217)
+#endif
+#ifndef __NR_fstatfs64
+#define __NR_fstatfs64          (__NR_Linux + 218)
+#endif
+#ifndef __NR_clock_gettime
+#define __NR_clock_gettime      (__NR_Linux + 226)
+#endif
+#ifndef __NR_clock_getres
+#define __NR_clock_getres       (__NR_Linux + 227)
+#endif
+#ifndef __NR_openat
+#define __NR_openat             (__NR_Linux + 251)
+#endif
+#ifndef __NR_fstatat
+#define __NR_fstatat            (__NR_Linux + 256)
+#endif
+#ifndef __NR_unlinkat
+#define __NR_unlinkat           (__NR_Linux + 257)
+#endif
+#ifndef __NR_move_pages
+#define __NR_move_pages         (__NR_Linux + 271)
+#endif
+#ifndef __NR_getcpu
+#define __NR_getcpu             (__NR_Linux + 275)
+#endif
+#ifndef __NR_ioprio_set
+#define __NR_ioprio_set         (__NR_Linux + 277)
+#endif
+#ifndef __NR_ioprio_get
+#define __NR_ioprio_get         (__NR_Linux + 278)
+#endif
+/* End of MIPS (new 32bit API) definitions                                   */
+#endif
+/* End of MIPS definitions                                                   */
+#elif defined(__PPC__)
+#ifndef __NR_setfsuid
+#define __NR_setfsuid           138
+#define __NR_setfsgid           139
+#endif
+#ifndef __NR_setresuid
+#define __NR_setresuid          164
+#define __NR_getresuid          165
+#define __NR_setresgid          169
+#define __NR_getresgid          170
+#endif
+#ifndef __NR_rt_sigaction
+#define __NR_rt_sigreturn       172
+#define __NR_rt_sigaction       173
+#define __NR_rt_sigprocmask     174
+#define __NR_rt_sigpending      175
+#define __NR_rt_sigsuspend      178
+#endif
+#ifndef __NR_pread64
+#define __NR_pread64            179
+#endif
+#ifndef __NR_pwrite64
+#define __NR_pwrite64           180
+#endif
+#ifndef __NR_ugetrlimit
+#define __NR_ugetrlimit         190
+#endif
+#ifndef __NR_readahead
+#define __NR_readahead          191
+#endif
+#ifndef __NR_stat64
+#define __NR_stat64             195
+#endif
+#ifndef __NR_fstat64
+#define __NR_fstat64            197
+#endif
+#ifndef __NR_getdents64
+#define __NR_getdents64         202
+#endif
+#ifndef __NR_gettid
+#define __NR_gettid             207
+#endif
+#ifndef __NR_tkill
+#define __NR_tkill              208
+#endif
+#ifndef __NR_setxattr
+#define __NR_setxattr           209
+#endif
+#ifndef __NR_lsetxattr
+#define __NR_lsetxattr          210
+#endif
+#ifndef __NR_getxattr
+#define __NR_getxattr           212
+#endif
+#ifndef __NR_lgetxattr
+#define __NR_lgetxattr          213
+#endif
+#ifndef __NR_listxattr
+#define __NR_listxattr          215
+#endif
+#ifndef __NR_llistxattr
+#define __NR_llistxattr         216
+#endif
+#ifndef __NR_futex
+#define __NR_futex              221
+#endif
+#ifndef __NR_sched_setaffinity
+#define __NR_sched_setaffinity  222
+#define __NR_sched_getaffinity  223
+#endif
+#ifndef __NR_set_tid_address
+#define __NR_set_tid_address    232
+#endif
+#ifndef __NR_clock_gettime
+#define __NR_clock_gettime      246
+#endif
+#ifndef __NR_clock_getres
+#define __NR_clock_getres       247
+#endif
+#ifndef __NR_statfs64
+#define __NR_statfs64           252
+#endif
+#ifndef __NR_fstatfs64
+#define __NR_fstatfs64          253
+#endif
+#ifndef __NR_fadvise64_64
+#define __NR_fadvise64_64       254
+#endif
+#ifndef __NR_ioprio_set
+#define __NR_ioprio_set         273
+#endif
+#ifndef __NR_ioprio_get
+#define __NR_ioprio_get         274
+#endif
+#ifndef __NR_openat
+#define __NR_openat             286
+#endif
+#ifndef __NR_fstatat64
+#define __NR_fstatat64          291
+#endif
+#ifndef __NR_unlinkat
+#define __NR_unlinkat           292
+#endif
+#ifndef __NR_move_pages
+#define __NR_move_pages         301
+#endif
+#ifndef __NR_getcpu
+#define __NR_getcpu             302
+#endif
+/* End of powerpc defininitions                                              */
+#endif
+
+
+/* After forking, we must make sure to only call system calls.               */
+#if __BOUNDED_POINTERS__
+  #error "Need to port invocations of syscalls for bounded ptrs"
+#else
+  /* The core dumper and the thread lister get executed after threads
+   * have been suspended. As a consequence, we cannot call any functions
+   * that acquire locks. Unfortunately, libc wraps most system calls
+   * (e.g. in order to implement pthread_atfork, and to make calls
+   * cancellable), which means we cannot call these functions. Instead,
+   * we have to call syscall() directly.
+   */
+  #undef LSS_ERRNO
+  #ifdef SYS_ERRNO
+    /* Allow the including file to override the location of errno. This can
+     * be useful when using clone() with the CLONE_VM option.
+     */
+    #define LSS_ERRNO SYS_ERRNO
+  #else
+    #define LSS_ERRNO errno
+  #endif
+
+  #undef LSS_INLINE
+  #ifdef SYS_INLINE
+    #define LSS_INLINE SYS_INLINE
+  #else
+    #define LSS_INLINE static inline
+  #endif
+
+  /* Allow the including file to override the prefix used for all new
+   * system calls. By default, it will be set to "sys_".
+   */
+  #undef LSS_NAME
+  #ifndef SYS_PREFIX
+    #define LSS_NAME(name) sys_##name
+  #elif SYS_PREFIX < 0
+    #define LSS_NAME(name) name
+  #elif SYS_PREFIX == 0
+    #define LSS_NAME(name) sys0_##name
+  #elif SYS_PREFIX == 1
+    #define LSS_NAME(name) sys1_##name
+  #elif SYS_PREFIX == 2
+    #define LSS_NAME(name) sys2_##name
+  #elif SYS_PREFIX == 3
+    #define LSS_NAME(name) sys3_##name
+  #elif SYS_PREFIX == 4
+    #define LSS_NAME(name) sys4_##name
+  #elif SYS_PREFIX == 5
+    #define LSS_NAME(name) sys5_##name
+  #elif SYS_PREFIX == 6
+    #define LSS_NAME(name) sys6_##name
+  #elif SYS_PREFIX == 7
+    #define LSS_NAME(name) sys7_##name
+  #elif SYS_PREFIX == 8
+    #define LSS_NAME(name) sys8_##name
+  #elif SYS_PREFIX == 9
+    #define LSS_NAME(name) sys9_##name
+  #endif
+
+  #undef  LSS_RETURN
+  #if (defined(__i386__) || defined(__x86_64__) || defined(__ARM_ARCH_3__))
+  /* Failing system calls return a negative result in the range of
+   * -1..-4095. These are "errno" values with the sign inverted.
+   */
+  #define LSS_RETURN(type, res)                                               \
+    do {                                                                      \
+      if ((unsigned long)(res) >= (unsigned long)(-4095)) {                   \
+        LSS_ERRNO = -(res);                                                   \
+        res = -1;                                                             \
+      }                                                                       \
+      return (type) (res);                                                    \
+    } while (0)
+  #elif defined(__mips__)
+  /* On MIPS, failing system calls return -1, and set errno in a
+   * separate CPU register.
+   */
+  #define LSS_RETURN(type, res, err)                                          \
+    do {                                                                      \
+      if (err) {                                                              \
+        LSS_ERRNO = (res);                                                    \
+        res = -1;                                                             \
+      }                                                                       \
+      return (type) (res);                                                    \
+    } while (0)
+  #elif defined(__PPC__)
+  /* On PPC, failing system calls return -1, and set errno in a
+   * separate CPU register. See linux/unistd.h.
+   */
+  #define LSS_RETURN(type, res, err)                                          \
+   do {                                                                       \
+     if (err & 0x10000000 ) {                                                 \
+       LSS_ERRNO = (res);                                                     \
+       res = -1;                                                              \
+     }                                                                        \
+     return (type) (res);                                                     \
+   } while (0)
+  #endif
+  #if defined(__i386__)
+    /* In PIC mode (e.g. when building shared libraries), gcc for i386
+     * reserves ebx. Unfortunately, most distribution ship with implementations
+     * of _syscallX() which clobber ebx.
+     * Also, most definitions of _syscallX() neglect to mark "memory" as being
+     * clobbered. This causes problems with compilers, that do a better job
+     * at optimizing across __asm__ calls.
+     * So, we just have to redefine all of the _syscallX() macros.
+     */
+    #undef  LSS_BODY
+    #define LSS_BODY(type,args...)                                            \
+      long __res;                                                             \
+      __asm__ __volatile__("push %%ebx\n"                                     \
+                           "movl %2,%%ebx\n"                                  \
+                           "int $0x80\n"                                      \
+                           "pop %%ebx"                                        \
+                           args                                               \
+                           : "esp", "memory");                                \
+      LSS_RETURN(type,__res)
+    #undef  _syscall0
+    #define _syscall0(type,name)                                              \
+      type LSS_NAME(name)(void) {                                             \
+        long __res;                                                           \
+        __asm__ volatile("int $0x80"                                          \
+                         : "=a" (__res)                                       \
+                         : "0" (__NR_##name)                                  \
+                         : "memory");                                         \
+        LSS_RETURN(type,__res);                                               \
+      }
+    #undef  _syscall1
+    #define _syscall1(type,name,type1,arg1)                                   \
+      type LSS_NAME(name)(type1 arg1) {                                       \
+        LSS_BODY(type,                                                        \
+             : "=a" (__res)                                                   \
+             : "0" (__NR_##name), "ri" ((long)(arg1)));                       \
+      }
+    #undef  _syscall2
+    #define _syscall2(type,name,type1,arg1,type2,arg2)                        \
+      type LSS_NAME(name)(type1 arg1,type2 arg2) {                            \
+        LSS_BODY(type,                                                        \
+             : "=a" (__res)                                                   \
+             : "0" (__NR_##name),"ri" ((long)(arg1)), "c" ((long)(arg2)));    \
+      }
+    #undef  _syscall3
+    #define _syscall3(type,name,type1,arg1,type2,arg2,type3,arg3)             \
+      type LSS_NAME(name)(type1 arg1,type2 arg2,type3 arg3) {                 \
+        LSS_BODY(type,                                                        \
+             : "=a" (__res)                                                   \
+             : "0" (__NR_##name), "ri" ((long)(arg1)), "c" ((long)(arg2)),    \
+               "d" ((long)(arg3)));                                           \
+      }
+    #undef  _syscall4
+    #define _syscall4(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4)  \
+      type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4) {   \
+        LSS_BODY(type,                                                        \
+             : "=a" (__res)                                                   \
+             : "0" (__NR_##name), "ri" ((long)(arg1)), "c" ((long)(arg2)),    \
+               "d" ((long)(arg3)),"S" ((long)(arg4)));                        \
+      }
+    #undef  _syscall5
+    #define _syscall5(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4,  \
+                      type5,arg5)                                             \
+      type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4,     \
+                          type5 arg5) {                                       \
+        long __res;                                                           \
+        __asm__ __volatile__("push %%ebx\n"                                   \
+                             "movl %2,%%ebx\n"                                \
+                             "movl %1,%%eax\n"                                \
+                             "int  $0x80\n"                                   \
+                             "pop  %%ebx"                                     \
+                             : "=a" (__res)                                   \
+                             : "i" (__NR_##name), "ri" ((long)(arg1)),        \
+                               "c" ((long)(arg2)), "d" ((long)(arg3)),        \
+                               "S" ((long)(arg4)), "D" ((long)(arg5))         \
+                             : "esp", "memory");                              \
+        LSS_RETURN(type,__res);                                               \
+      }
+    #undef  _syscall6
+    #define _syscall6(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4,  \
+                      type5,arg5,type6,arg6)                                  \
+      type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4,     \
+                          type5 arg5, type6 arg6) {                           \
+        long __res;                                                           \
+        struct { long __a1; long __a6; } __s = { (long)arg1, (long) arg6 };   \
+        __asm__ __volatile__("push %%ebp\n"                                   \
+                             "push %%ebx\n"                                   \
+                             "movl 4(%2),%%ebp\n"                             \
+                             "movl 0(%2), %%ebx\n"                            \
+                             "movl %1,%%eax\n"                                \
+                             "int  $0x80\n"                                   \
+                             "pop  %%ebx\n"                                   \
+                             "pop  %%ebp"                                     \
+                             : "=a" (__res)                                   \
+                             : "i" (__NR_##name),  "0" ((long)(&__s)),        \
+                               "c" ((long)(arg2)), "d" ((long)(arg3)),        \
+                               "S" ((long)(arg4)), "D" ((long)(arg5))         \
+                             : "esp", "memory");                              \
+        LSS_RETURN(type,__res);                                               \
+      }
+    LSS_INLINE int LSS_NAME(clone)(int (*fn)(void *), void *child_stack,
+                                   int flags, void *arg, int *parent_tidptr,
+                                   void *newtls, int *child_tidptr) {
+      long __res;
+      __asm__ __volatile__(/* if (fn == NULL)
+                            *   return -EINVAL;
+                            */
+                           "movl   %3,%%ecx\n"
+                           "jecxz  1f\n"
+
+                           /* if (child_stack == NULL)
+                            *   return -EINVAL;
+                            */
+                           "movl   %4,%%ecx\n"
+                           "jecxz  1f\n"
+
+                           /* Set up alignment of the child stack:
+                            * child_stack = (child_stack & ~0xF) - 20;
+                            */
+                           "andl   $-16,%%ecx\n"
+                           "subl   $20,%%ecx\n"
+
+                           /* Push "arg" and "fn" onto the stack that will be
+                            * used by the child.
+                            */
+                           "movl   %6,%%eax\n"
+                           "movl   %%eax,4(%%ecx)\n"
+                           "movl   %3,%%eax\n"
+                           "movl   %%eax,(%%ecx)\n"
+
+                           /* %eax = syscall(%eax = __NR_clone,
+                            *                %ebx = flags,
+                            *                %ecx = child_stack,
+                            *                %edx = parent_tidptr,
+                            *                %esi = newtls,
+                            *                %edi = child_tidptr)
+                            * Also, make sure that %ebx gets preserved as it is
+                            * used in PIC mode.
+                            */
+                           "movl   %8,%%esi\n"
+                           "movl   %7,%%edx\n"
+                           "movl   %5,%%eax\n"
+                           "movl   %9,%%edi\n"
+                           "pushl  %%ebx\n"
+                           "movl   %%eax,%%ebx\n"
+                           "movl   %2,%%eax\n"
+                           "int    $0x80\n"
+
+                           /* In the parent: restore %ebx
+                            * In the child:  move "fn" into %ebx
+                            */
+                           "popl   %%ebx\n"
+
+                           /* if (%eax != 0)
+                            *   return %eax;
+                            */
+                           "test   %%eax,%%eax\n"
+                           "jnz    1f\n"
+
+                           /* In the child, now. Terminate frame pointer chain.
+                            */
+                           "movl   $0,%%ebp\n"
+
+                           /* Call "fn". "arg" is already on the stack.
+                            */
+                           "call   *%%ebx\n"
+
+                           /* Call _exit(%ebx). Unfortunately older versions
+                            * of gcc restrict the number of arguments that can
+                            * be passed to asm(). So, we need to hard-code the
+                            * system call number.
+                            */
+                           "movl   %%eax,%%ebx\n"
+                           "movl   $1,%%eax\n"
+                           "int    $0x80\n"
+
+                           /* Return to parent.
+                            */
+                         "1:\n"
+                           : "=a" (__res)
+                           : "0"(-EINVAL), "i"(__NR_clone),
+                             "m"(fn), "m"(child_stack), "m"(flags), "m"(arg),
+                             "m"(parent_tidptr), "m"(newtls), "m"(child_tidptr)
+                           : "esp", "memory", "ecx", "edx", "esi", "edi");
+      LSS_RETURN(int, __res);
+    }
+
+    #define __NR__fadvise64_64 __NR_fadvise64_64
+    LSS_INLINE _syscall6(int, _fadvise64_64, int, fd,
+                         unsigned, offset_lo, unsigned, offset_hi,
+                         unsigned, len_lo, unsigned, len_hi,
+                         int, advice)
+
+    LSS_INLINE int LSS_NAME(fadvise64)(int fd, loff_t offset,
+                                       loff_t len, int advice) {
+      return LSS_NAME(_fadvise64_64)(fd,
+                                     (unsigned)offset, (unsigned)(offset >>32),
+                                     (unsigned)len, (unsigned)(len >> 32),
+                                     advice);
+    }
+
+    #define __NR__fallocate __NR_fallocate
+    LSS_INLINE _syscall6(int, _fallocate, int, fd,
+                         int, mode,
+                         unsigned, offset_lo, unsigned, offset_hi,
+                         unsigned, len_lo, unsigned, len_hi)
+
+    LSS_INLINE int LSS_NAME(fallocate)(int fd, int mode,
+                                       loff_t offset, loff_t len) {
+      union { loff_t off; unsigned w[2]; } o = { offset }, l = { len };
+      return LSS_NAME(_fallocate)(fd, mode, o.w[0], o.w[1], l.w[0], l.w[1]);
+    }
+
+    LSS_INLINE _syscall1(int, set_thread_area, void *, u)
+    LSS_INLINE _syscall1(int, get_thread_area, void *, u)
+
+    LSS_INLINE void (*LSS_NAME(restore_rt)(void))(void) {
+      /* On i386, the kernel does not know how to return from a signal
+       * handler. Instead, it relies on user space to provide a
+       * restorer function that calls the {rt_,}sigreturn() system call.
+       * Unfortunately, we cannot just reference the glibc version of this
+       * function, as glibc goes out of its way to make it inaccessible.
+       */
+      void (*res)(void);
+      __asm__ __volatile__("call   2f\n"
+                         "0:.align 16\n"
+                         "1:movl   %1,%%eax\n"
+                           "int    $0x80\n"
+                         "2:popl   %0\n"
+                           "addl   $(1b-0b),%0\n"
+                           : "=a" (res)
+                           : "i"  (__NR_rt_sigreturn));
+      return res;
+    }
+    LSS_INLINE void (*LSS_NAME(restore)(void))(void) {
+      /* On i386, the kernel does not know how to return from a signal
+       * handler. Instead, it relies on user space to provide a
+       * restorer function that calls the {rt_,}sigreturn() system call.
+       * Unfortunately, we cannot just reference the glibc version of this
+       * function, as glibc goes out of its way to make it inaccessible.
+       */
+      void (*res)(void);
+      __asm__ __volatile__("call   2f\n"
+                         "0:.align 16\n"
+                         "1:pop    %%eax\n"
+                           "movl   %1,%%eax\n"
+                           "int    $0x80\n"
+                         "2:popl   %0\n"
+                           "addl   $(1b-0b),%0\n"
+                           : "=a" (res)
+                           : "i"  (__NR_sigreturn));
+      return res;
+    }
+  #elif defined(__x86_64__)
+    /* There are no known problems with any of the _syscallX() macros
+     * currently shipping for x86_64, but we still need to be able to define
+     * our own version so that we can override the location of the errno
+     * location (e.g. when using the clone() system call with the CLONE_VM
+     * option).
+     */
+    #undef  LSS_BODY
+    #define LSS_BODY(type,name, ...)                                          \
+          long __res;                                                         \
+          __asm__ __volatile__("syscall" : "=a" (__res) : "0" (__NR_##name),  \
+            ##__VA_ARGS__ : "r11", "rcx", "memory");                          \
+          LSS_RETURN(type, __res)
+    #undef _syscall0
+    #define _syscall0(type,name)                                              \
+      type LSS_NAME(name)() {                                                 \
+        LSS_BODY(type, name);                                                 \
+      }
+    #undef _syscall1
+    #define _syscall1(type,name,type1,arg1)                                   \
+      type LSS_NAME(name)(type1 arg1) {                                       \
+        LSS_BODY(type, name, "D" ((long)(arg1)));                             \
+      }
+    #undef _syscall2
+    #define _syscall2(type,name,type1,arg1,type2,arg2)                        \
+      type LSS_NAME(name)(type1 arg1, type2 arg2) {                           \
+        LSS_BODY(type, name, "D" ((long)(arg1)), "S" ((long)(arg2)));         \
+      }
+    #undef _syscall3
+    #define _syscall3(type,name,type1,arg1,type2,arg2,type3,arg3)             \
+      type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3) {               \
+        LSS_BODY(type, name, "D" ((long)(arg1)), "S" ((long)(arg2)),          \
+                             "d" ((long)(arg3)));                             \
+      }
+    #undef _syscall4
+    #define _syscall4(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4)  \
+      type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4) {   \
+          long __res;                                                         \
+          __asm__ __volatile__("movq %5,%%r10; syscall" :                     \
+            "=a" (__res) : "0" (__NR_##name),                                 \
+            "D" ((long)(arg1)), "S" ((long)(arg2)), "d" ((long)(arg3)),       \
+            "r" ((long)(arg4)) : "r10", "r11", "rcx", "memory");              \
+          LSS_RETURN(type, __res);                                            \
+      }
+    #undef _syscall5
+    #define _syscall5(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4,  \
+                      type5,arg5)                                             \
+      type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4,     \
+                          type5 arg5) {                                       \
+          long __res;                                                         \
+          __asm__ __volatile__("movq %5,%%r10; movq %6,%%r8; syscall" :       \
+            "=a" (__res) : "0" (__NR_##name),                                 \
+            "D" ((long)(arg1)), "S" ((long)(arg2)), "d" ((long)(arg3)),       \
+            "r" ((long)(arg4)), "r" ((long)(arg5)) :                          \
+            "r8", "r10", "r11", "rcx", "memory");                             \
+          LSS_RETURN(type, __res);                                            \
+      }
+    #undef _syscall6
+    #define _syscall6(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4,  \
+                      type5,arg5,type6,arg6)                                  \
+      type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4,     \
+                          type5 arg5, type6 arg6) {                           \
+          long __res;                                                         \
+          __asm__ __volatile__("movq %5,%%r10; movq %6,%%r8; movq %7,%%r9;"   \
+                               "syscall" :                                    \
+            "=a" (__res) : "0" (__NR_##name),                                 \
+            "D" ((long)(arg1)), "S" ((long)(arg2)), "d" ((long)(arg3)),       \
+            "r" ((long)(arg4)), "r" ((long)(arg5)), "r" ((long)(arg6)) :      \
+            "r8", "r9", "r10", "r11", "rcx", "memory");                       \
+          LSS_RETURN(type, __res);                                            \
+      }
+    LSS_INLINE int LSS_NAME(clone)(int (*fn)(void *), void *child_stack,
+                                   int flags, void *arg, int *parent_tidptr,
+                                   void *newtls, int *child_tidptr) {
+      long __res;
+      {
+        register void *__tls  __asm__("r8")  = newtls;
+        register int  *__ctid __asm__("r10") = child_tidptr;
+        __asm__ __volatile__(/* if (fn == NULL)
+                              *   return -EINVAL;
+                              */
+                             "testq  %4,%4\n"
+                             "jz     1f\n"
+
+                             /* if (child_stack == NULL)
+                              *   return -EINVAL;
+                              */
+                             "testq  %5,%5\n"
+                             "jz     1f\n"
+
+                             /* childstack -= 2*sizeof(void *);
+                              */
+                             "subq   $16,%5\n"
+
+                             /* Push "arg" and "fn" onto the stack that will be
+                              * used by the child.
+                              */
+                             "movq   %7,8(%5)\n"
+                             "movq   %4,0(%5)\n"
+
+                             /* %rax = syscall(%rax = __NR_clone,
+                              *                %rdi = flags,
+                              *                %rsi = child_stack,
+                              *                %rdx = parent_tidptr,
+                              *                %r8  = new_tls,
+                              *                %r10 = child_tidptr)
+                              */
+                             "movq   %2,%%rax\n"
+                             "syscall\n"
+
+                             /* if (%rax != 0)
+                              *   return;
+                              */
+                             "testq  %%rax,%%rax\n"
+                             "jnz    1f\n"
+
+                             /* In the child. Terminate frame pointer chain.
+                              */
+                             "xorq   %%rbp,%%rbp\n"
+
+                             /* Call "fn(arg)".
+                              */
+                             "popq   %%rax\n"
+                             "popq   %%rdi\n"
+                             "call   *%%rax\n"
+
+                             /* Call _exit(%ebx).
+                              */
+                             "movq   %%rax,%%rdi\n"
+                             "movq   %3,%%rax\n"
+                             "syscall\n"
+
+                             /* Return to parent.
+                              */
+                           "1:\n"
+                             : "=a" (__res)
+                             : "0"(-EINVAL), "i"(__NR_clone), "i"(__NR_exit),
+                               "r"(fn), "S"(child_stack), "D"(flags), "r"(arg),
+                               "d"(parent_tidptr), "r"(__tls), "r"(__ctid)
+                             : "rsp", "memory", "r11", "rcx");
+      }
+      LSS_RETURN(int, __res);
+    }
+    LSS_INLINE _syscall2(int, arch_prctl, int, c, void *, a)
+    LSS_INLINE _syscall4(int, fadvise64, int, fd, loff_t, offset, loff_t, len,
+                         int,  advice)
+
+    LSS_INLINE void (*LSS_NAME(restore_rt)(void))(void) {
+      /* On x86-64, the kernel does not know how to return from
+       * a signal handler. Instead, it relies on user space to provide a
+       * restorer function that calls the rt_sigreturn() system call.
+       * Unfortunately, we cannot just reference the glibc version of this
+       * function, as glibc goes out of its way to make it inaccessible.
+       */
+      void (*res)(void);
+      __asm__ __volatile__("call   2f\n"
+                         "0:.align 16\n"
+                         "1:movq   %1,%%rax\n"
+                           "syscall\n"
+                         "2:popq   %0\n"
+                           "addq   $(1b-0b),%0\n"
+                           : "=a" (res)
+                           : "i"  (__NR_rt_sigreturn));
+      return res;
+    }
+  #elif defined(__ARM_ARCH_3__)
+    /* Most definitions of _syscallX() neglect to mark "memory" as being
+     * clobbered. This causes problems with compilers, that do a better job
+     * at optimizing across __asm__ calls.
+     * So, we just have to redefine all fo the _syscallX() macros.
+     */
+    #undef LSS_REG
+    #define LSS_REG(r,a) register long __r##r __asm__("r"#r) = (long)a
+    #undef  LSS_BODY
+    #define LSS_BODY(type,name,args...)                                       \
+          register long __res_r0 __asm__("r0");                               \
+          long __res;                                                         \
+          __asm__ __volatile__ (__syscall(name)                               \
+                                : "=r"(__res_r0) : args : "lr", "memory");    \
+          __res = __res_r0;                                                   \
+          LSS_RETURN(type, __res)
+    #undef _syscall0
+    #define _syscall0(type, name)                                             \
+      type LSS_NAME(name)() {                                                 \
+        LSS_BODY(type, name);                                                 \
+      }
+    #undef _syscall1
+    #define _syscall1(type, name, type1, arg1)                                \
+      type LSS_NAME(name)(type1 arg1) {                                       \
+        LSS_REG(0, arg1); LSS_BODY(type, name, "r"(__r0));                    \
+      }
+    #undef _syscall2
+    #define _syscall2(type, name, type1, arg1, type2, arg2)                   \
+      type LSS_NAME(name)(type1 arg1, type2 arg2) {                           \
+        LSS_REG(0, arg1); LSS_REG(1, arg2);                                   \
+        LSS_BODY(type, name, "r"(__r0), "r"(__r1));                           \
+      }
+    #undef _syscall3
+    #define _syscall3(type, name, type1, arg1, type2, arg2, type3, arg3)      \
+      type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3) {               \
+        LSS_REG(0, arg1); LSS_REG(1, arg2); LSS_REG(2, arg3);                 \
+        LSS_BODY(type, name, "r"(__r0), "r"(__r1), "r"(__r2));                \
+      }
+    #undef _syscall4
+    #define _syscall4(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4)  \
+      type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4) {   \
+        LSS_REG(0, arg1); LSS_REG(1, arg2); LSS_REG(2, arg3);                 \
+        LSS_REG(3, arg4);                                                     \
+        LSS_BODY(type, name, "r"(__r0), "r"(__r1), "r"(__r2), "r"(__r3));     \
+      }
+    #undef _syscall5
+    #define _syscall5(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4,  \
+                      type5,arg5)                                             \
+      type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4,     \
+                          type5 arg5) {                                       \
+        LSS_REG(0, arg1); LSS_REG(1, arg2); LSS_REG(2, arg3);                 \
+        LSS_REG(3, arg4); LSS_REG(4, arg5);                                   \
+        LSS_BODY(type, name, "r"(__r0), "r"(__r1), "r"(__r2), "r"(__r3),      \
+                             "r"(__r4));                                      \
+      }
+    #undef _syscall6
+    #define _syscall6(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4,  \
+                      type5,arg5,type6,arg6)                                  \
+      type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4,     \
+                          type5 arg5, type6 arg6) {                           \
+        LSS_REG(0, arg1); LSS_REG(1, arg2); LSS_REG(2, arg3);                 \
+        LSS_REG(3, arg4); LSS_REG(4, arg5); LSS_REG(5, arg6);                 \
+        LSS_BODY(type, name, "r"(__r0), "r"(__r1), "r"(__r2), "r"(__r3),      \
+                             "r"(__r4), "r"(__r5));                           \
+      }
+    LSS_INLINE int LSS_NAME(clone)(int (*fn)(void *), void *child_stack,
+                                   int flags, void *arg, int *parent_tidptr,
+                                   void *newtls, int *child_tidptr) {
+      long __res;
+      {
+        register int   __flags __asm__("r0") = flags;
+        register void *__stack __asm__("r1") = child_stack;
+        register void *__ptid  __asm__("r2") = parent_tidptr;
+        register void *__tls   __asm__("r3") = newtls;
+        register int  *__ctid  __asm__("r4") = child_tidptr;
+        __asm__ __volatile__(/* if (fn == NULL || child_stack == NULL)
+                              *   return -EINVAL;
+                              */
+                             "cmp   %2,#0\n"
+                             "cmpne %3,#0\n"
+                             "moveq %0,%1\n"
+                             "beq   1f\n"
+
+                             /* Push "arg" and "fn" onto the stack that will be
+                              * used by the child.
+                              */
+                             "str   %5,[%3,#-4]!\n"
+                             "str   %2,[%3,#-4]!\n"
+
+                             /* %r0 = syscall(%r0 = flags,
+                              *               %r1 = child_stack,
+                              *               %r2 = parent_tidptr,
+                              *               %r3 = newtls,
+                              *               %r4 = child_tidptr)
+                              */
+                             __syscall(clone)"\n"
+
+                             /* if (%r0 != 0)
+                              *   return %r0;
+                              */
+                             "movs  %0,r0\n"
+                             "bne   1f\n"
+
+                             /* In the child, now. Call "fn(arg)".
+                              */
+                             "ldr   r0,[sp, #4]\n"
+                             "mov   lr,pc\n"
+                             "ldr   pc,[sp]\n"
+
+                             /* Call _exit(%r0).
+                              */
+                             __syscall(exit)"\n"
+                           "1:\n"
+                             : "=r" (__res)
+                             : "i"(-EINVAL),
+                               "r"(fn), "r"(__stack), "r"(__flags), "r"(arg),
+                               "r"(__ptid), "r"(__tls), "r"(__ctid)
+                             : "lr", "memory");
+      }
+      LSS_RETURN(int, __res);
+    }
+  #elif defined(__mips__)
+    #undef LSS_REG
+    #define LSS_REG(r,a) register unsigned long __r##r __asm__("$"#r) =       \
+                                 (unsigned long)(a)
+    #undef  LSS_BODY
+    #define LSS_BODY(type,name,r7,...)                                        \
+          register unsigned long __v0 __asm__("$2") = __NR_##name;            \
+          __asm__ __volatile__ ("syscall\n"                                   \
+                                : "=&r"(__v0), r7 (__r7)                      \
+                                : "0"(__v0), ##__VA_ARGS__                    \
+                                : "$8", "$9", "$10", "$11", "$12",            \
+                                  "$13", "$14", "$15", "$24", "memory");      \
+          LSS_RETURN(type, __v0, __r7)
+    #undef _syscall0
+    #define _syscall0(type, name)                                             \
+      type LSS_NAME(name)() {                                                 \
+        register unsigned long __r7 __asm__("$7");                            \
+        LSS_BODY(type, name, "=r");                                           \
+      }
+    #undef _syscall1
+    #define _syscall1(type, name, type1, arg1)                                \
+      type LSS_NAME(name)(type1 arg1) {                                       \
+        register unsigned long __r7 __asm__("$7");                            \
+        LSS_REG(4, arg1); LSS_BODY(type, name, "=r", "r"(__r4));              \
+      }
+    #undef _syscall2
+    #define _syscall2(type, name, type1, arg1, type2, arg2)                   \
+      type LSS_NAME(name)(type1 arg1, type2 arg2) {                           \
+        register unsigned long __r7 __asm__("$7");                            \
+        LSS_REG(4, arg1); LSS_REG(5, arg2);                                   \
+        LSS_BODY(type, name, "=r", "r"(__r4), "r"(__r5));                     \
+      }
+    #undef _syscall3
+    #define _syscall3(type, name, type1, arg1, type2, arg2, type3, arg3)      \
+      type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3) {               \
+        register unsigned long __r7 __asm__("$7");                            \
+        LSS_REG(4, arg1); LSS_REG(5, arg2); LSS_REG(6, arg3);                 \
+        LSS_BODY(type, name, "=r", "r"(__r4), "r"(__r5), "r"(__r6));          \
+      }
+    #undef _syscall4
+    #define _syscall4(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4)  \
+      type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4) {   \
+        LSS_REG(4, arg1); LSS_REG(5, arg2); LSS_REG(6, arg3);                 \
+        LSS_REG(7, arg4);                                                     \
+        LSS_BODY(type, name, "+r", "r"(__r4), "r"(__r5), "r"(__r6));          \
+      }
+    #undef _syscall5
+    #if _MIPS_SIM == _MIPS_SIM_ABI32
+    /* The old 32bit MIPS system call API passes the fifth and sixth argument
+     * on the stack, whereas the new APIs use registers "r8" and "r9".
+     */
+    #define _syscall5(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4,  \
+                      type5,arg5)                                             \
+      type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4,     \
+                          type5 arg5) {                                       \
+        LSS_REG(4, arg1); LSS_REG(5, arg2); LSS_REG(6, arg3);                 \
+        LSS_REG(7, arg4);                                                     \
+        register unsigned long __v0 __asm__("$2");                            \
+        __asm__ __volatile__ (".set noreorder\n"                              \
+                              "lw    $2, %6\n"                                \
+                              "subu  $29, 32\n"                               \
+                              "sw    $2, 16($29)\n"                           \
+                              "li    $2, %2\n"                                \
+                              "syscall\n"                                     \
+                              "addiu $29, 32\n"                               \
+                              ".set reorder\n"                                \
+                              : "=&r"(__v0), "+r" (__r7)                      \
+                              : "i" (__NR_##name), "r"(__r4), "r"(__r5),      \
+                                "r"(__r6), "m" ((unsigned long)arg5)          \
+                              : "$8", "$9", "$10", "$11", "$12",              \
+                                "$13", "$14", "$15", "$24", "memory");        \
+        LSS_RETURN(type, __v0, __r7);                                         \
+      }
+    #else
+    #define _syscall5(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4,  \
+                      type5,arg5)                                             \
+      type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4,     \
+                          type5 arg5) {                                       \
+        LSS_REG(4, arg1); LSS_REG(5, arg2); LSS_REG(6, arg3);                 \
+        LSS_REG(7, arg4); LSS_REG(8, arg5);                                   \
+        LSS_BODY(type, name, "+r", "r"(__r4), "r"(__r5), "r"(__r6),           \
+                 "r"(__r8));                                                  \
+      }
+    #endif
+    #undef _syscall6
+    #if _MIPS_SIM == _MIPS_SIM_ABI32
+    /* The old 32bit MIPS system call API passes the fifth and sixth argument
+     * on the stack, whereas the new APIs use registers "r8" and "r9".
+     */
+    #define _syscall6(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4,  \
+                      type5,arg5,type6,arg6)                                  \
+      type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4,     \
+                          type5 arg5, type6 arg6) {                           \
+        LSS_REG(4, arg1); LSS_REG(5, arg2); LSS_REG(6, arg3);                 \
+        LSS_REG(7, arg4);                                                     \
+        register unsigned long __v0 __asm__("$2");                            \
+        __asm__ __volatile__ (".set noreorder\n"                              \
+                              "lw    $2, %6\n"                                \
+                              "lw    $8, %7\n"                                \
+                              "subu  $29, 32\n"                               \
+                              "sw    $2, 16($29)\n"                           \
+                              "sw    $8, 20($29)\n"                           \
+                              "li    $2, %2\n"                                \
+                              "syscall\n"                                     \
+                              "addiu $29, 32\n"                               \
+                              ".set reorder\n"                                \
+                              : "=&r"(__v0), "+r" (__r7)                      \
+                              : "i" (__NR_##name), "r"(__r4), "r"(__r5),      \
+                                "r"(__r6), "r" ((unsigned long)arg5),         \
+                                "r" ((unsigned long)arg6)                     \
+                              : "$8", "$9", "$10", "$11", "$12",              \
+                                "$13", "$14", "$15", "$24", "memory");        \
+        LSS_RETURN(type, __v0, __r7);                                         \
+      }
+    #else
+    #define _syscall6(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4,  \
+                      type5,arg5,type6,arg6)                                  \
+      type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4,     \
+                          type5 arg5,type6 arg6) {                            \
+        LSS_REG(4, arg1); LSS_REG(5, arg2); LSS_REG(6, arg3);                 \
+        LSS_REG(7, arg4); LSS_REG(8, arg5); LSS_REG(9, arg6);                 \
+        LSS_BODY(type, name, "+r", "r"(__r4), "r"(__r5), "r"(__r6),           \
+                 "r"(__r8), "r"(__r9));                                       \
+      }
+    #endif
+    LSS_INLINE int LSS_NAME(clone)(int (*fn)(void *), void *child_stack,
+                                   int flags, void *arg, int *parent_tidptr,
+                                   void *newtls, int *child_tidptr) {
+      register unsigned long __v0 __asm__("$2");
+      register unsigned long __r7 __asm__("$7") = (unsigned long)newtls;
+      {
+        register int   __flags __asm__("$4") = flags;
+        register void *__stack __asm__("$5") = child_stack;
+        register void *__ptid  __asm__("$6") = parent_tidptr;
+        register int  *__ctid  __asm__("$8") = child_tidptr;
+        __asm__ __volatile__(
+          #if _MIPS_SIM == _MIPS_SIM_ABI32 && _MIPS_SZPTR == 32
+                             "subu  $29,24\n"
+          #elif _MIPS_SIM == _MIPS_SIM_NABI32
+                             "sub   $29,16\n"
+          #else
+                             "dsubu $29,16\n"
+          #endif
+
+                             /* if (fn == NULL || child_stack == NULL)
+                              *   return -EINVAL;
+                              */
+                             "li    %0,%2\n"
+                             "beqz  %5,1f\n"
+                             "beqz  %6,1f\n"
+
+                             /* Push "arg" and "fn" onto the stack that will be
+                              * used by the child.
+                              */
+          #if _MIPS_SIM == _MIPS_SIM_ABI32 && _MIPS_SZPTR == 32
+                             "subu  %6,32\n"
+                             "sw    %5,0(%6)\n"
+                             "sw    %8,4(%6)\n"
+          #elif _MIPS_SIM == _MIPS_SIM_NABI32
+                             "sub   %6,32\n"
+                             "sw    %5,0(%6)\n"
+                             "sw    %8,8(%6)\n"
+          #else
+                             "dsubu %6,32\n"
+                             "sd    %5,0(%6)\n"
+                             "sd    %8,8(%6)\n"
+          #endif
+
+                             /* $7 = syscall($4 = flags,
+                              *              $5 = child_stack,
+                              *              $6 = parent_tidptr,
+                              *              $7 = newtls,
+                              *              $8 = child_tidptr)
+                              */
+                             "li    $2,%3\n"
+                             "syscall\n"
+
+                             /* if ($7 != 0)
+                              *   return $2;
+                              */
+                             "bnez  $7,1f\n"
+                             "bnez  $2,1f\n"
+
+                             /* In the child, now. Call "fn(arg)".
+                              */
+          #if _MIPS_SIM == _MIPS_SIM_ABI32 && _MIPS_SZPTR == 32
+                            "lw    $25,0($29)\n"
+                            "lw    $4,4($29)\n"
+          #elif _MIPS_SIM == _MIPS_SIM_NABI32
+                            "lw    $25,0($29)\n"
+                            "lw    $4,8($29)\n"
+          #else
+                            "ld    $25,0($29)\n"
+                            "ld    $4,8($29)\n"
+          #endif
+                            "jalr  $25\n"
+
+                             /* Call _exit($2)
+                              */
+                            "move  $4,$2\n"
+                            "li    $2,%4\n"
+                            "syscall\n"
+
+                           "1:\n"
+          #if _MIPS_SIM == _MIPS_SIM_ABI32 && _MIPS_SZPTR == 32
+                             "addu  $29, 24\n"
+          #elif _MIPS_SIM == _MIPS_SIM_NABI32
+                             "add   $29, 16\n"
+          #else
+                             "daddu $29,16\n"
+          #endif
+                             : "=&r" (__v0), "=r" (__r7)
+                             : "i"(-EINVAL), "i"(__NR_clone), "i"(__NR_exit),
+                               "r"(fn), "r"(__stack), "r"(__flags), "r"(arg),
+                               "r"(__ptid), "r"(__r7), "r"(__ctid)
+                             : "$9", "$10", "$11", "$12", "$13", "$14", "$15",
+                               "$24", "memory");
+      }
+      LSS_RETURN(int, __v0, __r7);
+    }
+  #elif defined (__PPC__)
+    #undef  LSS_LOADARGS_0
+    #define LSS_LOADARGS_0(name, dummy...)                                    \
+        __sc_0 = __NR_##name
+    #undef  LSS_LOADARGS_1
+    #define LSS_LOADARGS_1(name, arg1)                                        \
+            LSS_LOADARGS_0(name);                                             \
+            __sc_3 = (unsigned long) (arg1)
+    #undef  LSS_LOADARGS_2
+    #define LSS_LOADARGS_2(name, arg1, arg2)                                  \
+            LSS_LOADARGS_1(name, arg1);                                       \
+            __sc_4 = (unsigned long) (arg2)
+    #undef  LSS_LOADARGS_3
+    #define LSS_LOADARGS_3(name, arg1, arg2, arg3)                            \
+            LSS_LOADARGS_2(name, arg1, arg2);                                 \
+            __sc_5 = (unsigned long) (arg3)
+    #undef  LSS_LOADARGS_4
+    #define LSS_LOADARGS_4(name, arg1, arg2, arg3, arg4)                      \
+            LSS_LOADARGS_3(name, arg1, arg2, arg3);                           \
+            __sc_6 = (unsigned long) (arg4)
+    #undef  LSS_LOADARGS_5
+    #define LSS_LOADARGS_5(name, arg1, arg2, arg3, arg4, arg5)                \
+            LSS_LOADARGS_4(name, arg1, arg2, arg3, arg4);                     \
+            __sc_7 = (unsigned long) (arg5)
+    #undef  LSS_LOADARGS_6
+    #define LSS_LOADARGS_6(name, arg1, arg2, arg3, arg4, arg5, arg6)          \
+            LSS_LOADARGS_5(name, arg1, arg2, arg3, arg4, arg5);               \
+            __sc_8 = (unsigned long) (arg6)
+    #undef  LSS_ASMINPUT_0
+    #define LSS_ASMINPUT_0 "0" (__sc_0)
+    #undef  LSS_ASMINPUT_1
+    #define LSS_ASMINPUT_1 LSS_ASMINPUT_0, "1" (__sc_3)
+    #undef  LSS_ASMINPUT_2
+    #define LSS_ASMINPUT_2 LSS_ASMINPUT_1, "2" (__sc_4)
+    #undef  LSS_ASMINPUT_3
+    #define LSS_ASMINPUT_3 LSS_ASMINPUT_2, "3" (__sc_5)
+    #undef  LSS_ASMINPUT_4
+    #define LSS_ASMINPUT_4 LSS_ASMINPUT_3, "4" (__sc_6)
+    #undef  LSS_ASMINPUT_5
+    #define LSS_ASMINPUT_5 LSS_ASMINPUT_4, "5" (__sc_7)
+    #undef  LSS_ASMINPUT_6
+    #define LSS_ASMINPUT_6 LSS_ASMINPUT_5, "6" (__sc_8)
+    #undef  LSS_BODY
+    #define LSS_BODY(nr, type, name, args...)                                 \
+        long __sc_ret, __sc_err;                                              \
+        {                                                                     \
+                        register unsigned long __sc_0 __asm__ ("r0");         \
+                        register unsigned long __sc_3 __asm__ ("r3");         \
+                        register unsigned long __sc_4 __asm__ ("r4");         \
+                        register unsigned long __sc_5 __asm__ ("r5");         \
+                        register unsigned long __sc_6 __asm__ ("r6");         \
+                        register unsigned long __sc_7 __asm__ ("r7");         \
+                        register unsigned long __sc_8 __asm__ ("r8");         \
+                                                                              \
+            LSS_LOADARGS_##nr(name, args);                                    \
+            __asm__ __volatile__                                              \
+                ("sc\n\t"                                                     \
+                 "mfcr %0"                                                    \
+                 : "=&r" (__sc_0),                                            \
+                   "=&r" (__sc_3), "=&r" (__sc_4),                            \
+                   "=&r" (__sc_5), "=&r" (__sc_6),                            \
+                   "=&r" (__sc_7), "=&r" (__sc_8)                             \
+                 : LSS_ASMINPUT_##nr                                          \
+                 : "cr0", "ctr", "memory",                                    \
+                   "r9", "r10", "r11", "r12");                                \
+            __sc_ret = __sc_3;                                                \
+            __sc_err = __sc_0;                                                \
+        }                                                                     \
+        LSS_RETURN(type, __sc_ret, __sc_err)
+    #undef _syscall0
+    #define _syscall0(type, name)                                             \
+       type LSS_NAME(name)(void) {                                            \
+          LSS_BODY(0, type, name);                                            \
+       }
+    #undef _syscall1
+    #define _syscall1(type, name, type1, arg1)                                \
+       type LSS_NAME(name)(type1 arg1) {                                      \
+          LSS_BODY(1, type, name, arg1);                                      \
+       }
+    #undef _syscall2
+    #define _syscall2(type, name, type1, arg1, type2, arg2)                   \
+       type LSS_NAME(name)(type1 arg1, type2 arg2) {                          \
+          LSS_BODY(2, type, name, arg1, arg2);                                \
+       }
+    #undef _syscall3
+    #define _syscall3(type, name, type1, arg1, type2, arg2, type3, arg3)      \
+       type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3) {              \
+          LSS_BODY(3, type, name, arg1, arg2, arg3);                          \
+       }
+    #undef _syscall4
+    #define _syscall4(type, name, type1, arg1, type2, arg2, type3, arg3,      \
+                                  type4, arg4)                                \
+       type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4) {  \
+          LSS_BODY(4, type, name, arg1, arg2, arg3, arg4);                    \
+       }
+    #undef _syscall5
+    #define _syscall5(type, name, type1, arg1, type2, arg2, type3, arg3,      \
+                                  type4, arg4, type5, arg5)                   \
+       type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4,    \
+                                               type5 arg5) {                  \
+          LSS_BODY(5, type, name, arg1, arg2, arg3, arg4, arg5);              \
+       }
+    #undef _syscall6
+    #define _syscall6(type, name, type1, arg1, type2, arg2, type3, arg3,      \
+                                  type4, arg4, type5, arg5, type6, arg6)      \
+       type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4,    \
+                                               type5 arg5, type6 arg6) {      \
+          LSS_BODY(6, type, name, arg1, arg2, arg3, arg4, arg5, arg6);        \
+       }
+    /* clone function adapted from glibc 2.3.6 clone.S                       */
+    /* TODO(csilvers): consider wrapping some args up in a struct, like we
+     * do for i386's _syscall6, so we can compile successfully on gcc 2.95
+     */
+    LSS_INLINE int LSS_NAME(clone)(int (*fn)(void *), void *child_stack,
+                                   int flags, void *arg, int *parent_tidptr,
+                                   void *newtls, int *child_tidptr) {
+      long __ret, __err;
+      {
+        register int (*__fn)(void *)    __asm__ ("r8")  = fn;
+        register void *__cstack                 __asm__ ("r4")  = child_stack;
+        register int __flags                    __asm__ ("r3")  = flags;
+        register void * __arg                   __asm__ ("r9")  = arg;
+        register int * __ptidptr                __asm__ ("r5")  = parent_tidptr;
+        register void * __newtls                __asm__ ("r6")  = newtls;
+        register int * __ctidptr                __asm__ ("r7")  = child_tidptr;
+        __asm__ __volatile__(
+            /* check for fn == NULL
+             * and child_stack == NULL
+             */
+            "cmpwi cr0, %6, 0\n\t"
+            "cmpwi cr1, %7, 0\n\t"
+            "cror cr0*4+eq, cr1*4+eq, cr0*4+eq\n\t"
+            "beq- cr0, 1f\n\t"
+
+            /* set up stack frame for child                                  */
+            "clrrwi %7, %7, 4\n\t"
+            "li 0, 0\n\t"
+            "stwu 0, -16(%7)\n\t"
+
+            /* fn, arg, child_stack are saved across the syscall: r28-30     */
+            "mr 28, %6\n\t"
+            "mr 29, %7\n\t"
+            "mr 27, %9\n\t"
+
+            /* syscall                                                       */
+            "li 0, %4\n\t"
+            /* flags already in r3
+             * child_stack already in r4
+             * ptidptr already in r5
+             * newtls already in r6
+             * ctidptr already in r7
+             */
+            "sc\n\t"
+
+            /* Test if syscall was successful                                */
+            "cmpwi cr1, 3, 0\n\t"
+            "crandc cr1*4+eq, cr1*4+eq, cr0*4+so\n\t"
+            "bne- cr1, 1f\n\t"
+
+            /* Do the function call                                          */
+            "mtctr 28\n\t"
+            "mr 3, 27\n\t"
+            "bctrl\n\t"
+
+            /* Call _exit(r3)                                                */
+            "li 0, %5\n\t"
+            "sc\n\t"
+
+            /* Return to parent                                              */
+            "1:\n"
+            "mfcr %1\n\t"
+            "mr %0, 3\n\t"
+              : "=r" (__ret), "=r" (__err)
+              : "0" (-1), "1" (EINVAL),
+                "i" (__NR_clone), "i" (__NR_exit),
+                "r" (__fn), "r" (__cstack), "r" (__flags),
+                "r" (__arg), "r" (__ptidptr), "r" (__newtls),
+                "r" (__ctidptr)
+              : "cr0", "cr1", "memory", "ctr",
+                "r0", "r29", "r27", "r28");
+      }
+      LSS_RETURN(int, __ret, __err);
+    }
+  #endif
+  #define __NR__exit   __NR_exit
+  #define __NR__gettid __NR_gettid
+  #define __NR__mremap __NR_mremap
+  LSS_INLINE _syscall1(int,     brk,             void *,      e)
+  LSS_INLINE _syscall1(int,     chdir,           const char *,p)
+  LSS_INLINE _syscall1(int,     close,           int,         f)
+  LSS_INLINE _syscall2(int,     clock_getres,    int,         c,
+                       struct kernel_timespec*, t)
+  LSS_INLINE _syscall2(int,     clock_gettime,   int,         c,
+                       struct kernel_timespec*, t)
+  LSS_INLINE _syscall1(int,     dup,             int,         f)
+  LSS_INLINE _syscall2(int,     dup2,            int,         s,
+                       int,            d)
+  LSS_INLINE _syscall3(int,     execve,          const char*, f,
+                       const char*const*,a,const char*const*, e)
+  LSS_INLINE _syscall1(int,     _exit,           int,         e)
+  LSS_INLINE _syscall1(int,     exit_group,      int,         e)
+  LSS_INLINE _syscall3(int,     fcntl,           int,         f,
+                       int,            c, long,   a)
+  LSS_INLINE _syscall0(pid_t,   fork)
+  LSS_INLINE _syscall2(int,     fstat,           int,         f,
+                      struct kernel_stat*,   b)
+  LSS_INLINE _syscall2(int,     fstatfs,         int,         f,
+                      struct kernel_statfs*, b)
+  LSS_INLINE _syscall2(int, ftruncate,           int,         f,
+                       off_t,          l)
+  LSS_INLINE _syscall4(int,     futex,           int*,        a,
+                       int,            o, int,    v,
+                      struct kernel_timespec*, t)
+  LSS_INLINE _syscall3(int,     getdents,        int,         f,
+                      struct kernel_dirent*, d, int,    c)
+  LSS_INLINE _syscall3(int,     getdents64,      int,         f,
+                      struct kernel_dirent64*, d, int,    c)
+  LSS_INLINE _syscall0(gid_t,   getegid)
+  LSS_INLINE _syscall0(uid_t,   geteuid)
+  LSS_INLINE _syscall0(pid_t,   getpgrp)
+  LSS_INLINE _syscall0(pid_t,   getpid)
+  LSS_INLINE _syscall0(pid_t,   getppid)
+  LSS_INLINE _syscall2(int,     getpriority,     int,         a,
+                       int,            b)
+  LSS_INLINE _syscall3(int,     getresgid,       gid_t *,     r,
+                       gid_t *,         e,       gid_t *,     s)
+  LSS_INLINE _syscall3(int,     getresuid,       uid_t *,     r,
+                       uid_t *,         e,       uid_t *,     s)
+  LSS_INLINE _syscall2(int,     getrlimit,       int,         r,
+                      struct kernel_rlimit*, l)
+  LSS_INLINE _syscall1(pid_t,   getsid,          pid_t,       p)
+  LSS_INLINE _syscall0(pid_t,   _gettid)
+  LSS_INLINE _syscall2(int,     gettimeofday,    struct timeval *, v,
+                       struct timezone *, z)
+  LSS_INLINE _syscall5(int,     setxattr,        const char *,p,
+                       const char *,   n,        const void *,v,
+                       size_t,         s,        int,         f)
+  LSS_INLINE _syscall5(int,     lsetxattr,       const char *,p,
+                       const char *,   n,        const void *,v,
+                       size_t,         s,        int,         f)
+  LSS_INLINE _syscall4(ssize_t, getxattr,        const char *,p,
+                       const char *,   n,        void *,      v, size_t, s)
+  LSS_INLINE _syscall4(ssize_t, lgetxattr,       const char *,p,
+                       const char *,   n,        void *,      v, size_t, s)
+  LSS_INLINE _syscall3(ssize_t, listxattr,       const char *,p,
+                       char *,   l,              size_t,      s)
+  LSS_INLINE _syscall3(ssize_t, llistxattr,      const char *,p,
+                       char *,   l,              size_t,      s)
+  LSS_INLINE _syscall3(int,     ioctl,           int,         d,
+                       int,     r,               void *,      a)
+  LSS_INLINE _syscall2(int,     ioprio_get,      int,         which,
+                       int,     who)
+  LSS_INLINE _syscall3(int,     ioprio_set,      int,         which,
+                       int,     who,             int,         ioprio)
+  LSS_INLINE _syscall2(int,     kill,            pid_t,       p,
+                       int,            s)
+  LSS_INLINE _syscall3(off_t,   lseek,           int,         f,
+                       off_t,          o, int,    w)
+  LSS_INLINE _syscall2(int,     munmap,          void*,       s,
+                       size_t,         l)
+  LSS_INLINE _syscall6(long,    move_pages,      pid_t,       p,
+                       unsigned long,  n, void **,g, int *,   d,
+                       int *,          s, int,    f)
+  LSS_INLINE _syscall3(int,     mprotect,        const void *,a,
+                       size_t,         l,        int,         p)
+  LSS_INLINE _syscall5(void*,   _mremap,         void*,       o,
+                       size_t,         os,       size_t,      ns,
+                       unsigned long,  f, void *, a)
+  LSS_INLINE _syscall3(int,     open,            const char*, p,
+                       int,            f, int,    m)
+  LSS_INLINE _syscall3(int,     poll,           struct kernel_pollfd*, u,
+                       unsigned int,   n, int,    t)
+  LSS_INLINE _syscall2(int,     prctl,           int,         o,
+                       long,           a)
+  LSS_INLINE _syscall4(long,    ptrace,          int,         r,
+                       pid_t,          p, void *, a, void *, d)
+  #if defined(__NR_quotactl)
+    // Defined on x86_64 / i386 only
+    LSS_INLINE _syscall4(int,  quotactl,  int,  cmd,  const char *, special,
+                         int, id, caddr_t, addr)
+  #endif
+  LSS_INLINE _syscall3(ssize_t, read,            int,         f,
+                       void *,         b, size_t, c)
+  LSS_INLINE _syscall3(int,     readlink,        const char*, p,
+                       char*,          b, size_t, s)
+  LSS_INLINE _syscall4(int,     rt_sigaction,    int,         s,
+                       const struct kernel_sigaction*, a,
+                       struct kernel_sigaction*, o, size_t,   c)
+  LSS_INLINE _syscall2(int, rt_sigpending, struct kernel_sigset_t *, s,
+                       size_t,         c)
+  LSS_INLINE _syscall4(int, rt_sigprocmask,      int,         h,
+                       const struct kernel_sigset_t*,  s,
+                       struct kernel_sigset_t*,        o, size_t, c);
+  LSS_INLINE _syscall1(int, rt_sigreturn,        unsigned long, u);
+  LSS_INLINE _syscall2(int, rt_sigsuspend,
+                       const struct kernel_sigset_t*, s,  size_t, c);
+  LSS_INLINE _syscall3(int,     sched_getaffinity,pid_t,      p,
+                       unsigned int,   l, unsigned long *, m)
+  LSS_INLINE _syscall3(int,     sched_setaffinity,pid_t,      p,
+                       unsigned int,   l, unsigned long *, m)
+  LSS_INLINE _syscall0(int,     sched_yield)
+  LSS_INLINE _syscall1(long,    set_tid_address, int *,       t)
+  LSS_INLINE _syscall1(int,     setfsgid,        gid_t,       g)
+  LSS_INLINE _syscall1(int,     setfsuid,        uid_t,       u)
+  LSS_INLINE _syscall1(int,     setuid,          uid_t,       u)
+  LSS_INLINE _syscall1(int,     setgid,          gid_t,       g)
+  LSS_INLINE _syscall2(int,     setpgid,         pid_t,       p,
+                       pid_t,          g)
+  LSS_INLINE _syscall3(int,     setpriority,     int,         a,
+                       int,            b, int,    p)
+  LSS_INLINE _syscall3(int,     setresgid,       gid_t,       r,
+                       gid_t,          e, gid_t,  s)
+  LSS_INLINE _syscall3(int,     setresuid,       uid_t,       r,
+                       uid_t,          e, uid_t,  s)
+  LSS_INLINE _syscall2(int,     setrlimit,       int,         r,
+                       const struct kernel_rlimit*, l)
+  LSS_INLINE _syscall0(pid_t,    setsid)
+  LSS_INLINE _syscall2(int,     sigaltstack,     const stack_t*, s,
+                       const stack_t*, o)
+  #if defined(__NR_sigreturn)
+  LSS_INLINE _syscall1(int,     sigreturn,       unsigned long, u);
+  #endif
+  LSS_INLINE _syscall2(int,     stat,            const char*, f,
+                      struct kernel_stat*,   b)
+  LSS_INLINE _syscall2(int,     statfs,          const char*, f,
+                      struct kernel_statfs*, b)
+  LSS_INLINE _syscall3(int,     tgkill,          pid_t,       p,
+                       pid_t,          t, int,            s)
+  LSS_INLINE _syscall2(int,     tkill,           pid_t,       p,
+                       int,            s)
+  LSS_INLINE _syscall3(ssize_t, write,            int,        f,
+                       const void *,   b, size_t, c)
+  LSS_INLINE _syscall3(ssize_t, writev,           int,        f,
+                       const struct kernel_iovec*, v, size_t, c)
+  LSS_INLINE _syscall1(int,     unlink,          const char*, f)
+  #if defined(__NR_getcpu)
+    LSS_INLINE _syscall3(long, getcpu, unsigned *, cpu,
+                         unsigned *, node, void *, unused);
+  #endif
+  #if defined(__x86_64__) ||                                                  \
+     (defined(__mips__) && _MIPS_SIM != _MIPS_SIM_ABI32)
+    LSS_INLINE _syscall3(int, recvmsg,            int,   s,
+                        struct kernel_msghdr*,     m, int, f)
+    LSS_INLINE _syscall3(int, sendmsg,            int,   s,
+                         const struct kernel_msghdr*, m, int, f)
+    LSS_INLINE _syscall6(int, sendto,             int,   s,
+                         const void*,             m, size_t, l,
+                         int,                     f,
+                         const struct kernel_sockaddr*, a, int, t)
+    LSS_INLINE _syscall2(int, shutdown,           int,   s,
+                         int,                     h)
+    LSS_INLINE _syscall3(int, socket,             int,   d,
+                         int,                     t, int,       p)
+    LSS_INLINE _syscall4(int, socketpair,         int,   d,
+                         int,                     t, int,       p, int*, s)
+  #endif
+  #if defined(__x86_64__)
+    LSS_INLINE _syscall4(int, fallocate, int, fd, int, mode,
+                         loff_t, offset, loff_t, len)
+
+    LSS_INLINE int LSS_NAME(getresgid32)(gid_t *rgid,
+                                         gid_t *egid,
+                                         gid_t *sgid) {
+      return LSS_NAME(getresgid)(rgid, egid, sgid);
+    }
+
+    LSS_INLINE int LSS_NAME(getresuid32)(uid_t *ruid,
+                                         uid_t *euid,
+                                         uid_t *suid) {
+      return LSS_NAME(getresuid)(ruid, euid, suid);
+    }
+
+    LSS_INLINE _syscall6(void*, mmap,              void*, s,
+                         size_t,                   l, int,               p,
+                         int,                      f, int,               d,
+                         __off64_t,                o)
+
+    LSS_INLINE _syscall4(int, newfstatat,         int,   d,
+                         const char *,            p,
+                        struct kernel_stat*,       b, int, f)
+
+    LSS_INLINE int LSS_NAME(setfsgid32)(gid_t gid) {
+      return LSS_NAME(setfsgid)(gid);
+    }
+
+    LSS_INLINE int LSS_NAME(setfsuid32)(uid_t uid) {
+      return LSS_NAME(setfsuid)(uid);
+    }
+
+    LSS_INLINE int LSS_NAME(setresgid32)(gid_t rgid, gid_t egid, gid_t sgid) {
+      return LSS_NAME(setresgid)(rgid, egid, sgid);
+    }
+
+    LSS_INLINE int LSS_NAME(setresuid32)(uid_t ruid, uid_t euid, uid_t suid) {
+      return LSS_NAME(setresuid)(ruid, euid, suid);
+    }
+
+    LSS_INLINE int LSS_NAME(sigaction)(int signum,
+                                       const struct kernel_sigaction *act,
+                                       struct kernel_sigaction *oldact) {
+      /* On x86_64, the kernel requires us to always set our own
+       * SA_RESTORER in order to be able to return from a signal handler.
+       * This function must have a "magic" signature that the "gdb"
+       * (and maybe the kernel?) can recognize.
+       */
+      if (act != NULL && !(act->sa_flags & SA_RESTORER)) {
+        struct kernel_sigaction a = *act;
+        a.sa_flags   |= SA_RESTORER;
+        a.sa_restorer = LSS_NAME(restore_rt)();
+        return LSS_NAME(rt_sigaction)(signum, &a, oldact,
+                                      (KERNEL_NSIG+7)/8);
+      } else {
+        return LSS_NAME(rt_sigaction)(signum, act, oldact,
+                                      (KERNEL_NSIG+7)/8);
+      }
+    }
+
+    LSS_INLINE int LSS_NAME(sigpending)(struct kernel_sigset_t *set) {
+      return LSS_NAME(rt_sigpending)(set, (KERNEL_NSIG+7)/8);
+    }
+
+    LSS_INLINE int LSS_NAME(sigprocmask)(int how,
+                                         const struct kernel_sigset_t *set,
+                                         struct kernel_sigset_t *oldset) {
+      return LSS_NAME(rt_sigprocmask)(how, set, oldset, (KERNEL_NSIG+7)/8);
+    }
+
+    LSS_INLINE int LSS_NAME(sigsuspend)(const struct kernel_sigset_t *set) {
+      return LSS_NAME(rt_sigsuspend)(set, (KERNEL_NSIG+7)/8);
+    }
+  #endif
+  #if defined(__x86_64__) || defined(__ARM_ARCH_3__) ||                       \
+     (defined(__mips__) && _MIPS_SIM != _MIPS_SIM_ABI32)
+    LSS_INLINE _syscall4(pid_t, wait4,            pid_t, p,
+                         int*,                    s, int,       o,
+                        struct kernel_rusage*,     r)
+
+    LSS_INLINE pid_t LSS_NAME(waitpid)(pid_t pid, int *status, int options){
+      return LSS_NAME(wait4)(pid, status, options, 0);
+    }
+  #endif
+  #if defined(__i386__) || defined(__x86_64__)
+    LSS_INLINE _syscall4(int, openat, int, d, const char *, p, int, f, int, m)
+    LSS_INLINE _syscall3(int, unlinkat, int, d, const char *, p, int, f)
+  #endif
+  #if defined(__i386__) || defined(__ARM_ARCH_3__)
+    #define __NR__getresgid32 __NR_getresgid32
+    #define __NR__getresuid32 __NR_getresuid32
+    #define __NR__setfsgid32  __NR_setfsgid32
+    #define __NR__setfsuid32  __NR_setfsuid32
+    #define __NR__setresgid32 __NR_setresgid32
+    #define __NR__setresuid32 __NR_setresuid32
+    LSS_INLINE _syscall2(int,   ugetrlimit,        int,          r,
+                        struct kernel_rlimit*, l)
+    LSS_INLINE _syscall3(int,     _getresgid32,    gid_t *,      r,
+                         gid_t *,            e,    gid_t *,      s)
+    LSS_INLINE _syscall3(int,     _getresuid32,    uid_t *,      r,
+                         uid_t *,            e,    uid_t *,      s)
+    LSS_INLINE _syscall1(int,     _setfsgid32,      gid_t,       f)
+    LSS_INLINE _syscall1(int,     _setfsuid32,      uid_t,       f)
+    LSS_INLINE _syscall3(int,     _setresgid32,     gid_t,       r,
+                         gid_t,              e,     gid_t,       s)
+    LSS_INLINE _syscall3(int,     _setresuid32,     uid_t,       r,
+                         uid_t,              e,     uid_t,       s)
+
+    LSS_INLINE int LSS_NAME(getresgid32)(gid_t *rgid,
+                                         gid_t *egid,
+                                         gid_t *sgid) {
+      int rc;
+      if ((rc = LSS_NAME(_getresgid32)(rgid, egid, sgid)) < 0 &&
+          LSS_ERRNO == ENOSYS) {
+        if ((rgid == NULL) || (egid == NULL) || (sgid == NULL)) {
+          return EFAULT;
+        }
+        // Clear the high bits first, since getresgid only sets 16 bits
+        *rgid = *egid = *sgid = 0;
+        rc = LSS_NAME(getresgid)(rgid, egid, sgid);
+      }
+      return rc;
+    }
+
+    LSS_INLINE int LSS_NAME(getresuid32)(uid_t *ruid,
+                                         uid_t *euid,
+                                         uid_t *suid) {
+      int rc;
+      if ((rc = LSS_NAME(_getresuid32)(ruid, euid, suid)) < 0 &&
+          LSS_ERRNO == ENOSYS) {
+        if ((ruid == NULL) || (euid == NULL) || (suid == NULL)) {
+          return EFAULT;
+        }
+        // Clear the high bits first, since getresuid only sets 16 bits
+        *ruid = *euid = *suid = 0;
+        rc = LSS_NAME(getresuid)(ruid, euid, suid);
+      }
+      return rc;
+    }
+
+    LSS_INLINE int LSS_NAME(setfsgid32)(gid_t gid) {
+      int rc;
+      if ((rc = LSS_NAME(_setfsgid32)(gid)) < 0 &&
+          LSS_ERRNO == ENOSYS) {
+        if ((unsigned int)gid & ~0xFFFFu) {
+          rc = EINVAL;
+        } else {
+          rc = LSS_NAME(setfsgid)(gid);
+        }
+      }
+      return rc;
+    }
+
+    LSS_INLINE int LSS_NAME(setfsuid32)(uid_t uid) {
+      int rc;
+      if ((rc = LSS_NAME(_setfsuid32)(uid)) < 0 &&
+          LSS_ERRNO == ENOSYS) {
+        if ((unsigned int)uid & ~0xFFFFu) {
+          rc = EINVAL;
+        } else {
+          rc = LSS_NAME(setfsuid)(uid);
+        }
+      }
+      return rc;
+    }
+
+    LSS_INLINE int LSS_NAME(setresgid32)(gid_t rgid, gid_t egid, gid_t sgid) {
+      int rc;
+      if ((rc = LSS_NAME(_setresgid32)(rgid, egid, sgid)) < 0 &&
+          LSS_ERRNO == ENOSYS) {
+        if ((unsigned int)rgid & ~0xFFFFu ||
+            (unsigned int)egid & ~0xFFFFu ||
+            (unsigned int)sgid & ~0xFFFFu) {
+          rc = EINVAL;
+        } else {
+          rc = LSS_NAME(setresgid)(rgid, egid, sgid);
+        }
+      }
+      return rc;
+    }
+
+    LSS_INLINE int LSS_NAME(setresuid32)(uid_t ruid, uid_t euid, uid_t suid) {
+      int rc;
+      if ((rc = LSS_NAME(_setresuid32)(ruid, euid, suid)) < 0 &&
+          LSS_ERRNO == ENOSYS) {
+        if ((unsigned int)ruid & ~0xFFFFu ||
+            (unsigned int)euid & ~0xFFFFu ||
+            (unsigned int)suid & ~0xFFFFu) {
+          rc = EINVAL;
+        } else {
+          rc = LSS_NAME(setresuid)(ruid, euid, suid);
+        }
+      }
+      return rc;
+    }
+  #endif
+  LSS_INLINE int LSS_NAME(sigemptyset)(struct kernel_sigset_t *set) {
+    memset(&set->sig, 0, sizeof(set->sig));
+    return 0;
+  }
+
+  LSS_INLINE int LSS_NAME(sigfillset)(struct kernel_sigset_t *set) {
+    memset(&set->sig, -1, sizeof(set->sig));
+    return 0;
+  }
+
+  LSS_INLINE int LSS_NAME(sigaddset)(struct kernel_sigset_t *set,
+                                     int signum) {
+    if (signum < 1 || signum > (int)(8*sizeof(set->sig))) {
+      LSS_ERRNO = EINVAL;
+      return -1;
+    } else {
+      set->sig[(signum - 1)/(8*sizeof(set->sig[0]))]
+          |= 1UL << ((signum - 1) % (8*sizeof(set->sig[0])));
+      return 0;
+    }
+  }
+
+  LSS_INLINE int LSS_NAME(sigdelset)(struct kernel_sigset_t *set,
+                                        int signum) {
+    if (signum < 1 || signum > (int)(8*sizeof(set->sig))) {
+      LSS_ERRNO = EINVAL;
+      return -1;
+    } else {
+      set->sig[(signum - 1)/(8*sizeof(set->sig[0]))]
+          &= ~(1UL << ((signum - 1) % (8*sizeof(set->sig[0]))));
+      return 0;
+    }
+  }
+
+  LSS_INLINE int LSS_NAME(sigismember)(struct kernel_sigset_t *set,
+                                          int signum) {
+    if (signum < 1 || signum > (int)(8*sizeof(set->sig))) {
+      LSS_ERRNO = EINVAL;
+      return -1;
+    } else {
+      return !!(set->sig[(signum - 1)/(8*sizeof(set->sig[0]))] &
+                (1UL << ((signum - 1) % (8*sizeof(set->sig[0])))));
+    }
+  }
+  #if defined(__i386__) || defined(__ARM_ARCH_3__) ||                         \
+     (defined(__mips__) && _MIPS_SIM == _MIPS_SIM_ABI32) || defined(__PPC__)
+    #define __NR__sigaction   __NR_sigaction
+    #define __NR__sigpending  __NR_sigpending
+    #define __NR__sigprocmask __NR_sigprocmask
+    #define __NR__sigsuspend  __NR_sigsuspend
+    #define __NR__socketcall  __NR_socketcall
+    LSS_INLINE _syscall2(int, fstat64,             int, f,
+                         struct kernel_stat64 *, b)
+    LSS_INLINE _syscall5(int, _llseek,     uint, fd, ulong, hi, ulong, lo,
+                         loff_t *, res, uint, wh)
+    LSS_INLINE _syscall1(void*, mmap,              void*, a)
+    LSS_INLINE _syscall6(void*, mmap2,             void*, s,
+                         size_t,                   l, int,               p,
+                         int,                      f, int,               d,
+                         __off64_t,                o)
+    LSS_INLINE _syscall3(int,   _sigaction,        int,   s,
+                         const struct kernel_old_sigaction*,  a,
+                         struct kernel_old_sigaction*,        o)
+    LSS_INLINE _syscall1(int,   _sigpending, unsigned long*, s)
+    LSS_INLINE _syscall3(int,   _sigprocmask,      int,   h,
+                         const unsigned long*,     s,
+                         unsigned long*,           o)
+    #ifdef __PPC__
+    LSS_INLINE _syscall1(int, _sigsuspend,         unsigned long, s)
+    #else
+    LSS_INLINE _syscall3(int, _sigsuspend,         const void*, a,
+                         int,                      b,
+                         unsigned long,            s)
+    #endif
+    LSS_INLINE _syscall2(int, stat64,              const char *, p,
+                         struct kernel_stat64 *, b)
+
+    LSS_INLINE int LSS_NAME(sigaction)(int signum,
+                                       const struct kernel_sigaction *act,
+                                       struct kernel_sigaction *oldact) {
+      int old_errno = LSS_ERRNO;
+      int rc;
+      struct kernel_sigaction a;
+      if (act != NULL) {
+        a             = *act;
+        #ifdef __i386__
+        /* On i386, the kernel requires us to always set our own
+         * SA_RESTORER when using realtime signals. Otherwise, it does not
+         * know how to return from a signal handler. This function must have
+         * a "magic" signature that the "gdb" (and maybe the kernel?) can
+         * recognize.
+         * Apparently, a SA_RESTORER is implicitly set by the kernel, when
+         * using non-realtime signals.
+         *
+         * TODO: Test whether ARM needs a restorer
+         */
+        if (!(a.sa_flags & SA_RESTORER)) {
+          a.sa_flags   |= SA_RESTORER;
+          a.sa_restorer = (a.sa_flags & SA_SIGINFO)
+                          ? LSS_NAME(restore_rt)() : LSS_NAME(restore)();
+        }
+        #endif
+      }
+      rc = LSS_NAME(rt_sigaction)(signum, act ? &a : act, oldact,
+                                  (KERNEL_NSIG+7)/8);
+      if (rc < 0 && LSS_ERRNO == ENOSYS) {
+        struct kernel_old_sigaction oa, ooa, *ptr_a = &oa, *ptr_oa = &ooa;
+        if (!act) {
+          ptr_a            = NULL;
+        } else {
+          oa.sa_handler_   = act->sa_handler_;
+          memcpy(&oa.sa_mask, &act->sa_mask, sizeof(oa.sa_mask));
+          #ifndef __mips__
+          oa.sa_restorer   = act->sa_restorer;
+          #endif
+          oa.sa_flags      = act->sa_flags;
+        }
+        if (!oldact) {
+          ptr_oa           = NULL;
+        }
+        LSS_ERRNO = old_errno;
+        rc = LSS_NAME(_sigaction)(signum, ptr_a, ptr_oa);
+        if (rc == 0 && oldact) {
+          if (act) {
+            memcpy(oldact, act, sizeof(*act));
+          } else {
+            memset(oldact, 0, sizeof(*oldact));
+          }
+          oldact->sa_handler_    = ptr_oa->sa_handler_;
+          oldact->sa_flags       = ptr_oa->sa_flags;
+          memcpy(&oldact->sa_mask, &ptr_oa->sa_mask, sizeof(ptr_oa->sa_mask));
+          #ifndef __mips__
+          oldact->sa_restorer    = ptr_oa->sa_restorer;
+          #endif
+        }
+      }
+      return rc;
+    }
+
+    LSS_INLINE int LSS_NAME(sigpending)(struct kernel_sigset_t *set) {
+      int old_errno = LSS_ERRNO;
+      int rc = LSS_NAME(rt_sigpending)(set, (KERNEL_NSIG+7)/8);
+      if (rc < 0 && LSS_ERRNO == ENOSYS) {
+        LSS_ERRNO = old_errno;
+        LSS_NAME(sigemptyset)(set);
+        rc = LSS_NAME(_sigpending)(&set->sig[0]);
+      }
+      return rc;
+    }
+
+    LSS_INLINE int LSS_NAME(sigprocmask)(int how,
+                                         const struct kernel_sigset_t *set,
+                                         struct kernel_sigset_t *oldset) {
+      int olderrno = LSS_ERRNO;
+      int rc = LSS_NAME(rt_sigprocmask)(how, set, oldset, (KERNEL_NSIG+7)/8);
+      if (rc < 0 && LSS_ERRNO == ENOSYS) {
+        LSS_ERRNO = olderrno;
+        if (oldset) {
+          LSS_NAME(sigemptyset)(oldset);
+        }
+        rc = LSS_NAME(_sigprocmask)(how,
+                                    set ? &set->sig[0] : NULL,
+                                    oldset ? &oldset->sig[0] : NULL);
+      }
+      return rc;
+    }
+
+    LSS_INLINE int LSS_NAME(sigsuspend)(const struct kernel_sigset_t *set) {
+      int olderrno = LSS_ERRNO;
+      int rc = LSS_NAME(rt_sigsuspend)(set, (KERNEL_NSIG+7)/8);
+      if (rc < 0 && LSS_ERRNO == ENOSYS) {
+        LSS_ERRNO = olderrno;
+        rc = LSS_NAME(_sigsuspend)(
+        #ifndef __PPC__
+                                   set, 0,
+        #endif
+                                   set->sig[0]);
+      }
+      return rc;
+    }
+  #endif
+  #if defined(__PPC__)
+    #undef LSS_SC_LOADARGS_0
+    #define LSS_SC_LOADARGS_0(dummy...)
+    #undef LSS_SC_LOADARGS_1
+    #define LSS_SC_LOADARGS_1(arg1)                                           \
+        __sc_4  = (unsigned long) (arg1)
+    #undef LSS_SC_LOADARGS_2
+    #define LSS_SC_LOADARGS_2(arg1, arg2)                                     \
+        LSS_SC_LOADARGS_1(arg1);                                              \
+        __sc_5  = (unsigned long) (arg2)
+    #undef LSS_SC_LOADARGS_3
+    #define LSS_SC_LOADARGS_3(arg1, arg2, arg3)                               \
+        LSS_SC_LOADARGS_2(arg1, arg2);                                        \
+        __sc_6  = (unsigned long) (arg3)
+    #undef LSS_SC_LOADARGS_4
+    #define LSS_SC_LOADARGS_4(arg1, arg2, arg3, arg4)                         \
+        LSS_SC_LOADARGS_3(arg1, arg2, arg3);                                  \
+        __sc_7  = (unsigned long) (arg4)
+    #undef LSS_SC_LOADARGS_5
+    #define LSS_SC_LOADARGS_5(arg1, arg2, arg3, arg4, arg5)                   \
+        LSS_SC_LOADARGS_4(arg1, arg2, arg3, arg4);                            \
+        __sc_8  = (unsigned long) (arg5)
+    #undef LSS_SC_BODY
+    #define LSS_SC_BODY(nr, type, opt, args...)                               \
+        long __sc_ret, __sc_err;                                              \
+        {                                                                     \
+          register unsigned long __sc_0 __asm__ ("r0") = __NR_socketcall;     \
+          register unsigned long __sc_3 __asm__ ("r3") = opt;                 \
+          register unsigned long __sc_4 __asm__ ("r4");                       \
+          register unsigned long __sc_5 __asm__ ("r5");                       \
+          register unsigned long __sc_6 __asm__ ("r6");                       \
+          register unsigned long __sc_7 __asm__ ("r7");                       \
+          register unsigned long __sc_8 __asm__ ("r8");                       \
+          LSS_SC_LOADARGS_##nr(args);                                         \
+          __asm__ __volatile__                                                \
+              ("stwu 1, -48(1)\n\t"                                           \
+               "stw 4, 20(1)\n\t"                                             \
+               "stw 5, 24(1)\n\t"                                             \
+               "stw 6, 28(1)\n\t"                                             \
+               "stw 7, 32(1)\n\t"                                             \
+               "stw 8, 36(1)\n\t"                                             \
+               "addi 4, 1, 20\n\t"                                            \
+               "sc\n\t"                                                       \
+               "mfcr %0"                                                      \
+                 : "=&r" (__sc_0),                                            \
+                   "=&r" (__sc_3), "=&r" (__sc_4),                            \
+                   "=&r" (__sc_5), "=&r" (__sc_6),                            \
+                   "=&r" (__sc_7), "=&r" (__sc_8)                             \
+                 : LSS_ASMINPUT_##nr                                          \
+                 : "cr0", "ctr", "memory");                                   \
+          __sc_ret = __sc_3;                                                  \
+          __sc_err = __sc_0;                                                  \
+        }                                                                     \
+        LSS_RETURN(type, __sc_ret, __sc_err)
+
+    LSS_INLINE ssize_t LSS_NAME(recvmsg)(int s,struct kernel_msghdr *msg,
+                                         int flags){
+      LSS_SC_BODY(3, ssize_t, 17, s, msg, flags);
+    }
+
+    LSS_INLINE ssize_t LSS_NAME(sendmsg)(int s,
+                                         const struct kernel_msghdr *msg,
+                                         int flags) {
+      LSS_SC_BODY(3, ssize_t, 16, s, msg, flags);
+    }
+
+    // TODO(csilvers): why is this ifdef'ed out?
+#if 0
+    LSS_INLINE ssize_t LSS_NAME(sendto)(int s, const void *buf, size_t len,
+                                        int flags,
+                                        const struct kernel_sockaddr *to,
+                                        unsigned int tolen) {
+      LSS_BODY(6, ssize_t, 11, s, buf, len, flags, to, tolen);
+    }
+#endif
+
+    LSS_INLINE int LSS_NAME(shutdown)(int s, int how) {
+      LSS_SC_BODY(2, int, 13, s, how);
+    }
+
+    LSS_INLINE int LSS_NAME(socket)(int domain, int type, int protocol) {
+      LSS_SC_BODY(3, int, 1, domain, type, protocol);
+    }
+
+    LSS_INLINE int LSS_NAME(socketpair)(int d, int type, int protocol,
+                                        int sv[2]) {
+      LSS_SC_BODY(4, int, 8, d, type, protocol, sv);
+    }
+  #endif
+  #if defined(__i386__) || defined(__ARM_ARCH_3__) ||                         \
+      (defined(__mips__) && _MIPS_SIM == _MIPS_SIM_ABI32)
+    #define __NR__socketcall  __NR_socketcall
+    LSS_INLINE _syscall2(int,      _socketcall,    int,   c,
+                         va_list,                  a)
+
+    LSS_INLINE int LSS_NAME(socketcall)(int op, ...) {
+      int rc;
+      va_list ap;
+      va_start(ap, op);
+      rc = LSS_NAME(_socketcall)(op, ap);
+      va_end(ap);
+      return rc;
+    }
+
+    LSS_INLINE ssize_t LSS_NAME(recvmsg)(int s,struct kernel_msghdr *msg,
+                                         int flags){
+      return (ssize_t)LSS_NAME(socketcall)(17, s, msg, flags);
+    }
+
+    LSS_INLINE ssize_t LSS_NAME(sendmsg)(int s,
+                                         const struct kernel_msghdr *msg,
+                                         int flags) {
+      return (ssize_t)LSS_NAME(socketcall)(16, s, msg, flags);
+    }
+
+    LSS_INLINE ssize_t LSS_NAME(sendto)(int s, const void *buf, size_t len,
+                                        int flags,
+                                        const struct kernel_sockaddr *to,
+                                        unsigned int tolen) {
+      return (ssize_t)LSS_NAME(socketcall)(11, s, buf, len, flags, to, tolen);
+    }
+
+    LSS_INLINE int LSS_NAME(shutdown)(int s, int how) {
+      return LSS_NAME(socketcall)(13, s, how);
+    }
+
+    LSS_INLINE int LSS_NAME(socket)(int domain, int type, int protocol) {
+      return LSS_NAME(socketcall)(1, domain, type, protocol);
+    }
+
+    LSS_INLINE int LSS_NAME(socketpair)(int d, int type, int protocol,
+                                        int sv[2]) {
+      return LSS_NAME(socketcall)(8, d, type, protocol, sv);
+    }
+  #endif
+  #if defined(__i386__) || defined(__PPC__)
+    LSS_INLINE _syscall4(int,   fstatat64,        int,   d,
+                         const char *,      p,
+                         struct kernel_stat64 *,   b,    int,   f)
+  #endif
+  #if defined(__i386__) || defined(__PPC__) ||                                \
+     (defined(__mips__) && _MIPS_SIM == _MIPS_SIM_ABI32)
+    LSS_INLINE _syscall3(pid_t, waitpid,          pid_t, p,
+                         int*,              s,    int,   o)
+  #endif
+  #if defined(__mips__)
+    /* sys_pipe() on MIPS has non-standard calling conventions, as it returns
+     * both file handles through CPU registers.
+     */
+    LSS_INLINE int LSS_NAME(pipe)(int *p) {
+      register unsigned long __v0 __asm__("$2") = __NR_pipe;
+      register unsigned long __v1 __asm__("$3");
+      register unsigned long __r7 __asm__("$7");
+      __asm__ __volatile__ ("syscall\n"
+                            : "=&r"(__v0), "=&r"(__v1), "+r" (__r7)
+                            : "0"(__v0)
+                            : "$8", "$9", "$10", "$11", "$12",
+                              "$13", "$14", "$15", "$24", "memory");
+      if (__r7) {
+        LSS_ERRNO = __v0;
+        return -1;
+      } else {
+        p[0] = __v0;
+        p[1] = __v1;
+        return 0;
+      }
+    }
+  #else
+    LSS_INLINE _syscall1(int,     pipe,           int *, p)
+  #endif
+  /* TODO(csilvers): see if ppc can/should support this as well              */
+  #if defined(__i386__) || defined(__ARM_ARCH_3__) ||                         \
+     (defined(__mips__) && _MIPS_SIM != _MIPS_SIM_ABI64)
+    #define __NR__statfs64  __NR_statfs64
+    #define __NR__fstatfs64 __NR_fstatfs64
+    LSS_INLINE _syscall3(int, _statfs64,     const char*, p,
+                         size_t, s,struct kernel_statfs64*, b)
+    LSS_INLINE _syscall3(int, _fstatfs64,          int,   f,
+                         size_t, s,struct kernel_statfs64*, b)
+    LSS_INLINE int LSS_NAME(statfs64)(const char *p,
+                                     struct kernel_statfs64 *b) {
+      return LSS_NAME(_statfs64)(p, sizeof(*b), b);
+    }
+    LSS_INLINE int LSS_NAME(fstatfs64)(int f,struct kernel_statfs64 *b) {
+      return LSS_NAME(_fstatfs64)(f, sizeof(*b), b);
+    }
+  #endif
+
+  LSS_INLINE int LSS_NAME(execv)(const char *path, const char *const argv[]) {
+    extern char **environ;
+    return LSS_NAME(execve)(path, argv, (const char *const *)environ);
+  }
+
+  LSS_INLINE pid_t LSS_NAME(gettid)() {
+    pid_t tid = LSS_NAME(_gettid)();
+    if (tid != -1) {
+      return tid;
+    }
+    return LSS_NAME(getpid)();
+  }
+
+  LSS_INLINE void *LSS_NAME(mremap)(void *old_address, size_t old_size,
+                                    size_t new_size, int flags, ...) {
+    va_list ap;
+    void *new_address, *rc;
+    va_start(ap, flags);
+    new_address = va_arg(ap, void *);
+    rc = LSS_NAME(_mremap)(old_address, old_size, new_size,
+                           flags, new_address);
+    va_end(ap);
+    return rc;
+  }
+
+  LSS_INLINE int LSS_NAME(ptrace_detach)(pid_t pid) {
+    /* PTRACE_DETACH can sometimes forget to wake up the tracee and it
+     * then sends job control signals to the real parent, rather than to
+     * the tracer. We reduce the risk of this happening by starting a
+     * whole new time slice, and then quickly sending a SIGCONT signal
+     * right after detaching from the tracee.
+     *
+     * We use tkill to ensure that we only issue a wakeup for the thread being
+     * detached.  Large multi threaded apps can take a long time in the kernel
+     * processing SIGCONT.
+     */
+    int rc, err;
+    LSS_NAME(sched_yield)();
+    rc = LSS_NAME(ptrace)(PTRACE_DETACH, pid, (void *)0, (void *)0);
+    err = LSS_ERRNO;
+    LSS_NAME(tkill)(pid, SIGCONT);
+    /* Old systems don't have tkill */
+    if (LSS_ERRNO == ENOSYS)
+      LSS_NAME(kill)(pid, SIGCONT);
+    LSS_ERRNO = err;
+    return rc;
+  }
+
+  LSS_INLINE int LSS_NAME(raise)(int sig) {
+    return LSS_NAME(kill)(LSS_NAME(getpid)(), sig);
+  }
+
+  LSS_INLINE int LSS_NAME(setpgrp)() {
+    return LSS_NAME(setpgid)(0, 0);
+  }
+
+  LSS_INLINE int LSS_NAME(sysconf)(int name) {
+    extern int __getpagesize(void);
+    switch (name) {
+      case _SC_OPEN_MAX: {
+        struct kernel_rlimit limit;
+        return LSS_NAME(getrlimit)(RLIMIT_NOFILE, &limit) < 0
+               ? 8192 : limit.rlim_cur;
+      }
+      case _SC_PAGESIZE:
+        return __getpagesize();
+      default:
+        LSS_ERRNO = ENOSYS;
+        return -1;
+    }
+  }
+  #if defined(__x86_64__) ||                                                  \
+     (defined(__mips__) && _MIPS_SIM == _MIPS_SIM_ABI64)
+    LSS_INLINE _syscall4(ssize_t, pread64,        int,         f,
+                         void *,         b, size_t,   c,
+                         loff_t,         o)
+    LSS_INLINE _syscall4(ssize_t, pwrite64,       int,         f,
+                         const void *,   b, size_t,   c,
+                         loff_t,         o)
+    LSS_INLINE _syscall3(int,     readahead,      int,         f,
+                         loff_t,         o, unsigned, c)
+  #else
+    #define __NR__pread64   __NR_pread64
+    #define __NR__pwrite64  __NR_pwrite64
+    #define __NR__readahead __NR_readahead
+    LSS_INLINE _syscall5(ssize_t, _pread64,        int,         f,
+                         void *,         b, size_t, c, unsigned, o1,
+                         unsigned, o2)
+    LSS_INLINE _syscall5(ssize_t, _pwrite64,       int,         f,
+                         const void *,   b, size_t, c, unsigned, o1,
+                         long, o2)
+    LSS_INLINE _syscall4(int, _readahead,          int,         f,
+                         unsigned,       o1, unsigned, o2, size_t, c);
+    /* We force 64bit-wide parameters onto the stack, then access each
+     * 32-bit component individually. This guarantees that we build the
+     * correct parameters independent of the native byte-order of the
+     * underlying architecture.
+     */
+    LSS_INLINE ssize_t LSS_NAME(pread64)(int fd, void *buf, size_t count,
+                                         loff_t off) {
+      union { loff_t off; unsigned arg[2]; } o = { off };
+      return LSS_NAME(_pread64)(fd, buf, count, o.arg[0], o.arg[1]);
+    }
+    LSS_INLINE ssize_t LSS_NAME(pwrite64)(int fd, const void *buf,
+                                          size_t count, loff_t off) {
+      union { loff_t off; unsigned arg[2]; } o = { off };
+      return LSS_NAME(_pwrite64)(fd, buf, count, o.arg[0], o.arg[1]);
+    }
+    LSS_INLINE int LSS_NAME(readahead)(int fd, loff_t off, int len) {
+      union { loff_t off; unsigned arg[2]; } o = { off };
+      return LSS_NAME(_readahead)(fd, o.arg[0], o.arg[1], len);
+    }
+  #endif
+#endif
+
+#if defined(__cplusplus) && !defined(SYS_CPLUSPLUS)
+}
+#endif
+
+#endif
+#endif
diff --git a/sandbox/linux/seccomp/madvise.cc b/sandbox/linux/seccomp/madvise.cc
new file mode 100644
index 0000000..70c594f
--- /dev/null
+++ b/sandbox/linux/seccomp/madvise.cc
@@ -0,0 +1,81 @@
+// Copyright (c) 2010 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "debug.h"
+#include "sandbox_impl.h"
+
+namespace playground {
+
+long Sandbox::sandbox_madvise(void* start, size_t length, int advice) {
+  long long tm;
+  Debug::syscall(&tm, __NR_madvise, "Executing handler");
+  struct {
+    int       sysnum;
+    long long cookie;
+    MAdvise   madvise_req;
+  } __attribute__((packed)) request;
+  request.sysnum             = __NR_madvise;
+  request.cookie             = cookie();
+  request.madvise_req.start  = start;
+  request.madvise_req.len    = length;
+  request.madvise_req.advice = advice;
+
+  long rc;
+  SysCalls sys;
+  if (write(sys, processFdPub(), &request, sizeof(request)) !=
+      sizeof(request) ||
+      read(sys, threadFdPub(), &rc, sizeof(rc)) != sizeof(rc)) {
+    die("Failed to forward madvise() request [sandbox]");
+  }
+  Debug::elapsed(tm, __NR_madvise);
+  return rc;
+}
+
+bool Sandbox::process_madvise(int parentMapsFd, int sandboxFd, int threadFdPub,
+                              int threadFd, SecureMem::Args* mem) {
+  // Read request
+  MAdvise madvise_req;
+  SysCalls sys;
+  if (read(sys, sandboxFd, &madvise_req, sizeof(madvise_req)) !=
+      sizeof(madvise_req)) {
+    die("Failed to read parameters for madvise() [process]");
+  }
+  int rc = -EINVAL;
+  switch (madvise_req.advice) {
+    case MADV_NORMAL:
+    case MADV_RANDOM:
+    case MADV_SEQUENTIAL:
+    case MADV_WILLNEED:
+    ok:
+      SecureMem::sendSystemCall(threadFdPub, false, -1, mem, __NR_madvise,
+                                madvise_req.start, madvise_req.len,
+                                madvise_req.advice);
+      return true;
+    default:
+      // All other flags to madvise() are potential dangerous (as opposed to
+      // merely affecting overall performance). Do not allow them on memory
+      // ranges that were part of the original mappings.
+      void *stop = reinterpret_cast<void *>(
+          (char *)madvise_req.start + madvise_req.len);
+      ProtectedMap::const_iterator iter = protectedMap_.lower_bound(
+          (void *)madvise_req.start);
+      if (iter != protectedMap_.begin()) {
+        --iter;
+      }
+      for (; iter != protectedMap_.end() && iter->first < stop; ++iter) {
+        if (madvise_req.start < reinterpret_cast<void *>(
+                reinterpret_cast<char *>(iter->first) + iter->second) &&
+            stop > iter->first) {
+          SecureMem::abandonSystemCall(threadFd, rc);
+          return false;
+        }
+      }
+
+      // Changing attributes on memory regions that were newly mapped inside of
+      // the sandbox is OK.
+      goto ok;
+  }
+}
+
+} // namespace
diff --git a/sandbox/linux/seccomp/maps.cc b/sandbox/linux/seccomp/maps.cc
new file mode 100644
index 0000000..8ae218d
--- /dev/null
+++ b/sandbox/linux/seccomp/maps.cc
@@ -0,0 +1,267 @@
+// Copyright (c) 2010 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include <errno.h>
+#include <fcntl.h>
+#include <linux/unistd.h>
+#include <signal.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#include <sys/ptrace.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+
+#include "library.h"
+#include "maps.h"
+#include "sandbox_impl.h"
+
+namespace playground {
+
+Maps::Maps(int proc_self_maps) :
+    proc_self_maps_(proc_self_maps),
+    begin_iter_(this, true, false),
+    end_iter_(this, false, true),
+    vsyscall_(0) {
+  Sandbox::SysCalls sys;
+  if (proc_self_maps_ >= 0 &&
+      !sys.lseek(proc_self_maps_, 0, SEEK_SET)) {
+    char buf[256] = { 0 };
+    int len = 0, rc = 1;
+    bool long_line = false;
+    do {
+      if (rc > 0) {
+        rc = Sandbox::read(sys, proc_self_maps_, buf + len,
+                           sizeof(buf) - len - 1);
+        if (rc > 0) {
+          len += rc;
+        }
+      }
+      char *ptr = buf;
+      if (!long_line) {
+        long_line = true;
+        unsigned long start = strtoul(ptr, &ptr, 16);
+        unsigned long stop = strtoul(ptr + 1, &ptr, 16);
+        while (*ptr == ' ' || *ptr == '\t') ++ptr;
+        char *perm_ptr = ptr;
+        while (*ptr && *ptr != ' ' && *ptr != '\t') ++ptr;
+        string perm(perm_ptr, ptr - perm_ptr);
+        unsigned long offset = strtoul(ptr, &ptr, 16);
+        while (*ptr == ' ' || *ptr == '\t') ++ptr;
+        char *id_ptr = ptr;
+        while (*ptr && *ptr != ' ' && *ptr != '\t') ++ptr;
+        while (*ptr == ' ' || *ptr == '\t') ++ptr;
+        while (*ptr && *ptr != ' ' && *ptr != '\t') ++ptr;
+        string id(id_ptr, ptr - id_ptr);
+        while (*ptr == ' ' || *ptr == '\t') ++ptr;
+        char *library_ptr = ptr;
+        while (*ptr && *ptr != ' ' && *ptr != '\t' && *ptr != '\n') ++ptr;
+        string library(library_ptr, ptr - library_ptr);
+        bool isVDSO = false;
+        if (library == "[vdso]") {
+          // /proc/self/maps has a misleading file offset in the [vdso] entry.
+          // Override it with a sane value.
+          offset = 0;
+          isVDSO = true;
+        } else if (library == "[vsyscall]") {
+          vsyscall_ = reinterpret_cast<char *>(start);
+        } else if (library.empty() || library[0] == '[') {
+          goto skip_entry;
+        }
+        int prot = 0;
+        if (perm.find('r') != string::npos) {
+          prot |= PROT_READ;
+        }
+        if (perm.find('w') != string::npos) {
+          prot |= PROT_WRITE;
+        }
+        if (perm.find('x') != string::npos) {
+          prot |= PROT_EXEC;
+        }
+        if ((prot & (PROT_EXEC | PROT_READ)) == 0) {
+          goto skip_entry;
+        }
+        Library* lib = &libs_[id + ' ' + library];
+        lib->setLibraryInfo(this);
+        lib->addMemoryRange(reinterpret_cast<void *>(start),
+                            reinterpret_cast<void *>(stop),
+                            Elf_Addr(offset),
+                            prot, isVDSO);
+      }
+   skip_entry:
+      for (;;) {
+        if (!*ptr || *ptr++ == '\n') {
+          long_line = false;
+          memmove(buf, ptr, len - (ptr - buf));
+          memset(buf + len - (ptr - buf), 0, ptr - buf);
+          len -= (ptr - buf);
+          break;
+        }
+      }
+    } while (len || long_line);
+  }
+}
+
+Maps::Iterator::Iterator(Maps* maps, bool at_beginning, bool at_end)
+    : maps_(maps),
+      at_beginning_(at_beginning),
+      at_end_(at_end) {
+}
+
+Maps::LibraryMap::iterator& Maps::Iterator::getIterator() const {
+  if (at_beginning_) {
+    iter_ = maps_->libs_.begin();
+  } else if (at_end_) {
+    iter_ = maps_->libs_.end();
+  }
+  return iter_;
+}
+
+Maps::Iterator Maps::Iterator::begin() {
+  return maps_->begin_iter_;
+}
+
+Maps::Iterator Maps::Iterator::end() {
+  return maps_->end_iter_;
+}
+
+Maps::Iterator& Maps::Iterator::operator++() {
+  getIterator().operator++();
+  at_beginning_ = false;
+  return *this;
+}
+
+Maps::Iterator Maps::Iterator::operator++(int i) {
+  getIterator().operator++(i);
+  at_beginning_ = false;
+  return *this;
+}
+
+Library* Maps::Iterator::operator*() const {
+  return &getIterator().operator*().second;
+}
+
+bool Maps::Iterator::operator==(const Maps::Iterator& iter) const {
+  return getIterator().operator==(iter.getIterator());
+}
+
+bool Maps::Iterator::operator!=(const Maps::Iterator& iter) const {
+  return !operator==(iter);
+}
+
+Maps::string Maps::Iterator::name() const {
+  return getIterator()->first;
+}
+
+// Test whether a line ends with "[stack]"; used for identifying the
+// stack entry of /proc/self/maps.
+static bool isStackLine(char* buf, char* end) {
+  char* ptr = buf;
+  for ( ; *ptr != '\n' && ptr < end; ++ptr)
+    ;
+  if (ptr < end && ptr - 7 > buf) {
+    return (memcmp(ptr - 7, "[stack]", 7) == 0);
+  }
+  return false;
+}
+
+char* Maps::allocNearAddr(char* addr_target, size_t size, int prot) const {
+  // We try to allocate memory within 1.5GB of a target address. This means,
+  // we will be able to perform relative 32bit jumps from the target address.
+  const unsigned long kMaxDistance = 1536 << 20;
+  // In most of the code below, we just care about the numeric value of
+  // the address.
+  const long addr = reinterpret_cast<long>(addr_target);
+  size = (size + 4095) & ~4095;
+  Sandbox::SysCalls sys;
+  if (sys.lseek(proc_self_maps_, 0, SEEK_SET)) {
+    return NULL;
+  }
+
+  // Iterate through lines of /proc/self/maps to consider each mapped
+  // region one at a time, looking for a gap between regions to allocate.
+  char buf[256] = { 0 };
+  int len = 0, rc = 1;
+  bool long_line = false;
+  unsigned long gap_start = 0x10000;
+  void* new_addr;
+  do {
+    if (rc > 0) {
+      do {
+        rc = Sandbox::read(sys, proc_self_maps_, buf + len,
+                           sizeof(buf) - len - 1);
+        if (rc > 0) {
+          len += rc;
+        }
+      } while (rc > 0 && len < (int)sizeof(buf) - 1);
+    }
+    char *ptr = buf;
+    if (!long_line) {
+      long_line = true;
+      // Maps lines have the form "<start address>-<end address> ... <name>".
+      unsigned long gap_end = strtoul(ptr, &ptr, 16);
+      unsigned long map_end = strtoul(ptr + 1, &ptr, 16);
+
+      // gap_start to gap_end now covers the region of empty space before
+      // the current line.  Now we try to see if there's a place within the
+      // gap we can use.
+
+      if (gap_end - gap_start >= size) {
+        // Is the gap before our target address?
+        if (addr - static_cast<long>(gap_end) >= 0) {
+          if (addr - (gap_end - size) < kMaxDistance) {
+            unsigned long position;
+            if (isStackLine(ptr, buf + len)) {
+              // If we're adjacent to the stack, try to stay away from
+              // the GROWS_DOWN region.  Pick the farthest away region that
+              // is still within the gap.
+
+              if (static_cast<unsigned long>(addr) < kMaxDistance ||  // Underflow protection.
+                  static_cast<unsigned long>(addr) - kMaxDistance < gap_start) {
+                position = gap_start;
+              } else {
+                position = (addr - kMaxDistance) & ~4095;
+                if (position < gap_start) {
+                  position = gap_start;
+                }
+              }
+            } else {
+              // Otherwise, take the end of the region.
+              position = gap_end - size;
+            }
+            new_addr = reinterpret_cast<char *>(sys.MMAP
+                           (reinterpret_cast<void *>(position), size, prot,
+                            MAP_PRIVATE|MAP_ANONYMOUS|MAP_FIXED, -1, 0));
+            if (new_addr != MAP_FAILED) {
+              goto done;
+            }
+          }
+        } else if (gap_start + size - addr < kMaxDistance) {
+          // Gap is after the address.  Above checks that we can wrap around
+          // through 0 to a space we'd use.
+          new_addr = reinterpret_cast<char *>(sys.MMAP
+                         (reinterpret_cast<void *>(gap_start), size, prot,
+                          MAP_PRIVATE|MAP_ANONYMOUS|MAP_FIXED, -1 ,0));
+          if (new_addr != MAP_FAILED) {
+            goto done;
+          }
+        }
+      }
+      gap_start = map_end;
+    }
+    for (;;) {
+      if (!*ptr || *ptr++ == '\n') {
+        long_line = false;
+        memmove(buf, ptr, len - (ptr - buf));
+        memset(buf + len - (ptr - buf), 0, ptr - buf);
+        len -= (ptr - buf);
+        break;
+      }
+    }
+  } while (len || long_line);
+  new_addr = NULL;
+done:
+  return reinterpret_cast<char*>(new_addr);
+}
+
+} // namespace
diff --git a/sandbox/linux/seccomp/maps.h b/sandbox/linux/seccomp/maps.h
new file mode 100644
index 0000000..fbcc7672
--- /dev/null
+++ b/sandbox/linux/seccomp/maps.h
@@ -0,0 +1,94 @@
+// Copyright (c) 2010 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef MAPS_H__
+#define MAPS_H__
+
+#include <elf.h>
+#include <functional>
+#include <map>
+#include <string>
+
+#include "allocator.h"
+
+#if defined(__x86_64__)
+typedef Elf64_Addr Elf_Addr;
+#elif defined(__i386__)
+typedef Elf32_Addr Elf_Addr;
+#else
+#error Undefined target platform
+#endif
+
+namespace playground {
+
+class Library;
+class Maps {
+  friend class Library;
+ public:
+  typedef std::basic_string<char, std::char_traits<char>,
+                            SystemAllocator<char> > string;
+
+  Maps(int proc_self_maps);
+  ~Maps() { }
+
+ protected:
+  // A map with all the libraries currently loaded into the application.
+  // The key is a unique combination of device number, inode number, and
+  // file name. It should be treated as opaque.
+  typedef std::map<string, Library, std::less<string>,
+                   SystemAllocator<std::pair<const string,
+                                             Library> > > LibraryMap;
+  friend class Iterator;
+  class Iterator {
+    friend class Maps;
+
+   protected:
+    explicit Iterator(Maps* maps);
+    Iterator(Maps* maps, bool at_beginning, bool at_end);
+    Maps::LibraryMap::iterator& getIterator() const;
+
+   public:
+    Iterator begin();
+    Iterator end();
+    Iterator& operator++();
+    Iterator operator++(int i);
+    Library* operator*() const;
+    bool operator==(const Iterator& iter) const;
+    bool operator!=(const Iterator& iter) const;
+    string name() const;
+
+   protected:
+    mutable LibraryMap::iterator iter_;
+    Maps *maps_;
+    bool at_beginning_;
+    bool at_end_;
+  };
+
+ public:
+  typedef class Iterator const_iterator;
+
+  const_iterator begin() {
+    return begin_iter_;
+  }
+
+  const_iterator end() {
+    return end_iter_;
+  }
+
+  char* allocNearAddr(char *addr, size_t size, int prot) const;
+
+  char* vsyscall() const { return vsyscall_; }
+
+ protected:
+  const int      proc_self_maps_;
+  const Iterator begin_iter_;
+  const Iterator end_iter_;
+
+  LibraryMap     libs_;
+  char*          vsyscall_;
+};
+
+} // namespace
+
+#endif // MAPS_H__
diff --git a/sandbox/linux/seccomp/mmap.cc b/sandbox/linux/seccomp/mmap.cc
new file mode 100644
index 0000000..700da91
--- /dev/null
+++ b/sandbox/linux/seccomp/mmap.cc
@@ -0,0 +1,75 @@
+// Copyright (c) 2010 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "debug.h"
+#include "sandbox_impl.h"
+
+namespace playground {
+
+void* Sandbox::sandbox_mmap(void *start, size_t length, int prot, int flags,
+                          int fd, off_t offset) {
+  long long tm;
+  Debug::syscall(&tm, __NR_mmap, "Executing handler");
+  struct {
+    int       sysnum;
+    long long cookie;
+    MMap      mmap_req;
+  } __attribute__((packed)) request;
+  request.sysnum          = __NR_MMAP;
+  request.cookie          = cookie();
+  request.mmap_req.start  = start;
+  request.mmap_req.length = length;
+  request.mmap_req.prot   = prot;
+  request.mmap_req.flags  = flags;
+  request.mmap_req.fd     = fd;
+  request.mmap_req.offset = offset;
+
+  void* rc;
+  SysCalls sys;
+  if (write(sys, processFdPub(), &request, sizeof(request)) !=
+      sizeof(request) ||
+      read(sys, threadFdPub(), &rc, sizeof(rc)) != sizeof(rc)) {
+    die("Failed to forward mmap() request [sandbox]");
+  }
+  Debug::elapsed(tm, __NR_mmap);
+  return rc;
+}
+
+bool Sandbox::process_mmap(int parentMapsFd, int sandboxFd, int threadFdPub,
+                           int threadFd, SecureMem::Args* mem) {
+  // Read request
+  SysCalls sys;
+  MMap mmap_req;
+  if (read(sys, sandboxFd, &mmap_req, sizeof(mmap_req)) != sizeof(mmap_req)) {
+    die("Failed to read parameters for mmap() [process]");
+  }
+
+  if (mmap_req.flags & MAP_FIXED) {
+    // Cannot map a memory area that was part of the original memory mappings.
+    void *stop = reinterpret_cast<void *>(
+        (char *)mmap_req.start + mmap_req.length);
+    ProtectedMap::const_iterator iter = protectedMap_.lower_bound(
+        (void *)mmap_req.start);
+    if (iter != protectedMap_.begin()) {
+      --iter;
+    }
+    for (; iter != protectedMap_.end() && iter->first < stop; ++iter) {
+      if (mmap_req.start < reinterpret_cast<void *>(
+              reinterpret_cast<char *>(iter->first) + iter->second) &&
+          stop > iter->first) {
+        int rc = -EINVAL;
+        SecureMem::abandonSystemCall(threadFd, rc);
+        return false;
+      }
+    }
+  }
+
+  // All other mmap() requests are OK
+  SecureMem::sendSystemCall(threadFdPub, false, -1, mem, __NR_MMAP,
+                            mmap_req.start, mmap_req.length, mmap_req.prot,
+                            mmap_req.flags, mmap_req.fd, mmap_req.offset);
+  return true;
+}
+
+} // namespace
diff --git a/sandbox/linux/seccomp/mprotect.cc b/sandbox/linux/seccomp/mprotect.cc
new file mode 100644
index 0000000..548199d
--- /dev/null
+++ b/sandbox/linux/seccomp/mprotect.cc
@@ -0,0 +1,73 @@
+// Copyright (c) 2010 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "debug.h"
+#include "sandbox_impl.h"
+
+namespace playground {
+
+long Sandbox::sandbox_mprotect(const void *addr, size_t len, int prot) {
+  long long tm;
+  Debug::syscall(&tm, __NR_mprotect, "Executing handler");
+  struct {
+    int       sysnum;
+    long long cookie;
+    MProtect  mprotect_req;
+  } __attribute__((packed)) request;
+  request.sysnum            = __NR_mprotect;
+  request.cookie            = cookie();
+  request.mprotect_req.addr = addr;
+  request.mprotect_req.len  = len;
+  request.mprotect_req.prot = prot;
+
+  long rc;
+  SysCalls sys;
+  if (write(sys, processFdPub(), &request, sizeof(request)) !=
+      sizeof(request) ||
+      read(sys, threadFdPub(), &rc, sizeof(rc)) != sizeof(rc)) {
+    die("Failed to forward mprotect() request [sandbox]");
+  }
+  Debug::elapsed(tm, __NR_mprotect);
+  return rc;
+}
+
+bool Sandbox::process_mprotect(int parentMapsFd, int sandboxFd,
+                               int threadFdPub, int threadFd,
+                               SecureMem::Args* mem) {
+  // Read request
+  SysCalls sys;
+  MProtect mprotect_req;
+  if (read(sys, sandboxFd, &mprotect_req, sizeof(mprotect_req)) !=
+      sizeof(mprotect_req)) {
+    die("Failed to read parameters for mprotect() [process]");
+  }
+
+  // Cannot change permissions on any memory region that was part of the
+  // original memory mappings.
+  int rc = -EINVAL;
+  void *stop = reinterpret_cast<void *>(
+      (char *)mprotect_req.addr + mprotect_req.len);
+  ProtectedMap::const_iterator iter = protectedMap_.lower_bound(
+      (void *)mprotect_req.addr);
+  if (iter != protectedMap_.begin()) {
+    --iter;
+  }
+  for (; iter != protectedMap_.end() && iter->first < stop; ++iter) {
+    if (mprotect_req.addr < reinterpret_cast<void *>(
+            reinterpret_cast<char *>(iter->first) + iter->second) &&
+        stop > iter->first) {
+      SecureMem::abandonSystemCall(threadFd, rc);
+      return false;
+    }
+  }
+
+  // Changing permissions on memory regions that were newly mapped inside of
+  // the sandbox is OK.
+  SecureMem::sendSystemCall(threadFdPub, false, -1, mem, __NR_mprotect,
+                            mprotect_req.addr,  mprotect_req.len,
+                            mprotect_req.prot);
+  return true;
+}
+
+} // namespace
diff --git a/sandbox/linux/seccomp/munmap.cc b/sandbox/linux/seccomp/munmap.cc
new file mode 100644
index 0000000..dde7c7a
--- /dev/null
+++ b/sandbox/linux/seccomp/munmap.cc
@@ -0,0 +1,70 @@
+// Copyright (c) 2010 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "debug.h"
+#include "sandbox_impl.h"
+
+namespace playground {
+
+long Sandbox::sandbox_munmap(void* start, size_t length) {
+  long long tm;
+  Debug::syscall(&tm, __NR_munmap, "Executing handler");
+  struct {
+    int       sysnum;
+    long long cookie;
+    MUnmap    munmap_req;
+  } __attribute__((packed)) request;
+  request.sysnum            = __NR_munmap;
+  request.cookie            = cookie();
+  request.munmap_req.start  = start;
+  request.munmap_req.length = length;
+
+  long rc;
+  SysCalls sys;
+  if (write(sys, processFdPub(), &request, sizeof(request)) !=
+      sizeof(request) ||
+      read(sys, threadFdPub(), &rc, sizeof(rc)) != sizeof(rc)) {
+    die("Failed to forward munmap() request [sandbox]");
+  }
+  Debug::elapsed(tm, __NR_munmap);
+  return rc;
+}
+
+bool Sandbox::process_munmap(int parentMapsFd, int sandboxFd, int threadFdPub,
+                             int threadFd, SecureMem::Args* mem) {
+  // Read request
+  SysCalls sys;
+  MUnmap munmap_req;
+  if (read(sys, sandboxFd, &munmap_req, sizeof(munmap_req)) !=
+      sizeof(munmap_req)) {
+    die("Failed to read parameters for munmap() [process]");
+  }
+
+  // Cannot unmap any memory region that was part of the original memory
+  // mappings.
+  int rc = -EINVAL;
+  void *stop = reinterpret_cast<void *>(
+      reinterpret_cast<char *>(munmap_req.start) + munmap_req.length);
+  ProtectedMap::const_iterator iter = protectedMap_.lower_bound(
+      munmap_req.start);
+  if (iter != protectedMap_.begin()) {
+    --iter;
+  }
+  for (; iter != protectedMap_.end() && iter->first < stop; ++iter) {
+    if (munmap_req.start < reinterpret_cast<void *>(
+            reinterpret_cast<char *>(iter->first) + iter->second) &&
+        stop > iter->first) {
+      SecureMem::abandonSystemCall(threadFd, rc);
+      return false;
+    }
+  }
+
+  // Unmapping memory regions that were newly mapped inside of the sandbox
+  // is OK.
+  SecureMem::sendSystemCall(threadFdPub, false, -1, mem, __NR_munmap,
+                            munmap_req.start, munmap_req.length);
+  return true;
+}
+
+} // namespace
diff --git a/sandbox/linux/seccomp/mutex.h b/sandbox/linux/seccomp/mutex.h
new file mode 100644
index 0000000..d7e1c5d
--- /dev/null
+++ b/sandbox/linux/seccomp/mutex.h
@@ -0,0 +1,153 @@
+// Copyright (c) 2010 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef MUTEX_H__
+#define MUTEX_H__
+
+#include "sandbox_impl.h"
+
+namespace playground {
+
+class Mutex {
+ public:
+  typedef int mutex_t;
+
+  enum { kInitValue = 0 };
+
+  static void initMutex(mutex_t* mutex) {
+    // Mutex is unlocked, and nobody is waiting for it
+    *mutex = kInitValue;
+  }
+
+  static void unlockMutex(mutex_t* mutex) {
+    char status;
+    #if defined(__x86_64__) || defined(__i386__)
+    asm volatile(
+        "lock; addl %2, %0\n"
+        "setz %1"
+        : "=m"(*mutex), "=qm"(status)
+        : "ir"(0x80000000), "m"(*mutex));
+    #else
+    #error Unsupported target platform
+    #endif
+    if (status) {
+      // Mutex is zero now. No other waiters. So, we can return.
+      return;
+    }
+    // We unlocked the mutex, but still need to wake up other waiters.
+    Sandbox::SysCalls sys;
+    sys.futex(mutex, FUTEX_WAKE, 1, NULL);
+  }
+
+  static bool lockMutex(mutex_t* mutex, int timeout = 0) {
+    bool rc        = true;
+    // Increment mutex to add ourselves to the list of waiters
+    #if defined(__x86_64__) || defined(__i386__)
+    asm volatile(
+        "lock; incl %0\n"
+        : "=m"(*mutex)
+        : "m"(*mutex));
+    #else
+    #error Unsupported target platform
+    #endif
+    for (;;) {
+      // Atomically check whether the mutex is available and if so, acquire it
+      char status;
+      #if defined(__x86_64__) || defined(__i386__)
+      asm volatile(
+          "lock; btsl %3, %1\n"
+          "setc %0"
+          : "=q"(status), "=m"(*mutex)
+          : "m"(*mutex), "ir"(31));
+      #else
+      #error Unsupported target platform
+      #endif
+      if (!status) {
+     done:
+        // If the mutex was available, remove ourselves from list of waiters
+        #if defined(__x86_64__) || defined(__i386__)
+        asm volatile(
+            "lock; decl %0\n"
+            : "=m"(*mutex)
+            : "m"(*mutex));
+        #else
+        #error Unsupported target platform
+        #endif
+        return rc;
+      }
+      int value    = *mutex;
+      if (value >= 0) {
+        // Mutex has just become available, no need to call kernel
+        continue;
+      }
+      Sandbox::SysCalls sys;
+      Sandbox::SysCalls::kernel_timespec tm;
+      if (timeout) {
+        tm.tv_sec  = timeout / 1000;
+        tm.tv_nsec = (timeout % 1000) * 1000 * 1000;
+      } else {
+        tm.tv_sec  = 0;
+        tm.tv_nsec = 0;
+      }
+      if (NOINTR_SYS(sys.futex(mutex, FUTEX_WAIT, value, &tm)) &&
+          sys.my_errno == ETIMEDOUT) {
+        rc         = false;
+        goto done;
+      }
+    }
+  }
+
+  static bool waitForUnlock(mutex_t* mutex, int timeout = 0) {
+    bool rc        = true;
+    // Increment mutex to add ourselves to the list of waiters
+    #if defined(__x86_64__) || defined(__i386__)
+    asm volatile(
+        "lock; incl %0\n"
+        : "=m"(*mutex)
+        : "m"(*mutex));
+    #else
+    #error Unsupported target platform
+    #endif
+    Sandbox::SysCalls sys;
+    for (;;) {
+      mutex_t value = *mutex;
+      if (value >= 0) {
+     done:
+        // Mutex was not locked. Remove ourselves from list of waiters, notify
+        // any other waiters (if any), and return.
+        #if defined(__x86_64__) || defined(__i386__)
+        asm volatile(
+            "lock; decl %0\n"
+            : "=m"(*mutex)
+            : "m"(*mutex));
+        #else
+        #error Unsupported target platform
+        #endif
+        NOINTR_SYS(sys.futex(mutex, FUTEX_WAKE, 1, 0));
+        return rc;
+      }
+
+      // Wait for mutex to become unlocked
+      Sandbox::SysCalls::kernel_timespec tm;
+      if (timeout) {
+        tm.tv_sec   = timeout / 1000;
+        tm.tv_nsec  = (timeout % 1000) * 1000 * 1000;
+      } else {
+        tm.tv_sec   = 0;
+        tm.tv_nsec  = 0;
+      }
+
+      if (NOINTR_SYS(sys.futex(mutex, FUTEX_WAIT, value, &tm)) &&
+          sys.my_errno == ETIMEDOUT) {
+        rc          = false;
+        goto done;
+      }
+    }
+  }
+
+};
+
+} // namespace
+
+#endif // MUTEX_H__
diff --git a/sandbox/linux/seccomp/open.cc b/sandbox/linux/seccomp/open.cc
new file mode 100644
index 0000000..8a9093c
--- /dev/null
+++ b/sandbox/linux/seccomp/open.cc
@@ -0,0 +1,99 @@
+// Copyright (c) 2010 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "debug.h"
+#include "sandbox_impl.h"
+
+namespace playground {
+
+long Sandbox::sandbox_open(const char *pathname, int flags, mode_t mode) {
+  long long tm;
+  Debug::syscall(&tm, __NR_open, "Executing handler");
+  size_t len                    = strlen(pathname);
+  struct Request {
+    int       sysnum;
+    long long cookie;
+    Open      open_req;
+    char      pathname[0];
+  } __attribute__((packed)) *request;
+  char data[sizeof(struct Request) + len];
+  request                       = reinterpret_cast<struct Request*>(data);
+  request->sysnum               = __NR_open;
+  request->cookie               = cookie();
+  request->open_req.path_length = len;
+  request->open_req.flags       = flags;
+  request->open_req.mode        = mode;
+  memcpy(request->pathname, pathname, len);
+
+  long rc;
+  SysCalls sys;
+  if (write(sys, processFdPub(), request, sizeof(data)) != (int)sizeof(data) ||
+      read(sys, threadFdPub(), &rc, sizeof(rc)) != sizeof(rc)) {
+    die("Failed to forward open() request [sandbox]");
+  }
+  Debug::elapsed(tm, __NR_open);
+  return rc;
+}
+
+bool Sandbox::process_open(int parentMapsFd, int sandboxFd, int threadFdPub,
+                           int threadFd, SecureMem::Args* mem) {
+  // Read request
+  SysCalls sys;
+  Open open_req;
+  if (read(sys, sandboxFd, &open_req, sizeof(open_req)) != sizeof(open_req)) {
+ read_parm_failed:
+    die("Failed to read parameters for open() [process]");
+  }
+  int   rc                  = -ENAMETOOLONG;
+  if (open_req.path_length >= sizeof(mem->pathname)) {
+    char buf[32];
+    while (open_req.path_length > 0) {
+      size_t len            = open_req.path_length > sizeof(buf) ?
+                              sizeof(buf) : open_req.path_length;
+      ssize_t i             = read(sys, sandboxFd, buf, len);
+      if (i <= 0) {
+        goto read_parm_failed;
+      }
+      open_req.path_length -= i;
+    }
+    if (write(sys, threadFd, &rc, sizeof(rc)) != sizeof(rc)) {
+      die("Failed to return data from open() [process]");
+    }
+    return false;
+  }
+
+  if ((open_req.flags & O_ACCMODE) != O_RDONLY ||
+      !g_policy.allow_file_namespace) {
+    // After locking the mutex, we can no longer abandon the system call. So,
+    // perform checks before clobbering the securely shared memory.
+    char tmp[open_req.path_length];
+    if (read(sys, sandboxFd, tmp, open_req.path_length) !=
+        (ssize_t)open_req.path_length) {
+      goto read_parm_failed;
+    }
+    Debug::message(("Denying access to \"" + std::string(tmp) + "\"").c_str());
+    SecureMem::abandonSystemCall(threadFd, -EACCES);
+    return false;
+  }
+
+  SecureMem::lockSystemCall(parentMapsFd, mem);
+  if (read(sys, sandboxFd, mem->pathname, open_req.path_length) !=
+      (ssize_t)open_req.path_length) {
+    goto read_parm_failed;
+  }
+  mem->pathname[open_req.path_length] = '\000';
+
+  // TODO(markus): Implement sandboxing policy. For now, we allow read
+  // access to everything. That's probably not correct.
+  Debug::message(("Allowing access to \"" + std::string(mem->pathname) +
+                  "\"").c_str());
+
+  // Tell trusted thread to open the file.
+  SecureMem::sendSystemCall(threadFdPub, true, parentMapsFd, mem, __NR_open,
+                            mem->pathname - (char*)mem + (char*)mem->self,
+                            open_req.flags, open_req.mode);
+  return true;
+}
+
+} // namespace
diff --git a/sandbox/linux/seccomp/sandbox.cc b/sandbox/linux/seccomp/sandbox.cc
new file mode 100644
index 0000000..0b09457
--- /dev/null
+++ b/sandbox/linux/seccomp/sandbox.cc
@@ -0,0 +1,838 @@
+// Copyright (c) 2010 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "library.h"
+#include "sandbox_impl.h"
+#include "syscall_table.h"
+
+namespace playground {
+
+// Global variables
+int                                 Sandbox::proc_self_maps_ = -1;
+enum Sandbox::SandboxStatus         Sandbox::status_ = STATUS_UNKNOWN;
+int                                 Sandbox::pid_;
+int                                 Sandbox::processFdPub_;
+int                                 Sandbox::cloneFdPub_;
+Sandbox::SysCalls::kernel_sigaction Sandbox::sa_segv_;
+Sandbox::ProtectedMap               Sandbox::protectedMap_;
+std::vector<SecureMem::Args*>       Sandbox::secureMemPool_;
+
+bool Sandbox::sendFd(int transport, int fd0, int fd1, const void* buf,
+                     size_t len) {
+  int fds[2], count                     = 0;
+  if (fd0 >= 0) { fds[count++]          = fd0; }
+  if (fd1 >= 0) { fds[count++]          = fd1; }
+  if (!count) {
+    return false;
+  }
+  char cmsg_buf[CMSG_SPACE(count*sizeof(int))];
+  memset(cmsg_buf, 0, sizeof(cmsg_buf));
+  struct SysCalls::kernel_iovec  iov[2] = { { 0 } };
+  struct SysCalls::kernel_msghdr msg    = { 0 };
+  int dummy                             = 0;
+  iov[0].iov_base                       = &dummy;
+  iov[0].iov_len                        = sizeof(dummy);
+  if (buf && len > 0) {
+    iov[1].iov_base                     = const_cast<void *>(buf);
+    iov[1].iov_len                      = len;
+  }
+  msg.msg_iov                           = iov;
+  msg.msg_iovlen                        = (buf && len > 0) ? 2 : 1;
+  msg.msg_control                       = cmsg_buf;
+  msg.msg_controllen                    = CMSG_LEN(count*sizeof(int));
+  struct cmsghdr *cmsg                  = CMSG_FIRSTHDR(&msg);
+  cmsg->cmsg_level                      = SOL_SOCKET;
+  cmsg->cmsg_type                       = SCM_RIGHTS;
+  cmsg->cmsg_len                        = CMSG_LEN(count*sizeof(int));
+  memcpy(CMSG_DATA(cmsg), fds, count*sizeof(int));
+  SysCalls sys;
+  return NOINTR_SYS(sys.sendmsg(transport, &msg, 0)) ==
+      (ssize_t)(sizeof(dummy) + ((buf && len > 0) ? len : 0));
+}
+
+bool Sandbox::getFd(int transport, int* fd0, int* fd1, void* buf, size_t*len) {
+  int count                            = 0;
+  int *err                             = NULL;
+  if (fd0) {
+    count++;
+    err                                = fd0;
+    *fd0                               = -1;
+  }
+  if (fd1) {
+    if (!count++) {
+      err                              = fd1;
+    }
+    *fd1                               = -1;
+  }
+  if (!count) {
+    return false;
+  }
+  char cmsg_buf[CMSG_SPACE(count*sizeof(int))];
+  memset(cmsg_buf, 0, sizeof(cmsg_buf));
+  struct SysCalls::kernel_iovec iov[2] = { { 0 } };
+  struct SysCalls::kernel_msghdr msg   = { 0 };
+  iov[0].iov_base                      = err;
+  iov[0].iov_len                       = sizeof(int);
+  if (buf && len && *len > 0) {
+    iov[1].iov_base                    = buf;
+    iov[1].iov_len                     = *len;
+  }
+  msg.msg_iov                          = iov;
+  msg.msg_iovlen                       = (buf && len && *len > 0) ? 2 : 1;
+  msg.msg_control                      = cmsg_buf;
+  msg.msg_controllen                   = CMSG_LEN(count*sizeof(int));
+  SysCalls sys;
+  ssize_t bytes = NOINTR_SYS(sys.recvmsg(transport, &msg, 0));
+  if (len) {
+    *len                               = bytes > (int)sizeof(int) ?
+                                           bytes - sizeof(int) : 0;
+  }
+  if (bytes != (ssize_t)(sizeof(int) + ((buf && len && *len > 0) ? *len : 0))){
+    *err                               = bytes >= 0 ? 0 : -EBADF;
+    return false;
+  }
+  if (*err) {
+    // "err" is the first four bytes of the payload. If these are non-zero,
+    // the sender on the other side of the socketpair sent us an errno value.
+    // We don't expect to get any file handles in this case.
+    return false;
+  }
+  struct cmsghdr *cmsg               = CMSG_FIRSTHDR(&msg);
+  if ((msg.msg_flags & (MSG_TRUNC|MSG_CTRUNC)) ||
+      !cmsg                                    ||
+      cmsg->cmsg_level != SOL_SOCKET           ||
+      cmsg->cmsg_type  != SCM_RIGHTS           ||
+      cmsg->cmsg_len   != CMSG_LEN(count*sizeof(int))) {
+    *err                             = -EBADF;
+    return false;
+  }
+  if (fd1) { *fd1 = ((int *)CMSG_DATA(cmsg))[--count]; }
+  if (fd0) { *fd0 = ((int *)CMSG_DATA(cmsg))[--count]; }
+  return true;
+}
+
+void Sandbox::setupSignalHandlers() {
+  // Set SIGCHLD to SIG_DFL so that waitpid() can work
+  SysCalls sys;
+  struct SysCalls::kernel_sigaction sa;
+  memset(&sa, 0, sizeof(sa));
+  sa.sa_handler_ = SIG_DFL;
+  sys.sigaction(SIGCHLD, &sa, NULL);
+
+  // Set up SEGV handler for dealing with RDTSC instructions, system calls
+  // that have been rewritten to use INT0, for sigprocmask() emulation, for
+  // the creation of threads, and for user-provided SEGV handlers.
+  sa.sa_sigaction_ = segv();
+  sa.sa_flags      = SA_SIGINFO | SA_NODEFER;
+  sys.sigaction(SIGSEGV, &sa, &sa_segv_);
+
+  // Unblock SIGSEGV and SIGCHLD
+  SysCalls::kernel_sigset_t mask;
+  memset(&mask, 0x00, sizeof(mask));
+  mask.sig[0] |= (1 << (SIGSEGV - 1)) | (1 << (SIGCHLD - 1));
+  sys.sigprocmask(SIG_UNBLOCK, &mask, 0);
+}
+
+void (*Sandbox::segv())(int signo, SysCalls::siginfo *context, void *unused) {
+  void (*fnc)(int signo, SysCalls::siginfo *context, void *unused);
+  asm volatile(
+      "call 999f\n"
+#if defined(__x86_64__)
+      // Inspect instruction at the point where the segmentation fault
+      // happened. If it is RDTSC, forward the request to the trusted
+      // thread.
+      "mov  $-3, %%r14\n"          // request for RDTSC
+      "mov  0xB0(%%rsp), %%r15\n"  // %rip at time of segmentation fault
+      "cmpw $0x310F, (%%r15)\n"    // RDTSC
+      "jz   0f\n"
+      "cmpw $0x010F, (%%r15)\n"    // RDTSCP
+      "jnz  8f\n"
+      "cmpb $0xF9, 2(%%r15)\n"
+      "jnz  8f\n"
+      "mov  $-4, %%r14\n"          // request for RDTSCP
+    "0:"
+#ifndef NDEBUG
+      "lea  100f(%%rip), %%rdi\n"
+      "call playground$debugMessage\n"
+#endif
+      "sub  $4, %%rsp\n"
+      "push %%r14\n"
+      "mov  %%gs:16, %%edi\n"      // fd  = threadFdPub
+      "mov  %%rsp, %%rsi\n"        // buf = %rsp
+      "mov  $4, %%edx\n"           // len = sizeof(int)
+    "1:mov  $1, %%eax\n"           // NR_write
+      "syscall\n"
+      "cmp  %%rax, %%rdx\n"
+      "jz   5f\n"
+      "cmp  $-4, %%eax\n"          // EINTR
+      "jz   1b\n"
+    "2:add  $12, %%rsp\n"
+      "movq $0, 0x98(%%rsp)\n"     // %rax at time of segmentation fault
+      "movq $0, 0x90(%%rsp)\n"     // %rdx at time of segmentation fault
+      "cmpw $0x310F, (%%r15)\n"    // RDTSC
+      "jz   3f\n"
+      "movq $0, 0xA0(%%rsp)\n"     // %rcx at time of segmentation fault
+    "3:addq $2, 0xB0(%%rsp)\n"     // %rip at time of segmentation fault
+      "cmpw $0x010F, (%%r15)\n"    // RDTSC
+      "jnz  4f\n"
+      "addq $1, 0xB0(%%rsp)\n"     // %rip at time of segmentation fault
+    "4:ret\n"
+    "5:mov  $12, %%edx\n"          // len = 3*sizeof(int)
+    "6:mov  $0, %%eax\n"           // NR_read
+      "syscall\n"
+      "cmp  $-4, %%eax\n"          // EINTR
+      "jz   6b\n"
+      "cmp  %%rax, %%rdx\n"
+      "jnz  2b\n"
+      "mov  0(%%rsp), %%eax\n"
+      "mov  4(%%rsp), %%edx\n"
+      "mov  8(%%rsp), %%ecx\n"
+      "add  $12, %%rsp\n"
+      "mov  %%rdx, 0x90(%%rsp)\n"  // %rdx at time of segmentation fault
+      "cmpw $0x310F, (%%r15)\n"    // RDTSC
+      "jz   7f\n"
+      "mov  %%rcx, 0xA0(%%rsp)\n"  // %rcx at time of segmentation fault
+    "7:mov  %%rax, 0x98(%%rsp)\n"  // %rax at time of segmentation fault
+      "jmp  3b\n"
+
+      // If the instruction is INT 0, then this was probably the result
+      // of playground::Library being unable to find a way to safely
+      // rewrite the system call instruction. Retrieve the CPU register
+      // at the time of the segmentation fault and invoke syscallWrapper().
+    "8:cmpw $0x00CD, (%%r15)\n"    // INT $0x0
+      "jnz  16f\n"
+#ifndef NDEBUG
+      "lea  200f(%%rip), %%rdi\n"
+      "call playground$debugMessage\n"
+#endif
+      "mov  0x98(%%rsp), %%rax\n"  // %rax at time of segmentation fault
+      "mov  0x70(%%rsp), %%rdi\n"  // %rdi at time of segmentation fault
+      "mov  0x78(%%rsp), %%rsi\n"  // %rsi at time of segmentation fault
+      "mov  0x90(%%rsp), %%rdx\n"  // %rdx at time of segmentation fault
+      "mov  0x40(%%rsp), %%r10\n"  // %r10 at time of segmentation fault
+      "mov  0x30(%%rsp), %%r8\n"   // %r8  at time of segmentation fault
+      "mov  0x38(%%rsp), %%r9\n"   // %r9  at time of segmentation fault
+
+      // Handle rt_sigprocmask()
+      "cmp  $14, %%rax\n"          // NR_rt_sigprocmask
+      "jnz  12f\n"
+      "mov  $-22, %%rax\n"         // -EINVAL
+      "cmp  $8, %%r10\n"           // %r10 = sigsetsize (8 bytes = 64 signals)
+      "jl   7b\n"
+      "mov  0x130(%%rsp), %%r10\n" // signal mask at time of segmentation fault
+      "test %%rsi, %%rsi\n"        // only set mask, if set is non-NULL
+      "jz   11f\n"
+      "mov  0(%%rsi), %%rsi\n"
+      "cmp  $0, %%rdi\n"           // %rdi = how (SIG_BLOCK)
+      "jnz  9f\n"
+      "or   %%rsi, 0x130(%%rsp)\n" // signal mask at time of segmentation fault
+      "jmp  11f\n"
+    "9:cmp  $1, %%rdi\n"           // %rdi = how (SIG_UNBLOCK)
+      "jnz  10f\n"
+      "xor  $-1, %%rsi\n"
+      "and  %%rsi, 0x130(%%rsp)\n" // signal mask at time of segmentation fault
+      "jmp  11f\n"
+   "10:cmp  $2, %%rdi\n"           // %rdi = how (SIG_SETMASK)
+      "jnz  7b\n"
+      "mov  %%rsi, 0x130(%%rsp)\n" // signal mask at time of segmentation fault
+   "11:xor  %%rax, %%rax\n"
+      "test %%rdx, %%rdx\n"        // only return old mask, if set is non-NULL
+      "jz   7b\n"
+      "mov  %%r10, 0(%%rdx)\n"     // old_set
+      "jmp  7b\n"
+
+      // Handle rt_sigreturn()
+   "12:cmp  $15, %%rax\n"          // NR_rt_sigreturn
+      "jnz  14f\n"
+      "mov  0xA8(%%rsp), %%rsp\n"  // %rsp at time of segmentation fault
+   "13:syscall\n"                  // rt_sigreturn() is unrestricted
+      "mov  $66, %%edi\n"          // rt_sigreturn() should never return
+      "mov  $231, %%eax\n"         // NR_exit_group
+      "jmp  13b\n"
+
+      // Copy signal frame onto new stack. See clone.cc for details
+   "14:cmp  $56+0xF000, %%rax\n"   // NR_clone + 0xF000
+      "jnz  15f\n"
+      "lea  8(%%rsp), %%rax\n"     // retain stack frame upon returning
+      "mov  %%rax, 0xA8(%%rsp)\n"  // %rsp at time of segmentation fault
+      "jmp  7b\n"
+
+      // Forward system call to syscallWrapper()
+   "15:lea  7b(%%rip), %%rcx\n"
+      "push %%rcx\n"
+      "push 0xB8(%%rsp)\n"         // %rip at time of segmentation fault
+      "lea  playground$syscallWrapper(%%rip), %%rcx\n"
+      "jmp  *%%rcx\n"
+
+      // In order to implement SA_NODEFER, we have to keep track of recursive
+      // calls to SIGSEGV handlers. This means we have to increment a counter
+      // before calling the user's signal handler, and decrement it on
+      // leaving the user's signal handler.
+      // Some signal handlers look at the return address of the signal
+      // stack, and more importantly "gdb" uses the call to rt_sigreturn()
+      // as a magic signature when doing stacktraces. So, we have to use
+      // a little more unusual code to regain control after the user's
+      // signal handler is done. We adjust the return address to point to
+      // non-executable memory. And when we trigger another SEGV we pop the
+      // extraneous signal frame and then call rt_sigreturn().
+      // N.B. We currently do not correctly adjust the SEGV counter, if the
+      // user's signal handler exits in way other than by returning (e.g. by
+      // directly calling rt_sigreturn(), or by calling siglongjmp()).
+   "16:lea  22f(%%rip), %%r14\n"
+      "cmp  %%r14, %%r15\n"
+      "jnz  17f\n"                 // check if returning from user's handler
+      "decl %%gs:0x105C-0xE0\n"    // decrement SEGV recursion counter
+      "mov  0xA8(%%rsp), %%rsp\n"  // %rsp at time of segmentation fault
+      "mov  $0xF, %%eax\n"         // NR_rt_sigreturn
+      "syscall\n"
+
+      // This was a genuine segmentation fault. Check Sandbox::sa_segv_ for
+      // what we are supposed to do.
+   "17:mov  playground$sa_segv@GOTPCREL(%%rip), %%rax\n"
+      "cmp  $0, 0(%%rax)\n"        // SIG_DFL
+      "jz   18f\n"
+      "cmp  $1, 0(%%rax)\n"        // SIG_IGN
+      "jnz  19f\n"                 // can't really ignore synchronous signals
+
+      // Trigger the kernel's default signal disposition. The only way we can
+      // do this from seccomp mode is by blocking the signal and retriggering
+      // it.
+   "18:orb  $4, 0x131(%%rsp)\n"    // signal mask at time of segmentation fault
+      "ret\n"
+
+      // Check sa_flags:
+      //  - We can ignore SA_NOCLDSTOP, SA_NOCLDWAIT, and SA_RESTART as they
+      //    do not have any effect for SIGSEGV.
+      //  - On x86-64, we can also ignore SA_SIGINFO, as the calling
+      //    conventions for sa_handler() are a subset of the conventions for
+      //    sa_sigaction().
+      //  - We have to always register our signal handler with SA_NODEFER so
+      //    that the user's signal handler can make system calls which might
+      //    require additional help from our SEGV handler.
+      //  - If the user's signal handler wasn't supposed to be SA_NODEFER, then
+      //    we emulate this behavior by keeping track of a recursion counter.
+      //
+      // TODO(markus): If/when we add support for sigaltstack(), we have to
+      // handle SA_ONSTACK.
+   "19:cmpl $0, %%gs:0x105C-0xE0\n"// check if we failed inside of SEGV handler
+      "jnz  18b\n"                 // if so, then terminate program
+      "mov  0(%%rax), %%rbx\n"     // sa_segv_.sa_sigaction
+      "mov  8(%%rax), %%rcx\n"     // sa_segv_.sa_flags
+      "btl  $31, %%ecx\n"          // SA_RESETHAND
+      "jnc  20f\n"
+      "movq $0, 0(%%rax)\n"        // set handler to SIG_DFL
+   "20:btl  $30, %%ecx\n"          // SA_NODEFER
+      "jc   21f\n"
+      "mov  %%r14, 0(%%rsp)\n"     // trigger a SEGV on return, so that we can
+      "incl %%gs:0x105C-0xE0\n"    //   clean up state; incr. recursion counter
+   "21:jmp  *%%rbx\n"              // call user's signal handler
+
+
+      // Non-executable version of the restorer function. We use this to
+      // trigger a SEGV upon returning from the user's signal handler, giving
+      // us an ability to clean up prior to returning from the SEGV handler.
+      ".pushsection .data\n"       // move code into non-executable section
+   "22:mov  $0xF, %%rax\n"         // gdb looks for this signature when doing
+      "syscall\n"                  //   backtraces
+      ".popsection\n"
+#elif defined(__i386__)
+      // Inspect instruction at the point where the segmentation fault
+      // happened. If it is RDTSC, forward the request to the trusted
+      // thread.
+      "mov  $-3, %%ebx\n"          // request for RDTSC
+      "mov  0xDC(%%esp), %%ebp\n"  // %eip at time of segmentation fault
+      "cmpw $0x310F, (%%ebp)\n"    // RDTSC
+      "jz   0f\n"
+      "cmpw $0x010F, (%%ebp)\n"    // RDTSCP
+      "jnz  9f\n"
+      "cmpb $0xF9, 2(%%ebp)\n"
+      "jnz  9f\n"
+      "mov  $-4, %%ebx\n"          // request for RDTSCP
+    "0:"
+#ifndef NDEBUG
+      "lea  100f, %%eax\n"
+      "push %%eax\n"
+      "call playground$debugMessage\n"
+      "sub  $4, %%esp\n"
+#else
+      "sub  $8, %%esp\n"           // allocate buffer for receiving timestamp
+#endif
+      "push %%ebx\n"
+      "mov  %%fs:16, %%ebx\n"      // fd  = threadFdPub
+      "mov  %%esp, %%ecx\n"        // buf = %esp
+      "mov  $4, %%edx\n"           // len = sizeof(int)
+    "1:mov  %%edx, %%eax\n"        // NR_write
+      "int  $0x80\n"
+      "cmp  %%eax, %%edx\n"
+      "jz   7f\n"
+      "cmp  $-4, %%eax\n"          // EINTR
+      "jz   1b\n"
+    "2:add  $12, %%esp\n"          // remove temporary buffer from stack
+      "xor  %%eax, %%eax\n"
+      "movl $0, 0xC8(%%esp)\n"     // %edx at time of segmentation fault
+      "cmpw $0x310F, (%%ebp)\n"    // RDTSC
+      "jz   3f\n"
+      "movl $0, 0xCC(%%esp)\n"     // %ecx at time of segmentation fault
+    "3:mov  %%eax, 0xD0(%%esp)\n"  // %eax at time of segmentation fault
+    "4:mov  0xDC(%%esp), %%ebp\n"  // %eip at time of segmentation fault
+      "addl $2, 0xDC(%%esp)\n"     // %eip at time of segmentation fault
+      "cmpw $0x010F, (%%ebp)\n"    // RDTSCP
+      "jnz  5f\n"
+      "addl $1, 0xDC(%%esp)\n"     // %eip at time of segmentation fault
+    "5:sub  $0x1C8, %%esp\n"       // a legacy signal stack is much larger
+      "mov  0x1CC(%%esp), %%eax\n" // push signal number
+      "push %%eax\n"
+      "lea  0x270(%%esp), %%esi\n" // copy siginfo register values
+      "lea  0x4(%%esp), %%edi\n"   //     into new location
+      "mov  $22, %%ecx\n"
+      "cld\n"
+      "rep movsl\n"
+      "mov  0x2C8(%%esp), %%ebx\n" // copy first half of signal mask
+      "mov  %%ebx, 0x54(%%esp)\n"
+      "lea  6f, %%esi\n"           // copy "magic" restorer function
+      "push %%esi\n"               // push restorer function
+      "lea  0x2D4(%%esp), %%edi\n" // patch up retcode magic numbers
+      "movb $2, %%cl\n"
+      "rep movsl\n"
+      "ret\n"                      // return to restorer function
+
+      // The restorer function is sometimes used by gdb as a magic marker to
+      // recognize signal stack frames. Don't change any of the next three
+      // instructions.
+    "6:pop  %%eax\n"               // remove dummy argument (signo)
+      "mov  $119, %%eax\n"         // NR_sigreturn
+      "int  $0x80\n"
+    "7:mov  $12, %%edx\n"          // len = 3*sizeof(int)
+    "8:mov  $3, %%eax\n"           // NR_read
+      "int  $0x80\n"
+      "cmp  $-4, %%eax\n"          // EINTR
+      "jz   8b\n"
+      "cmp  %%eax, %%edx\n"
+      "jnz  2b\n"
+      "pop  %%eax\n"
+      "pop  %%edx\n"
+      "pop  %%ecx\n"
+      "mov  %%edx, 0xC8(%%esp)\n"  // %edx at time of segmentation fault
+      "cmpw $0x310F, (%%ebp)\n"    // RDTSC
+      "jz   3b\n"
+      "mov  %%ecx, 0xCC(%%esp)\n"  // %ecx at time of segmentation fault
+      "jmp  3b\n"
+
+      // If the instruction is INT 0, then this was probably the result
+      // of playground::Library being unable to find a way to safely
+      // rewrite the system call instruction. Retrieve the CPU register
+      // at the time of the segmentation fault and invoke syscallWrapper().
+    "9:cmpw $0x00CD, (%%ebp)\n"    // INT $0x0
+      "jnz  20f\n"
+#ifndef NDEBUG
+      "lea  200f, %%eax\n"
+      "push %%eax\n"
+      "call playground$debugMessage\n"
+      "add  $0x4, %%esp\n"
+#endif
+      "mov  0xD0(%%esp), %%eax\n"  // %eax at time of segmentation fault
+      "mov  0xC4(%%esp), %%ebx\n"  // %ebx at time of segmentation fault
+      "mov  0xCC(%%esp), %%ecx\n"  // %ecx at time of segmentation fault
+      "mov  0xC8(%%esp), %%edx\n"  // %edx at time of segmentation fault
+      "mov  0xB8(%%esp), %%esi\n"  // %esi at time of segmentation fault
+      "mov  0xB4(%%esp), %%edi\n"  // %edi at time of segmentation fault
+      "mov  0xB2(%%esp), %%ebp\n"  // %ebp at time of segmentation fault
+
+      // Handle sigprocmask() and rt_sigprocmask()
+      "cmp  $175, %%eax\n"         // NR_rt_sigprocmask
+      "jnz  10f\n"
+      "mov  $-22, %%eax\n"         // -EINVAL
+      "cmp  $8, %%esi\n"           // %esi = sigsetsize (8 bytes = 64 signals)
+      "jl   3b\n"
+      "jmp  11f\n"
+   "10:cmp  $126, %%eax\n"         // NR_sigprocmask
+      "jnz  15f\n"
+      "mov  $-22, %%eax\n"
+   "11:mov  0xFC(%%esp), %%edi\n"  // signal mask at time of segmentation fault
+      "mov  0x100(%%esp), %%ebp\n"
+      "test %%ecx, %%ecx\n"        // only set mask, if set is non-NULL
+      "jz   14f\n"
+      "mov  0(%%ecx), %%esi\n"
+      "mov  4(%%ecx), %%ecx\n"
+      "cmp  $0, %%ebx\n"           // %ebx = how (SIG_BLOCK)
+      "jnz  12f\n"
+      "or   %%esi, 0xFC(%%esp)\n"  // signal mask at time of segmentation fault
+      "or   %%ecx, 0x100(%%esp)\n"
+      "jmp  14f\n"
+   "12:cmp  $1, %%ebx\n"           // %ebx = how (SIG_UNBLOCK)
+      "jnz  13f\n"
+      "xor  $-1, %%esi\n"
+      "xor  $-1, %%ecx\n"
+      "and  %%esi, 0xFC(%%esp)\n"  // signal mask at time of segmentation fault
+      "and  %%ecx, 0x100(%%esp)\n"
+      "jmp  14f\n"
+   "13:cmp  $2, %%ebx\n"           // %ebx = how (SIG_SETMASK)
+      "jnz  3b\n"
+      "mov  %%esi, 0xFC(%%esp)\n"  // signal mask at time of segmentation fault
+      "mov  %%ecx, 0x100(%%esp)\n"
+   "14:xor  %%eax, %%eax\n"
+      "test %%edx, %%edx\n"        // only return old mask, if set is non-NULL
+      "jz   3b\n"
+      "mov  %%edi, 0(%%edx)\n"     // old_set
+      "mov  %%ebp, 4(%%edx)\n"
+      "jmp  3b\n"
+
+      // Handle sigreturn() and rt_sigreturn()
+      // See syscall.cc for a discussion on how we can emulate rt_sigreturn()
+      // by calling sigreturn() with a suitably adjusted stack.
+   "15:cmp  $119, %%eax\n"         // NR_sigreturn
+      "jnz  17f\n"
+      "mov  0xC0(%%esp), %%esp\n"  // %esp at time of segmentation fault
+   "16:int  $0x80\n"               // sigreturn() is unrestricted
+   "17:cmp  $173, %%eax\n"         // NR_rt_sigreturn
+      "jnz  18f\n"
+      "mov  0xC0(%%esp), %%esp\n"  // %esp at time of segmentation fault
+      "sub  $4, %%esp\n"           // add fake return address
+      "jmp  4b\n"
+
+      // Copy signal frame onto new stack. In the process, we have to convert
+      // it from an RT signal frame to a legacy signal frame.
+      // See clone.cc for details
+   "18:cmp  $120+0xF000, %%eax\n"  // NR_clone + 0xF000
+      "jnz  19f\n"
+      "lea  -0x1C8(%%esp), %%eax\n"// retain stack frame upon returning
+      "mov  %%eax, 0xC0(%%esp)\n"  // %esp at time of segmentation fault
+      "jmp  3b\n"
+
+      // Forward system call to syscallWrapper()
+   "19:call playground$syscallWrapper\n"
+      "jmp  3b\n"
+
+      // In order to implement SA_NODEFER, we have to keep track of recursive
+      // calls to SIGSEGV handlers. This means we have to increment a counter
+      // before calling the user's signal handler, and decrement it on
+      // leaving the user's signal handler.
+      // Some signal handlers look at the return address of the signal
+      // stack, and more importantly "gdb" uses the call to {,rt_}sigreturn()
+      // as a magic signature when doing stacktraces. So, we have to use
+      // a little more unusual code to regain control after the user's
+      // signal handler is done. We adjust the return address to point to
+      // non-executable memory. And when we trigger another SEGV we pop the
+      // extraneous signal frame and then call sigreturn().
+      // N.B. We currently do not correctly adjust the SEGV counter, if the
+      // user's signal handler exits in way other than by returning (e.g. by
+      // directly calling {,rt_}sigreturn(), or by calling siglongjmp()).
+   "20:lea  30f, %%edi\n"          // rt-style restorer function
+      "lea  31f, %%esi\n"          // legacy restorer function
+      "cmp  %%ebp, %%edi\n"        // check if returning from user's handler
+      "jnz  21f\n"
+      "decl %%fs:0x1040-0x58\n"    // decrement SEGV recursion counter
+      "mov  0xC0(%%esp), %%esp\n"  // %esp at time of segmentation fault
+      "jmp  29f\n"
+   "21:cmp  %%ebp, %%esi\n"        // check if returning from user's handler
+      "jnz  22f\n"
+      "decl %%fs:0x1040-0x58\n"    // decrement SEGV recursion counter
+      "mov  0xC0(%%esp), %%esp\n"  // %esp at time of segmentation fault
+      "jmp  6b\n"
+
+      // This was a genuine segmentation fault. Check Sandbox::sa_segv_ for
+      // what we are supposed to do.
+   "22:lea  playground$sa_segv, %%eax\n"
+      "cmp  $0, 0(%%eax)\n"        // SIG_DFL
+      "jz   23f\n"
+      "cmp  $1, 0(%%eax)\n"        // SIG_IGN
+      "jnz  24f\n"                 // can't really ignore synchronous signals
+
+      // Trigger the kernel's default signal disposition. The only way we can
+      // do this from seccomp mode is by blocking the signal and retriggering
+      // it.
+   "23:orb  $4, 0xFD(%%esp)\n"     // signal mask at time of segmentation fault
+      "jmp  5b\n"
+
+      // Check sa_flags:
+      //  - We can ignore SA_NOCLDSTOP, SA_NOCLDWAIT, and SA_RESTART as they
+      //    do not have any effect for SIGSEGV.
+      //  - We have to always register our signal handler with SA_NODEFER so
+      //    that the user's signal handler can make system calls which might
+      //    require additional help from our SEGV handler.
+      //  - If the user's signal handler wasn't supposed to be SA_NODEFER, then
+      //    we emulate this behavior by keeping track of a recursion counter.
+      //
+      // TODO(markus): If/when we add support for sigaltstack(), we have to
+      // handle SA_ONSTACK.
+   "24:cmpl $0, %%fs:0x1040-0x58\n"// check if we failed inside of SEGV handler
+      "jnz  23b\n"                 // if so, then terminate program
+      "mov  0(%%eax), %%ebx\n"     // sa_segv_.sa_sigaction
+      "mov  4(%%eax), %%ecx\n"     // sa_segv_.sa_flags
+      "btl  $31, %%ecx\n"          // SA_RESETHAND
+      "jnc  25f\n"
+      "movl $0, 0(%%eax)\n"        // set handler to SIG_DFL
+   "25:btl  $30, %%ecx\n"          // SA_NODEFER
+      "jc   28f\n"
+      "btl  $2, %%ecx\n"           // SA_SIGINFO
+      "jnc  26f\n"
+      "mov  %%edi, 0(%%esp)\n"     // trigger a SEGV on return
+      "incl %%fs:0x1040-0x58\n"    // increment recursion counter
+      "jmp  *%%ebx\n"              // call user's signal handler
+   "26:mov  %%esi, 0(%%esp)\n"
+      "incl %%fs:0x1040-0x58\n"    // increment recursion counter
+
+      // We always register the signal handler to give us rt-style signal
+      // frames. But if the user asked for legacy signal frames, we must
+      // convert the signal frame prior to calling the user's signal handler.
+   "27:sub  $0x1C8, %%esp\n"       // a legacy signal stack is much larger
+      "mov  0x1CC(%%esp), %%eax\n" // push signal number
+      "push %%eax\n"
+      "mov  0x1CC(%%esp), %%eax\n" // push restorer function
+      "push %%eax\n"
+      "lea  0x274(%%esp), %%esi\n" // copy siginfo register values
+      "lea  0x8(%%esp), %%edi\n"   //     into new location
+      "mov  $22, %%ecx\n"
+      "cld\n"
+      "rep movsl\n"
+      "mov  0x2CC(%%esp), %%eax\n" // copy first half of signal mask
+      "mov  %%eax, 0x58(%%esp)\n"
+      "lea  31f, %%esi\n"
+      "lea  0x2D4(%%esp), %%edi\n" // patch up retcode magic numbers
+      "movb $2, %%cl\n"
+      "rep movsl\n"
+      "jmp  *%%ebx\n"              // call user's signal handler
+   "28:lea  6b, %%eax\n"           // set appropriate restorer function
+      "mov  %%eax, 0(%%esp)\n"
+      "btl  $2, %%ecx\n"           // SA_SIGINFO
+      "jnc  27b\n"
+      "lea  29f, %%eax\n"
+      "mov  %%eax, 0(%%esp)\n"     // set appropriate restorer function
+      "jmp  *%%ebx\n"              // call user's signal handler
+   "29:pushl $30f\n"               // emulate rt_sigreturn()
+      "jmp  5b\n"
+
+      // Non-executable versions of the restorer function. We use these to
+      // trigger a SEGV upon returning from the user's signal handler, giving
+      // us an ability to clean up prior to returning from the SEGV handler.
+      ".pushsection .data\n"       // move code into non-executable section
+   "30:mov  $173, %%eax\n"         // NR_rt_sigreturn
+      "int  $0x80\n"               // gdb looks for this signature when doing
+      ".byte 0\n"                  //   backtraces
+   "31:pop  %%eax\n"
+      "mov  $119, %%eax\n"         // NR_sigreturn
+      "int  $0x80\n"
+      ".popsection\n"
+#else
+#error Unsupported target platform
+#endif
+      ".pushsection \".rodata\"\n"
+#ifndef NDEBUG
+  "100:.asciz \"RDTSC(P): Executing handler\\n\"\n"
+  "200:.asciz \"INT $0x0: Executing handler\\n\"\n"
+#endif
+      ".popsection\n"
+  "999:pop  %0\n"
+      : "=g"(fnc)
+      :
+      : "memory"
+#if defined(__x86_64__)
+        , "rsp"
+#elif defined(__i386__)
+        , "esp"
+#endif
+  );
+  return fnc;
+}
+
+SecureMem::Args* Sandbox::getSecureMem() {
+  // Check trusted_thread.cc for the magic offset that gets us from the TLS
+  // to the beginning of the secure memory area.
+  SecureMem::Args* ret;
+#if defined(__x86_64__)
+  asm volatile(
+    "movq %%gs:-0xE0, %0\n"
+    : "=q"(ret));
+#elif defined(__i386__)
+  asm volatile(
+    "movl %%fs:-0x58, %0\n"
+    : "=r"(ret));
+#else
+#error Unsupported target platform
+#endif
+  return ret;
+}
+
+void Sandbox::snapshotMemoryMappings(int processFd, int proc_self_maps) {
+  SysCalls sys;
+  if (sys.lseek(proc_self_maps, 0, SEEK_SET) ||
+      !sendFd(processFd, proc_self_maps, -1, NULL, 0)) {
+ failure:
+    die("Cannot access /proc/self/maps");
+  }
+  int dummy;
+  if (read(sys, processFd, &dummy, sizeof(dummy)) != sizeof(dummy)) {
+    goto failure;
+  }
+}
+
+int Sandbox::supportsSeccompSandbox(int proc_fd) {
+  if (status_ != STATUS_UNKNOWN) {
+    return status_ != STATUS_UNSUPPORTED;
+  }
+  int fds[2];
+  SysCalls sys;
+  if (sys.pipe(fds)) {
+    status_ = STATUS_UNSUPPORTED;
+    return 0;
+  }
+  pid_t pid;
+  switch ((pid = sys.fork())) {
+    case -1:
+      status_ = STATUS_UNSUPPORTED;
+      return 0;
+    case 0: {
+      int devnull = sys.open("/dev/null", O_RDWR, 0);
+      if (devnull >= 0) {
+        sys.dup2(devnull, 0);
+        sys.dup2(devnull, 1);
+        sys.dup2(devnull, 2);
+        sys.close(devnull);
+      }
+      if (proc_fd >= 0) {
+        setProcSelfMaps(sys.openat(proc_fd, "self/maps", O_RDONLY, 0));
+      }
+      startSandbox();
+      write(sys, fds[1], "", 1);
+
+      // Try to tell the trusted thread to shut down the entire process in an
+      // orderly fashion
+      defaultSystemCallHandler(__NR_exit_group, 0, 0, 0, 0, 0, 0);
+
+      // If that did not work (e.g. because the kernel does not know about the
+      // exit_group() system call), make a direct _exit() system call instead.
+      // This system call is unrestricted in seccomp mode, so it will always
+      // succeed. Normally, we don't like it, because unlike exit_group() it
+      // does not terminate any other thread. But since we know that
+      // exit_group() exists in all kernels which support kernel-level threads,
+      // this is OK we only get here for old kernels where _exit() is OK.
+      sys._exit(0);
+    }
+    default:
+      NOINTR_SYS(sys.close(fds[1]));
+      char ch;
+      if (read(sys, fds[0], &ch, 1) != 1) {
+        status_ = STATUS_UNSUPPORTED;
+      } else {
+        status_ = STATUS_AVAILABLE;
+      }
+      int rc;
+      NOINTR_SYS(sys.waitpid(pid, &rc, 0));
+      NOINTR_SYS(sys.close(fds[0]));
+      return status_ != STATUS_UNSUPPORTED;
+  }
+}
+
+void Sandbox::setProcSelfMaps(int proc_self_maps) {
+  proc_self_maps_ = proc_self_maps;
+}
+
+void Sandbox::startSandbox() {
+  if (status_ == STATUS_UNSUPPORTED) {
+    die("The seccomp sandbox is not supported on this computer");
+  } else if (status_ == STATUS_ENABLED) {
+    return;
+  }
+
+  SysCalls sys;
+  if (proc_self_maps_ < 0) {
+    proc_self_maps_        = sys.open("/proc/self/maps", O_RDONLY, 0);
+    if (proc_self_maps_ < 0) {
+      die("Cannot access \"/proc/self/maps\"");
+    }
+  }
+
+  // The pid is unchanged for the entire program, so we can retrieve it once
+  // and store it in a global variable.
+  pid_                     = sys.getpid();
+
+  // Block all signals, except for the RDTSC handler
+  setupSignalHandlers();
+
+  // Get socketpairs for talking to the trusted process
+  int pair[4];
+  if (sys.socketpair(AF_UNIX, SOCK_STREAM, 0, pair) ||
+      sys.socketpair(AF_UNIX, SOCK_STREAM, 0, pair+2)) {
+    die("Failed to create trusted thread");
+  }
+  processFdPub_            = pair[0];
+  cloneFdPub_              = pair[2];
+  SecureMemArgs* secureMem = createTrustedProcess(pair[0], pair[1],
+                                                  pair[2], pair[3]);
+
+  // We find all libraries that have system calls and redirect the system
+  // calls to the sandbox. If we miss any system calls, the application will be
+  // terminated by the kernel's seccomp code. So, from a security point of
+  // view, if this code fails to identify system calls, we are still behaving
+  // correctly.
+  {
+    Maps maps(proc_self_maps_);
+    const char *libs[]     = { "ld", "libc", "librt", "libpthread", NULL };
+
+    // Intercept system calls in the VDSO segment (if any). This has to happen
+    // before intercepting system calls in any of the other libraries, as
+    // the main kernel entry point might be inside of the VDSO and we need to
+    // determine its address before we can compare it to jumps from inside
+    // other libraries.
+    for (Maps::const_iterator iter = maps.begin(); iter != maps.end(); ++iter){
+      Library* library = *iter;
+      if (library->isVDSO() && library->parseElf()) {
+        library->makeWritable(true);
+        library->patchSystemCalls();
+        library->makeWritable(false);
+        break;
+      }
+    }
+
+    // Intercept system calls in libraries that are known to have them.
+    for (Maps::const_iterator iter = maps.begin(); iter != maps.end(); ++iter){
+      Library* library = *iter;
+      const char* mapping = iter.name().c_str();
+
+      // Find the actual base name of the mapped library by skipping past any
+      // SPC and forward-slashes. We don't want to accidentally find matches,
+      // because the directory name included part of our well-known lib names.
+      //
+      // Typically, prior to pruning, entries would look something like this:
+      // 08:01 2289011 /lib/libc-2.7.so
+      for (const char *delim = " /"; *delim; ++delim) {
+        const char* skip = strrchr(mapping, *delim);
+        if (skip) {
+          mapping = skip + 1;
+        }
+      }
+
+      for (const char **ptr = libs; *ptr; ptr++) {
+        const char *name = strstr(mapping, *ptr);
+        if (name == mapping) {
+          char ch = name[strlen(*ptr)];
+          if (ch < 'A' || (ch > 'Z' && ch < 'a') || ch > 'z') {
+            if (library->parseElf()) {
+              library->makeWritable(true);
+              library->patchSystemCalls();
+              library->makeWritable(false);
+              break;
+            }
+          }
+        }
+      }
+    }
+  }
+
+  // Take a snapshot of the current memory mappings. These mappings will be
+  // off-limits to all future mmap(), munmap(), mremap(), and mprotect() calls.
+  snapshotMemoryMappings(processFdPub_, proc_self_maps_);
+  NOINTR_SYS(sys.close(proc_self_maps_));
+  proc_self_maps_ = -1;
+
+  // Creating the trusted thread enables sandboxing
+  createTrustedThread(processFdPub_, cloneFdPub_, secureMem);
+
+  // We can no longer check for sandboxing support at this point, but we also
+  // know for a fact that it is available (as we just turned it on). So update
+  // the status to reflect this information.
+  status_ = STATUS_ENABLED;
+}
+
+} // namespace
diff --git a/sandbox/linux/seccomp/sandbox.h b/sandbox/linux/seccomp/sandbox.h
new file mode 100644
index 0000000..8f49575
--- /dev/null
+++ b/sandbox/linux/seccomp/sandbox.h
@@ -0,0 +1,12 @@
+// Copyright (c) 2010 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef SANDBOX_H__
+#define SANDBOX_H__
+
+extern "C" int  SupportsSeccompSandbox(int proc_fd);
+extern "C" void SeccompSandboxSetProcSelfMaps(int proc_self_maps);
+extern "C" void StartSeccompSandbox();
+
+#endif // SANDBOX_H__
diff --git a/sandbox/linux/seccomp/sandbox_impl.h b/sandbox/linux/seccomp/sandbox_impl.h
new file mode 100644
index 0000000..3e99a5510
--- /dev/null
+++ b/sandbox/linux/seccomp/sandbox_impl.h
@@ -0,0 +1,715 @@
+// Copyright (c) 2010 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef SANDBOX_IMPL_H__
+#define SANDBOX_IMPL_H__
+
+#include <asm/ldt.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <linux/prctl.h>
+#include <linux/unistd.h>
+#include <netinet/in.h>
+#include <netinet/tcp.h>
+#include <sched.h>
+#include <signal.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <sys/ptrace.h>
+#include <sys/resource.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <time.h>
+#include <unistd.h>
+
+#define NOINTR_SYS(x)                                                         \
+  ({ typeof(x) i__; while ((i__ = (x)) < 0 && sys.my_errno == EINTR); i__;})
+
+#ifdef __cplusplus
+#include <map>
+#include <vector>
+#include "sandbox.h"
+#include "securemem.h"
+#include "tls.h"
+
+namespace playground {
+
+class Sandbox {
+  // TODO(markus): restrict access to our private file handles
+ public:
+  enum { kMaxThreads = 100 };
+
+
+  // There are a lot of reasons why the Seccomp sandbox might not be available.
+  // This could be because the kernel does not support Seccomp mode, or it
+  // could be because we fail to successfully rewrite all system call entry
+  // points.
+  // "proc_fd" should be a file descriptor for "/proc", or -1 if not provided
+  // by the caller.
+  static int supportsSeccompSandbox(int proc_fd)
+                                         asm("SupportsSeccompSandbox");
+
+  // The sandbox needs to be able to access "/proc/self/maps". If this file
+  // is not accessible when "startSandbox()" gets called, the caller can
+  // provide an already opened file descriptor by calling "setProcSelfMaps()".
+  // The sandbox becomes the newer owner of this file descriptor and will
+  // eventually close it when "startSandbox()" executes.
+  static void setProcSelfMaps(int proc_self_maps)
+                                         asm("SeccompSandboxSetProcSelfMaps");
+
+  // This is the main public entry point. It finds all system calls that
+  // need rewriting, sets up the resources needed by the sandbox, and
+  // enters Seccomp mode.
+  static void startSandbox()             asm("StartSeccompSandbox");
+
+ private:
+// syscall_table.c has to be implemented in C, as C++ does not support
+// designated initializers for arrays. The only other alternative would be
+// to have a source code generator for this table.
+//
+// We would still like the C source file to include our header file. This
+// requires some define statements to transform C++ specific constructs to
+// something that is palatable to a C compiler.
+#define STATIC static
+#define SecureMemArgs SecureMem::Args
+  // Clone() is special as it has a wrapper in syscall_table.c. The wrapper
+  // adds one extra argument (the pointer to the saved registers) and then
+  // calls playground$sandbox__clone().
+  static long sandbox_clone(int flags, char* stack, int* pid, int* ctid,
+                            void* tls, void* wrapper_sp)
+    asm("playground$sandbox__clone")
+  #if defined(__x86_64__)
+    __attribute__((visibility("internal")))
+#endif
+    ;
+#else
+#define STATIC
+#define bool int
+#define SecureMemArgs void
+  // This is the wrapper entry point that is found in the syscall_table.
+  long sandbox_clone(int flags, char* stack, int* pid, int* ctid, void* tls)
+                                         asm("playground$sandbox_clone");
+#endif
+
+  // Entry points for sandboxed code that is attempting to make system calls
+  STATIC long sandbox_access(const char*, int)
+                                         asm("playground$sandbox_access");
+  STATIC long sandbox_exit(int status)    asm("playground$sandbox_exit");
+  STATIC long sandbox_getpid()            asm("playground$sandbox_getpid");
+  #if defined(__NR_getsockopt)
+  STATIC long sandbox_getsockopt(int, int, int, void*, socklen_t*)
+                                         asm("playground$sandbox_getsockopt");
+  #endif
+  STATIC long sandbox_gettid()            asm("playground$sandbox_gettid");
+  STATIC long sandbox_ioctl(int d, int req, void* arg)
+                                         asm("playground$sandbox_ioctl");
+  #if defined(__NR_ipc)
+  STATIC long sandbox_ipc(unsigned, int, int, int, void*, long)
+                                         asm("playground$sandbox_ipc");
+  #endif
+  STATIC long sandbox_lstat(const char* path, void* buf)
+                                         asm("playground$sandbox_lstat");
+  #if defined(__NR_lstat64)
+  STATIC long sandbox_lstat64(const char *path, void* b)
+                                         asm("playground$sandbox_lstat64");
+  #endif
+  STATIC long sandbox_madvise(void*, size_t, int)
+                                         asm("playground$sandbox_madvise");
+  STATIC void *sandbox_mmap(void* start, size_t length, int prot, int flags,
+                            int fd, off_t offset)
+                                         asm("playground$sandbox_mmap");
+  STATIC long sandbox_mprotect(const void*, size_t, int)
+                                         asm("playground$sandbox_mprotect");
+  STATIC long sandbox_munmap(void* start, size_t length)
+                                         asm("playground$sandbox_munmap");
+  STATIC long sandbox_open(const char*, int, mode_t)
+                                         asm("playground$sandbox_open");
+  #if defined(__NR_recvfrom)
+  STATIC ssize_t sandbox_recvfrom(int, void*, size_t, int, void*, socklen_t*)
+                                         asm("playground$sandbox_recvfrom");
+  STATIC ssize_t sandbox_recvmsg(int, struct msghdr*, int)
+                                         asm("playground$sandbox_recvmsg");
+  #endif
+  #if defined(__NR_rt_sigaction)
+  STATIC long sandbox_rt_sigaction(int, const void*, void*, size_t)
+                                        asm("playground$sandbox_rt_sigaction");
+  #endif
+  #if defined(__NR_rt_sigprocmask)
+  STATIC long sandbox_rt_sigprocmask(int how, const void*, void*, size_t)
+                                      asm("playground$sandbox_rt_sigprocmask");
+  #endif
+  #if defined(__NR_sendmsg)
+  STATIC size_t sandbox_sendmsg(int, const struct msghdr*, int)
+                                         asm("playground$sandbox_sendmsg");
+  STATIC ssize_t sandbox_sendto(int, const void*, size_t, int, const void*,
+                                socklen_t)asm("playground$sandbox_sendto");
+  #endif
+  #if defined(__NR_shmat)
+  STATIC void* sandbox_shmat(int, const void*, int)
+                                         asm("playground$sandbox_shmat");
+  STATIC long sandbox_shmctl(int, int, void*)
+                                         asm("playground$sandbox_shmctl");
+  STATIC long sandbox_shmdt(const void*)  asm("playground$sandbox_shmdt");
+  STATIC long sandbox_shmget(int, size_t, int)
+                                         asm("playground$sandbox_shmget");
+  #endif
+  #if defined(__NR_setsockopt)
+  STATIC long sandbox_setsockopt(int, int, int, const void*, socklen_t)
+                                         asm("playground$sandbox_setsockopt");
+  #endif
+  #if defined(__NR_sigaction)
+  STATIC long sandbox_sigaction(int, const void*, void*)
+                                         asm("playground$sandbox_sigaction");
+  #endif
+  #if defined(__NR_signal)
+  STATIC void* sandbox_signal(int, const void*)
+                                         asm("playground$sandbox_signal");
+  #endif
+  #if defined(__NR_sigprocmask)
+  STATIC long sandbox_sigprocmask(int how, const void*, void*)
+                                         asm("playground$sandbox_sigprocmask");
+  #endif
+  #if defined(__NR_socketcall)
+  STATIC long sandbox_socketcall(int call, void* args)
+                                         asm("playground$sandbox_socketcall");
+  #endif
+  STATIC long sandbox_stat(const char* path, void* buf)
+                                         asm("playground$sandbox_stat");
+  #if defined(__NR_stat64)
+  STATIC long sandbox_stat64(const char *path, void* b)
+                                         asm("playground$sandbox_stat64");
+  #endif
+
+  // Functions for system calls that need to be handled in the trusted process
+  STATIC bool process_access(int, int, int, int, SecureMemArgs*)
+                                         asm("playground$process_access");
+  STATIC bool process_clone(int, int, int, int, SecureMemArgs*)
+                                         asm("playground$process_clone");
+  STATIC bool process_exit(int, int, int, int, SecureMemArgs*)
+                                         asm("playground$process_exit");
+  #if defined(__NR_getsockopt)
+  STATIC bool process_getsockopt(int, int, int, int, SecureMemArgs*)
+                                         asm("playground$process_getsockopt");
+  #endif
+  STATIC bool process_ioctl(int, int, int, int, SecureMemArgs*)
+                                         asm("playground$process_ioctl");
+  #if defined(__NR_ipc)
+  STATIC bool process_ipc(int, int, int, int, SecureMemArgs*)
+                                         asm("playground$process_ipc");
+  #endif
+  STATIC bool process_madvise(int, int, int, int, SecureMemArgs*)
+                                         asm("playground$process_madvise");
+  STATIC bool process_mmap(int, int, int, int, SecureMemArgs*)
+                                         asm("playground$process_mmap");
+  STATIC bool process_mprotect(int, int, int, int, SecureMemArgs*)
+                                         asm("playground$process_mprotect");
+  STATIC bool process_munmap(int, int, int, int, SecureMemArgs*)
+                                         asm("playground$process_munmap");
+  STATIC bool process_open(int, int, int, int, SecureMemArgs*)
+                                         asm("playground$process_open");
+  #if defined(__NR_recvfrom)
+  STATIC bool process_recvfrom(int, int, int, int, SecureMemArgs*)
+                                         asm("playground$process_recvfrom");
+  STATIC bool process_recvmsg(int, int, int, int, SecureMemArgs*)
+                                         asm("playground$process_recvmsg");
+  STATIC bool process_sendmsg(int, int, int, int, SecureMemArgs*)
+                                         asm("playground$process_sendmsg");
+  STATIC bool process_sendto(int, int, int, int, SecureMemArgs*)
+                                         asm("playground$process_sendto");
+  STATIC bool process_setsockopt(int, int, int, int, SecureMemArgs*)
+                                         asm("playground$process_setsockopt");
+  #endif
+  #if defined(__NR_shmat)
+  STATIC bool process_shmat(int, int, int, int, SecureMemArgs*)
+                                         asm("playground$process_shmat");
+  STATIC bool process_shmctl(int, int, int, int, SecureMemArgs*)
+                                         asm("playground$process_shmctl");
+  STATIC bool process_shmdt(int, int, int, int, SecureMemArgs*)
+                                         asm("playground$process_shmdt");
+  STATIC bool process_shmget(int, int, int, int, SecureMemArgs*)
+                                         asm("playground$process_shmget");
+  #endif
+  STATIC bool process_sigaction(int, int, int, int, SecureMemArgs*)
+                                         asm("playground$process_sigaction");
+  #if defined(__NR_socketcall)
+  STATIC bool process_socketcall(int, int, int, int, SecureMemArgs*)
+                                         asm("playground$process_socketcall");
+  #endif
+  STATIC bool process_stat(int, int, int, int, SecureMemArgs*)
+                                         asm("playground$process_stat");
+
+#ifdef __cplusplus
+  friend class Debug;
+  friend class Library;
+  friend class Maps;
+  friend class Mutex;
+  friend class SecureMem;
+  friend class TLS;
+
+  // Define our own inline system calls. These calls will not be rewritten
+  // to point to the sandboxed wrapper functions. They thus allow us to
+  // make actual system calls (e.g. in the sandbox initialization code, and
+  // in the trusted process)
+  class SysCalls {
+   public:
+    #define SYS_CPLUSPLUS
+    #define SYS_ERRNO     my_errno
+    #define SYS_INLINE    inline
+    #define SYS_PREFIX    -1
+    #undef  SYS_LINUX_SYSCALL_SUPPORT_H
+    #include "linux_syscall_support.h"
+    SysCalls() : my_errno(0) { }
+    int my_errno;
+  };
+  #ifdef __NR_mmap2
+    #define      MMAP      mmap2
+    #define __NR_MMAP __NR_mmap2
+  #else
+    #define      MMAP      mmap
+    #define __NR_MMAP __NR_mmap
+  #endif
+
+  // Print an error message and terminate the program. Used for fatal errors.
+  static void die(const char *msg = 0) __attribute__((noreturn)) {
+    SysCalls sys;
+    if (msg) {
+      sys.write(2, msg, strlen(msg));
+      sys.write(2, "\n", 1);
+    }
+    for (;;) {
+      sys.exit_group(1);
+      sys._exit(1);
+    }
+  }
+
+  // Wrapper around "read()" that can deal with partial and interrupted reads
+  // and that does not modify the global errno variable.
+  static ssize_t read(SysCalls& sys, int fd, void* buf, size_t len) {
+    if (static_cast<ssize_t>(len) < 0) {
+      sys.my_errno = EINVAL;
+      return -1;
+    }
+    size_t offset = 0;
+    while (offset < len) {
+      ssize_t partial =
+          NOINTR_SYS(sys.read(fd, reinterpret_cast<char*>(buf) + offset,
+                              len - offset));
+      if (partial < 0) {
+        return partial;
+      } else if (!partial) {
+        break;
+      }
+      offset += partial;
+    }
+    return offset;
+  }
+
+  // Wrapper around "write()" that can deal with interrupted writes and that
+  // does not modify the global errno variable.
+  static ssize_t write(SysCalls& sys, int fd, const void* buf, size_t len){
+    return NOINTR_SYS(sys.write(fd, buf, len));
+  }
+
+  // Sends a file handle to another process.
+  // N.B. trusted_thread.cc has an assembly version of this function that
+  //      is safe to use without a call stack. If the wire-format is changed,
+  ///     make sure to update the assembly code.
+  static bool sendFd(int transport, int fd0, int fd1, const void* buf,
+                     size_t len);
+
+  // If getFd() fails, it will set the first valid fd slot (e.g. fd0) to
+  // -errno.
+  static bool getFd(int transport, int* fd0, int* fd1, void* buf,
+                    size_t* len);
+
+  // Data structures used to forward system calls to the trusted process.
+  struct Accept {
+    int        sockfd;
+    void*      addr;
+    socklen_t* addrlen;
+  } __attribute__((packed));
+
+  struct Accept4 {
+    int        sockfd;
+    void*      addr;
+    socklen_t* addrlen;
+    int        flags;
+  } __attribute__((packed));
+
+  struct Access {
+    size_t path_length;
+    int    mode;
+  } __attribute__((packed));
+
+  struct Bind {
+    int       sockfd;
+    void*     addr;
+    socklen_t addrlen;
+  } __attribute__((packed));
+
+  struct Clone {
+    int       flags;
+    char*     stack;
+    int*      pid;
+    int*      ctid;
+    void*     tls;
+    #if defined(__x86_64__)
+      struct {
+        void* r15;
+        void* r14;
+        void* r13;
+        void* r12;
+        void* r11;
+        void* r10;
+        void* r9;
+        void* r8;
+        void* rdi;
+        void* rsi;
+        void* rdx;
+        void* rcx;
+        void* rbx;
+        void* rbp;
+        void* fake_ret;
+      } regs64 __attribute__((packed));
+    #elif defined(__i386__)
+      struct {
+        void* ebp;
+        void* edi;
+        void* esi;
+        void* edx;
+        void* ecx;
+        void* ebx;
+      } regs32 __attribute__((packed));
+    #else
+    #error Unsupported target platform
+    #endif
+    void*     ret;
+  } __attribute__((packed));
+
+  struct Connect {
+    int       sockfd;
+    void*     addr;
+    socklen_t addrlen;
+  } __attribute__((packed));
+
+  struct GetSockName {
+    int        sockfd;
+    void*      name;
+    socklen_t* namelen;
+  } __attribute__((packed));
+
+  struct GetPeerName {
+    int        sockfd;
+    void*      name;
+    socklen_t* namelen;
+  } __attribute__((packed));
+
+  struct GetSockOpt {
+    int        sockfd;
+    int        level;
+    int        optname;
+    void*      optval;
+    socklen_t* optlen;
+  } __attribute__((packed));
+
+  struct IOCtl {
+    int  d;
+    int  req;
+    void *arg;
+  } __attribute__((packed));
+
+  #if defined(__NR_ipc)
+  struct IPC {
+    unsigned call;
+    int      first;
+    int      second;
+    int      third;
+    void*    ptr;
+    long     fifth;
+  } __attribute__((packed));
+  #endif
+
+  struct Listen {
+    int sockfd;
+    int backlog;
+  } __attribute__((packed));
+
+  struct MAdvise {
+    const void*  start;
+    size_t       len;
+    int          advice;
+  } __attribute__((packed));
+
+  struct MMap {
+    void*  start;
+    size_t length;
+    int    prot;
+    int    flags;
+    int    fd;
+    off_t  offset;
+  } __attribute__((packed));
+
+  struct MProtect {
+    const void*  addr;
+    size_t       len;
+    int          prot;
+  };
+
+  struct MUnmap {
+    void*  start;
+    size_t length;
+  } __attribute__((packed));
+
+  struct Open {
+    size_t path_length;
+    int    flags;
+    mode_t mode;
+  } __attribute__((packed));
+
+  struct Recv {
+    int    sockfd;
+    void*  buf;
+    size_t len;
+    int    flags;
+  } __attribute__((packed));
+
+  struct RecvFrom {
+    int       sockfd;
+    void*     buf;
+    size_t    len;
+    int       flags;
+    void*     from;
+    socklen_t *fromlen;
+  } __attribute__((packed));
+
+  struct RecvMsg {
+    int                  sockfd;
+    struct msghdr*       msg;
+    int                  flags;
+  } __attribute__((packed));
+
+  struct Send {
+    int         sockfd;
+    const void* buf;
+    size_t      len;
+    int         flags;
+  } __attribute__((packed));
+
+  struct SendMsg {
+    int                  sockfd;
+    const struct msghdr* msg;
+    int                  flags;
+  } __attribute__((packed));
+
+  struct SendTo {
+    int         sockfd;
+    const void* buf;
+    size_t      len;
+    int         flags;
+    const void* to;
+    socklen_t   tolen;
+  } __attribute__((packed));
+
+  struct SetSockOpt {
+    int         sockfd;
+    int         level;
+    int         optname;
+    const void* optval;
+    socklen_t   optlen;
+  } __attribute__((packed));
+
+  #if defined(__NR_shmat)
+  struct ShmAt {
+    int         shmid;
+    const void* shmaddr;
+    int         shmflg;
+ } __attribute__((packed));
+
+  struct ShmCtl {
+    int  shmid;
+    int  cmd;
+    void *buf;
+  } __attribute__((packed));
+
+  struct ShmDt {
+    const void *shmaddr;
+  } __attribute__((packed));
+
+  struct ShmGet {
+    int    key;
+    size_t size;
+    int    shmflg;
+  } __attribute__((packed));
+  #endif
+
+  struct ShutDown {
+    int sockfd;
+    int how;
+  } __attribute__((packed));
+
+  struct SigAction {
+    int                               sysnum;
+    int                               signum;
+    const SysCalls::kernel_sigaction* action;
+    const SysCalls::kernel_sigaction* old_action;
+    size_t                            sigsetsize;
+  } __attribute__((packed));
+
+  struct Socket {
+    int domain;
+    int type;
+    int protocol;
+  } __attribute__((packed));
+
+  struct SocketPair {
+    int  domain;
+    int  type;
+    int  protocol;
+    int* pair;
+  } __attribute__((packed));
+
+  #if defined(__NR_socketcall)
+  struct SocketCall {
+    int    call;
+    void*  arg_ptr;
+    union {
+      Socket      socket;
+      Bind        bind;
+      Connect     connect;
+      Listen      listen;
+      Accept      accept;
+      GetSockName getsockname;
+      GetPeerName getpeername;
+      SocketPair  socketpair;
+      Send        send;
+      Recv        recv;
+      SendTo      sendto;
+      RecvFrom    recvfrom;
+      ShutDown    shutdown;
+      SetSockOpt  setsockopt;
+      GetSockOpt  getsockopt;
+      SendMsg     sendmsg;
+      RecvMsg     recvmsg;
+      Accept4     accept4;
+    } args;
+  } __attribute__((packed));
+  #endif
+
+  struct Stat {
+    int    sysnum;
+    size_t path_length;
+    void*  buf;
+  } __attribute__((packed));
+
+  // Thread local data available from each sandboxed thread.
+  enum { TLS_COOKIE, TLS_TID, TLS_THREAD_FD };
+  static long long cookie() { return TLS::getTLSValue<long long>(TLS_COOKIE); }
+  static int tid()          { return TLS::getTLSValue<int>(TLS_TID); }
+  static int threadFdPub()  { return TLS::getTLSValue<int>(TLS_THREAD_FD); }
+  static int processFdPub() { return processFdPub_; }
+  static kernel_sigset_t* signalMask() { return &getSecureMem()->signalMask; }
+
+  // The SEGV handler knows how to handle RDTSC instructions
+  static void setupSignalHandlers();
+  static void (*segv())(int signo, SysCalls::siginfo *context, void *unused);
+
+  // If no specific handler has been registered for a system call, call this
+  // function which asks the trusted thread to perform the call. This is used
+  // for system calls that are not restricted.
+  static void* defaultSystemCallHandler(int syscallNum, void* arg0,
+                                        void* arg1, void* arg2, void* arg3,
+                                        void* arg4, void* arg5)
+                                    asm("playground$defaultSystemCallHandler")
+  #if defined(__x86_64__)
+                                    __attribute__((visibility("internal")))
+  #endif
+  ;
+
+  // Return the current secure memory structure for this thread.
+  static SecureMem::Args* getSecureMem();
+
+  // Return a secure memory structure that can be used by a newly created
+  // thread.
+  static SecureMem::Args* getNewSecureMem();
+
+  // This functions runs in the trusted process at startup and finds all the
+  // memory mappings that existed when the sandbox was first enabled. Going
+  // forward, all these mappings are off-limits for operations such as
+  // mmap(), munmap(), and mprotect().
+  static int   initializeProtectedMap(int fd);
+
+  // Helper functions that allows the trusted process to get access to
+  // "/proc/self/maps" in the sandbox.
+  static void  snapshotMemoryMappings(int processFd, int proc_self_maps);
+
+  // Main loop for the trusted process.
+  static void  trustedProcess(int parentMapsFd, int processFdPub,
+                              int sandboxFd, int cloneFd,
+                              SecureMem::Args* secureArena)
+                                                     __attribute__((noreturn));
+
+  // Fork()s of the trusted process.
+  static SecureMem::Args* createTrustedProcess(int processFdPub, int sandboxFd,
+                                               int cloneFdPub, int cloneFd);
+
+  // Creates the trusted thread for the initial thread, then enables
+  // Seccomp mode.
+  static void  createTrustedThread(int processFdPub, int cloneFdPub,
+                                   SecureMem::Args* secureMem);
+
+  static int   proc_self_maps_;
+  static enum SandboxStatus {
+    STATUS_UNKNOWN, STATUS_UNSUPPORTED, STATUS_AVAILABLE, STATUS_ENABLED
+  }            status_;
+  static int   pid_;
+  static int   processFdPub_;
+  static int   cloneFdPub_;
+
+  #ifdef __i386__
+  struct SocketCallArgInfo;
+  static const struct SocketCallArgInfo socketCallArgInfo[];
+  #endif
+
+  // We always have to intercept SIGSEGV. If the application wants to set its
+  // own SEGV handler, we forward to it whenever necessary.
+  static SysCalls::kernel_sigaction sa_segv_ asm("playground$sa_segv");
+
+  // The syscall_mutex_ can only be directly accessed by the trusted process.
+  // It can be accessed by the trusted thread after fork()ing and calling
+  // mprotect(PROT_READ|PROT_WRITE). The mutex is used for system calls that
+  // require passing additional data, and that require the trusted process to
+  // wait until the trusted thread is done processing (e.g. exit(), clone(),
+  // open(), stat())
+  static int syscall_mutex_ asm("playground$syscall_mutex");
+
+  // Available in trusted process, only
+  typedef std::map<void *, long>       ProtectedMap;
+  static ProtectedMap                  protectedMap_;
+  static std::vector<SecureMem::Args*> secureMemPool_;
+};
+
+// If this struct is extended to contain parameters that are read by
+// the trusted thread, we will have to mprotect() it to be read-only when
+// starting the sandbox.  However, currently it is read only by the
+// trusted process, and the sandboxed process cannot change the values
+// that the fork()'d trusted process sees.
+struct SandboxPolicy {
+  bool allow_file_namespace;  // Allow filename-based system calls.
+};
+
+extern struct SandboxPolicy g_policy;
+
+} // namespace
+
+using playground::Sandbox;
+#endif // __cplusplus
+
+#endif // SANDBOX_IMPL_H__
diff --git a/sandbox/linux/seccomp/seccomp.gyp b/sandbox/linux/seccomp/seccomp.gyp
new file mode 100644
index 0000000..596be21
--- /dev/null
+++ b/sandbox/linux/seccomp/seccomp.gyp
@@ -0,0 +1,93 @@
+# Copyright (c) 2010 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+{
+  'variables': {
+    'chromium_code': 1,
+    'seccomp_intermediate_dir': '<(INTERMEDIATE_DIR)/seccomp-sandbox',
+  },
+  'targets': [
+    {
+      'target_name': 'seccomp_sandbox',
+      'type': 'static_library',
+      'sources': [
+        'access.cc',
+        'allocator.cc',
+        'allocator.h',
+        'clone.cc',
+        'exit.cc',
+        'debug.cc',
+        'getpid.cc',
+        'gettid.cc',
+        'ioctl.cc',
+        'ipc.cc',
+        'library.cc',
+        'library.h',
+        'linux_syscall_support.h',
+        'madvise.cc',
+        'maps.cc',
+        'maps.h',
+        'mmap.cc',
+        'mprotect.cc',
+        'munmap.cc',
+        'mutex.h',
+        'open.cc',
+        'sandbox.cc',
+        'sandbox.h',
+        'sandbox_impl.h',
+        'securemem.cc',
+        'securemem.h',
+        'sigaction.cc',
+        'sigprocmask.cc',
+        'socketcall.cc',
+        'stat.cc',
+        'syscall.cc',
+        'syscall.h',
+        'syscall_table.c',
+        'syscall_table.h',
+        'tls.h',
+        'trusted_process.cc',
+        'trusted_thread.cc',
+        'x86_decode.cc',
+        'x86_decode.h',
+      ],
+    },
+    {
+      'target_name': 'seccomp_tests',
+      'type': 'executable',
+      'sources': [
+        'tests/test_syscalls.cc',
+      ],
+      'include_dirs': [
+         '.',
+         '<(seccomp_intermediate_dir)',
+      ],
+      'dependencies': [
+        'seccomp_sandbox',
+      ],
+      'libraries': [
+        '-lpthread',
+        '-lutil', # For openpty()
+      ],
+      'actions': [
+        {
+          'action_name': 'make_test_list',
+          'inputs': [
+            'tests/list_tests.py',
+            'tests/test_syscalls.cc',
+          ],
+          'outputs': ['<(seccomp_intermediate_dir)/test-list.h'],
+          'action': ['sh', '-c', 'python <(_inputs) > <(_outputs)'],
+        },
+      ],
+    },
+    {
+      'target_name': 'timestats',
+      'type': 'executable',
+      'sources': [
+        'timestats.cc',
+      ],
+    },
+  ],
+}
diff --git a/sandbox/linux/seccomp/securemem.cc b/sandbox/linux/seccomp/securemem.cc
new file mode 100644
index 0000000..5f07bbe
--- /dev/null
+++ b/sandbox/linux/seccomp/securemem.cc
@@ -0,0 +1,105 @@
+// Copyright (c) 2010 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "debug.h"
+#include "mutex.h"
+#include "sandbox_impl.h"
+#include "securemem.h"
+
+namespace playground {
+
+void SecureMem::abandonSystemCall(int fd, int err) {
+  void* rc = reinterpret_cast<void *>(err);
+  if (err) {
+    Debug::message("System call failed\n");
+  }
+  Sandbox::SysCalls sys;
+  if (Sandbox::write(sys, fd, &rc, sizeof(rc)) != sizeof(rc)) {
+    Sandbox::die("Failed to send system call");
+  }
+}
+
+void SecureMem::dieIfParentDied(int parentMapsFd) {
+  // The syscall_mutex_ should not be contended. If it is, we are either
+  // experiencing a very unusual load of system calls that the sandbox is not
+  // optimized for; or, more likely, the sandboxed process terminated while the
+  // trusted process was in the middle of waiting for the mutex. We detect
+  // this situation and terminate the trusted process.
+  int alive = !lseek(parentMapsFd, 0, SEEK_SET);
+  if (alive) {
+    char buf;
+    do {
+      alive = read(parentMapsFd, &buf, 1);
+    } while (alive < 0 && errno == EINTR);
+  }
+  if (!alive) {
+    Sandbox::die();
+  }
+}
+
+void SecureMem::lockSystemCall(int parentMapsFd, Args* mem) {
+  while (!Mutex::lockMutex(&Sandbox::syscall_mutex_, 500)) {
+    dieIfParentDied(parentMapsFd);
+  }
+  asm volatile(
+  #if defined(__x86_64__)
+      "lock; incq (%0)\n"
+  #elif defined(__i386__)
+      "lock; incl (%0)\n"
+  #else
+  #error Unsupported target platform
+  #endif
+      :
+      : "q"(&mem->sequence)
+      : "memory");
+}
+
+void SecureMem::sendSystemCallInternal(int fd, bool locked, int parentMapsFd,
+                                       Args* mem, int syscallNum, void* arg1,
+                                       void* arg2, void* arg3, void* arg4,
+                                       void* arg5, void* arg6) {
+  if (!locked) {
+    asm volatile(
+    #if defined(__x86_64__)
+        "lock; incq (%0)\n"
+    #elif defined(__i386__)
+        "lock; incl (%0)\n"
+    #else
+    #error Unsupported target platform
+    #endif
+        :
+        : "q"(&mem->sequence)
+        : "memory");
+  }
+  mem->callType    = locked ? -2 : -1;
+  mem->syscallNum  = syscallNum;
+  mem->arg1        = arg1;
+  mem->arg2        = arg2;
+  mem->arg3        = arg3;
+  mem->arg4        = arg4;
+  mem->arg5        = arg5;
+  mem->arg6        = arg6;
+  asm volatile(
+  #if defined(__x86_64__)
+      "lock; incq (%0)\n"
+  #elif defined(__i386__)
+      "lock; incl (%0)\n"
+  #else
+  #error Unsupported target platform
+  #endif
+      :
+      : "q"(&mem->sequence)
+      : "memory");
+  Sandbox::SysCalls sys;
+  if (Sandbox::write(sys, fd, &mem->callType, sizeof(int)) != sizeof(int)) {
+    Sandbox::die("Failed to send system call");
+  }
+  if (parentMapsFd >= 0) {
+    while (!Mutex::waitForUnlock(&Sandbox::syscall_mutex_, 500)) {
+      dieIfParentDied(parentMapsFd);
+    }
+  }
+}
+
+} // namespace
diff --git a/sandbox/linux/seccomp/securemem.h b/sandbox/linux/seccomp/securemem.h
new file mode 100644
index 0000000..91283db
--- /dev/null
+++ b/sandbox/linux/seccomp/securemem.h
@@ -0,0 +1,205 @@
+// Copyright (c) 2010 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef SECURE_MEM_H__
+#define SECURE_MEM_H__
+
+#include <stdlib.h>
+#include "linux_syscall_support.h"
+
+namespace playground {
+
+class SecureMem {
+ public:
+  // Each thread is associated with two memory pages (i.e. 8192 bytes). This
+  // memory is fully accessible by the trusted process, but in the trusted
+  // thread and the sandboxed thread, the first page is only mapped PROT_READ,
+  // and the second one is PROT_READ|PROT_WRITE.
+  //
+  // The first page can be modified by the trusted process and this is the
+  // main mechanism how it communicates with the trusted thread. After each
+  // update, it updates the "sequence" number. The trusted process must
+  // check the "sequence" number has the expected value, and only then can
+  // it trust the data in this page.
+  typedef struct Args {
+    union {
+      struct {
+        union {
+          struct {
+            struct Args* self;
+            long         sequence;
+            long         callType;
+            long         syscallNum;
+            void*        arg1;
+            void*        arg2;
+            void*        arg3;
+            void*        arg4;
+            void*        arg5;
+            void*        arg6;
+
+            // Used by clone() to allow return from the syscall wrapper.
+            void*        ret;
+            #if defined(__x86_64__)
+            void*        rbp;
+            void*        rbx;
+            void*        rcx;
+            void*        rdx;
+            void*        rsi;
+            void*        rdi;
+            void*        r8;
+            void*        r9;
+            void*        r10;
+            void*        r11;
+            void*        r12;
+            void*        r13;
+            void*        r14;
+            void*        r15;
+            #elif defined(__i386__)
+            void*        ebp;
+            void*        edi;
+            void*        esi;
+            void*        edx;
+            void*        ecx;
+            void*        ebx;
+            #else
+            #error Unsupported target platform
+            #endif
+
+            // Used by clone() to set up data for the new thread.
+            struct Args* newSecureMem;
+            int          processFdPub;
+            int          cloneFdPub;
+
+            // Set to non-zero, if in debugging mode
+            int          allowAllSystemCalls;
+
+            // The most recent SysV SHM identifier returned by
+            // shmget(IPC_PRIVATE)
+            int          shmId;
+
+            // The following entries make up the sandboxed thread's TLS
+            long long    cookie;
+            long long    threadId;
+            long long    threadFdPub;
+          } __attribute__((packed));
+          char           header[512];
+        };
+        // Used for calls such as open() and stat().
+        char             pathname[4096 - 512];
+      } __attribute__((packed));
+      char               securePage[4096];
+    };
+    union {
+      struct {
+        // This scratch space is used by the trusted thread to read parameters
+        // for unrestricted system calls.
+        int              tmpSyscallNum;
+        void*            tmpArg1;
+        void*            tmpArg2;
+        void*            tmpArg3;
+        void*            tmpArg4;
+        void*            tmpArg5;
+        void*            tmpArg6;
+        void*            tmpReturnValue;
+
+        // Scratch space used to return the result of a rdtsc instruction
+        int              rdtscpEax;
+        int              rdtscpEdx;
+        int              rdtscpEcx;
+
+        // We often have long sequences of calls to gettimeofday(). This is
+        // needlessly expensive. Coalesce them into a single call.
+        int              lastSyscallNum;
+        int              gettimeofdayCounter;
+
+        // For debugging purposes, we want to be able to log messages. This can
+        // result in additional system calls. Make sure that we don't trigger
+        // logging of those recursive calls.
+        int              recursionLevel;
+
+        // Computing the signal mask is expensive. Keep a cached copy.
+        kernel_sigset_t  signalMask;
+
+        // Keep track of whether we are in a SEGV handler
+        int              inSegvHandler;
+      } __attribute__((packed));
+      char               scratchPage[4096];
+    };
+  } __attribute__((packed)) Args;
+
+  // Allows the trusted process to check whether the parent process still
+  // exists. If it doesn't, kill the trusted process.
+  static void dieIfParentDied(int parentProc);
+
+  // The trusted process received a system call that it intends to deny.
+  static void abandonSystemCall(int fd, int err);
+
+  // Acquires the syscall_mutex_ prior to making changes to the parameters in
+  // the secure memory page. Used by calls such as exit(), clone(), open(),
+  // socketcall(), and stat().
+  // After locking the mutex, it is no longer valid to abandon the system
+  // call!
+  static void lockSystemCall(int parentProc, Args* mem);
+
+  // Sends a system call to the trusted thread. If "locked" is true, the
+  // caller must first call lockSystemCall() and must also provide
+  // "parentProc". In locked mode, sendSystemCall() won't return until the
+  // trusted thread has completed processing.
+  // Use sparingly as it serializes the operation of the trusted process.
+  static void sendSystemCall(int fd, bool locked, int parentProc, Args* mem,
+                             int syscallNum) {
+    sendSystemCallInternal(fd, locked, parentProc, mem, syscallNum);
+  }
+  template<class T1> static
+  void sendSystemCall(int fd, bool locked, int parentProc, Args* mem,
+                      int syscallNum, T1 arg1) {
+    sendSystemCallInternal(fd, locked, parentProc, mem, syscallNum,
+                           (void*)arg1);
+  }
+  template<class T1, class T2> static
+  void sendSystemCall(int fd, bool locked, int parentProc, Args* mem,
+                      int syscallNum, T1 arg1, T2 arg2) {
+    sendSystemCallInternal(fd, locked, parentProc, mem, syscallNum,
+                           (void*)arg1, (void*)arg2);
+  }
+  template<class T1, class T2, class T3> static
+  void sendSystemCall(int fd, bool locked, int parentProc, Args* mem,
+                      int syscallNum, T1 arg1, T2 arg2, T3 arg3) {
+    sendSystemCallInternal(fd, locked, parentProc, mem, syscallNum,
+                           (void*)arg1, (void*)arg2, (void*)arg3);
+  }
+  template<class T1, class T2, class T3, class T4> static
+  void sendSystemCall(int fd, bool locked, int parentProc, Args* mem,
+                      int syscallNum, T1 arg1, T2 arg2, T3 arg3, T4 arg4) {
+    sendSystemCallInternal(fd, locked, parentProc, mem, syscallNum,
+                           (void*)arg1, (void*)arg2, (void*)arg3, (void*)arg4);
+  }
+  template<class T1, class T2, class T3, class T4, class T5> static
+  void sendSystemCall(int fd, bool locked, int parentProc, Args* mem,
+                      int syscallNum, T1 arg1, T2 arg2, T3 arg3, T4 arg4,
+                      T5 arg5) {
+    sendSystemCallInternal(fd, locked, parentProc, mem, syscallNum,
+                           (void*)arg1, (void*)arg2, (void*)arg3, (void*)arg4,
+                           (void*)arg5);
+  }
+  template<class T1, class T2, class T3, class T4, class T5, class T6> static
+  void sendSystemCall(int fd, bool locked, int parentProc, Args* mem,
+                      int syscallNum, T1 arg1, T2 arg2, T3 arg3, T4 arg4,
+                      T5 arg5, T6 arg6) {
+    sendSystemCallInternal(fd, locked, parentProc, mem, syscallNum,
+                           (void*)arg1, (void*)arg2, (void*)arg3, (void*)arg4,
+                           (void*)arg5, (void*)arg6);
+  }
+
+ private:
+  static void sendSystemCallInternal(int fd, bool locked, int parentProc,
+                                     Args* mem, int syscallNum, void* arg1 = 0,
+                                     void* arg2 = 0, void* arg3 = 0,
+                                     void* arg4 = 0, void* arg5 = 0,
+                                     void* arg6 = 0);
+};
+
+} // namespace
+
+#endif // SECURE_MEM_H__
diff --git a/sandbox/linux/seccomp/sigaction.cc b/sandbox/linux/seccomp/sigaction.cc
new file mode 100644
index 0000000..162416d
--- /dev/null
+++ b/sandbox/linux/seccomp/sigaction.cc
@@ -0,0 +1,177 @@
+// Copyright (c) 2010 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// TODO(markus): We currently instrument the restorer functions with calls to
+//               the syscallWrapper(). This prevents gdb from properly
+//               creating backtraces of code that is running in signal
+//               handlers. We might instead want to always override the
+//               restorer with a function that contains the "magic" signature
+//               but that is not executable. The SEGV handler can detect this
+//               and then invoke the appropriate restorer.
+
+#include "debug.h"
+#include "sandbox_impl.h"
+
+namespace playground {
+
+#if defined(__NR_sigaction)
+long Sandbox::sandbox_sigaction(int signum, const void* a_, void* oa_) {
+  const SysCalls::kernel_old_sigaction* action =
+    reinterpret_cast<const SysCalls::kernel_old_sigaction*>(a_);
+  SysCalls::kernel_old_sigaction* old_action =
+    reinterpret_cast<SysCalls::kernel_old_sigaction*>(oa_);
+
+  long rc = 0;
+  long long tm;
+  Debug::syscall(&tm, __NR_sigaction, "Executing handler");
+  if (signum == SIGSEGV) {
+    if (old_action) {
+      old_action->sa_handler_ = sa_segv_.sa_handler_;
+      old_action->sa_mask     = sa_segv_.sa_mask.sig[0];
+      old_action->sa_flags    = sa_segv_.sa_flags;
+      old_action->sa_restorer = sa_segv_.sa_restorer;
+    }
+    if (action) {
+      sa_segv_.sa_handler_    = action->sa_handler_;
+      sa_segv_.sa_mask.sig[0] = action->sa_mask;
+      sa_segv_.sa_flags       = action->sa_flags;
+      sa_segv_.sa_restorer    = action->sa_restorer;
+    }
+  } else {
+    struct {
+      int       sysnum;
+      long long cookie;
+      SigAction sigaction_req;
+    } __attribute__((packed)) request;
+    request.sysnum                   = __NR_sigaction;
+    request.cookie                   = cookie();
+    request.sigaction_req.sysnum     = __NR_sigaction;
+    request.sigaction_req.signum     = signum;
+    request.sigaction_req.action     =
+      reinterpret_cast<const SysCalls::kernel_sigaction *>(action);
+    request.sigaction_req.old_action =
+      reinterpret_cast<const SysCalls::kernel_sigaction *>(old_action);
+    request.sigaction_req.sigsetsize = 8;
+
+    SysCalls sys;
+    if (write(sys, processFdPub(), &request, sizeof(request)) !=
+        sizeof(request) ||
+        read(sys, threadFdPub(), &rc, sizeof(rc)) != sizeof(rc)) {
+      die("Failed to forward sigaction() request [sandbox]");
+    }
+  }
+  Debug::elapsed(tm, __NR_sigaction);
+  return rc;
+}
+#endif
+
+#if defined(__NR_rt_sigaction)
+#define min(a,b) ({ typeof(a) a_=(a); typeof(b) b_=(b); a_ < b_ ? a_ : b_; })
+#define max(a,b) ({ typeof(a) a_=(a); typeof(b) b_=(b); a_ > b_ ? a_ : b_; })
+
+long Sandbox::sandbox_rt_sigaction(int signum, const void* a_, void* oa_,
+                                   size_t sigsetsize) {
+  const SysCalls::kernel_sigaction* action =
+    reinterpret_cast<const SysCalls::kernel_sigaction*>(a_);
+  SysCalls::kernel_sigaction* old_action =
+    reinterpret_cast<SysCalls::kernel_sigaction*>(oa_);
+
+  long rc = 0;
+  long long tm;
+  Debug::syscall(&tm, __NR_rt_sigaction, "Executing handler");
+  if (signum == SIGSEGV) {
+    size_t theirSize = offsetof(SysCalls::kernel_sigaction, sa_mask) +
+                       sigsetsize;
+    if (old_action) {
+      memcpy(old_action, &sa_segv_, min(sizeof(sa_segv_), theirSize));
+      memset(old_action + 1, 0, max(0u, theirSize - sizeof(sa_segv_)));
+    }
+    if (action) {
+      memcpy(&sa_segv_, action, min(sizeof(sa_segv_), theirSize));
+      memset(&sa_segv_.sa_mask, 0, max(0u, 8 - sigsetsize));
+    }
+  } else {
+    struct {
+      int       sysnum;
+      long long cookie;
+      SigAction sigaction_req;
+    } __attribute__((packed)) request;
+    request.sysnum                   = __NR_rt_sigaction;
+    request.cookie                   = cookie();
+    request.sigaction_req.sysnum     = __NR_rt_sigaction;
+    request.sigaction_req.signum     = signum;
+    request.sigaction_req.action     = action;
+    request.sigaction_req.old_action = old_action;
+    request.sigaction_req.sigsetsize = sigsetsize;
+
+    SysCalls sys;
+    if (write(sys, processFdPub(), &request, sizeof(request)) !=
+        sizeof(request) ||
+        read(sys, threadFdPub(), &rc, sizeof(rc)) != sizeof(rc)) {
+      die("Failed to forward rt_sigaction() request [sandbox]");
+    }
+  }
+  Debug::elapsed(tm, __NR_rt_sigaction);
+  return rc;
+}
+#endif
+
+#if defined(__NR_signal)
+void* Sandbox::sandbox_signal(int signum, const void* handler) {
+  struct kernel_old_sigaction sa, osa;
+  sa.sa_handler_ = reinterpret_cast<void (*)(int)>(handler);
+  sa.sa_flags    = SA_NODEFER | SA_RESETHAND | SA_RESTORER;
+  sa.sa_mask     = 0;
+  asm volatile(
+      "lea  0f, %0\n"
+      "jmp  1f\n"
+    "0:pop  %%eax\n"
+      "mov  $119, %%eax\n" // __NR_sigreturn
+      "int  $0x80\n"
+    "1:\n"
+      : "=r"(sa.sa_restorer));
+  long rc = sandbox_sigaction(signum, &sa, &osa);
+  if (rc < 0) {
+    return (void *)rc;
+  }
+  return reinterpret_cast<void *>(osa.sa_handler_);
+}
+#endif
+
+bool Sandbox::process_sigaction(int parentMapsFd, int sandboxFd,
+                                int threadFdPub, int threadFd,
+                                SecureMem::Args* mem) {
+  // We need to intercept sigaction() in order to properly rewrite calls to
+  // sigaction(SEGV). While there is no security implication if we didn't do
+  // so, it would end up preventing the program from running correctly as the
+  // the sandbox's SEGV handler could accidentally get removed. All of this is
+  // done in sandbox_{,rt_}sigaction(). But we still bounce through the
+  // trusted process as that is the only way we can instrument system calls.
+  // This is somewhat needlessly complicated. But as sigaction() is not a
+  // performance critical system call, it is easier to do this way than to
+  // extend the format of the syscall_table so that it could deal with this
+  // special case.
+
+  // Read request
+  SigAction sigaction_req;
+  SysCalls sys;
+  if (read(sys, sandboxFd, &sigaction_req, sizeof(sigaction_req)) !=
+      sizeof(sigaction_req)) {
+    die("Failed to read parameters for sigaction() [process]");
+  }
+  if (sigaction_req.signum == SIGSEGV) {
+    // This should never happen. Something went wrong when intercepting the
+    // system call. This is not a security problem, but it clearly doesn't
+    // make sense to let the system call pass.
+    SecureMem::abandonSystemCall(threadFd, -EINVAL);
+    return false;
+  }
+  SecureMem::sendSystemCall(threadFdPub, false, -1, mem, sigaction_req.sysnum,
+                            sigaction_req.signum, sigaction_req.action,
+                            sigaction_req.old_action,
+                            sigaction_req.sigsetsize);
+  return true;
+}
+
+} // namespace
diff --git a/sandbox/linux/seccomp/sigprocmask.cc b/sandbox/linux/seccomp/sigprocmask.cc
new file mode 100644
index 0000000..9ff2922
--- /dev/null
+++ b/sandbox/linux/seccomp/sigprocmask.cc
@@ -0,0 +1,120 @@
+// Copyright (c) 2010 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "debug.h"
+#include "sandbox_impl.h"
+
+namespace playground {
+
+// If the sandboxed process tries to mask SIGSEGV, there is a good chance
+// the process will eventually get terminated. If this is really ever a
+// problem, we can hide the fact that SIGSEGV is unmasked. But I don't think
+// we really need this. Masking of synchronous signals is rarely necessary.
+
+#if defined(__NR_sigprocmask)
+long Sandbox::sandbox_sigprocmask(int how, const void* set, void* old_set) {
+  long long tm;
+  Debug::syscall(&tm, __NR_sigprocmask, "Executing handler");
+
+  // Access the signal mask by triggering a SEGV and modifying the signal state
+  // prior to calling rt_sigreturn().
+  long res = -ENOSYS;
+  #if defined(__x86_64__)
+  #error x86-64 does not support sigprocmask(); use rt_sigprocmask() instead
+  #elif defined(__i386__)
+  asm volatile(
+    "push  %%ebx\n"
+    "movl  %2, %%ebx\n"
+    "int   $0\n"
+    "pop   %%ebx\n"
+    : "=a"(res)
+    : "0"(__NR_sigprocmask), "ri"((long)how),
+      "c"((long)set), "d"((long)old_set)
+    : "esp", "memory");
+  #else
+  #error Unsupported target platform
+  #endif
+
+  // Update our shadow signal mask, so that we can copy it upon creation of
+  // new threads.
+  if (res == 0 && set != NULL) {
+    SecureMem::Args* args = getSecureMem();
+    switch (how) {
+    case SIG_BLOCK:
+      *(unsigned long long *)&args->signalMask |=  *(unsigned long long *)set;
+      break;
+    case SIG_UNBLOCK:
+      *(unsigned long long *)&args->signalMask &= ~*(unsigned long long *)set;
+      break;
+    case SIG_SETMASK:
+      *(unsigned long long *)&args->signalMask  =  *(unsigned long long *)set;
+      break;
+    default:
+      break;
+    }
+  }
+
+  Debug::elapsed(tm, __NR_sigprocmask);
+
+  return res;
+}
+#endif
+
+#if defined(__NR_rt_sigprocmask)
+long Sandbox::sandbox_rt_sigprocmask(int how, const void* set, void* old_set,
+                                     size_t bytes) {
+  long long tm;
+  Debug::syscall(&tm, __NR_rt_sigprocmask, "Executing handler");
+
+  // Access the signal mask by triggering a SEGV and modifying the signal state
+  // prior to calling rt_sigreturn().
+  long res = -ENOSYS;
+  #if defined(__x86_64__)
+  asm volatile(
+    "movq %5, %%r10\n"
+    "int $0\n"
+    : "=a"(res)
+    : "0"(__NR_rt_sigprocmask), "D"((long)how),
+      "S"((long)set), "d"((long)old_set), "r"((long)bytes)
+    : "r10", "r11", "rcx", "memory");
+  #elif defined(__i386__)
+  asm volatile(
+    "push  %%ebx\n"
+    "movl  %2, %%ebx\n"
+    "int   $0\n"
+    "pop   %%ebx\n"
+    : "=a"(res)
+    : "0"(__NR_rt_sigprocmask), "ri"((long)how),
+      "c"((long)set), "d"((long)old_set), "S"((long)bytes)
+    : "esp", "memory");
+  #else
+  #error Unsupported target platform
+  #endif
+
+  // Update our shadow signal mask, so that we can copy it upon creation of
+  // new threads.
+  if (res == 0 && set != NULL && bytes >= 8) {
+    SecureMem::Args* args = getSecureMem();
+    switch (how) {
+    case SIG_BLOCK:
+      *(unsigned long long *)&args->signalMask |=  *(unsigned long long *)set;
+      break;
+    case SIG_UNBLOCK:
+      *(unsigned long long *)&args->signalMask &= ~*(unsigned long long *)set;
+      break;
+    case SIG_SETMASK:
+      *(unsigned long long *)&args->signalMask  =  *(unsigned long long *)set;
+      break;
+    default:
+      break;
+    }
+  }
+
+  Debug::elapsed(tm, __NR_rt_sigprocmask);
+
+  return res;
+}
+#endif
+
+} // namespace
diff --git a/sandbox/linux/seccomp/socketcall.cc b/sandbox/linux/seccomp/socketcall.cc
new file mode 100644
index 0000000..c7b2015
--- /dev/null
+++ b/sandbox/linux/seccomp/socketcall.cc
@@ -0,0 +1,1039 @@
+// Copyright (c) 2010 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "debug.h"
+#include "sandbox_impl.h"
+
+namespace playground {
+
+#if defined(__NR_socket)
+
+ssize_t Sandbox::sandbox_recvfrom(int sockfd, void* buf, size_t len, int flags,
+                                  void* from, socklen_t* fromlen) {
+  long long tm;
+  Debug::syscall(&tm, __NR_recvfrom, "Executing handler");
+
+  SysCalls sys;
+  if (!from && !flags) {
+    // recv() with a NULL sender and no flags is the same as read(), which
+    // is unrestricted in seccomp mode.
+    Debug::message("Replaced recv() with call to read()");
+    ssize_t rc = sys.read(sockfd, buf, len);
+    if (rc < 0) {
+      Debug::elapsed(tm, __NR_recvfrom);
+      return -sys.my_errno;
+    } else {
+      Debug::elapsed(tm, __NR_recvfrom);
+      return rc;
+    }
+  }
+
+  struct {
+    int       sysnum;
+    long long cookie;
+    RecvFrom  recvfrom_req;
+  } __attribute__((packed)) request;
+  request.sysnum               = __NR_recvfrom;
+  request.cookie               = cookie();
+  request.recvfrom_req.sockfd  = sockfd;
+  request.recvfrom_req.buf     = buf;
+  request.recvfrom_req.len     = len;
+  request.recvfrom_req.flags   = flags;
+  request.recvfrom_req.from    = from;
+  request.recvfrom_req.fromlen = fromlen;
+
+  long rc;
+  if (write(sys, processFdPub(), &request, sizeof(request)) !=
+      sizeof(request) ||
+      read(sys, threadFdPub(), &rc, sizeof(rc)) != sizeof(rc)) {
+    die("Failed to forward recvfrom() request [sandbox]");
+  }
+  Debug::elapsed(tm, __NR_recvfrom);
+  return static_cast<ssize_t>(rc);
+}
+
+ssize_t Sandbox::sandbox_recvmsg(int sockfd, struct msghdr* msg, int flags) {
+  long long tm;
+  Debug::syscall(&tm, __NR_recvmsg, "Executing handler");
+
+  // We cannot simplify recvmsg() to recvfrom(), recv() or read(), as we do
+  // not know whether the caller needs us to set msg->msg_flags.
+  struct {
+    int       sysnum;
+    long long cookie;
+    RecvMsg   recvmsg_req;
+  } __attribute__((packed)) request;
+  request.sysnum             = __NR_recvmsg;
+  request.cookie             = cookie();
+  request.recvmsg_req.sockfd = sockfd;
+  request.recvmsg_req.msg    = msg;
+  request.recvmsg_req.flags  = flags;
+
+  long rc;
+  SysCalls sys;
+  if (write(sys, processFdPub(), &request, sizeof(request)) !=
+      sizeof(request) ||
+      read(sys, threadFdPub(), &rc, sizeof(rc)) != sizeof(rc)) {
+    die("Failed to forward recvmsg() request [sandbox]");
+  }
+  Debug::elapsed(tm, __NR_recvmsg);
+  return static_cast<ssize_t>(rc);
+}
+
+size_t Sandbox::sandbox_sendmsg(int sockfd, const struct msghdr* msg,
+                                int flags) {
+  long long tm;
+  Debug::syscall(&tm, __NR_sendmsg, "Executing handler");
+
+  if (msg->msg_iovlen == 1 && msg->msg_controllen == 0) {
+    // sendmsg() can sometimes be simplified as sendto()
+    return sandbox_sendto(sockfd, msg->msg_iov, msg->msg_iovlen,
+                          flags, msg->msg_name, msg->msg_namelen);
+  }
+
+  struct Request {
+    int           sysnum;
+    long long     cookie;
+    SendMsg       sendmsg_req;
+    struct msghdr msg;
+  } __attribute__((packed));
+  char data[sizeof(struct Request) + msg->msg_namelen + msg->msg_controllen];
+  struct Request *request     = reinterpret_cast<struct Request *>(data);
+  request->sysnum             = __NR_sendmsg;
+  request->cookie             = cookie();
+  request->sendmsg_req.sockfd = sockfd;
+  request->sendmsg_req.msg    = msg;
+  request->sendmsg_req.flags  = flags;
+  request->msg                = *msg;
+  memcpy(reinterpret_cast<char *>(
+    memcpy(request + 1, msg->msg_name, msg->msg_namelen)) +
+    msg->msg_namelen,
+      msg->msg_control, msg->msg_controllen);
+
+  long rc;
+  SysCalls sys;
+  if (write(sys, processFdPub(), &data, sizeof(data)) !=
+      (ssize_t)sizeof(data) ||
+      read(sys, threadFdPub(), &rc, sizeof(rc)) != sizeof(rc)) {
+    die("Failed to forward sendmsg() request [sandbox]");
+  }
+  Debug::elapsed(tm, __NR_sendmsg);
+  return static_cast<ssize_t>(rc);
+}
+
+ssize_t Sandbox::sandbox_sendto(int sockfd, const void* buf, size_t len,
+                                int flags, const void* to, socklen_t tolen) {
+  long long tm;
+  Debug::syscall(&tm, __NR_sendto, "Executing handler");
+
+  SysCalls sys;
+  if (!to && !flags) {
+    // sendto() with a NULL recipient and no flags is the same as write(),
+    // which is unrestricted in seccomp mode.
+    Debug::message("Replaced sendto() with call to write()");
+    ssize_t rc = sys.write(sockfd, buf, len);
+    if (rc < 0) {
+      Debug::elapsed(tm, __NR_sendto);
+      return -sys.my_errno;
+    } else {
+      Debug::elapsed(tm, __NR_sendto);
+      return rc;
+    }
+  }
+
+  struct {
+    int       sysnum;
+    long long cookie;
+    SendTo    sendto_req;
+  } __attribute__((packed)) request;
+  request.sysnum            = __NR_sendto;
+  request.cookie            = cookie();
+  request.sendto_req.sockfd = sockfd;
+  request.sendto_req.buf    = buf;
+  request.sendto_req.len    = len;
+  request.sendto_req.flags  = flags;
+  request.sendto_req.to     = to;
+  request.sendto_req.tolen  = tolen;
+
+  long rc;
+  if (write(sys, processFdPub(), &request, sizeof(request)) !=
+      sizeof(request) ||
+      read(sys, threadFdPub(), &rc, sizeof(rc)) != sizeof(rc)) {
+    die("Failed to forward sendto() request [sandbox]");
+  }
+  Debug::elapsed(tm, __NR_sendto);
+  return static_cast<ssize_t>(rc);
+}
+
+long Sandbox::sandbox_setsockopt(int sockfd, int level, int optname,
+                                 const void* optval, socklen_t optlen) {
+  long long tm;
+  Debug::syscall(&tm, __NR_setsockopt, "Executing handler");
+
+  struct {
+    int        sysnum;
+    long long  cookie;
+    SetSockOpt setsockopt_req;
+  } __attribute__((packed)) request;
+  request.sysnum                 = __NR_setsockopt;
+  request.cookie                 = cookie();
+  request.setsockopt_req.sockfd  = sockfd;
+  request.setsockopt_req.level   = level;
+  request.setsockopt_req.optname = optname;
+  request.setsockopt_req.optval  = optval;
+  request.setsockopt_req.optlen  = optlen;
+
+  long rc;
+  SysCalls sys;
+  if (write(sys, processFdPub(), &request, sizeof(request)) !=
+      sizeof(request) ||
+      read(sys, threadFdPub(), &rc, sizeof(rc)) != sizeof(rc)) {
+    die("Failed to forward setsockopt() request [sandbox]");
+  }
+  Debug::elapsed(tm, __NR_setsockopt);
+  return rc;
+}
+
+long Sandbox::sandbox_getsockopt(int sockfd, int level, int optname,
+                                 void* optval, socklen_t* optlen) {
+  long long tm;
+  Debug::syscall(&tm, __NR_getsockopt, "Executing handler");
+
+  struct {
+    int        sysnum;
+    long long  cookie;
+    GetSockOpt getsockopt_req;
+  } __attribute__((packed)) request;
+  request.sysnum                 = __NR_getsockopt;
+  request.cookie                 = cookie();
+  request.getsockopt_req.sockfd  = sockfd;
+  request.getsockopt_req.level   = level;
+  request.getsockopt_req.optname = optname;
+  request.getsockopt_req.optval  = optval;
+  request.getsockopt_req.optlen  = optlen;
+
+  long rc;
+  SysCalls sys;
+  if (write(sys, processFdPub(), &request, sizeof(request)) !=
+      sizeof(request) ||
+      read(sys, threadFdPub(), &rc, sizeof(rc)) != sizeof(rc)) {
+    die("Failed to forward getsockopt() request [sandbox]");
+  }
+  Debug::elapsed(tm, __NR_getsockopt);
+  return rc;
+}
+
+bool Sandbox::process_recvfrom(int parentMapsFd, int sandboxFd,
+                               int threadFdPub, int threadFd,
+                               SecureMem::Args* mem) {
+  // Read request
+  RecvFrom recvfrom_req;
+  SysCalls sys;
+  if (read(sys, sandboxFd, &recvfrom_req, sizeof(recvfrom_req)) !=
+      sizeof(recvfrom_req)) {
+    die("Failed to read parameters for recvfrom() [process]");
+  }
+
+  // Unsupported flag encountered. Deny the call.
+  if (recvfrom_req.flags &
+      ~(MSG_DONTWAIT|MSG_OOB|MSG_PEEK|MSG_TRUNC|MSG_WAITALL)) {
+    SecureMem::abandonSystemCall(threadFd, -EINVAL);
+    return false;
+  }
+
+  // While we do not anticipate any particular need to receive data on
+  // unconnected sockets, there is no particular risk in doing so.
+  SecureMem::sendSystemCall(threadFdPub, false, -1, mem,
+                            __NR_recvfrom, recvfrom_req.sockfd,
+                            recvfrom_req.buf, recvfrom_req.len,
+                            recvfrom_req.flags, recvfrom_req.from,
+                            recvfrom_req.fromlen);
+  return true;
+}
+
+bool Sandbox::process_recvmsg(int parentMapsFd, int sandboxFd, int threadFdPub,
+                              int threadFd, SecureMem::Args* mem) {
+  // Read request
+  RecvMsg  recvmsg_req;
+  SysCalls sys;
+  if (read(sys, sandboxFd, &recvmsg_req, sizeof(recvmsg_req)) !=
+      sizeof(recvmsg_req)) {
+    die("Failed to read parameters for recvmsg() [process]");
+  }
+
+  // Unsupported flag encountered. Deny the call.
+  if (recvmsg_req.flags &
+      ~(MSG_DONTWAIT|MSG_OOB|MSG_PEEK|MSG_TRUNC|MSG_WAITALL)) {
+    SecureMem::abandonSystemCall(threadFd, -EINVAL);
+    return false;
+  }
+
+  // Receiving messages is general not security critical.
+  SecureMem::sendSystemCall(threadFdPub, false, -1, mem,
+                            __NR_recvmsg, recvmsg_req.sockfd,
+                            recvmsg_req.msg, recvmsg_req.flags);
+  return true;
+}
+
+bool Sandbox::process_sendmsg(int parentMapsFd, int sandboxFd, int threadFdPub,
+                              int threadFd, SecureMem::Args* mem) {
+  // Read request
+  struct {
+    SendMsg sendmsg_req;
+    struct msghdr   msg;
+  } __attribute__((packed)) data;
+  SysCalls sys;
+  if (read(sys, sandboxFd, &data, sizeof(data)) != sizeof(data)) {
+    die("Failed to read parameters for sendmsg() [process]");
+  }
+
+  if (data.msg.msg_namelen > 4096 || data.msg.msg_controllen > 4096) {
+    die("Unexpected size for socketcall() payload [process]");
+  }
+  char extra[data.msg.msg_namelen + data.msg.msg_controllen];
+  if (read(sys, sandboxFd, &extra, sizeof(extra)) != (ssize_t)sizeof(extra)) {
+    die("Failed to read parameters for sendmsg() [process]");
+  }
+  if (sizeof(struct msghdr) + sizeof(extra) > sizeof(mem->pathname)) {
+    goto deny;
+  }
+
+  if (data.msg.msg_namelen ||
+      (data.sendmsg_req.flags &
+       ~(MSG_CONFIRM|MSG_DONTWAIT|MSG_EOR|MSG_MORE|MSG_NOSIGNAL|MSG_OOB))) {
+ deny:
+    SecureMem::abandonSystemCall(threadFd, -EINVAL);
+    return false;
+  }
+
+  // The trusted process receives file handles when a new untrusted thread
+  // gets created. We have security checks in place that prevent any
+  // critical information from being tampered with during thread creation.
+  // But if we disallowed passing of file handles, this would add an extra
+  // hurdle for an attacker.
+  // Unfortunately, for now, this is not possible as Chrome's
+  // base::SendRecvMsg() needs the ability to pass file handles.
+  if (data.msg.msg_controllen) {
+    data.msg.msg_control = extra + data.msg.msg_namelen;
+    struct cmsghdr *cmsg = CMSG_FIRSTHDR(&data.msg);
+    do {
+      if (cmsg->cmsg_level != SOL_SOCKET ||
+          cmsg->cmsg_type != SCM_RIGHTS) {
+        goto deny;
+      }
+    } while ((cmsg = CMSG_NXTHDR(&data.msg, cmsg)) != NULL);
+  }
+
+  // This must be a locked system call, because we have to ensure that the
+  // untrusted code does not tamper with the msghdr after we have examined it.
+  SecureMem::lockSystemCall(parentMapsFd, mem);
+  if (sizeof(extra) > 0) {
+    if (data.msg.msg_namelen > 0) {
+      data.msg.msg_name = mem->pathname + sizeof(struct msghdr);
+    }
+    if (data.msg.msg_controllen > 0) {
+      data.msg.msg_control = mem->pathname + sizeof(struct msghdr) +
+                             data.msg.msg_namelen;
+    }
+    memcpy(mem->pathname + sizeof(struct msghdr), extra, sizeof(extra));
+  }
+  memcpy(mem->pathname, &data.msg, sizeof(struct msghdr));
+  SecureMem::sendSystemCall(threadFdPub, true, parentMapsFd, mem,
+                            __NR_sendmsg, data.sendmsg_req.sockfd,
+                            mem->pathname - (char*)mem + (char*)mem->self,
+                            data.sendmsg_req.flags);
+  return true;
+}
+
+bool Sandbox::process_sendto(int parentMapsFd, int sandboxFd, int threadFdPub,
+                             int threadFd, SecureMem::Args* mem) {
+  // Read request
+  SendTo   sendto_req;
+  SysCalls sys;
+  if (read(sys, sandboxFd, &sendto_req, sizeof(sendto_req)) !=
+      sizeof(sendto_req)) {
+    die("Failed to read parameters for sendto() [process]");
+  }
+
+  // The sandbox does not allow sending to arbitrary addresses.
+  if (sendto_req.to) {
+    SecureMem::abandonSystemCall(threadFd, -EINVAL);
+    return false;
+  }
+
+  // Unsupported flag encountered. Deny the call.
+  if (sendto_req.flags &
+      ~(MSG_CONFIRM|MSG_DONTWAIT|MSG_EOR|MSG_MORE|MSG_NOSIGNAL|MSG_OOB)) {
+    SecureMem::abandonSystemCall(threadFd, -EINVAL);
+    return false;
+  }
+
+  // Sending data on a connected socket is similar to calling write().
+  // Allow it.
+  SecureMem::sendSystemCall(threadFdPub, false, -1, mem,
+                            __NR_sendto, sendto_req.sockfd,
+                            sendto_req.buf, sendto_req.len,
+                            sendto_req.flags, sendto_req.to,
+                            sendto_req.tolen);
+  return true;
+}
+
+bool Sandbox::process_setsockopt(int parentMapsFd, int sandboxFd,
+                                 int threadFdPub, int threadFd,
+                                 SecureMem::Args* mem) {
+  // Read request
+  SetSockOpt setsockopt_req;
+  SysCalls sys;
+  if (read(sys, sandboxFd, &setsockopt_req, sizeof(setsockopt_req)) !=
+      sizeof(setsockopt_req)) {
+    die("Failed to read parameters for setsockopt() [process]");
+  }
+
+  switch (setsockopt_req.level) {
+    case SOL_SOCKET:
+      switch (setsockopt_req.optname) {
+        case SO_KEEPALIVE:
+        case SO_LINGER:
+        case SO_OOBINLINE:
+        case SO_RCVBUF:
+        case SO_RCVLOWAT:
+        case SO_SNDLOWAT:
+        case SO_RCVTIMEO:
+        case SO_SNDTIMEO:
+        case SO_REUSEADDR:
+        case SO_SNDBUF:
+        case SO_TIMESTAMP:
+          SecureMem::sendSystemCall(threadFdPub, false, -1, mem,
+                                 __NR_setsockopt, setsockopt_req.sockfd,
+                                 setsockopt_req.level, setsockopt_req.optname,
+                                 setsockopt_req.optval, setsockopt_req.optlen);
+          return true;
+        default:
+          break;
+      }
+      break;
+    case IPPROTO_TCP:
+      switch (setsockopt_req.optname) {
+        case TCP_CORK:
+        case TCP_DEFER_ACCEPT:
+        case TCP_INFO:
+        case TCP_KEEPCNT:
+        case TCP_KEEPIDLE:
+        case TCP_KEEPINTVL:
+        case TCP_LINGER2:
+        case TCP_MAXSEG:
+        case TCP_NODELAY:
+        case TCP_QUICKACK:
+        case TCP_SYNCNT:
+        case TCP_WINDOW_CLAMP:
+          SecureMem::sendSystemCall(threadFdPub, false, -1, mem,
+                                 __NR_setsockopt, setsockopt_req.sockfd,
+                                 setsockopt_req.level, setsockopt_req.optname,
+                                 setsockopt_req.optval, setsockopt_req.optlen);
+          return true;
+        default:
+          break;
+      }
+      break;
+    default:
+      break;
+  }
+  SecureMem::abandonSystemCall(threadFd, -EINVAL);
+  return false;
+}
+
+bool Sandbox::process_getsockopt(int parentMapsFd, int sandboxFd,
+                                 int threadFdPub, int threadFd,
+                                 SecureMem::Args* mem) {
+  // Read request
+  GetSockOpt getsockopt_req;
+  SysCalls sys;
+  if (read(sys, sandboxFd, &getsockopt_req, sizeof(getsockopt_req)) !=
+      sizeof(getsockopt_req)) {
+    die("Failed to read parameters for getsockopt() [process]");
+  }
+
+  switch (getsockopt_req.level) {
+    case SOL_SOCKET:
+      switch (getsockopt_req.optname) {
+        case SO_ACCEPTCONN:
+        case SO_ERROR:
+        case SO_KEEPALIVE:
+        case SO_LINGER:
+        case SO_OOBINLINE:
+        case SO_RCVBUF:
+        case SO_RCVLOWAT:
+        case SO_SNDLOWAT:
+        case SO_RCVTIMEO:
+        case SO_SNDTIMEO:
+        case SO_REUSEADDR:
+        case SO_SNDBUF:
+        case SO_TIMESTAMP:
+        case SO_TYPE:
+          SecureMem::sendSystemCall(threadFdPub, false, -1, mem,
+                                 __NR_getsockopt, getsockopt_req.sockfd,
+                                 getsockopt_req.level, getsockopt_req.optname,
+                                 getsockopt_req.optval, getsockopt_req.optlen);
+          return true;
+        default:
+          break;
+      }
+      break;
+    case IPPROTO_TCP:
+      switch (getsockopt_req.optname) {
+        case TCP_CORK:
+        case TCP_DEFER_ACCEPT:
+        case TCP_INFO:
+        case TCP_KEEPCNT:
+        case TCP_KEEPIDLE:
+        case TCP_KEEPINTVL:
+        case TCP_LINGER2:
+        case TCP_MAXSEG:
+        case TCP_NODELAY:
+        case TCP_QUICKACK:
+        case TCP_SYNCNT:
+        case TCP_WINDOW_CLAMP:
+          SecureMem::sendSystemCall(threadFdPub, false, -1, mem,
+                                 __NR_getsockopt, getsockopt_req.sockfd,
+                                 getsockopt_req.level, getsockopt_req.optname,
+                                 getsockopt_req.optval, getsockopt_req.optlen);
+          return true;
+        default:
+          break;
+      }
+      break;
+    default:
+      break;
+  }
+  SecureMem::abandonSystemCall(threadFd, -EINVAL);
+  return false;
+}
+
+#endif
+#if defined(__NR_socketcall)
+
+enum {
+  SYS_SOCKET      =  1,
+  SYS_BIND        =  2,
+  SYS_CONNECT     =  3,
+  SYS_LISTEN      =  4,
+  SYS_ACCEPT      =  5,
+  SYS_GETSOCKNAME =  6,
+  SYS_GETPEERNAME =  7,
+  SYS_SOCKETPAIR  =  8,
+  SYS_SEND        =  9,
+  SYS_RECV        = 10,
+  SYS_SENDTO      = 11,
+  SYS_RECVFROM    = 12,
+  SYS_SHUTDOWN    = 13,
+  SYS_SETSOCKOPT  = 14,
+  SYS_GETSOCKOPT  = 15,
+  SYS_SENDMSG     = 16,
+  SYS_RECVMSG     = 17,
+  SYS_ACCEPT4     = 18
+};
+
+struct Sandbox::SocketCallArgInfo {
+  size_t len;
+  off_t  addrOff;
+  off_t  lengthOff;
+};
+const struct Sandbox::SocketCallArgInfo Sandbox::socketCallArgInfo[] = {
+  #define STRUCT(s)   reinterpret_cast<SocketCall *>(0)->args.s
+  #define SIZE(s)     sizeof(STRUCT(s))
+  #define OFF(s, f)   offsetof(typeof STRUCT(s), f)
+  { 0                                                                  },
+  { SIZE(socket)                                                       },
+  { SIZE(bind),       OFF(bind, addr),         OFF(bind, addrlen)      },
+  { SIZE(connect),    OFF(connect, addr),      OFF(connect, addrlen)   },
+  { SIZE(listen)                                                       },
+  { SIZE(accept)                                                       },
+  { SIZE(getsockname)                                                  },
+  { SIZE(getpeername)                                                  },
+  { SIZE(socketpair)                                                   },
+  { SIZE(send)                                                         },
+  { SIZE(recv)                                                         },
+  { SIZE(sendto),     OFF(sendto, to),         OFF(sendto, tolen)      },
+  { SIZE(recvfrom)                                                     },
+  { SIZE(shutdown)                                                     },
+  { SIZE(setsockopt), OFF(setsockopt, optval), OFF(setsockopt, optlen) },
+  { SIZE(getsockopt)                                                   },
+  { SIZE(sendmsg)                                                      },
+  { SIZE(recvmsg)                                                      },
+  { SIZE(accept4)                                                      }
+  #undef STRUCT
+  #undef SIZE
+  #undef OFF
+};
+
+long Sandbox::sandbox_socketcall(int call, void* args) {
+  long long tm;
+  Debug::syscall(&tm, __NR_socketcall, "Executing handler", call);
+
+  // When demultiplexing socketcall(), only accept calls that have a valid
+  // "call" opcode.
+  if (call < SYS_SOCKET || call > SYS_ACCEPT4) {
+    Debug::elapsed(tm, __NR_socketcall, call);
+    return -ENOSYS;
+  }
+
+  // Some type of calls include a pointer to an address or name, which cannot
+  // be accessed by the trusted process, as it lives in a separate address
+  // space. For these calls, append the extra data to the serialized request.
+  // This requires some copying of data, as we have to make sure there is
+  // only a single atomic call to write().
+  socklen_t   numExtraData  = 0;
+  const void* extraDataAddr = NULL;
+  if (socketCallArgInfo[call].lengthOff) {
+    memcpy(&numExtraData,
+           reinterpret_cast<char *>(args) + socketCallArgInfo[call].lengthOff,
+           sizeof(socklen_t));
+    extraDataAddr = reinterpret_cast<char *>(args) +
+                    socketCallArgInfo[call].addrOff;
+  }
+
+  // sendmsg() and recvmsg() have more complicated requirements for computing
+  // the amount of extra data that needs to be sent to the trusted process.
+  if (call == SYS_SENDMSG) {
+    SendMsg *sendmsg_args = reinterpret_cast<SendMsg *>(args);
+    if (sendmsg_args->msg->msg_iovlen == 1 &&
+        !sendmsg_args->msg->msg_control) {
+      // Further down in the code, this sendmsg() call will be simplified to
+      // a sendto() call. Make sure we already compute the correct value for
+      // numExtraData, as it is needed when we allocate "data[]" on the stack.
+      numExtraData  = sendmsg_args->msg->msg_namelen;
+      extraDataAddr = sendmsg_args->msg->msg_name;
+    } else {
+      // sendmsg() needs to include some of the extra data so that we can
+      // inspect it in process_socketcall()
+      numExtraData  = sizeof(*sendmsg_args->msg) +
+                      sendmsg_args->msg->msg_namelen +
+                      sendmsg_args->msg->msg_controllen;
+      extraDataAddr = NULL;
+    }
+  }
+  if (call == SYS_RECVMSG) {
+    RecvMsg *recvmsg_args = reinterpret_cast<RecvMsg *>(args);
+    numExtraData  = sizeof(*recvmsg_args->msg);
+    extraDataAddr = recvmsg_args->msg;
+  }
+
+  // Set up storage for the request header and copy the data from "args"
+  // into it.
+  struct Request {
+    int        sysnum;
+    long long  cookie;
+    SocketCall socketcall_req;
+  } __attribute__((packed)) *request;
+  char data[sizeof(struct Request) + numExtraData];
+  request = reinterpret_cast<struct Request *>(data);
+  memcpy(&request->socketcall_req.args, args, socketCallArgInfo[call].len);
+
+  // Simplify send(), sendto() and sendmsg(), if there are simpler equivalent
+  // calls. This allows us to occasionally replace them with calls to write(),
+  // which don't have to be forwarded to the trusted process.
+  SysCalls sys;
+  if (call == SYS_SENDMSG &&
+      request->socketcall_req.args.sendmsg.msg->msg_iovlen == 1 &&
+      !request->socketcall_req.args.sendmsg.msg->msg_control) {
+    // Ordering of these assignments is important, as we are reshuffling
+    // fields inside of a union.
+    call = SYS_SENDTO;
+    request->socketcall_req.args.sendto.flags =
+        request->socketcall_req.args.sendmsg.flags;
+    request->socketcall_req.args.sendto.to    =
+        request->socketcall_req.args.sendmsg.msg->msg_name;
+    request->socketcall_req.args.sendto.tolen =
+        request->socketcall_req.args.sendmsg.msg->msg_namelen;
+    request->socketcall_req.args.sendto.len   =
+        request->socketcall_req.args.sendmsg.msg->msg_iov->iov_len;
+    request->socketcall_req.args.sendto.buf   =
+        request->socketcall_req.args.sendmsg.msg->msg_iov->iov_base;
+  }
+  if (call == SYS_SENDTO && !request->socketcall_req.args.sendto.to) {
+    // sendto() with a NULL address is the same as send()
+    call         = SYS_SEND;
+    numExtraData = 0;
+  }
+  if (call == SYS_SEND && !request->socketcall_req.args.send.flags) {
+    // send() with no flags is the same as write(), which is unrestricted
+    // in seccomp mode.
+    Debug::message("Replaced socketcall() with call to write()");
+    ssize_t rc = sys.write(request->socketcall_req.args.send.sockfd,
+                           request->socketcall_req.args.send.buf,
+                           request->socketcall_req.args.send.len);
+    if (rc < 0) {
+      Debug::elapsed(tm, __NR_socketcall, call);
+      return -sys.my_errno;
+    } else {
+      Debug::elapsed(tm, __NR_socketcall, call);
+      return rc;
+    }
+  }
+
+  // Simplify recv(), and recvfrom(), if there are simpler equivalent calls.
+  // This allows us to occasionally replace them with calls to read(), which
+  // don't have to be forwarded to the trusted process.
+  // We cannot simplify recvmsg() to recvfrom(), recv() or read(), as we do
+  // not know whether the caller needs us to set msg->msg_flags.
+  if (call == SYS_RECVFROM && !request->socketcall_req.args.recvfrom.from) {
+    // recvfrom() with a NULL address buffer is the same as recv()
+    call = SYS_RECV;
+  }
+  if (call == SYS_RECV && !request->socketcall_req.args.recv.flags) {
+    // recv() with no flags is the same as read(), which is unrestricted
+    // in seccomp mode.
+    Debug::message("Replaced socketcall() with call to read()");
+    ssize_t rc = sys.read(request->socketcall_req.args.recv.sockfd,
+                          request->socketcall_req.args.recv.buf,
+                          request->socketcall_req.args.recv.len);
+    if (rc < 0) {
+      Debug::elapsed(tm, __NR_socketcall, call);
+      return -sys.my_errno;
+    } else {
+      Debug::elapsed(tm, __NR_socketcall, call);
+      return rc;
+    }
+  }
+
+  // Fill in the rest of the request header.
+  request->sysnum                 = __NR_socketcall;
+  request->cookie                 = cookie();
+  request->socketcall_req.call    = call;
+  request->socketcall_req.arg_ptr = args;
+  int padding = sizeof(request->socketcall_req.args) -
+                socketCallArgInfo[call].len;
+  if (padding > 0) {
+    memset((char *)(&request->socketcall_req.args + 1) - padding, 0, padding);
+  }
+  if (call == SYS_SENDMSG) {
+    // for sendmsg() we include the (optional) destination address, and the
+    // (optional) control data in the payload.
+    SendMsg *sendmsg_args = reinterpret_cast<SendMsg *>(args);
+    memcpy(reinterpret_cast<char *>(
+      memcpy(reinterpret_cast<char *>(
+        memcpy(request + 1, sendmsg_args->msg, sizeof(*sendmsg_args->msg))) +
+          sizeof(*sendmsg_args->msg),
+          sendmsg_args->msg->msg_name, sendmsg_args->msg->msg_namelen)) +
+            sendmsg_args->msg->msg_namelen,
+            sendmsg_args->msg->msg_control, sendmsg_args->msg->msg_controllen);
+  } else if (extraDataAddr) {
+    memcpy(request + 1, extraDataAddr, numExtraData);
+  }
+
+  // Send request to trusted process and collect response from trusted thread.
+  long rc;
+  ssize_t len                     = sizeof(struct Request) + numExtraData;
+  if (write(sys, processFdPub(), data, len) != len ||
+      read(sys, threadFdPub(), &rc, sizeof(rc)) != sizeof(rc)) {
+    die("Failed to forward socketcall() request [sandbox]");
+  }
+  Debug::elapsed(tm, __NR_socketcall, call);
+  return rc;
+}
+
+bool Sandbox::process_socketcall(int parentMapsFd, int sandboxFd,
+                                 int threadFdPub, int threadFd,
+                                 SecureMem::Args* mem) {
+  // Read request
+  SocketCall socketcall_req;
+  SysCalls sys;
+  if (read(sys, sandboxFd, &socketcall_req, sizeof(socketcall_req)) !=
+      sizeof(socketcall_req)) {
+    die("Failed to read parameters for socketcall() [process]");
+  }
+
+  // sandbox_socketcall() should never send us an unexpected "call" opcode.
+  // If it did, something went very wrong and we better terminate the process.
+  if (socketcall_req.call < SYS_SOCKET || socketcall_req.call > SYS_ACCEPT4) {
+    die("Unexpected socketcall() [process]");
+  }
+
+  // Check if this particular operation carries an extra payload.
+  socklen_t numExtraData = 0;
+  if (socketCallArgInfo[socketcall_req.call].lengthOff) {
+    memcpy(&numExtraData,
+           reinterpret_cast<char *>(&socketcall_req) +
+           socketCallArgInfo[socketcall_req.call].lengthOff,
+           sizeof(socklen_t));
+  } else if (socketcall_req.call == SYS_SENDMSG) {
+    numExtraData  = sizeof(*socketcall_req.args.sendmsg.msg);
+  } else if (socketcall_req.call == SYS_RECVMSG) {
+    numExtraData  = sizeof(*socketcall_req.args.recvmsg.msg);
+  }
+
+  // Verify that the length for the payload is reasonable. We don't want to
+  // blow up our stack, and excessive (or negative) buffer sizes are almost
+  // certainly a bug.
+  if (numExtraData > 4096) {
+    die("Unexpected size for socketcall() payload [process]");
+  }
+
+  // Read the extra payload, if any.
+  char extra[numExtraData];
+  if (numExtraData) {
+    if (read(sys, sandboxFd, extra, numExtraData) != (ssize_t)numExtraData) {
+      die("Failed to read socketcall() payload [process]");
+    }
+  }
+
+  // sendmsg() has another level of indirection and can carry even more payload
+  ssize_t numSendmsgExtra = 0;
+  if (socketcall_req.call == SYS_SENDMSG) {
+    struct msghdr* msg = reinterpret_cast<struct msghdr*>(extra);
+    if (msg->msg_namelen > 4096 || msg->msg_controllen > 4096) {
+      die("Unexpected size for socketcall() payload [process]");
+    }
+    numSendmsgExtra = msg->msg_namelen + msg->msg_controllen;
+  }
+  char sendmsgExtra[numSendmsgExtra];
+  if (numSendmsgExtra) {
+    if (read(sys, sandboxFd, sendmsgExtra, numSendmsgExtra) !=
+        numSendmsgExtra) {
+      die("Failed to read socketcall() payload [process]");
+    }
+  }
+
+  int rc = -EINVAL;
+  switch (socketcall_req.call) {
+    case SYS_SOCKET:
+      // The sandbox does not allow creation of any new sockets.
+      goto deny;
+    case SYS_BIND:
+      // The sandbox does not allow binding an address to a socket.
+      goto deny;
+    case SYS_CONNECT:
+      // The sandbox does not allow connecting a socket.
+      goto deny;
+    case SYS_LISTEN:
+      // The sandbox does not allow a socket to enter listening state.
+      goto deny;
+    case SYS_ACCEPT4:
+    case SYS_ACCEPT:
+      // If the sandbox obtained a socket that is already in the listening
+      // state (e.g. because somebody sent it a suitable file descriptor), it
+      // is permissible to call accept().
+
+    accept_simple:
+      // None of the parameters need to be checked, so it is OK to refer
+      // to the parameter block created by the untrusted code.
+      SecureMem::sendSystemCall(threadFdPub, false, -1, mem, __NR_socketcall,
+                                socketcall_req.call, socketcall_req.arg_ptr);
+      return true;
+    case SYS_GETSOCKNAME:
+    case SYS_GETPEERNAME:
+      // Querying the local and the remote name is not considered security
+      // sensitive for the purposes of the sandbox.
+      goto accept_simple;
+    case SYS_SOCKETPAIR:
+      // Socket pairs are connected to each other and not considered
+      // security sensitive.
+      goto accept_simple;
+    case SYS_SENDTO:
+      if (socketcall_req.args.sendto.to) {
+        // The sandbox does not allow sending to arbitrary addresses.
+        goto deny;
+      }
+      // Fall through
+    case SYS_SEND:
+      if (socketcall_req.args.send.flags &
+          ~(MSG_CONFIRM|MSG_DONTWAIT|MSG_EOR|MSG_MORE|MSG_NOSIGNAL|MSG_OOB)) {
+        // Unsupported flag encountered. Deny the call.
+        goto deny;
+      }
+      // Sending data on a connected socket is similar to calling write().
+      // Allow it.
+
+    accept_complex:
+      // The parameter block contains potentially security critical information
+      // that should not be tampered with after it has been inspected. Copy it
+      // into the write-protected securely shared memory before telling the
+      // trusted thread to execute the socket call.
+      SecureMem::lockSystemCall(parentMapsFd, mem);
+      memcpy(mem->pathname, &socketcall_req.args, sizeof(socketcall_req.args));
+      SecureMem::sendSystemCall(threadFdPub, true, parentMapsFd, mem,
+                                __NR_socketcall, socketcall_req.call,
+                                mem->pathname - (char*)mem + (char*)mem->self);
+      return true;
+    case SYS_RECVFROM:
+      // While we do not anticipate any particular need to receive data on
+      // unconnected sockets, there is no particular risk in doing so.
+      // Fall through
+    case SYS_RECV:
+      if (socketcall_req.args.recv.flags &
+          ~(MSG_DONTWAIT|MSG_OOB|MSG_PEEK|MSG_TRUNC|MSG_WAITALL)) {
+        // Unsupported flag encountered. Deny the call.
+        goto deny;
+      }
+      // Receiving data on a connected socket is similar to calling read().
+      // Allow it.
+      goto accept_complex;
+    case SYS_SHUTDOWN:
+      // Shutting down a socket is always OK.
+      goto accept_simple;
+    case SYS_SETSOCKOPT:
+      switch (socketcall_req.args.setsockopt.level) {
+        case SOL_SOCKET:
+          switch (socketcall_req.args.setsockopt.optname) {
+            case SO_KEEPALIVE:
+            case SO_LINGER:
+            case SO_OOBINLINE:
+            case SO_RCVBUF:
+            case SO_RCVLOWAT:
+            case SO_SNDLOWAT:
+            case SO_RCVTIMEO:
+            case SO_SNDTIMEO:
+            case SO_REUSEADDR:
+            case SO_SNDBUF:
+            case SO_TIMESTAMP:
+              goto accept_complex;
+            default:
+              break;
+          }
+          break;
+        case IPPROTO_TCP:
+          switch (socketcall_req.args.setsockopt.optname) {
+            case TCP_CORK:
+            case TCP_DEFER_ACCEPT:
+            case TCP_INFO:
+            case TCP_KEEPCNT:
+            case TCP_KEEPIDLE:
+            case TCP_KEEPINTVL:
+            case TCP_LINGER2:
+            case TCP_MAXSEG:
+            case TCP_NODELAY:
+            case TCP_QUICKACK:
+            case TCP_SYNCNT:
+            case TCP_WINDOW_CLAMP:
+              goto accept_complex;
+            default:
+              break;
+          }
+          break;
+        default:
+          break;
+      }
+      goto deny;
+    case SYS_GETSOCKOPT:
+      switch (socketcall_req.args.getsockopt.level) {
+        case SOL_SOCKET:
+          switch (socketcall_req.args.getsockopt.optname) {
+            case SO_ACCEPTCONN:
+            case SO_ERROR:
+            case SO_KEEPALIVE:
+            case SO_LINGER:
+            case SO_OOBINLINE:
+            case SO_RCVBUF:
+            case SO_RCVLOWAT:
+            case SO_SNDLOWAT:
+            case SO_RCVTIMEO:
+            case SO_SNDTIMEO:
+            case SO_REUSEADDR:
+            case SO_SNDBUF:
+            case SO_TIMESTAMP:
+            case SO_TYPE:
+              goto accept_complex;
+            default:
+              break;
+          }
+          break;
+        case IPPROTO_TCP:
+          switch (socketcall_req.args.getsockopt.optname) {
+            case TCP_CORK:
+            case TCP_DEFER_ACCEPT:
+            case TCP_INFO:
+            case TCP_KEEPCNT:
+            case TCP_KEEPIDLE:
+            case TCP_KEEPINTVL:
+            case TCP_LINGER2:
+            case TCP_MAXSEG:
+            case TCP_NODELAY:
+            case TCP_QUICKACK:
+            case TCP_SYNCNT:
+            case TCP_WINDOW_CLAMP:
+              goto accept_complex;
+            default:
+              break;
+          }
+          break;
+        default:
+          break;
+      }
+      goto deny;
+    case SYS_SENDMSG: {
+      struct msghdr* msg = reinterpret_cast<struct msghdr*>(extra);
+
+      if (sizeof(socketcall_req.args) + sizeof(*msg) + numSendmsgExtra >
+          sizeof(mem->pathname)) {
+        goto deny;
+      }
+
+      if (msg->msg_namelen ||
+          (socketcall_req.args.sendmsg.flags &
+           ~(MSG_CONFIRM|MSG_DONTWAIT|MSG_EOR|MSG_MORE|MSG_NOSIGNAL|MSG_OOB))){
+        goto deny;
+      }
+
+      // The trusted process receives file handles when a new untrusted thread
+      // gets created. We have security checks in place that prevent any
+      // critical information from being tampered with during thread creation.
+      // But if we disallowed passing of file handles, this would add an extra
+      // hurdle for an attacker.
+      // Unfortunately, for now, this is not possible as Chrome's
+      // base::SendRecvMsg() needs the ability to pass file handles.
+      if (msg->msg_controllen) {
+        msg->msg_control = sendmsgExtra + msg->msg_namelen;
+        struct cmsghdr *cmsg = CMSG_FIRSTHDR(msg);
+        do {
+          if (cmsg->cmsg_level != SOL_SOCKET ||
+              cmsg->cmsg_type != SCM_RIGHTS) {
+            goto deny;
+          }
+        } while ((cmsg = CMSG_NXTHDR(msg, cmsg)) != NULL);
+      }
+
+      // This must be a locked system call, because we have to ensure that
+      // the untrusted code does not tamper with the msghdr after we have
+      // examined it.
+      SecureMem::lockSystemCall(parentMapsFd, mem);
+      socketcall_req.args.sendmsg.msg =
+          reinterpret_cast<struct msghdr*>(mem->pathname +
+                                           sizeof(socketcall_req.args) -
+                                           (char*)mem + (char*)mem->self);
+      memcpy(mem->pathname, &socketcall_req.args, sizeof(socketcall_req.args));
+      if (numSendmsgExtra) {
+        if (msg->msg_namelen > 0) {
+          msg->msg_name = const_cast<struct msghdr*>(
+              socketcall_req.args.sendmsg.msg) + 1;
+        }
+        if (msg->msg_controllen > 0) {
+          msg->msg_control = (char *)(
+              socketcall_req.args.sendmsg.msg + 1) + msg->msg_namelen;
+        }
+        memcpy(mem->pathname + sizeof(socketcall_req.args) + sizeof(*msg),
+               sendmsgExtra, numSendmsgExtra);
+      }
+      memcpy(mem->pathname + sizeof(socketcall_req.args), msg, sizeof(*msg));
+      SecureMem::sendSystemCall(threadFdPub, true, parentMapsFd, mem,
+                                __NR_socketcall, socketcall_req.call,
+                                mem->pathname - (char*)mem + (char*)mem->self);
+      return true;
+    }
+    case SYS_RECVMSG:
+      // Receiving messages is general not security critical.
+      if (socketcall_req.args.recvmsg.flags &
+          ~(MSG_DONTWAIT|MSG_OOB|MSG_PEEK|MSG_TRUNC|MSG_WAITALL)) {
+        goto deny;
+      }
+      goto accept_complex;
+    default:
+    deny:
+      SecureMem::abandonSystemCall(threadFd, rc);
+      return false;
+  }
+}
+
+#endif
+
+} // namespace
diff --git a/sandbox/linux/seccomp/stat.cc b/sandbox/linux/seccomp/stat.cc
new file mode 100644
index 0000000..cdf7e4c
--- /dev/null
+++ b/sandbox/linux/seccomp/stat.cc
@@ -0,0 +1,197 @@
+// Copyright (c) 2010 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "debug.h"
+#include "sandbox_impl.h"
+
+namespace playground {
+
+long Sandbox::sandbox_stat(const char *path, void *buf) {
+  long long tm;
+  Debug::syscall(&tm, __NR_stat, "Executing handler");
+  size_t len                    = strlen(path);
+  struct Request {
+    int       sysnum;
+    long long cookie;
+    Stat      stat_req;
+    char      pathname[0];
+  } __attribute__((packed)) *request;
+  char data[sizeof(struct Request) + len];
+  request                       = reinterpret_cast<struct Request*>(data);
+  request->sysnum               = __NR_stat;
+  request->cookie               = cookie();
+  request->stat_req.sysnum      = __NR_stat;
+  request->stat_req.path_length = len;
+  request->stat_req.buf         = buf;
+  memcpy(request->pathname, path, len);
+
+  long rc;
+  SysCalls sys;
+  if (write(sys, processFdPub(), request, sizeof(data)) != (int)sizeof(data) ||
+      read(sys, threadFdPub(), &rc, sizeof(rc)) != sizeof(rc)) {
+    die("Failed to forward stat() request [sandbox]");
+  }
+  Debug::elapsed(tm, __NR_stat);
+  return rc;
+}
+
+long Sandbox::sandbox_lstat(const char *path, void *buf) {
+  long long tm;
+  Debug::syscall(&tm, __NR_lstat, "Executing handler");
+  size_t len                    = strlen(path);
+  struct Request {
+    int       sysnum;
+    long long cookie;
+    Stat      stat_req;
+    char      pathname[0];
+  } __attribute__((packed)) *request;
+  char data[sizeof(struct Request) + len];
+  request                       = reinterpret_cast<struct Request*>(data);
+  request->sysnum               = __NR_lstat;
+  request->cookie               = cookie();
+  request->stat_req.sysnum      = __NR_lstat;
+  request->stat_req.path_length = len;
+  request->stat_req.buf         = buf;
+  memcpy(request->pathname, path, len);
+
+  long rc;
+  SysCalls sys;
+  if (write(sys, processFdPub(), request, sizeof(data)) != (int)sizeof(data) ||
+      read(sys, threadFdPub(), &rc, sizeof(rc)) != sizeof(rc)) {
+    die("Failed to forward lstat() request [sandbox]");
+  }
+  Debug::elapsed(tm, __NR_lstat);
+  return rc;
+}
+
+#if defined(__NR_stat64)
+long Sandbox::sandbox_stat64(const char *path, void *buf) {
+  long long tm;
+  Debug::syscall(&tm, __NR_stat64, "Executing handler");
+  size_t len                    = strlen(path);
+  struct Request {
+    int       sysnum;
+    long long cookie;
+    Stat      stat_req;
+    char      pathname[0];
+  } __attribute__((packed)) *request;
+  char data[sizeof(struct Request) + len];
+  request                       = reinterpret_cast<struct Request*>(data);
+  request->sysnum               = __NR_stat64;
+  request->cookie               = cookie();
+  request->stat_req.sysnum      = __NR_stat64;
+  request->stat_req.path_length = len;
+  request->stat_req.buf         = buf;
+  memcpy(request->pathname, path, len);
+
+  long rc;
+  SysCalls sys;
+  if (write(sys, processFdPub(), request, sizeof(data)) != (int)sizeof(data) ||
+      read(sys, threadFdPub(), &rc, sizeof(rc)) != sizeof(rc)) {
+    die("Failed to forward stat64() request [sandbox]");
+  }
+  Debug::elapsed(tm, __NR_stat64);
+  return rc;
+}
+
+long Sandbox::sandbox_lstat64(const char *path, void *buf) {
+  long long tm;
+  Debug::syscall(&tm, __NR_lstat64, "Executing handler");
+  size_t len                    = strlen(path);
+  struct Request {
+    int       sysnum;
+    long long cookie;
+    Stat      stat_req;
+    char      pathname[0];
+  } __attribute__((packed)) *request;
+  char data[sizeof(struct Request) + len];
+  request                       = reinterpret_cast<struct Request*>(data);
+  request->sysnum               = __NR_lstat64;
+  request->cookie               = cookie();
+  request->stat_req.sysnum      = __NR_lstat64;
+  request->stat_req.path_length = len;
+  request->stat_req.buf         = buf;
+  memcpy(request->pathname, path, len);
+
+  long rc;
+  SysCalls sys;
+  if (write(sys, processFdPub(), request, sizeof(data)) != (int)sizeof(data) ||
+      read(sys, threadFdPub(), &rc, sizeof(rc)) != sizeof(rc)) {
+    die("Failed to forward lstat64() request [sandbox]");
+  }
+  Debug::elapsed(tm, __NR_lstat64);
+  return rc;
+}
+#endif
+
+bool Sandbox::process_stat(int parentMapsFd, int sandboxFd, int threadFdPub,
+                           int threadFd, SecureMem::Args* mem) {
+  // Read request
+  SysCalls sys;
+  Stat stat_req;
+  if (read(sys, sandboxFd, &stat_req, sizeof(stat_req)) != sizeof(stat_req)) {
+ read_parm_failed:
+    die("Failed to read parameters for stat() [process]");
+  }
+  int   rc                  = -ENAMETOOLONG;
+  if (stat_req.path_length >= (int)sizeof(mem->pathname)) {
+    char buf[32];
+    while (stat_req.path_length > 0) {
+      size_t len            = stat_req.path_length > sizeof(buf) ?
+                              sizeof(buf) : stat_req.path_length;
+      ssize_t i             = read(sys, sandboxFd, buf, len);
+      if (i <= 0) {
+        goto read_parm_failed;
+      }
+      stat_req.path_length -= i;
+    }
+    if (write(sys, threadFd, &rc, sizeof(rc)) != sizeof(rc)) {
+      die("Failed to return data from stat() [process]");
+    }
+    return false;
+  }
+  if (stat_req.sysnum != __NR_stat && stat_req.sysnum != __NR_lstat
+    #ifdef __NR_stat64
+      && stat_req.sysnum != __NR_stat64
+    #endif
+    #ifdef __NR_lstat64
+      && stat_req.sysnum != __NR_lstat64
+    #endif
+     ) {
+    die("Corrupted stat() request");
+  }
+
+  if (!g_policy.allow_file_namespace) {
+    // After locking the mutex, we can no longer abandon the system call. So,
+    // perform checks before clobbering the securely shared memory.
+    char tmp[stat_req.path_length];
+    if (read(sys, sandboxFd, tmp, stat_req.path_length) !=
+        (ssize_t)stat_req.path_length) {
+      goto read_parm_failed;
+    }
+    Debug::message(("Denying access to \"" + std::string(tmp) + "\"").c_str());
+    SecureMem::abandonSystemCall(threadFd, -EACCES);
+    return false;
+  }
+
+  SecureMem::lockSystemCall(parentMapsFd, mem);
+  if (read(sys, sandboxFd, mem->pathname, stat_req.path_length) !=
+      (ssize_t)stat_req.path_length) {
+    goto read_parm_failed;
+  }
+  mem->pathname[stat_req.path_length] = '\000';
+
+  // TODO(markus): Implement sandboxing policy
+  Debug::message(("Allowing access to \"" + std::string(mem->pathname) +
+                  "\"").c_str());
+
+  // Tell trusted thread to stat the file.
+  SecureMem::sendSystemCall(threadFdPub, true, parentMapsFd, mem,
+                            stat_req.sysnum,
+                            mem->pathname - (char*)mem + (char*)mem->self,
+                            stat_req.buf);
+  return true;
+}
+
+} // namespace
diff --git a/sandbox/linux/seccomp/syscall.cc b/sandbox/linux/seccomp/syscall.cc
new file mode 100644
index 0000000..681fec9
--- /dev/null
+++ b/sandbox/linux/seccomp/syscall.cc
@@ -0,0 +1,380 @@
+// Copyright (c) 2010 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "debug.h"
+#include "sandbox_impl.h"
+#include "syscall_table.h"
+
+namespace playground {
+
+// TODO(markus): change this into a function that returns the address of the assembly code. If that isn't possible for sandbox_clone, then move that function into a *.S file
+asm(
+    ".pushsection .text, \"ax\", @progbits\n"
+
+    // This is the special wrapper for the clone() system call. The code
+    // relies on the stack layout of the system call wrapper (c.f. below). It
+    // passes the stack pointer as an additional argument to sandbox__clone(),
+    // so that upon starting the child, register values can be restored and
+    // the child can start executing at the correct IP, instead of trying to
+    // run in the trusted thread.
+    "playground$sandbox_clone:"
+    ".globl playground$sandbox_clone\n"
+    ".type playground$sandbox_clone, @function\n"
+    #if defined(__x86_64__)
+    // Skip the 8 byte return address into the system call wrapper. The
+    // following bytes are the saved register values that we need to restore
+    // upon return from clone() in the new thread.
+    "lea 8(%rsp), %r9\n"
+    "jmp playground$sandbox__clone\n"
+    #elif defined(__i386__)
+    // As i386 passes function arguments on the stack, we need to skip a few
+    // more values before we can get to the saved registers.
+    "lea 28(%esp), %eax\n"
+    "mov %eax, 24(%esp)\n"
+    "jmp playground$sandbox__clone\n"
+    #else
+    #error Unsupported target platform
+    #endif
+    ".size playground$sandbox_clone, .-playground$sandbox_clone\n"
+
+
+    // This is the wrapper which is called by the untrusted code, trying to
+    // make a system call.
+    "playground$syscallWrapper:"
+    ".internal playground$syscallWrapper\n"
+    ".globl playground$syscallWrapper\n"
+    ".type playground$syscallWrapper, @function\n"
+    #if defined(__x86_64__)
+    // Check for rt_sigreturn(). It needs to be handled specially.
+    "cmp  $15, %rax\n"             // NR_rt_sigreturn
+    "jnz  1f\n"
+    "add  $0x90, %rsp\n"           // pop return addresses and red zone
+  "0:syscall\n"                    // rt_sigreturn() is unrestricted
+    "mov  $66, %edi\n"             // rt_sigreturn() should never return
+    "mov  $231, %eax\n"            // NR_exit_group
+    "jmp  0b\n"
+
+    // Save all registers
+  "1:push %rbp\n"
+    "mov  %rsp, %rbp\n"
+    "push %rbx\n"
+    "push %rcx\n"
+    "push %rdx\n"
+    "push %rsi\n"
+    "push %rdi\n"
+    "push %r8\n"
+    "push %r9\n"
+    "push %r10\n"
+    "push %r11\n"
+    "push %r12\n"
+    "push %r13\n"
+    "push %r14\n"
+    "push %r15\n"
+
+    // Convert from syscall calling conventions to C calling conventions.
+    // System calls have a subtly different register ordering than the user-
+    // space x86-64 ABI.
+    "mov %r10, %rcx\n"
+
+    // Check range of system call
+    "cmp playground$maxSyscall(%rip), %eax\n"
+    "ja  3f\n"
+
+    // Retrieve function call from system call table (c.f. syscall_table.c).
+    // We have three different types of entries; zero for denied system calls,
+    // that should be handled by the defaultSystemCallHandler(); minus one
+    // for unrestricted system calls that need to be forwarded to the trusted
+    // thread; and function pointers to specific handler functions.
+    "mov %rax, %r10\n"
+    "shl $4, %r10\n"
+    "lea playground$syscallTable(%rip), %r11\n"
+    "add %r11, %r10\n"
+    "mov 0(%r10), %r10\n"
+
+    // Jump to function if non-null and not UNRESTRICTED_SYSCALL, otherwise
+    // jump to fallback handler.
+    "cmp $1, %r10\n"
+    "jbe 3f\n"
+    "call *%r10\n"
+  "2:"
+
+    // Restore CPU registers, except for %rax which was set by the system call.
+    "pop %r15\n"
+    "pop %r14\n"
+    "pop %r13\n"
+    "pop %r12\n"
+    "pop %r11\n"
+    "pop %r10\n"
+    "pop %r9\n"
+    "pop %r8\n"
+    "pop %rdi\n"
+    "pop %rsi\n"
+    "pop %rdx\n"
+    "pop %rcx\n"
+    "pop %rbx\n"
+    "pop %rbp\n"
+
+    // Remove fake return address. This is added in the patching code in
+    // library.cc and it makes stack traces a little cleaner.
+    "add $8, %rsp\n"
+
+    // Return to caller
+    "ret\n"
+
+  "3:"
+    // If we end up calling a specific handler, we don't need to know the
+    // system call number. However, in the generic case, we do. Shift
+    // registers so that the system call number becomes visible as the
+    // first function argument.
+    "push %r9\n"
+    "mov  %r8, %r9\n"
+    "mov  %rcx, %r8\n"
+    "mov  %rdx, %rcx\n"
+    "mov  %rsi, %rdx\n"
+    "mov  %rdi, %rsi\n"
+    "mov  %rax, %rdi\n"
+
+    // Call default handler.
+    "call playground$defaultSystemCallHandler\n"
+    "pop  %r9\n"
+    "jmp 2b\n"
+    #elif defined(__i386__)
+    "cmp  $119, %eax\n"            // NR_sigreturn
+    "jnz  1f\n"
+    "add  $0x4, %esp\n"            // pop return address
+  "0:int  $0x80\n"                 // sigreturn() is unrestricted
+    "mov  $66, %ebx\n"             // sigreturn() should never return
+    "mov  %ebx, %eax\n"            // NR_exit
+    "jmp  0b\n"
+  "1:cmp  $173, %eax\n"            // NR_rt_sigreturn
+    "jnz  3f\n"
+
+    // Convert rt_sigframe into sigframe, allowing us to call sigreturn().
+    // This is possible since the first part of signal stack frames have
+    // stayed very stable since the earliest kernel versions. While never
+    // officially documented, lots of user space applications rely on this
+    // part of the ABI, and kernel developers have been careful to maintain
+    // backwards compatibility.
+    // In general, the rt_sigframe includes a lot of extra information that
+    // the signal handler can look at. Most notably, this means a complete
+    // siginfo record.
+    // Fortunately though, the kernel doesn't look at any of this extra data
+    // when returning from a signal handler. So, we can safely convert an
+    // rt_sigframe to a legacy sigframe, discarding the extra data in the
+    // process. Interestingly, the legacy signal frame is actually larger than
+    // the rt signal frame, as it includes a lot more padding.
+    "sub  $0x1C8, %esp\n"          // a legacy signal stack is much larger
+    "mov  0x1CC(%esp), %eax\n"     // push signal number
+    "push %eax\n"
+    "lea  0x270(%esp), %esi\n"     // copy siginfo register values
+    "lea  0x4(%esp), %edi\n"       //     into new location
+    "mov  $0x16, %ecx\n"
+    "cld\n"
+    "rep movsl\n"
+    "mov  0x2C8(%esp), %ebx\n"     // copy first half of signal mask
+    "mov  %ebx, 0x54(%esp)\n"
+    "lea  2f, %esi\n"
+    "push %esi\n"                  // push restorer function
+    "lea  0x2D4(%esp), %edi\n"     // patch up retcode magic numbers
+    "movb $2, %cl\n"
+    "rep movsl\n"
+    "ret\n"                        // return to restorer function
+  "2:pop  %eax\n"                  // remove dummy argument (signo)
+    "mov  $119, %eax\n"            // NR_sigaction
+    "int  $0x80\n"
+
+
+    // Preserve all registers
+  "3:push %ebx\n"
+    "push %ecx\n"
+    "push %edx\n"
+    "push %esi\n"
+    "push %edi\n"
+    "push %ebp\n"
+
+    // Convert from syscall calling conventions to C calling conventions
+    "push %ebp\n"
+    "push %edi\n"
+    "push %esi\n"
+    "push %edx\n"
+    "push %ecx\n"
+    "push %ebx\n"
+    "push %eax\n"
+
+    // Check range of system call
+    "cmp playground$maxSyscall, %eax\n"
+    "ja  9f\n"
+
+    // We often have long sequences of calls to gettimeofday(). This is
+    // needlessly expensive. Coalesce them into a single call.
+    //
+    // We keep track of state in TLS storage that we can access through
+    // the %fs segment register. See trusted_thread.cc for the exact
+    // memory layout.
+    //
+    // TODO(markus): maybe, we should proactively call gettimeofday() and
+    //               clock_gettime(), whenever we talk to the trusted thread?
+    //               or maybe, if we have recently seen requests to compute
+    //               the time. There might be a repeated pattern of those.
+    "cmp  $78, %eax\n"             // __NR_gettimeofday
+    "jnz  6f\n"
+    "cmp  %eax, %fs:0x102C-0x58\n" // last system call
+    "jnz  4f\n"
+
+    // This system call and the last system call prior to this one both are
+    // calls to gettimeofday(). Try to avoid making the new call and just
+    // return the same result as in the previous call.
+    // Just in case the caller is spinning on the result from gettimeofday(),
+    // every so often, call the actual system call.
+    "decl %fs:0x1030-0x58\n"       // countdown calls to gettimofday()
+    "jz   4f\n"
+
+    // Atomically read the 64bit word representing last-known timestamp and
+    // return it to the caller. On x86-32 this is a little more complicated and
+    // requires the use of the cmpxchg8b instruction.
+    "mov  %ebx, %eax\n"
+    "mov  %ecx, %edx\n"
+    "lock; cmpxchg8b 100f\n"
+    "mov  %eax, 0(%ebx)\n"
+    "mov  %edx, 4(%ebx)\n"
+    "xor  %eax, %eax\n"
+    "add  $28, %esp\n"
+    "jmp  8f\n"
+
+    // This is a call to gettimeofday(), but we don't have a valid cached
+    // result, yet.
+  "4:mov  %eax, %fs:0x102C-0x58\n" // remember syscall number
+    "movl $500, %fs:0x1030-0x58\n" // make system call, each 500 invocations
+    "call playground$defaultSystemCallHandler\n"
+
+    // Returned from gettimeofday(). Remember return value, in case the
+    // application calls us again right away.
+    // Again, this has to happen atomically and requires cmpxchg8b.
+    "mov 4(%ebx), %ecx\n"
+    "mov 0(%ebx), %ebx\n"
+    "mov 100f, %eax\n"
+    "mov 101f, %edx\n"
+  "5:lock; cmpxchg8b 100f\n"
+    "jnz 5b\n"
+    "xor %eax, %eax\n"
+    "jmp 10f\n"
+
+    // Remember the number of the last system call made. We deliberately do
+    // not remember calls to gettid(), as we have often seen long sequences
+    // of calls to just gettimeofday() and gettid(). In that situation, we
+    // would still like to coalesce the gettimeofday() calls.
+  "6:cmp $224, %eax\n"             // __NR_gettid
+    "jz  7f\n"
+    "mov  %eax, %fs:0x102C-0x58\n" // remember syscall number
+
+    // Retrieve function call from system call table (c.f. syscall_table.c).
+    // We have three different types of entries; zero for denied system calls,
+    // that should be handled by the defaultSystemCallHandler(); minus one
+    // for unrestricted system calls that need to be forwarded to the trusted
+    // thread; and function pointers to specific handler functions.
+  "7:shl  $3, %eax\n"
+    "lea  playground$syscallTable, %ebx\n"
+    "add  %ebx, %eax\n"
+    "mov  0(%eax), %eax\n"
+
+    // Jump to function if non-null and not UNRESTRICTED_SYSCALL, otherwise
+    // jump to fallback handler.
+    "cmp  $1, %eax\n"
+    "jbe  9f\n"
+    "add  $4, %esp\n"
+    "call *%eax\n"
+    "add  $24, %esp\n"
+
+    // Restore CPU registers, except for %eax which was set by the system call.
+  "8:pop  %ebp\n"
+    "pop  %edi\n"
+    "pop  %esi\n"
+    "pop  %edx\n"
+    "pop  %ecx\n"
+    "pop  %ebx\n"
+
+    // Return to caller
+    "ret\n"
+
+    // Call default handler.
+  "9:call playground$defaultSystemCallHandler\n"
+ "10:add  $28, %esp\n"
+    "jmp 8b\n"
+
+    ".pushsection \".bss\"\n"
+    ".balign 8\n"
+"100:.byte 0, 0, 0, 0\n"
+"101:.byte 0, 0, 0, 0\n"
+    ".popsection\n"
+
+    #else
+    #error Unsupported target platform
+    #endif
+    ".size playground$syscallWrapper, .-playground$syscallWrapper\n"
+    ".popsection\n"
+);
+
+
+void* Sandbox::defaultSystemCallHandler(int syscallNum, void* arg0, void* arg1,
+                                        void* arg2, void* arg3, void* arg4,
+                                        void* arg5) {
+  // TODO(markus): The following comment is currently not true, we do intercept these system calls. Try to fix that.
+
+  // We try to avoid intercepting read(), and write(), as these system calls
+  // are not restricted in Seccomp mode. But depending on the exact
+  // instruction sequence in libc, we might not be able to reliably
+  // filter out these system calls at the time when we instrument the code.
+  SysCalls  sys;
+  long      rc;
+  long long tm;
+  switch (syscallNum) {
+    case __NR_read:
+      Debug::syscall(&tm, syscallNum, "Allowing unrestricted system call");
+      rc             = sys.read((long)arg0, arg1, (size_t)arg2);
+      break;
+    case __NR_write:
+      Debug::syscall(&tm, syscallNum, "Allowing unrestricted system call");
+      rc             = sys.write((long)arg0, arg1, (size_t)arg2);
+      break;
+    default:
+      if (Debug::isEnabled()) {
+        // In debug mode, prevent stderr from being closed
+        if (syscallNum == __NR_close && arg0 == (void *)2)
+          return 0;
+      }
+
+      if ((unsigned)syscallNum <= maxSyscall &&
+          syscallTable[syscallNum].handler == UNRESTRICTED_SYSCALL) {
+        Debug::syscall(&tm, syscallNum, "Allowing unrestricted system call");
+     perform_unrestricted:
+        struct {
+          int          sysnum;
+          void*        unrestricted_req[6];
+        } __attribute__((packed)) request = {
+          syscallNum, { arg0, arg1, arg2, arg3, arg4, arg5 } };
+
+        int   thread = threadFdPub();
+        void* rc;
+        if (write(sys, thread, &request, sizeof(request)) != sizeof(request) ||
+            read(sys, thread, &rc, sizeof(rc)) != sizeof(rc)) {
+          die("Failed to forward unrestricted system call");
+        }
+        Debug::elapsed(tm, syscallNum);
+        return rc;
+      } else if (Debug::isEnabled()) {
+        Debug::syscall(&tm, syscallNum,
+                       "In production mode, this call would be disallowed");
+        goto perform_unrestricted;
+      } else {
+        return (void *)-ENOSYS;
+      }
+  }
+  if (rc < 0) {
+    rc               = -sys.my_errno;
+  }
+  Debug::elapsed(tm, syscallNum);
+  return (void *)rc;
+}
+
+} // namespace
diff --git a/sandbox/linux/seccomp/syscall.h b/sandbox/linux/seccomp/syscall.h
new file mode 100644
index 0000000..1315e12
--- /dev/null
+++ b/sandbox/linux/seccomp/syscall.h
@@ -0,0 +1,22 @@
+// Copyright (c) 2010 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef SYSCALL_H__
+#define SYSCALL_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void syscallWrapper() asm("playground$syscallWrapper")
+#if defined(__x86_64__)
+                      __attribute__((visibility("internal")))
+#endif
+;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // SYSCALL_H__
diff --git a/sandbox/linux/seccomp/syscall_table.c b/sandbox/linux/seccomp/syscall_table.c
new file mode 100644
index 0000000..c9dd7a4
--- /dev/null
+++ b/sandbox/linux/seccomp/syscall_table.c
@@ -0,0 +1,153 @@
+// Copyright (c) 2010 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include <asm/unistd.h>
+#include "sandbox_impl.h"
+#include "syscall_table.h"
+
+#if defined(__x86_64__)
+#ifndef __NR_set_robust_list
+#define __NR_set_robust_list 273
+#endif
+#ifndef __NR_accept4
+#define __NR_accept4         288
+#endif
+#elif defined(__i386__)
+#ifndef __NR_set_robust_list
+#define __NR_set_robust_list 311
+#endif
+#else
+#error Unsupported target platform
+#endif
+
+// TODO(markus): This is an incredibly dirty hack to make the syscallTable
+//               live in r/o memory.
+//               Unfortunately, gcc doesn't give us a clean option to do
+//               this. Ultimately, we should probably write some code that
+//               parses /usr/include/asm/unistd*.h and generates a *.S file.
+//               But we then need to figure out how to integrate this code
+//               with our build system.
+
+const struct SyscallTable syscallTable[] __attribute__((
+    section(".rodata, \"a\", @progbits\n#"))) ={
+
+  #if defined(__NR_accept)
+  [ __NR_accept          ] = { UNRESTRICTED_SYSCALL,     0                   },
+  [ __NR_accept4         ] = { UNRESTRICTED_SYSCALL,     0                   },
+  #endif
+  [ __NR_access          ] = { (void*)&sandbox_access,   process_access      },
+  [ __NR_brk             ] = { UNRESTRICTED_SYSCALL,     0                   },
+  [ __NR_clock_gettime   ] = { UNRESTRICTED_SYSCALL,     0                   },
+  [ __NR_clone           ] = { (void*)&sandbox_clone,    process_clone       },
+  [ __NR_close           ] = { UNRESTRICTED_SYSCALL,     0                   },
+  [ __NR_dup             ] = { UNRESTRICTED_SYSCALL,     0                   },
+  [ __NR_dup2            ] = { UNRESTRICTED_SYSCALL,     0                   },
+  [ __NR_epoll_create    ] = { UNRESTRICTED_SYSCALL,     0                   },
+  [ __NR_epoll_ctl       ] = { UNRESTRICTED_SYSCALL,     0                   },
+  [ __NR_epoll_wait      ] = { UNRESTRICTED_SYSCALL,     0                   },
+  [ __NR_exit            ] = { (void*)&sandbox_exit,     process_exit        },
+  [ __NR_exit_group      ] = { UNRESTRICTED_SYSCALL,     0                   },
+  [ __NR_fcntl           ] = { UNRESTRICTED_SYSCALL,     0                   },
+  #if defined(__NR_fcntl64)
+  [ __NR_fcntl64         ] = { UNRESTRICTED_SYSCALL,     0                   },
+  #endif
+  [ __NR_fstat           ] = { UNRESTRICTED_SYSCALL,     0                   },
+  #if defined(__NR_fstat64)
+  [ __NR_fstat64         ] = { UNRESTRICTED_SYSCALL,     0                   },
+  #endif
+  [ __NR_futex           ] = { UNRESTRICTED_SYSCALL,     0                   },
+  [ __NR_getdents        ] = { UNRESTRICTED_SYSCALL,     0                   },
+  [ __NR_getdents64      ] = { UNRESTRICTED_SYSCALL,     0                   },
+  #if defined(__NR_getpeername)
+  [ __NR_getpeername     ] = { UNRESTRICTED_SYSCALL,     0                   },
+  #endif
+  [ __NR_getpid          ] = { (void*)&sandbox_getpid,   0                   },
+  #if defined(__NR_getsockname)
+  [ __NR_getsockname     ] = { UNRESTRICTED_SYSCALL,     0                   },
+  [ __NR_getsockopt      ] = { (void*)&sandbox_getsockopt,process_getsockopt },
+  #endif
+  [ __NR_gettid          ] = { (void*)&sandbox_gettid,   0                   },
+  [ __NR_gettimeofday    ] = { UNRESTRICTED_SYSCALL,     0                   },
+  [ __NR_ioctl           ] = { (void*)&sandbox_ioctl,    process_ioctl       },
+  #if defined(__NR_ipc)
+  [ __NR_ipc             ] = { (void*)&sandbox_ipc,      process_ipc         },
+  #endif
+  #if defined(__NR__llseek)
+  [ __NR__llseek         ] = { UNRESTRICTED_SYSCALL,     0                   },
+  #endif
+  [ __NR_lseek           ] = { UNRESTRICTED_SYSCALL,     0                   },
+  [ __NR_lstat           ] = { (void*)&sandbox_lstat,    process_stat        },
+  #if defined(__NR_lstat64)
+  [ __NR_lstat64         ] = { (void*)&sandbox_lstat64,  process_stat        },
+  #endif
+  [ __NR_madvise         ] = { (void*)&sandbox_madvise,  process_madvise     },
+  #if defined(__NR_mmap2)
+  [ __NR_mmap2           ] =
+  #else
+  [ __NR_mmap            ] =
+  #endif
+                             { (void*)&sandbox_mmap,     process_mmap        },
+  [ __NR_mprotect        ] = { (void*)&sandbox_mprotect, process_mprotect    },
+  [ __NR_munmap          ] = { (void*)&sandbox_munmap,   process_munmap      },
+  [ __NR_open            ] = { (void*)&sandbox_open,     process_open        },
+  [ __NR_pipe            ] = { UNRESTRICTED_SYSCALL,     0                   },
+  [ __NR_poll            ] = { UNRESTRICTED_SYSCALL,     0                   },
+  #if defined(__NR_recvfrom)
+  [ __NR_recvfrom        ] = { (void*)&sandbox_recvfrom, process_recvfrom    },
+  [ __NR_recvmsg         ] = { (void*)&sandbox_recvmsg,  process_recvmsg     },
+  #endif
+  #if defined(__NR_rt_sigaction)
+  [ __NR_rt_sigaction    ] = { (void*)&sandbox_rt_sigaction,process_sigaction},
+  #endif
+  #if defined(__NR_rt_sigprocmask)
+  [ __NR_rt_sigprocmask  ] = { (void*)&sandbox_rt_sigprocmask, 0             },
+  #endif
+  #if defined(__NR_sendmsg)
+  [ __NR_sendmsg         ] = { (void*)&sandbox_sendmsg,  process_sendmsg     },
+  [ __NR_sendto          ] = { (void*)&sandbox_sendto,   process_sendto      },
+  #endif
+  [ __NR_set_robust_list ] = { UNRESTRICTED_SYSCALL,     0                   },
+  #if defined(__NR_setsockopt)
+  [ __NR_setsockopt      ] = { (void*)&sandbox_setsockopt,process_setsockopt },
+  #endif
+  #if defined(__NR_shmat)
+  [ __NR_shmat           ] = { (void*)&sandbox_shmat,    process_shmat       },
+  [ __NR_shmctl          ] = { (void*)&sandbox_shmctl,   process_shmctl      },
+  [ __NR_shmdt           ] = { (void*)&sandbox_shmdt,    process_shmdt       },
+  [ __NR_shmget          ] = { (void*)&sandbox_shmget,   process_shmget      },
+  #endif
+  #if defined(__NR_shutdown)
+  [ __NR_shutdown        ] = { UNRESTRICTED_SYSCALL,     0                   },
+  #endif
+  #if defined(__NR_sigaction)
+  [ __NR_sigaction       ] = { (void*)&sandbox_sigaction,process_sigaction   },
+  #endif
+  #if defined(__NR_signal)
+  [ __NR_signal          ] = { (void*)&sandbox_signal,   process_sigaction   },
+  #endif
+  #if defined(__NR_sigprocmask)
+  [ __NR_sigprocmask     ] = { (void*)&sandbox_sigprocmask, 0                },
+  #endif
+  #if defined(__NR_socketpair)
+  [ __NR_socketpair      ] = { UNRESTRICTED_SYSCALL,     0                   },
+  #endif
+  #if defined(__NR_socketcall)
+  [ __NR_socketcall      ] = { (void*)&sandbox_socketcall,process_socketcall },
+  #endif
+  [ __NR_stat            ] = { (void*)&sandbox_stat,     process_stat        },
+  #if defined(__NR_stat64)
+  [ __NR_stat64          ] = { (void*)&sandbox_stat64,   process_stat        },
+  #endif
+  [ __NR_time            ] = { UNRESTRICTED_SYSCALL,     0                   },
+  [ __NR_uname           ] = { UNRESTRICTED_SYSCALL,     0                   },
+};
+const unsigned maxSyscall __attribute__((section(".rodata"))) =
+    sizeof(syscallTable)/sizeof(struct SyscallTable);
+
+const int syscall_mutex_[4096/sizeof(int)] asm("playground$syscall_mutex")
+    __attribute__((section(".rodata"),aligned(4096)
+#if defined(__x86_64__)
+                   ,visibility("internal")
+#endif
+                   )) = { 0x80000000 };
diff --git a/sandbox/linux/seccomp/syscall_table.h b/sandbox/linux/seccomp/syscall_table.h
new file mode 100644
index 0000000..5bd6791
--- /dev/null
+++ b/sandbox/linux/seccomp/syscall_table.h
@@ -0,0 +1,43 @@
+// Copyright (c) 2010 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef SYSCALL_TABLE_H__
+#define SYSCALL_TABLE_H__
+
+#include <sys/types.h>
+
+#ifdef __cplusplus
+#include "securemem.h"
+extern "C" {
+namespace playground {
+#define SecureMemArgs SecureMem::Args
+#else
+#define SecureMemArgs void
+#define bool          int
+#endif
+  #define UNRESTRICTED_SYSCALL ((void *)1)
+
+  struct SyscallTable {
+    void   *handler;
+    bool  (*trustedProcess)(int parentMapsFd, int sandboxFd, int threadFdPub,
+                            int threadFd, SecureMemArgs* mem);
+  };
+  extern const struct SyscallTable syscallTable[]
+    asm("playground$syscallTable")
+#if defined(__x86_64__)
+    __attribute__((visibility("internal")))
+#endif
+    ;
+  extern const unsigned maxSyscall
+    asm("playground$maxSyscall")
+#if defined(__x86_64__)
+    __attribute__((visibility("internal")))
+#endif
+    ;
+#ifdef __cplusplus
+} // namespace
+}
+#endif
+
+#endif // SYSCALL_TABLE_H__
diff --git a/sandbox/linux/seccomp/tests/list_tests.py b/sandbox/linux/seccomp/tests/list_tests.py
new file mode 100644
index 0000000..011a52e
--- /dev/null
+++ b/sandbox/linux/seccomp/tests/list_tests.py
@@ -0,0 +1,22 @@
+# Copyright (c) 2010 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+import re
+import sys
+
+
+def get_tests(filename):
+    for line in open(filename):
+        match = re.match(r"TEST\((\w+)\)", line)
+        if match is not None:
+            yield match.group(1)
+
+
+def main(args):
+    for name in get_tests(args[0]):
+        print '  { "%s", %s },' % (name, name)
+
+
+if __name__ == "__main__":
+    main(sys.argv[1:])
diff --git a/sandbox/linux/seccomp/tests/test_syscalls.cc b/sandbox/linux/seccomp/tests/test_syscalls.cc
new file mode 100644
index 0000000..3e6acd5
--- /dev/null
+++ b/sandbox/linux/seccomp/tests/test_syscalls.cc
@@ -0,0 +1,758 @@
+// Copyright (c) 2010 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include <assert.h>
+#include <dirent.h>
+#include <pthread.h>
+#include <pty.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+
+#include "sandbox_impl.h"
+
+#ifdef DEBUG
+#define MSG(fmt, ...) printf(fmt, ##__VA_ARGS__)
+#else
+#define MSG(fmt, ...) do { } while (0)
+#endif
+
+int g_intended_status_fd = -1;
+
+// Declares the wait() status that the test subprocess intends to exit with.
+void intend_exit_status(int val, bool is_signal) {
+  if (is_signal) {
+    val = W_EXITCODE(0, val);
+  } else {
+    val = W_EXITCODE(val, 0);
+  }
+  if (g_intended_status_fd != -1) {
+    int sent = write(g_intended_status_fd, &val, sizeof(val));
+    assert(sent == sizeof(val));
+  } else {
+    // This prints in cases where we run one test without forking
+    printf("Intending to exit with status %i...\n", val);
+  }
+}
+
+
+// This is basically a marker to grep for.
+#define TEST(name) void name()
+
+TEST(test_dup) {
+  StartSeccompSandbox();
+  // Test a simple syscall that is marked as UNRESTRICTED_SYSCALL.
+  int fd = dup(1);
+  assert(fd >= 0);
+  int rc = close(fd);
+  assert(rc == 0);
+}
+
+TEST(test_segfault) {
+  StartSeccompSandbox();
+  // Check that the sandbox's SIGSEGV handler does not stop the
+  // process from dying cleanly in the event of a real segfault.
+  intend_exit_status(SIGSEGV, true);
+  asm("hlt");
+}
+
+TEST(test_exit) {
+  StartSeccompSandbox();
+  intend_exit_status(123, false);
+  _exit(123);
+}
+
+// This has an off-by-three error because it counts ".", "..", and the
+// FD for the /proc/self/fd directory.  This doesn't matter because it
+// is only used to check for differences in the number of open FDs.
+static int count_fds() {
+  DIR *dir = opendir("/proc/self/fd");
+  assert(dir != NULL);
+  int count = 0;
+  while (1) {
+    struct dirent *d = readdir(dir);
+    if (d == NULL)
+      break;
+    count++;
+  }
+  int rc = closedir(dir);
+  assert(rc == 0);
+  return count;
+}
+
+static void *thread_func(void *x) {
+  int *ptr = (int *) x;
+  *ptr = 123;
+  MSG("In new thread\n");
+  return (void *) 456;
+}
+
+TEST(test_thread) {
+  playground::g_policy.allow_file_namespace = true;  // To allow count_fds()
+  StartSeccompSandbox();
+  int fd_count1 = count_fds();
+  pthread_t tid;
+  int x = 999;
+  void *result;
+  pthread_create(&tid, NULL, thread_func, &x);
+  MSG("Waiting for thread\n");
+  pthread_join(tid, &result);
+  assert(result == (void *) 456);
+  assert(x == 123);
+  // Check that the process has not leaked FDs.
+  int fd_count2 = count_fds();
+  assert(fd_count2 == fd_count1);
+}
+
+static int clone_func(void *x) {
+  int *ptr = (int *) x;
+  *ptr = 124;
+  MSG("In thread\n");
+  // On x86-64, returning from this function calls the __NR_exit_group
+  // syscall instead of __NR_exit.
+  syscall(__NR_exit, 100);
+  // Not reached.
+  return 200;
+}
+
+#if defined(__i386__)
+static int get_gs() {
+  int gs;
+  asm volatile("mov %%gs, %0" : "=r"(gs));
+  return gs;
+}
+#endif
+
+static void *get_tls_base() {
+  void *base;
+#if defined(__x86_64__)
+  asm volatile("mov %%fs:0, %0" : "=r"(base));
+#elif defined(__i386__)
+  asm volatile("mov %%gs:0, %0" : "=r"(base));
+#else
+#error Unsupported target platform
+#endif
+  return base;
+}
+
+TEST(test_clone) {
+  playground::g_policy.allow_file_namespace = true;  // To allow count_fds()
+  StartSeccompSandbox();
+  int fd_count1 = count_fds();
+  int stack_size = 0x1000;
+  char *stack = (char *) malloc(stack_size);
+  assert(stack != NULL);
+  int flags = CLONE_VM | CLONE_FS | CLONE_FILES |
+    CLONE_SIGHAND | CLONE_THREAD | CLONE_SYSVSEM |
+    CLONE_SETTLS | CLONE_PARENT_SETTID | CLONE_CHILD_CLEARTID;
+  int tid = -1;
+  int x = 999;
+
+  // The sandbox requires us to pass CLONE_TLS.  Pass settings that
+  // are enough to copy the parent thread's TLS setup.  This allows us
+  // to invoke libc in the child thread.
+#if defined(__x86_64__)
+  void *tls = get_tls_base();
+#elif defined(__i386__)
+  struct user_desc tls_desc, *tls = &tls_desc;
+  tls_desc.entry_number = get_gs() >> 3;
+  tls_desc.base_addr = (long) get_tls_base();
+  tls_desc.limit = 0xfffff;
+  tls_desc.seg_32bit = 1;
+  tls_desc.contents = 0;
+  tls_desc.read_exec_only = 0;
+  tls_desc.limit_in_pages = 1;
+  tls_desc.seg_not_present = 0;
+  tls_desc.useable = 1;
+#else
+#error Unsupported target platform
+#endif
+
+  int rc = clone(clone_func, (void *) (stack + stack_size), flags, &x,
+                 &tid, tls, &tid);
+  assert(rc > 0);
+  while (tid == rc) {
+    syscall(__NR_futex, &tid, FUTEX_WAIT, rc, NULL);
+  }
+  assert(tid == 0);
+  assert(x == 124);
+  // Check that the process has not leaked FDs.
+  int fd_count2 = count_fds();
+  assert(fd_count2 == fd_count1);
+}
+
+static int uncalled_clone_func(void *x) {
+  printf("In thread func, which shouldn't happen\n");
+  return 1;
+}
+
+TEST(test_clone_disallowed_flags) {
+  StartSeccompSandbox();
+  int stack_size = 4096;
+  char *stack = (char *) malloc(stack_size);
+  assert(stack != NULL);
+  /* We omit the flags CLONE_SETTLS, CLONE_PARENT_SETTID and
+     CLONE_CHILD_CLEARTID, which is disallowed by the sandbox. */
+  int flags = CLONE_VM | CLONE_FS | CLONE_FILES |
+    CLONE_SIGHAND | CLONE_THREAD | CLONE_SYSVSEM;
+  int rc = clone(uncalled_clone_func, (void *) (stack + stack_size),
+                 flags, NULL, NULL, NULL, NULL);
+  assert(rc == -1);
+  assert(errno == EPERM);
+}
+
+static void *fp_thread(void *x) {
+  int val;
+  asm("movss %%xmm0, %0" : "=m"(val));
+  MSG("val=%i\n", val);
+  return NULL;
+}
+
+TEST(test_fp_regs) {
+  StartSeccompSandbox();
+  int val = 1234;
+  asm("movss %0, %%xmm0" : "=m"(val));
+  pthread_t tid;
+  pthread_create(&tid, NULL, fp_thread, NULL);
+  pthread_join(tid, NULL);
+  MSG("thread done OK\n");
+}
+
+static long long read_tsc() {
+  long long rc;
+  asm volatile(
+      "rdtsc\n"
+      "mov %%eax, (%0)\n"
+      "mov %%edx, 4(%0)\n"
+      :
+      : "c"(&rc), "a"(-1), "d"(-1));
+  return rc;
+}
+
+TEST(test_rdtsc) {
+  StartSeccompSandbox();
+  // Just check that we can do the instruction.
+  read_tsc();
+}
+
+TEST(test_getpid) {
+  int pid1 = getpid();
+  StartSeccompSandbox();
+  int pid2 = getpid();
+  assert(pid1 == pid2);
+  // Bypass any caching that glibc's getpid() wrapper might do.
+  int pid3 = syscall(__NR_getpid);
+  assert(pid1 == pid3);
+}
+
+TEST(test_gettid) {
+  // glibc doesn't provide a gettid() wrapper.
+  int tid1 = syscall(__NR_gettid);
+  assert(tid1 > 0);
+  StartSeccompSandbox();
+  int tid2 = syscall(__NR_gettid);
+  assert(tid1 == tid2);
+}
+
+static void *map_something() {
+  void *addr = mmap(NULL, 0x1000, PROT_READ,
+                    MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+  assert(addr != MAP_FAILED);
+  return addr;
+}
+
+TEST(test_mmap_disallows_remapping) {
+  void *addr = map_something();
+  StartSeccompSandbox();
+  // Overwriting a mapping that was created before the sandbox was
+  // enabled is not allowed.
+  void *result = mmap(addr, 0x1000, PROT_READ,
+                      MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0);
+  assert(result == MAP_FAILED);
+  assert(errno == EINVAL);
+}
+
+TEST(test_mmap_disallows_low_address) {
+  StartSeccompSandbox();
+  // Mapping pages at low addresses is not allowed because this helps
+  // with exploiting buggy kernels.
+  void *result = mmap(NULL, 0x1000, PROT_READ,
+                      MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0);
+  assert(result == MAP_FAILED);
+  assert(errno == EINVAL);
+}
+
+TEST(test_munmap_allowed) {
+  StartSeccompSandbox();
+  void *addr = map_something();
+  int result = munmap(addr, 0x1000);
+  assert(result == 0);
+}
+
+TEST(test_munmap_disallowed) {
+  void *addr = map_something();
+  StartSeccompSandbox();
+  int result = munmap(addr, 0x1000);
+  assert(result == -1);
+  assert(errno == EINVAL);
+}
+
+TEST(test_mprotect_allowed) {
+  StartSeccompSandbox();
+  void *addr = map_something();
+  int result = mprotect(addr, 0x1000, PROT_READ | PROT_WRITE);
+  assert(result == 0);
+}
+
+TEST(test_mprotect_disallowed) {
+  void *addr = map_something();
+  StartSeccompSandbox();
+  int result = mprotect(addr, 0x1000, PROT_READ | PROT_WRITE);
+  assert(result == -1);
+  assert(errno == EINVAL);
+}
+
+static int get_tty_fd() {
+  int master_fd, tty_fd;
+  int rc = openpty(&master_fd, &tty_fd, NULL, NULL, NULL);
+  assert(rc == 0);
+  return tty_fd;
+}
+
+TEST(test_ioctl_tiocgwinsz_allowed) {
+  int tty_fd = get_tty_fd();
+  StartSeccompSandbox();
+  int size[2];
+  // Get terminal width and height.
+  int result = ioctl(tty_fd, TIOCGWINSZ, size);
+  assert(result == 0);
+}
+
+TEST(test_ioctl_disallowed) {
+  int tty_fd = get_tty_fd();
+  StartSeccompSandbox();
+  // This ioctl call inserts a character into the tty's input queue,
+  // which provides a way to send commands to an interactive shell.
+  char c = 'x';
+  int result = ioctl(tty_fd, TIOCSTI, &c);
+  assert(result == -1);
+  assert(errno == EINVAL);
+}
+
+TEST(test_socket) {
+  StartSeccompSandbox();
+  int fd = socket(AF_UNIX, SOCK_STREAM, 0);
+  assert(fd == -1);
+  // TODO: Make it consistent between i386 and x86-64.
+  assert(errno == EINVAL || errno == ENOSYS);
+}
+
+TEST(test_open_disabled) {
+  StartSeccompSandbox();
+  int fd = open("/dev/null", O_RDONLY);
+  assert(fd == -1);
+  assert(errno == EACCES);
+
+  // Writing to the policy flag does not change this.
+  playground::g_policy.allow_file_namespace = true;
+  fd = open("/dev/null", O_RDONLY);
+  assert(fd == -1);
+  assert(errno == EACCES);
+}
+
+TEST(test_open_enabled) {
+  playground::g_policy.allow_file_namespace = true;
+  StartSeccompSandbox();
+  int fd = open("/dev/null", O_RDONLY);
+  assert(fd >= 0);
+  int rc = close(fd);
+  assert(rc == 0);
+  fd = open("/dev/null", O_WRONLY);
+  assert(fd == -1);
+  assert(errno == EACCES);
+}
+
+TEST(test_access_disabled) {
+  StartSeccompSandbox();
+  int rc = access("/dev/null", R_OK);
+  assert(rc == -1);
+  assert(errno == EACCES);
+}
+
+TEST(test_access_enabled) {
+  playground::g_policy.allow_file_namespace = true;
+  StartSeccompSandbox();
+  int rc = access("/dev/null", R_OK);
+  assert(rc == 0);
+  rc = access("path-that-does-not-exist", R_OK);
+  assert(rc == -1);
+  assert(errno == ENOENT);
+}
+
+TEST(test_stat_disabled) {
+  StartSeccompSandbox();
+  struct stat st;
+  int rc = stat("/dev/null", &st);
+  assert(rc == -1);
+  assert(errno == EACCES);
+}
+
+TEST(test_stat_enabled) {
+  playground::g_policy.allow_file_namespace = true;
+  StartSeccompSandbox();
+  struct stat st;
+  int rc = stat("/dev/null", &st);
+  assert(rc == 0);
+  rc = stat("path-that-does-not-exist", &st);
+  assert(rc == -1);
+  assert(errno == ENOENT);
+}
+
+static int g_value;
+
+static void signal_handler(int sig) {
+  g_value = 300;
+  MSG("In signal handler\n");
+}
+
+static void sigaction_handler(int sig, siginfo_t *a, void *b) {
+  g_value = 300;
+  MSG("In sigaction handler\n");
+}
+
+static void (*g_sig_handler_ptr)(int sig, void *addr) asm("g_sig_handler_ptr");
+
+static void non_fatal_sig_handler(int sig, void *addr) {
+  g_value = 300;
+  MSG("Caught signal %d at %p\n", sig, addr);
+}
+
+static void fatal_sig_handler(int sig, void *addr) {
+  // Recursively trigger another segmentation fault while already in the SEGV
+  // handler. This should terminate the program if SIGSEGV is marked as a
+  // deferred signal.
+  // Only do this on the first entry to this function. Otherwise, the signal
+  // handler was probably marked as SA_NODEFER and we want to continue
+  // execution.
+  if (!g_value++) {
+    MSG("Caught signal %d at %p\n", sig, addr);
+    if (sig == SIGSEGV) {
+      asm volatile("hlt");
+    } else {
+      asm volatile("int3");
+    }
+  }
+}
+
+static void (*generic_signal_handler(void))
+  (int signo, siginfo_t *info, void *context) {
+  void (*hdl)(int, siginfo_t *, void *);
+  asm volatile(
+    "lea  0f, %0\n"
+    "jmp  999f\n"
+  "0:\n"
+
+#if defined(__x86_64__)
+    "mov  0xB0(%%rsp), %%rsi\n"    // Pass original %rip to signal handler
+    "cmpb $0xF4, 0(%%rsi)\n"       // hlt
+    "jnz   1f\n"
+    "addq $1, 0xB0(%%rsp)\n"       // Adjust %eip past failing instruction
+  "1:jmp  *g_sig_handler_ptr\n"    // Call actual signal handler
+#elif defined(__i386__)
+    // TODO(markus): We currently don't guarantee that signal handlers always
+    //               have the correct "magic" restorer function. If we fix
+    //               this, we should add a test for it (both for SEGV and
+    //               non-SEGV).
+    "cmpw $0, 0xA(%%esp)\n"
+    "lea  0x40(%%esp), %%eax\n"    // %eip at time of exception
+    "jz   1f\n"
+    "add  $0x9C, %%eax\n"          // %eip at time of exception
+  "1:mov  0(%%eax), %%ecx\n"
+    "cmpb $0xF4, 0(%%ecx)\n"       // hlt
+    "jnz   2f\n"
+    "addl $1, 0(%%eax)\n"          // Adjust %eip past failing instruction
+  "2:push %%ecx\n"                 // Pass original %eip to signal handler
+    "mov  8(%%esp), %%eax\n"
+    "push %%eax\n"                 // Pass signal number to signal handler
+    "call *g_sig_handler_ptr\n"    // Call actual signal handler
+    "pop  %%eax\n"
+    "pop  %%ecx\n"
+    "ret\n"
+#else
+#error Unsupported target platform
+#endif
+
+"999:\n"
+    : "=r"(hdl));
+  return hdl;
+}
+
+TEST(test_signal_handler) {
+  sighandler_t result = signal(SIGTRAP, signal_handler);
+  assert(result != SIG_ERR);
+
+  StartSeccompSandbox();
+
+  result = signal(SIGTRAP, signal_handler);
+  assert(result != SIG_ERR);
+
+  g_value = 200;
+  asm("int3");
+  assert(g_value == 300);
+}
+
+TEST(test_sigaction_handler) {
+  struct sigaction act;
+  act.sa_sigaction = sigaction_handler;
+  sigemptyset(&act.sa_mask);
+  act.sa_flags = SA_SIGINFO;
+  int rc = sigaction(SIGTRAP, &act, NULL);
+  assert(rc == 0);
+
+  StartSeccompSandbox();
+
+  rc = sigaction(SIGTRAP, &act, NULL);
+  assert(rc == 0);
+
+  g_value = 200;
+  asm("int3");
+  assert(g_value == 300);
+}
+
+TEST(test_blocked_signal) {
+  sighandler_t result = signal(SIGTRAP, signal_handler);
+  assert(result != SIG_ERR);
+  StartSeccompSandbox();
+
+  // Initially the signal should not be blocked.
+  sigset_t sigs;
+  sigfillset(&sigs);
+  int rc = sigprocmask(0, NULL, &sigs);
+  assert(rc == 0);
+  assert(!sigismember(&sigs, SIGTRAP));
+
+  sigemptyset(&sigs);
+  sigaddset(&sigs, SIGTRAP);
+  rc = sigprocmask(SIG_BLOCK, &sigs, NULL);
+  assert(rc == 0);
+
+  // Check that we can read back the blocked status.
+  sigemptyset(&sigs);
+  rc = sigprocmask(0, NULL, &sigs);
+  assert(rc == 0);
+  assert(sigismember(&sigs, SIGTRAP));
+
+  // Check that the signal handler really is blocked.
+  intend_exit_status(SIGTRAP, true);
+  asm("int3");
+}
+
+TEST(test_sigaltstack) {
+  // The sandbox does not support sigaltstack() yet.  Just test that
+  // it returns an error.
+  StartSeccompSandbox();
+  stack_t st;
+  st.ss_size = 0x4000;
+  st.ss_sp = malloc(st.ss_size);
+  assert(st.ss_sp != NULL);
+  st.ss_flags = 0;
+  int rc = sigaltstack(&st, NULL);
+  assert(rc == -1);
+  assert(errno == ENOSYS);
+}
+
+TEST(test_sa_flags) {
+  StartSeccompSandbox();
+  int flags[4] = { 0, SA_NODEFER, SA_SIGINFO, SA_SIGINFO | SA_NODEFER };
+  for (int i = 0; i < 4; ++i) {
+    struct sigaction sa;
+    memset(&sa, 0, sizeof(sa));
+    sa.sa_sigaction = generic_signal_handler();
+    g_sig_handler_ptr = non_fatal_sig_handler;
+    sa.sa_flags = flags[i];
+
+    // Test SEGV handling
+    g_value = 200;
+    sigaction(SIGSEGV, &sa, NULL);
+    asm volatile("hlt");
+    assert(g_value == 300);
+
+    // Test non-SEGV handling
+    g_value = 200;
+    sigaction(SIGTRAP, &sa, NULL);
+    asm volatile("int3");
+    assert(g_value == 300);
+  }
+}
+
+TEST(test_segv_defer) {
+  StartSeccompSandbox();
+  struct sigaction sa;
+  memset(&sa, 0, sizeof(sa));
+  sa.sa_sigaction = generic_signal_handler();
+  g_sig_handler_ptr = fatal_sig_handler;
+
+  // Test non-deferred SEGV (should continue execution)
+  sa.sa_flags = SA_NODEFER;
+  sigaction(SIGSEGV, &sa, NULL);
+  g_value = 0;
+  asm volatile("hlt");
+
+  // Test deferred SEGV (should terminate program)
+  sa.sa_flags = 0;
+  sigaction(SIGSEGV, &sa, NULL);
+  g_value = 0;
+  intend_exit_status(SIGSEGV, true);
+  asm volatile("hlt");
+}
+
+TEST(test_trap_defer) {
+  StartSeccompSandbox();
+  struct sigaction sa;
+  memset(&sa, 0, sizeof(sa));
+  sa.sa_sigaction = generic_signal_handler();
+  g_sig_handler_ptr = fatal_sig_handler;
+
+  // Test non-deferred TRAP (should continue execution)
+  sa.sa_flags = SA_NODEFER;
+  sigaction(SIGTRAP, &sa, NULL);
+  g_value = 0;
+  asm volatile("int3");
+
+  // Test deferred TRAP (should terminate program)
+  sa.sa_flags = 0;
+  sigaction(SIGTRAP, &sa, NULL);
+  g_value = 0;
+  intend_exit_status(SIGTRAP, true);
+  asm volatile("int3");
+}
+
+TEST(test_segv_resethand) {
+  StartSeccompSandbox();
+  struct sigaction sa;
+  memset(&sa, 0, sizeof(sa));
+  sa.sa_sigaction = generic_signal_handler();
+  g_sig_handler_ptr = non_fatal_sig_handler;
+  sa.sa_flags = SA_RESETHAND;
+  sigaction(SIGSEGV, &sa, NULL);
+
+  // Test first invocation of signal handler (should continue execution)
+  asm volatile("hlt");
+
+  // Test second invocation of signal handler (should terminate program)
+  intend_exit_status(SIGSEGV, true);
+  asm volatile("hlt");
+}
+
+TEST(test_trap_resethand) {
+  StartSeccompSandbox();
+  struct sigaction sa;
+  memset(&sa, 0, sizeof(sa));
+  sa.sa_sigaction = generic_signal_handler();
+  g_sig_handler_ptr = non_fatal_sig_handler;
+  sa.sa_flags = SA_RESETHAND;
+  sigaction(SIGTRAP, &sa, NULL);
+
+  // Test first invocation of signal handler (should continue execution)
+  asm volatile("int3");
+
+  // Test second invocation of signal handler (should terminate program)
+  intend_exit_status(SIGTRAP, true);
+  asm volatile("int3");
+}
+
+struct testcase {
+  const char *test_name;
+  void (*test_func)();
+};
+
+struct testcase all_tests[] = {
+#include "test-list.h"
+  { NULL, NULL },
+};
+
+static int run_test_forked(struct testcase *test) {
+  printf("** %s\n", test->test_name);
+  int pipe_fds[2];
+  int rc = pipe(pipe_fds);
+  assert(rc == 0);
+  int pid = fork();
+  if (pid == 0) {
+    rc = close(pipe_fds[0]);
+    assert(rc == 0);
+    g_intended_status_fd = pipe_fds[1];
+
+    test->test_func();
+    intend_exit_status(0, false);
+    _exit(0);
+  }
+  rc = close(pipe_fds[1]);
+  assert(rc == 0);
+
+  int intended_status;
+  int got = read(pipe_fds[0], &intended_status, sizeof(intended_status));
+  bool got_intended_status = got == sizeof(intended_status);
+  if (!got_intended_status) {
+    printf("Test runner: Did not receive intended status\n");
+  }
+
+  int status;
+  int pid2 = waitpid(pid, &status, 0);
+  assert(pid2 == pid);
+  if (!got_intended_status) {
+    printf("Test returned exit status %i\n", status);
+    return 1;
+  }
+  else if ((status & ~WCOREFLAG) != intended_status) {
+    printf("Test failed with exit status %i, expected %i\n",
+           status, intended_status);
+    return 1;
+  }
+  else {
+    return 0;
+  }
+}
+
+static int run_test_by_name(const char *name) {
+  struct testcase *test;
+  for (test = all_tests; test->test_name != NULL; test++) {
+    if (strcmp(name, test->test_name) == 0) {
+      printf("Running test %s...\n", name);
+      test->test_func();
+      printf("OK\n");
+      return 0;
+    }
+  }
+  fprintf(stderr, "Test '%s' not found\n", name);
+  return 1;
+}
+
+int main(int argc, char **argv) {
+  setvbuf(stdout, NULL, _IONBF, 0);
+  setvbuf(stderr, NULL, _IONBF, 0);
+  if (argc == 2) {
+    // Run one test without forking, to aid debugging.
+    return run_test_by_name(argv[1]);
+  }
+  else if (argc > 2) {
+    // TODO: run multiple tests.
+    fprintf(stderr, "Too many arguments\n");
+    return 1;
+  }
+  else {
+    // Run all tests.
+    struct testcase *test;
+    int failures = 0;
+    for (test = all_tests; test->test_name != NULL; test++) {
+      failures += run_test_forked(test);
+    }
+    if (failures == 0) {
+      printf("OK\n");
+      return 0;
+    }
+    else {
+      printf("%i FAILURE(S)\n", failures);
+      return 1;
+    }
+  }
+}
diff --git a/sandbox/linux/seccomp/timestats.cc b/sandbox/linux/seccomp/timestats.cc
new file mode 100644
index 0000000..5d9b66a
--- /dev/null
+++ b/sandbox/linux/seccomp/timestats.cc
@@ -0,0 +1,191 @@
+// Copyright (c) 2010 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// Helper program to analyze the time that Chrome's renderers spend in system
+// calls. Start Chrome like this:
+//
+// SECCOMP_SANDBOX_DEBUGGING=1 chrome --enable-seccomp-sandbox 2>&1 | timestats
+//
+// The program prints CPU time (0-100%) spent within system calls. This gives
+// a general idea of where it is worthwhile to spend effort optimizing Chrome.
+//
+// Caveats:
+//  - there currently is no way to estimate what the overhead is for running
+//    inside of the sandbox vs. running without a sandbox.
+//  - we currently use a very simple heuristic to decide whether a system call
+//    is blocking or not. Blocking system calls should not be included in the
+//    computations. But it is quite possible for the numbers to be somewhat
+//    wrong, because the heuristic failed.
+//  - in order to collect this data, we have to turn on sandbox debugging.
+//    There is a measurable performance penalty to doing so. Production numbers
+//    are strictly better than the numbers reported by this tool.
+#include <set>
+#include <vector>
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/time.h>
+#include <time.h>
+
+static const int kAvgWindowSizeMs  =    500;
+static const int kPeakWindowSizeMs = 2*1000;
+
+// Class containing information on a single system call. Most notably, it
+// contains the time when the system call happened, and the time that it
+// took to complete.
+class Datum {
+  friend class Data;
+ public:
+  Datum(const char* name, double ms)
+    : name_(name),
+      ms_(ms) {
+    struct timeval tv;
+    gettimeofday(&tv, NULL);
+    timestamp_ = tv.tv_sec*1000.0 + tv.tv_usec/1000.0;
+  }
+  virtual ~Datum() { }
+
+  double operator-(const Datum& b) {
+    return timestamp_ - b.timestamp_;
+  }
+
+ protected:
+  const char* name_;
+  double      ms_;
+  double      timestamp_;
+};
+
+// Class containing data on the most recent system calls. It maintains
+// sliding averages for total CPU time used, and it also maintains a peak
+// CPU usage. The peak usage is usually updated slower than the average
+// usage, as that makes it easier to inspect visually.
+class Data {
+ public:
+  Data() { }
+  virtual ~Data() { }
+
+  void addData(const char* name, double ms) {
+    average_.push_back(Datum(name, ms));
+    peak_.push_back(Datum(name, ms));
+
+    // Prune entries outside of the window
+    std::vector<Datum>::iterator iter;
+    for (iter = average_.begin();
+         *average_.rbegin() - *iter > kAvgWindowSizeMs;
+         ++iter) {
+    }
+    average_.erase(average_.begin(), iter);
+
+    for (iter = peak_.begin();
+         *peak_.rbegin() - *iter > kPeakWindowSizeMs;
+         ++iter){
+    }
+    peak_.erase(peak_.begin(), iter);
+
+    // Add the total usage of all system calls inside of the window
+    double total = 0;
+    for (iter = average_.begin(); iter != average_.end(); ++iter) {
+      total += iter->ms_;
+    }
+
+    // Compute the peak CPU usage during the last window
+    double peak = 0;
+    double max = 0;
+    std::vector<Datum>::iterator tail = peak_.begin();
+    for (iter = tail; iter != peak_.end(); ++iter) {
+      while (*iter - *tail > kAvgWindowSizeMs) {
+        peak -= tail->ms_;
+        ++tail;
+      }
+      peak += iter->ms_;
+      if (peak > max) {
+        max = peak;
+      }
+    }
+
+    // Print the average CPU usage in the last window
+    char buf[80];
+    total *= 100.0/kAvgWindowSizeMs;
+    max   *= 100.0/kAvgWindowSizeMs;
+    sprintf(buf, "%6.2f%% (peak=%6.2f%%) ", total, max);
+
+    // Animate the actual usage, displaying both average and peak values
+    int len   = strlen(buf);
+    int space = sizeof(buf) - len - 1;
+    int mark  = (total * space + 50)/100;
+    int bar   = (max   * space + 50)/100;
+    for (int i = 0; i < mark; ++i) {
+      buf[len++] = '*';
+    }
+    if (mark == bar) {
+      if (bar) {
+        len--;
+      }
+    } else {
+      for (int i = 0; i < bar - mark - 1; ++i) {
+        buf[len++] = ' ';
+      }
+    }
+    buf[len++] = '|';
+    while (len < static_cast<int>(sizeof(buf))) {
+      buf[len++] = ' ';
+    }
+    strcpy(buf + len, "\r");
+    fwrite(buf, len + 1, 1, stdout);
+    fflush(stdout);
+  }
+
+ private:
+  std::vector<Datum> average_;
+  std::vector<Datum> peak_;
+};
+static Data data;
+
+
+int main(int argc, char *argv[]) {
+  char buf[80];
+  bool expensive = false;
+  while (fgets(buf, sizeof(buf), stdin)) {
+    // Allow longer delays for expensive system calls
+    if (strstr(buf, "This is an expensive system call")) {
+      expensive = true;
+      continue;
+    }
+
+    // Parse the string and extract the elapsed time
+    const char elapsed[] = "Elapsed time: ";
+    char* ms_string = strstr(buf, elapsed);
+    char* endptr;
+    double ms;
+    char* colon = strchr(buf, ':');
+
+    // If this string doesn't match, then it must be some other type of
+    // message. Just ignore it.
+    // It is quite likely that we will regularly encounter debug messages
+    // that either should be parsed by a completely different tool, or
+    // messages that were intended for humans to read.
+    if (!ms_string ||
+        ((ms = strtod(ms_string + sizeof(elapsed) - 1, &endptr)),
+         endptr == ms_string) ||
+        !colon) {
+      continue;
+    }
+
+    // Filter out system calls that were probably just blocking
+    // TODO(markus): automatically compute the cut-off for blocking calls
+    if (!expensive && ms > 0.05) {
+      continue;
+    }
+    expensive = false;
+
+    // Extract the name of the system call
+    *colon = '\000';
+
+    // Add the data point and update the display
+    data.addData(buf, ms);
+  }
+  puts("");
+  return 0;
+}
diff --git a/sandbox/linux/seccomp/tls.h b/sandbox/linux/seccomp/tls.h
new file mode 100644
index 0000000..7ec5a28
--- /dev/null
+++ b/sandbox/linux/seccomp/tls.h
@@ -0,0 +1,155 @@
+// Copyright (c) 2010 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef TLS_H__
+#define TLS_H__
+
+#include <asm/ldt.h>
+#include <stdlib.h>
+#include <sys/mman.h>
+#include <sys/prctl.h>
+
+namespace playground {
+
+class TLS {
+ private:
+  class SysCalls {
+   public:
+    #define SYS_CPLUSPLUS
+    #define SYS_ERRNO     my_errno
+    #define SYS_INLINE    inline
+    #define SYS_PREFIX    -1
+    #undef  SYS_LINUX_SYSCALL_SUPPORT_H
+    #include "linux_syscall_support.h"
+    SysCalls() : my_errno(0) { }
+    int my_errno;
+  };
+
+ public:
+  static void *allocateTLS() {
+    SysCalls sys;
+    #if defined(__x86_64__)
+    void *addr = sys.mmap(0, 4096, PROT_READ|PROT_WRITE,
+                          MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
+    if (sys.arch_prctl(ARCH_SET_GS, addr) < 0) {
+      return NULL;
+    }
+    #elif defined(__i386__)
+    void *addr = sys.mmap2(0, 4096, PROT_READ|PROT_WRITE,
+                           MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
+    struct user_desc u;
+    u.entry_number    = (typeof u.entry_number)-1;
+    u.base_addr       = (int)addr;
+    u.limit           = 0xfffff;
+    u.seg_32bit       = 1;
+    u.contents        = 0;
+    u.read_exec_only  = 0;
+    u.limit_in_pages  = 1;
+    u.seg_not_present = 0;
+    u.useable         = 1;
+    if (sys.set_thread_area(&u) < 0) {
+      return NULL;
+    }
+    asm volatile(
+        "movw %w0, %%fs"
+        :
+        : "q"(8*u.entry_number+3));
+    #else
+    #error Unsupported target platform
+    #endif
+    return addr;
+  }
+
+  static void freeTLS() {
+    SysCalls sys;
+    void *addr;
+    #if defined(__x86_64__)
+    sys.arch_prctl(ARCH_GET_GS, &addr);
+    #elif defined(__i386__)
+    struct user_desc u;
+    sys.get_thread_area(&u);
+    addr = (void *)u.base_addr;
+    #else
+    #error Unsupported target platform
+    #endif
+    sys.munmap(addr, 4096);
+  }
+
+  template<class T> static inline bool setTLSValue(int idx, T val) {
+    #if defined(__x86_64__)
+    if (idx < 0 || idx >= 4096/8) {
+      return false;
+    }
+    asm volatile(
+        "movq %0, %%gs:(%1)\n"
+        :
+        : "q"((void *)val), "q"(8ll * idx));
+    #elif defined(__i386__)
+    if (idx < 0 || idx >= 4096/8) {
+      return false;
+    }
+    if (sizeof(T) == 8) {
+      asm volatile(
+          "movl %0, %%fs:(%1)\n"
+          :
+          : "r"((unsigned)val), "r"(8 * idx));
+      asm volatile(
+          "movl %0, %%fs:(%1)\n"
+          :
+          : "r"((unsigned)((unsigned long long)val >> 32)), "r"(8 * idx + 4));
+    } else {
+      asm volatile(
+          "movl %0, %%fs:(%1)\n"
+          :
+          : "r"(val), "r"(8 * idx));
+    }
+    #else
+    #error Unsupported target platform
+    #endif
+    return true;
+  }
+
+  template<class T> static inline T getTLSValue(int idx) {
+    #if defined(__x86_64__)
+    long long rc;
+    if (idx < 0 || idx >= 4096/8) {
+      return 0;
+    }
+    asm volatile(
+        "movq %%gs:(%1), %0\n"
+        : "=q"(rc)
+        : "q"(8ll * idx));
+    return (T)rc;
+    #elif defined(__i386__)
+    if (idx < 0 || idx >= 4096/8) {
+      return 0;
+    }
+    if (sizeof(T) == 8) {
+      unsigned lo, hi;
+      asm volatile(
+          "movl %%fs:(%1), %0\n"
+          : "=r"(lo)
+          : "r"(8 * idx));
+      asm volatile(
+          "movl %%fs:(%1), %0\n"
+          : "=r"(hi)
+          : "r"(8 * idx + 4));
+      return (T)((unsigned long long)lo + ((unsigned long long)hi << 32));
+    } else {
+      long rc;
+      asm volatile(
+          "movl %%fs:(%1), %0\n"
+          : "=r"(rc)
+          : "r"(8 * idx));
+      return (T)rc;
+    }
+    #else
+    #error Unsupported target platform
+    #endif
+  }
+
+};
+
+} // namespace
+#endif
diff --git a/sandbox/linux/seccomp/trusted_process.cc b/sandbox/linux/seccomp/trusted_process.cc
new file mode 100644
index 0000000..5c62b0f
--- /dev/null
+++ b/sandbox/linux/seccomp/trusted_process.cc
@@ -0,0 +1,268 @@
+// Copyright (c) 2010 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include <dirent.h>
+#include <map>
+
+#include "debug.h"
+#include "sandbox_impl.h"
+#include "syscall_table.h"
+
+namespace playground {
+
+struct SandboxPolicy g_policy;
+
+struct Thread {
+  int              fdPub, fd;
+  SecureMem::Args* mem;
+};
+
+SecureMem::Args* Sandbox::getNewSecureMem() {
+  if (!secureMemPool_.empty()) {
+    SecureMem::Args* rc = secureMemPool_.back();
+    secureMemPool_.pop_back();
+    memset(rc->scratchPage, 0, sizeof(rc->scratchPage));
+    return rc;
+  }
+  return NULL;
+}
+
+void Sandbox::trustedProcess(int parentMapsFd, int processFdPub, int sandboxFd,
+                             int cloneFd, SecureMem::Args* secureArena) {
+  // The trusted process doesn't have access to TLS. Zero out the segment
+  // registers so that we can later test that we are in the trusted process.
+  #if defined(__x86_64__)
+  asm volatile("mov %0, %%gs\n" : : "r"(0));
+  #elif defined(__i386__)
+  asm volatile("mov %0, %%fs\n" : : "r"(0));
+  #else
+  #error Unsupported target platform
+  #endif
+
+  std::map<long long, struct Thread> threads;
+  SysCalls  sys;
+  long long cookie               = 0;
+
+  // The very first entry in the secure memory arena has been assigned to the
+  // initial thread. The remaining entries are available for allocation.
+  SecureMem::Args* startAddress  = secureArena;
+  SecureMem::Args* nextThread    = startAddress;
+  for (int i = 0; i < kMaxThreads-1; i++) {
+    secureMemPool_.push_back(++startAddress);
+  }
+
+newThreadCreated:
+  // Receive information from newly created thread
+  Thread *newThread              = &threads[++cookie];
+  memset(newThread, 0, sizeof(Thread));
+  struct {
+    SecureMem::Args* self;
+    int              tid;
+    int              fdPub;
+  } __attribute__((packed)) data;
+
+  size_t dataLen                 = sizeof(data);
+  if (!getFd(cloneFd, &newThread->fdPub, &newThread->fd, &data, &dataLen) ||
+      dataLen != sizeof(data)) {
+    // We get here either because the sandbox got corrupted, or because our
+    // parent process has terminated.
+    if (newThread->fdPub || dataLen) {
+      die("Failed to receive new thread information");
+    }
+    die();
+  }
+  if (data.self != nextThread) {
+    // The only potentially security critical information received from the
+    // newly created thread is "self". The "tid" is for informational purposes
+    // (and for use in the new thread's TLS), and "fdPub" is uncritical as all
+    // file descriptors are considered untrusted.
+    // Thus, we only use "self" for a sanity check, but don't actually trust
+    // it beyond that.
+    die("Received corrupted thread information");
+  }
+  newThread->mem                 = nextThread;
+
+  // Set up TLS area and let thread know that the data is now ready
+  nextThread->cookie             = cookie;
+  nextThread->threadId           = data.tid;
+  nextThread->threadFdPub        = data.fdPub;
+  write(sys, newThread->fd, "", 1);
+
+  // Dispatch system calls that have been forwarded from the trusted thread(s).
+  for (;;) {
+    struct {
+      unsigned int sysnum;
+      long long    cookie;
+    } __attribute__((packed)) header;
+
+    int rc;
+    if ((rc = read(sys, sandboxFd, &header, sizeof(header))) !=sizeof(header)){
+      if (rc) {
+        die("Failed to read system call number and thread id");
+      }
+      die();
+    }
+    std::map<long long, struct Thread>::iterator iter =
+                                                   threads.find(header.cookie);
+    if (iter == threads.end()) {
+      die("Received request from unknown thread");
+    }
+    struct Thread* currentThread = &iter->second;
+    if (header.sysnum > maxSyscall ||
+        !syscallTable[header.sysnum].trustedProcess) {
+      die("Trusted process encountered unexpected system call");
+    }
+
+    // Dispatch system call to handler function. Treat both exit() and clone()
+    // specially.
+    if (syscallTable[header.sysnum].trustedProcess(parentMapsFd,
+                                                   sandboxFd,
+                                                   currentThread->fdPub,
+                                                   currentThread->fd,
+                                                   currentThread->mem) &&
+        header.sysnum == __NR_clone) {
+      nextThread = currentThread->mem->newSecureMem;
+      goto newThreadCreated;
+    } else if (header.sysnum == __NR_exit) {
+      NOINTR_SYS(sys.close(iter->second.fdPub));
+      NOINTR_SYS(sys.close(iter->second.fd));
+      SecureMem::Args* secureMem = currentThread->mem;
+      threads.erase(iter);
+      secureMemPool_.push_back(secureMem);
+    }
+  }
+}
+
+int Sandbox::initializeProtectedMap(int fd) {
+  int mapsFd;
+  if (!getFd(fd, &mapsFd, NULL, NULL, NULL)) {
+ maps_failure:
+    die("Cannot access /proc/self/maps");
+  }
+
+  // Read the memory mappings as they were before the sandbox takes effect.
+  // These mappings cannot be changed by the sandboxed process.
+  char line[80];
+  FILE *fp = fdopen(mapsFd, "r");
+  for (bool truncated = false;;) {
+    if (fgets(line, sizeof(line), fp) == NULL) {
+      if (feof(fp) || errno != EINTR) {
+        break;
+      }
+      continue;
+    }
+    if (!truncated) {
+      unsigned long start, stop;
+      char *ptr = line;
+      errno = 0;
+      start = strtoul(ptr, &ptr, 16);
+      if (errno || *ptr++ != '-') {
+     parse_failure:
+        die("Failed to parse /proc/self/maps");
+      }
+      stop = strtoul(ptr, &ptr, 16);
+      if (errno || *ptr++ != ' ') {
+        goto parse_failure;
+      }
+      protectedMap_[reinterpret_cast<void *>(start)] = stop - start;
+    }
+    truncated = strchr(line, '\n') == NULL;
+  }
+
+  // Prevent low address memory allocations. Some buggy kernels allow those
+  if (protectedMap_[0] < (64 << 10)) {
+    protectedMap_[0] = 64 << 10;
+  }
+
+  // Let the sandbox know that we are done parsing the memory map.
+  SysCalls sys;
+  if (write(sys, fd, &mapsFd, sizeof(mapsFd)) != sizeof(mapsFd)) {
+    goto maps_failure;
+  }
+
+  return mapsFd;
+}
+
+SecureMem::Args* Sandbox::createTrustedProcess(int processFdPub, int sandboxFd,
+                                               int cloneFdPub, int cloneFd) {
+  // Allocate memory that will be used by an arena for storing the secure
+  // memory. While we allow this memory area to be empty at times (e.g. when
+  // not all threads are in use), we make sure that it never gets overwritten
+  // by user-allocated memory. This happens in initializeProtectedMap() and
+  // snapshotMemoryMappings().
+  SecureMem::Args* secureArena = reinterpret_cast<SecureMem::Args*>(
+      mmap(NULL, 8192*kMaxThreads, PROT_READ|PROT_WRITE,
+           MAP_SHARED|MAP_ANONYMOUS, -1, 0));
+  if (secureArena == MAP_FAILED) {
+    die("Failed to allocate secure memory arena");
+  }
+
+  // Set up the mutex to be accessible from the trusted process and from
+  // children of the trusted thread(s)
+  if (mmap(&syscall_mutex_, 4096, PROT_READ|PROT_WRITE,
+           MAP_SHARED|MAP_ANONYMOUS|MAP_FIXED, -1, 0) != &syscall_mutex_) {
+    die("Failed to initialize secure mutex");
+  }
+  syscall_mutex_ = 0x80000000;
+
+
+  // Create a trusted process that can evaluate system call parameters and
+  // decide whether a system call should execute. This process runs outside of
+  // the seccomp sandbox. It communicates with the sandbox'd process through
+  // a socketpair() and through securely shared memory.
+  pid_t pid                    = fork();
+  if (pid < 0) {
+    die("Failed to create trusted process");
+  }
+  if (!pid) {
+    // Close all file handles except for sandboxFd, cloneFd, and stdio
+    DIR *dir                   = opendir("/proc/self/fd");
+    if (dir == 0) {
+      // If we don't know the list of our open file handles, just try closing
+      // all valid ones.
+      for (int fd = sysconf(_SC_OPEN_MAX); --fd > 2; ) {
+        if (fd != sandboxFd && fd != cloneFd) {
+          close(fd);
+        }
+      }
+    } else {
+      // If available, if is much more efficient to just close the file
+      // handles that show up in /proc/self/fd/
+      struct dirent de, *res;
+      while (!readdir_r(dir, &de, &res) && res) {
+        if (res->d_name[0] < '0')
+          continue;
+        int fd                 = atoi(res->d_name);
+        if (fd > 2 &&
+            fd != sandboxFd && fd != cloneFd && fd != dirfd(dir)) {
+          close(fd);
+        }
+      }
+      closedir(dir);
+    }
+
+    // Initialize secure memory used for threads
+    for (int i = 0; i < kMaxThreads; i++) {
+      SecureMem::Args* args    = secureArena + i;
+      args->self               = args;
+      #ifndef NDEBUG
+      args->allowAllSystemCalls= Debug::isEnabled();
+      #endif
+    }
+
+    int parentMapsFd           = initializeProtectedMap(sandboxFd);
+    trustedProcess(parentMapsFd, processFdPub, sandboxFd,
+                   cloneFd, secureArena);
+    die();
+  }
+
+  // We are still in the untrusted code. Deny access to restricted resources.
+  mprotect(secureArena, 8192*kMaxThreads, PROT_NONE);
+  mprotect(&syscall_mutex_, 4096, PROT_NONE);
+  close(sandboxFd);
+
+  return secureArena;
+}
+
+} // namespace
diff --git a/sandbox/linux/seccomp/trusted_thread.cc b/sandbox/linux/seccomp/trusted_thread.cc
new file mode 100644
index 0000000..6d6a3f5
--- /dev/null
+++ b/sandbox/linux/seccomp/trusted_thread.cc
@@ -0,0 +1,1483 @@
+// Copyright (c) 2010 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "sandbox_impl.h"
+#include "syscall_table.h"
+
+namespace playground {
+
+void Sandbox::createTrustedThread(int processFdPub, int cloneFdPub,
+                                  SecureMem::Args* secureMem) {
+  SecureMem::Args args                  = { { { { { 0 } } } } };
+  args.self                             = &args;
+  args.newSecureMem                     = secureMem;
+  args.processFdPub                     = processFdPub;
+  args.cloneFdPub                       = cloneFdPub;
+#if defined(__x86_64__)
+  asm volatile(
+      "push %%rbx\n"
+      "push %%rbp\n"
+      "mov  %0, %%rbp\n"           // %rbp = args
+      "xor  %%rbx, %%rbx\n"        // initial sequence number
+      "lea  999f(%%rip), %%r15\n"  // continue in same thread
+
+      // Signal handlers are process-wide. This means that for security
+      // reasons, we cannot allow that the trusted thread ever executes any
+      // signal handlers.
+      // We prevent the execution of signal handlers by setting a signal
+      // mask that blocks all signals. In addition, we make sure that the
+      // stack pointer is invalid.
+      // We cannot reset the signal mask until after we have enabled
+      // Seccomp mode. Our sigprocmask() wrapper would normally do this by
+      // raising a signal, modifying the signal mask in the kernel-generated
+      // signal frame, and then calling sigreturn(). This presents a bit of
+      // a Catch-22, as all signals are masked and we can therefore not
+      // raise any signal that would allow us to generate the signal stack
+      // frame.
+      // Instead, we have to create the signal stack frame prior to entering
+      // Seccomp mode. This incidentally also helps us to restore the
+      // signal mask to the same value that it had prior to entering the
+      // sandbox.
+      // The signal wrapper for clone() is the second entry point into this
+      // code (by means of sending an IPC to its trusted thread). It goes
+      // through the same steps of creating a signal stack frame on the
+      // newly created thread's stacks prior to cloning. See clone.cc for
+      // details.
+      "mov  $56+0xF000, %%eax\n"   // __NR_clone + 0xF000
+      "mov  %%rsp, %%rcx\n"
+      "int  $0\n"                  // push a signal stack frame (see clone.cc)
+      "mov  %%rcx, 0xA0(%%rsp)\n"  // pop stack upon call to sigreturn()
+      "mov  %%rsp, %%r9\n"
+      "mov  $2, %%rdi\n"           // how     = SIG_SETMASK
+      "pushq $-1\n"
+      "mov  %%rsp, %%rsi\n"        // set     = full mask
+      "xor  %%rdx, %%rdx\n"        // old_set = NULL
+      "mov  $8, %%r10\n"           // mask all 64 signals
+      "mov  $14, %%eax\n"          // NR_rt_sigprocmask
+      "syscall\n"
+      "xor  %%rsp, %%rsp\n"        // invalidate the stack in all trusted code
+      "jmp  20f\n"                 // create trusted thread
+
+      // TODO(markus): Coalesce the read() operations by reading into a bigger
+      // buffer.
+
+      // Parameters:
+      //   *%fs: secure memory region
+      //         the page following this one contains the scratch space
+      //   %r13: thread's side of threadFd
+      //   %r15: processFdPub
+
+      // Local variables:
+      //   %rbx: sequence number for trusted calls
+
+      // Temporary variables:
+      //   %r8: child stack
+      //   %r9: system call number, child stack
+      //  %rbp: secure memory of previous thread
+
+      // Layout of secure shared memory region (c.f. securemem.h):
+      //   0x00: pointer to the secure shared memory region (i.e. self)
+      //   0x08: sequence number; must match %rbx
+      //   0x10: call type; must match %eax, iff %eax == -1 || %eax == -2
+      //   0x18: system call number; passed to syscall in %rax
+      //   0x20: first argument; passed to syscall in %rdi
+      //   0x28: second argument; passed to syscall in %rsi
+      //   0x30: third argument; passed to syscall in %rdx
+      //   0x38: fourth argument; passed to syscall in %r10
+      //   0x40: fifth argument; passed to syscall in %r8
+      //   0x48: sixth argument; passed to syscall in %r9
+      //   0x50: stored return address for clone() system call
+      //   0x58: stored %rbp value for clone() system call
+      //   0x60: stored %rbx value for clone() system call
+      //   0x68: stored %rcx value for clone() system call
+      //   0x70: stored %rdx value for clone() system call
+      //   0x78: stored %rsi value for clone() system call
+      //   0x80: stored %rdi value for clone() system call
+      //   0x88: stored %r8 value for clone() system call
+      //   0x90: stored %r9 value for clone() system call
+      //   0x98: stored %r10 value for clone() system call
+      //   0xA0: stored %r11 value for clone() system call
+      //   0xA8: stored %r12 value for clone() system call
+      //   0xB0: stored %r13 value for clone() system call
+      //   0xB8: stored %r14 value for clone() system call
+      //   0xC0: stored %r15 value for clone() system call
+      //   0xC8: new shared memory for clone()
+      //   0xD0: processFdPub for talking to trusted process
+      //   0xD4: cloneFdPub for talking to trusted process
+      //   0xD8: set to non-zero, if in debugging mode
+      //   0xDC: most recent SHM id returned by shmget(IPC_PRIVATE)
+      //   0xE0: cookie assigned to us by the trusted process (TLS_COOKIE)
+      //   0xE8: thread id (TLS_TID)
+      //   0xF0: threadFdPub (TLS_THREAD_FD)
+      //   0x200-0x1000: securely passed verified file name(s)
+
+      // Layout of (untrusted) scratch space:
+      //   0x00: syscall number; passed in %rax
+      //   0x04: first argument; passed in %rdi
+      //   0x0C: second argument; passed in %rsi
+      //   0x14: third argument; passed in %rdx
+      //   0x1C: fourth argument; passed in %r10
+      //   0x24: fifth argument; passed in %r8
+      //   0x2C: sixth argument; passed in %r9
+      //   0x34: return value
+      //   0x3C: RDTSCP result (%eax)
+      //   0x40: RDTSCP result (%edx)
+      //   0x44: RDTSCP result (%ecx)
+      //   0x48: last system call (not used on x86-64)
+      //   0x4C: number of consecutive calls to a time fnc (not used on x86-64)
+      //   0x50: nesting level of system calls (for debugging purposes only)
+      //   0x54: signal mask
+      //   0x5C: in SEGV handler
+
+      // We use the %fs register for accessing the secure read-only page, and
+      // the untrusted scratch space immediately following it. The segment
+      // register and the local descriptor table is set up by passing
+      // appropriate arguments to clone().
+
+    "0:xor  %%rsp, %%rsp\n"
+      "mov  $2, %%ebx\n"           // %rbx  = initial sequence number
+
+      // Read request from untrusted thread, or from trusted process. In either
+      // case, the data that we read has to be considered untrusted.
+      // read(threadFd, &scratch, 4)
+    "1:xor  %%rax, %%rax\n"        // NR_read
+      "mov  %%r13, %%rdi\n"        // fd  = threadFd
+      "mov  %%fs:0x0, %%rsi\n"     // secure_mem
+      "add  $0x1000, %%rsi\n"      // buf = &scratch
+      "mov  $4, %%edx\n"           // len = 4
+    "2:syscall\n"
+      "cmp  $-4, %%rax\n"          // EINTR
+      "jz   2b\n"
+      "cmp  %%rdx, %%rax\n"
+      "jnz  25f\n"                 // exit process
+
+      // Retrieve system call number. It is crucial that we only dereference
+      // %fs:0x1000 exactly once. Afterwards, memory becomes untrusted and
+      // we must use the value that we have read the first time.
+      "mov  0(%%rsi), %%eax\n"
+
+      // If syscall number is -1, execute an unlocked system call from the
+      // secure memory area
+      "cmp  $-1, %%eax\n"
+      "jnz  5f\n"
+    "3:cmp  %%rbx, %%fs:0x8\n"
+      "jne  25f\n"                 // exit process
+      "cmp  %%fs:0x10, %%eax\n"
+      "jne  25f\n"                 // exit process
+      "mov  %%fs:0x18, %%rax\n"
+      "mov  %%fs:0x20, %%rdi\n"
+      "mov  %%fs:0x28, %%rsi\n"
+      "mov  %%fs:0x30, %%rdx\n"
+      "mov  %%fs:0x38, %%r10\n"
+      "mov  %%fs:0x40, %%r8\n"
+      "mov  %%fs:0x48, %%r9\n"
+      "cmp  %%rbx, %%fs:0x8\n"
+      "jne  25f\n"                 // exit process
+      "add  $2, %%rbx\n"
+
+      // shmget() gets some special treatment. Whenever we return from this
+      // system call, we remember the most recently returned SysV shm id.
+      "cmp  $29, %%eax\n"          // NR_shmget
+      "jnz  4f\n"
+      "syscall\n"
+      "mov  %%rax, %%r8\n"
+      "mov  $56, %%eax\n"          // NR_clone
+      "mov  $17, %%edi\n"          // flags = SIGCHLD
+      "mov  $1, %%esi\n"           // stack = 1
+      "syscall\n"
+      "test %%rax, %%rax\n"
+      "js   25f\n"                 // exit process
+      "mov  %%rax, %%rdi\n"
+      "jnz  8f\n"                  // wait for child, then return result
+      "mov  %%fs:0x0, %%rdi\n"     // start = secure_mem
+      "mov  $4096, %%esi\n"        // len   = 4096
+      "mov  $3, %%edx\n"           // prot  = PROT_READ | PROT_WRITE
+      "mov  $10, %%eax\n"          // NR_mprotect
+      "syscall\n"
+      "mov  %%r8d, 0xDC(%%rdi)\n"  // set most recently returned SysV shm id
+      "xor  %%rdi, %%rdi\n"
+
+      // When debugging messages are enabled, warn about expensive system calls
+      #ifndef NDEBUG
+      "cmpw $0, %%fs:0xD8\n"       // debug mode
+      "jz   27f\n"
+      "mov  $1, %%eax\n"           // NR_write
+      "mov  $2, %%edi\n"           // fd = stderr
+      "lea  101f(%%rip), %%rsi\n"  // "This is an expensive system call"
+      "mov  $102f-101f, %%edx\n"   // len = strlen(msg)
+      "syscall\n"
+      "xor  %%rdi, %%rdi\n"
+      #endif
+
+      "jmp  27f\n"                 // exit program, no message
+    "4:syscall\n"
+      "jmp  15f\n"                 // return result
+
+      // If syscall number is -2, execute locked system call from the
+      // secure memory area
+    "5:jg   12f\n"
+      "cmp  $-2, %%eax\n"
+      "jnz  9f\n"
+      "cmp  %%rbx, %%fs:0x8\n"
+      "jne  25f\n"                 // exit process
+      "cmp  %%eax, %%fs:0x10\n"
+      "jne  25f\n"                 // exit process
+
+      // When debugging messages are enabled, warn about expensive system calls
+      #ifndef NDEBUG
+      "cmpw $0, %%fs:0xD8\n"       // debug mode
+      "jz   6f\n"
+      "mov  $1, %%eax\n"           // NR_write
+      "mov  $2, %%edi\n"           // fd = stderr
+      "lea  101f(%%rip), %%rsi\n"  // "This is an expensive system call"
+      "mov  $102f-101f, %%edx\n"   // len = strlen(msg)
+      "syscall\n"
+    "6:"
+      #endif
+
+      "mov  %%fs:0x18, %%rax\n"
+      "mov  %%fs:0x20, %%rdi\n"
+      "mov  %%fs:0x28, %%rsi\n"
+      "mov  %%fs:0x30, %%rdx\n"
+      "mov  %%fs:0x38, %%r10\n"
+      "mov  %%fs:0x40, %%r8\n"
+      "mov  %%fs:0x48, %%r9\n"
+      "cmp  %%rbx, %%fs:0x8\n"
+      "jne  25f\n"                 // exit process
+
+      // clone() has unusual calling conventions and must be handled specially
+      "cmp  $56, %%rax\n"          // NR_clone
+      "jz   19f\n"
+
+      // exit() terminates trusted thread
+      "cmp  $60, %%eax\n"          // NR_exit
+      "jz   18f\n"
+
+      // Perform requested system call
+      "syscall\n"
+
+      // Unlock mutex
+    "7:cmp  %%rbx, %%fs:0x8\n"
+      "jne  25f\n"                 // exit process
+      "add  $2, %%rbx\n"
+      "mov  %%rax, %%r8\n"
+      "mov  $56, %%eax\n"          // NR_clone
+      "mov  $17, %%rdi\n"          // flags = SIGCHLD
+      "mov  $1, %%rsi\n"           // stack = 1
+      "syscall\n"
+      "test %%rax, %%rax\n"
+      "js   25f\n"                 // exit process
+      "jz   22f\n"                 // unlock and exit
+      "mov  %%rax, %%rdi\n"
+    "8:xor  %%rsi, %%rsi\n"
+      "xor  %%rdx, %%rdx\n"
+      "xor  %%r10, %%r10\n"
+      "mov  $61, %%eax\n"          // NR_wait4
+      "syscall\n"
+      "cmp  $-4, %%eax\n"          // EINTR
+      "jz   8b\n"
+      "mov  %%r8, %%rax\n"
+      "jmp  15f\n"                 // return result
+
+      // If syscall number is -3, read the time stamp counter
+    "9:cmp  $-3, %%eax\n"
+      "jnz  10f\n"
+      "rdtsc\n"                    // sets %edx:%eax
+      "xor  %%rcx, %%rcx\n"
+      "jmp  11f\n"
+    "10:cmp  $-4, %%eax\n"
+      "jnz  12f\n"
+      "rdtscp\n"                   // sets %edx:%eax and %ecx
+   "11:add  $0x3C, %%rsi\n"
+      "mov  %%eax, 0(%%rsi)\n"
+      "mov  %%edx, 4(%%rsi)\n"
+      "mov  %%ecx, 8(%%rsi)\n"
+      "mov  $12, %%edx\n"
+      "jmp  16f\n"                 // return result
+
+      // Check in syscallTable whether this system call is unrestricted
+   "12:mov  %%rax, %%r9\n"
+      #ifndef NDEBUG
+      "cmpw $0, %%fs:0xD8\n"       // debug mode
+      "jnz  13f\n"
+      #endif
+      "cmp  playground$maxSyscall(%%rip), %%eax\n"
+      "ja   25f\n"                 // exit process
+      "shl  $4, %%rax\n"
+      "lea  playground$syscallTable(%%rip), %%rdi\n"
+      "add  %%rdi, %%rax\n"
+      "mov  0(%%rax), %%rax\n"
+      "cmp  $1, %%rax\n"
+      "jne  25f\n"                 // exit process
+
+      // Default behavior for unrestricted system calls is to just execute
+      // them. Read the remaining arguments first.
+   "13:mov  %%rsi, %%r8\n"
+      "xor  %%rax, %%rax\n"        // NR_read
+      "mov  %%r13, %%rdi\n"        // fd  = threadFd
+      "add  $4, %%rsi\n"           // buf = &scratch + 4
+      "mov  $48, %%edx\n"          // len = 6*sizeof(void *)
+   "14:syscall\n"
+      "cmp  $-4, %%rax\n"          // EINTR
+      "jz   14b\n"
+      "cmp  %%rdx, %%rax\n"
+      "jnz  25f\n"                 // exit process
+      "mov  %%r9, %%rax\n"
+      "mov  0x04(%%r8), %%rdi\n"
+      "mov  0x0C(%%r8), %%rsi\n"
+      "mov  0x14(%%r8), %%rdx\n"
+      "mov  0x1C(%%r8), %%r10\n"
+      "mov  0x2C(%%r8), %%r9\n"
+      "mov  0x24(%%r8), %%r8\n"
+      "cmp  $231, %%rax\n"         // NR_exit_group
+      "jz   27f\n"                 // exit program, no message
+      "syscall\n"
+
+      // Return result of system call to sandboxed thread
+   "15:mov  %%fs:0x0, %%rsi\n"     // secure_mem
+      "add  $0x1034, %%rsi\n"      // buf   = &scratch + 52
+      "mov  %%rax, (%%rsi)\n"
+      "mov  $8, %%edx\n"           // len   = 8
+   "16:mov  %%r13, %%rdi\n"        // fd    = threadFd
+      "mov  $1, %%eax\n"           // NR_write
+   "17:syscall\n"
+      "cmp  %%rdx, %%rax\n"
+      "jz   1b\n"
+      "cmp  $-4, %%rax\n"          // EINTR
+      "jz   17b\n"
+      "jmp  25f\n"                 // exit process
+
+      // NR_exit:
+      // Exit trusted thread after cleaning up resources
+   "18:mov  %%fs:0x0, %%rsi\n"     // secure_mem
+      "mov  0xF0(%%rsi), %%rdi\n"  // fd     = threadFdPub
+      "mov  $3, %%eax\n"           // NR_close
+      "syscall\n"
+      "mov  %%rsi, %%rdi\n"        // start  = secure_mem
+      "mov  $8192, %%esi\n"        // length = 8192
+      "xor  %%rdx, %%rdx\n"        // prot   = PROT_NONE
+      "mov  $10, %%eax\n"          // NR_mprotect
+      "syscall\n"
+      "mov  %%r13, %%rdi\n"        // fd     = threadFd
+      "mov  $3, %%eax\n"           // NR_close
+      "syscall\n"
+      "mov  $56, %%eax\n"          // NR_clone
+      "mov  $17, %%rdi\n"          // flags = SIGCHLD
+      "mov  $1, %%rsi\n"           // stack = 1
+      "syscall\n"
+      "mov  %%rax, %%rdi\n"
+      "test %%rax, %%rax\n"
+      "js   27f\n"                 // exit process
+      "jne  21f\n"                 // reap helper, exit thread
+      "jmp  22f\n"                 // unlock mutex
+
+      // NR_clone:
+      // Original trusted thread calls clone() to create new nascent
+      // thread. This thread is (typically) fully privileged and shares all
+      // resources with the caller (i.e. the previous trusted thread),
+      // and by extension it shares all resources with the sandbox'd
+      // threads.
+   "19:mov  %%fs:0x0, %%rbp\n"     // %rbp  = old_shared_mem
+      "mov  %%rsi, %%r15\n"        // remember child stack
+      "mov  $1, %%rsi\n"           // stack = 1
+      "syscall\n"                  // calls NR_clone
+      "cmp  $-4095, %%rax\n"       // return codes -1..-4095 are errno values
+      "jae  7b\n"                  // unlock mutex, return result
+      "add  $2, %%rbx\n"
+      "test %%rax, %%rax\n"
+      "jne  15b\n"                 // return result
+
+      // In nascent thread, now.
+      "sub  $2, %%rbx\n"
+
+      // We want to maintain an invalid %rsp whenver we access untrusted
+      // memory. This ensures that even if an attacker can trick us into
+      // triggering a SIGSEGV, we will never successfully execute a signal
+      // handler.
+      // Signal handlers are inherently dangerous, as an attacker could trick
+      // us into returning to the wrong address by adjusting the signal stack
+      // right before the handler returns.
+      // N.B. While POSIX is curiously silent about this, it appears that on
+      // Linux, alternate signal stacks are a per-thread property. That is
+      // good. It means that this security mechanism works, even if the
+      // sandboxed thread manages to set up an alternate signal stack.
+      //
+      // TODO(markus): We currently do not support emulating calls to
+      // sys_clone() with a zero (i.e. copy) stack parameter. See clone.cc
+      // for a discussion on how to fix this, if this ever becomes neccessary.
+      "mov  %%r15, %%r9\n"         // %r9 = child_stack
+      "xor  %%r15, %%r15\n"        // Request to return from clone() when done
+
+      // Get thread id of nascent thread
+   "20:mov  $186, %%eax\n"         // NR_gettid
+      "syscall\n"
+      "mov  %%rax, %%r14\n"
+
+      // Nascent thread creates socketpair() for sending requests to
+      // trusted thread.
+      // We can create the filehandles on the child's stack. Filehandles are
+      // always treated as untrusted.
+      // socketpair(AF_UNIX, SOCK_STREAM, 0, fds)
+      "sub  $0x10, %%r9\n"
+      "mov  %%r15, 8(%%r9)\n"      // preserve return address on child stack
+      "mov  $53, %%eax\n"          // NR_socketpair
+      "mov  $1, %%edi\n"           // domain = AF_UNIX
+      "mov  $1, %%esi\n"           // type = SOCK_STREAM
+      "xor  %%rdx, %%rdx\n"        // protocol = 0
+      "mov  %%r9, %%r10\n"         // sv = child_stack
+      "syscall\n"
+      "test %%rax, %%rax\n"
+      "jz   28f\n"
+
+      // If things went wrong, we don't have an (easy) way of signaling
+      // the parent. For our purposes, it is sufficient to fail with a
+      // fatal error.
+      "jmp  25f\n"                 // exit process
+   "21:xor  %%rsi, %%rsi\n"
+      "xor  %%rdx, %%rdx\n"
+      "xor  %%r10, %%r10\n"
+      "mov  $61, %%eax\n"          // NR_wait4
+      "syscall\n"
+      "cmp  $-4, %%eax\n"          // EINTR
+      "jz   21b\n"
+      "jmp  23f\n"                 // exit thread (no message)
+   "22:lea  playground$syscall_mutex(%%rip), %%rdi\n"
+      "mov  $4096, %%esi\n"
+      "mov  $3, %%edx\n"           // prot = PROT_READ | PROT_WRITE
+      "mov  $10, %%eax\n"          // NR_mprotect
+      "syscall\n"
+      "lock; addl $0x80000000, (%%rdi)\n"
+      "jz   23f\n"                 // exit thread
+      "mov  $1, %%edx\n"
+      "mov  %%rdx, %%rsi\n"        // FUTEX_WAKE
+      "mov  $202, %%eax\n"         // NR_futex
+      "syscall\n"
+   "23:mov  $60, %%eax\n"          // NR_exit
+      "mov  $1, %%edi\n"           // status = 1
+   "24:syscall\n"
+   "25:mov  $1, %%eax\n"           // NR_write
+      "mov  $2, %%edi\n"           // fd = stderr
+      "lea  100f(%%rip), %%rsi\n"  // "Sandbox violation detected"
+      "mov  $101f-100f, %%edx\n"   // len = strlen(msg)
+      "syscall\n"
+   "26:mov  $1, %%edi\n"
+   "27:mov  $231, %%eax\n"         // NR_exit_group
+      "jmp  24b\n"
+
+      // The first page is mapped read-only for use as securely shared memory
+   "28:mov  0xC8(%%rbp), %%r12\n"  // %r12 = secure shared memory
+      "cmp  %%rbx, 8(%%rbp)\n"
+      "jne  25b\n"                 // exit process
+      "mov  $10, %%eax\n"          // NR_mprotect
+      "mov  %%r12, %%rdi\n"        // addr = secure_mem
+      "mov  $4096, %%esi\n"        // len  = 4096
+      "mov  $1, %%edx\n"           // prot = PROT_READ
+      "syscall\n"
+
+      // The second page is used as scratch space by the trusted thread.
+      // Make it writable.
+      "mov  $10, %%eax\n"          // NR_mprotect
+      "add  $4096, %%rdi\n"        // addr = secure_mem + 4096
+      "mov  $3, %%edx\n"           // prot = PROT_READ | PROT_WRITE
+      "syscall\n"
+
+      // Call clone() to create new trusted thread().
+      // clone(CLONE_VM|CLONE_FS|CLONE_FILES|CLONE_SIGHAND|CLONE_THREAD|
+      //       CLONE_SYSVSEM|CLONE_UNTRACED|CLONE_SETTLS, stack, NULL, NULL,
+      //       tls)
+      "mov  4(%%r9), %%r13d\n"     // %r13  = threadFd (on child's stack)
+      "mov  $56, %%eax\n"          // NR_clone
+      "mov  $0x8D0F00, %%edi\n"    // flags = VM|FS|FILES|SIGH|THR|SYSV|UTR|TLS
+      "mov  $1, %%rsi\n"           // stack = 1
+      "mov  %%r12, %%r8\n"         // tls   = new_secure_mem
+      "mov  0xD0(%%rbp), %%r15d\n" // %r15  = processFdPub
+      "cmp  %%rbx, 8(%%rbp)\n"
+      "jne  25b\n"                 // exit process
+      "syscall\n"
+      "test %%rax, %%rax\n"
+      "js   25b\n"                 // exit process
+      "jz   0b\n"                  // invoke trustedThreadFnc()
+
+      // Copy the caller's signal mask
+      "mov  0x1054(%%rbp), %%rax\n"
+      "mov  %%rax, 0x1054(%%r12)\n"
+
+      // Done creating trusted thread. We can now get ready to return to caller
+      "mov  %%r9, %%r8\n"          // %r8 = child_stack
+      "mov  0(%%r9), %%r9d\n"      // %r9 = threadFdPub
+
+      // Set up thread local storage with information on how to talk to
+      // trusted thread and trusted process.
+      "lea  0xE0(%%r12), %%rsi\n"  // args   = &secure_mem.TLS;
+      "mov  $158, %%eax\n"         // NR_arch_prctl
+      "mov  $0x1001, %%edi\n"      // option = ARCH_SET_GS
+      "syscall\n"
+      "cmp  $-4095, %%rax\n"       // return codes -1..-4095 are errno values
+      "jae  25b\n"                 // exit process
+
+      // Check whether this is the initial thread, or a newly created one.
+      // At startup we run the same code as when we create a new thread. At
+      // the very top of this function, you will find that we push 999(%rip)
+      // on the stack. That is the signal that we should return on the same
+      // stack rather than return to where clone was called.
+      "mov  8(%%r8), %%r15\n"
+      "add  $0x10, %%r8\n"
+      "test %%r15, %%r15\n"
+      "jne  29f\n"
+
+      // Returning from clone() into the newly created thread is special. We
+      // cannot unroll the stack, as we just set up a new stack for this
+      // thread. We have to explicitly restore CPU registers to the values
+      // that they had when the program originally called clone().
+      // We patch the register values in the signal stack frame so that we
+      // can ask sigreturn() to restore all registers for us.
+      "sub  $0x8, %%r8\n"
+      "mov  0x50(%%rbp), %%rax\n"
+      "mov  %%rax, 0x00(%%r8)\n"   // return address
+      "xor  %%rax, %%rax\n"
+      "mov  %%rax, 0x98(%%r8)\n"   // %rax = 0
+      "mov  0x58(%%rbp), %%rax\n"
+      "mov  %%rax, 0x80(%%r8)\n"   // %rbp
+      "mov  0x60(%%rbp), %%rax\n"
+      "mov  %%rax, 0x88(%%r8)\n"   // %rbx
+      "mov  0x68(%%rbp), %%rax\n"
+      "mov  %%rax, 0xA0(%%r8)\n"   // %rcx
+      "mov  0x70(%%rbp), %%rax\n"
+      "mov  %%rax, 0x90(%%r8)\n"   // %rdx
+      "mov  0x78(%%rbp), %%rax\n"
+      "mov  %%rax, 0x78(%%r8)\n"   // %rsi
+      "mov  0x80(%%rbp), %%rax\n"
+      "mov  %%rax, 0x70(%%r8)\n"   // %rdi
+      "mov  0x88(%%rbp), %%rax\n"
+      "mov  %%rax, 0x30(%%r8)\n"   // %r8
+      "mov  0x90(%%rbp), %%rax\n"
+      "mov  %%rax, 0x38(%%r8)\n"   // %r9
+      "mov  0x98(%%rbp), %%rax\n"
+      "mov  %%rax, 0x40(%%r8)\n"   // %r10
+      "mov  0xA0(%%rbp), %%rax\n"
+      "mov  %%rax, 0x48(%%r8)\n"   // %r11
+      "mov  0xA8(%%rbp), %%rax\n"
+      "mov  %%rax, 0x50(%%r8)\n"   // %r12
+      "mov  0xB0(%%rbp), %%rax\n"
+      "mov  %%rax, 0x58(%%r8)\n"   // %r13
+      "mov  0xB8(%%rbp), %%rax\n"
+      "mov  %%rax, 0x60(%%r8)\n"   // %r14
+      "mov  0xC0(%%rbp), %%rax\n"
+      "mov  %%rax, 0x68(%%r8)\n"   // %r15
+      "cmp  %%rbx, 8(%%rbp)\n"
+      "jne  25b\n"                 // exit process
+
+      // Nascent thread launches a helper that doesn't share any of our
+      // resources, except for pages mapped as MAP_SHARED.
+      // clone(SIGCHLD, stack=1)
+   "29:mov  $56, %%eax\n"          // NR_clone
+      "mov  $17, %%rdi\n"          // flags = SIGCHLD
+      "mov  $1, %%rsi\n"           // stack = 1
+      "syscall\n"
+      "test %%rax, %%rax\n"
+      "js   25b\n"                 // exit process
+      "jne  31f\n"
+
+      // Use sendmsg() to send to the trusted process the file handles for
+      // communicating with the new trusted thread. We also send the address
+      // of the secure memory area (for sanity checks) and the thread id.
+      "mov  0xD4(%%rbp), %%edi\n"  // transport = Sandbox::cloneFdPub()
+      "cmp  %%rbx, 8(%%rbp)\n"
+      "jne  25b\n"                 // exit process
+
+      // 0x00 msg:
+      //   0x00 msg_name       ($0)
+      //   0x08 msg_namelen    ($0)
+      //   0x10 msg_iov        (%r8 + 0x44)
+      //   0x18 msg_iovlen     ($1)
+      //   0x20 msg_control    (%r8 + 0x54)
+      //   0x28 msg_controllen ($0x18)
+      // 0x30 data:
+      //   0x30 msg_flags/err  ($0)
+      //   0x34 secure_mem     (%r12)
+      //   0x3C threadId       (%r14d)
+      //   0x40 threadFdPub    (%r9d)
+      // 0x44 iov:
+      //   0x44 iov_base       (%r8 + 0x30)
+      //   0x4C iov_len        ($0x14)
+      // 0x54 cmsg:
+      //   0x54 cmsg_len       ($0x18)
+      //   0x5C cmsg_level     ($1, SOL_SOCKET)
+      //   0x60 cmsg_type      ($1, SCM_RIGHTS)
+      //   0x64 threadFdPub    (%r9d)
+      //   0x68 threadFd       (%r13d)
+      // 0x6C
+      "sub  $0x6C, %%r8\n"
+      "xor  %%rdx, %%rdx\n"        // flags = 0
+      "mov  %%rdx, 0x00(%%r8)\n"   // msg_name
+      "mov  %%edx, 0x08(%%r8)\n"   // msg_namelen
+      "mov  %%edx, 0x30(%%r8)\n"   // msg_flags
+      "mov  $1, %%r11d\n"
+      "mov  %%r11, 0x18(%%r8)\n"   // msg_iovlen
+      "mov  %%r11d, 0x5C(%%r8)\n"  // cmsg_level
+      "mov  %%r11d, 0x60(%%r8)\n"  // cmsg_type
+      "lea  0x30(%%r8), %%r11\n"
+      "mov  %%r11, 0x44(%%r8)\n"   // iov_base
+      "add  $0x14, %%r11\n"
+      "mov  %%r11, 0x10(%%r8)\n"   // msg_iov
+      "add  $0x10, %%r11\n"
+      "mov  %%r11, 0x20(%%r8)\n"   // msg_control
+      "mov  $0x14, %%r11d\n"
+      "mov  %%r11, 0x4C(%%r8)\n"   // iov_len
+      "add  $4, %%r11d\n"
+      "mov  %%r11, 0x28(%%r8)\n"   // msg_controllen
+      "mov  %%r11, 0x54(%%r8)\n"   // cmsg_len
+      "mov  %%r12, 0x34(%%r8)\n"   // secure_mem
+      "mov  %%r14d, 0x3C(%%r8)\n"  // threadId
+      "mov  %%r9d, 0x40(%%r8)\n"   // threadFdPub
+      "mov  %%r9d, 0x64(%%r8)\n"   // threadFdPub
+      "mov  %%r13d, 0x68(%%r8)\n"  // threadFd
+      "mov  $46, %%eax\n"          // NR_sendmsg
+      "mov  %%r8, %%rsi\n"         // msg
+      "syscall\n"
+
+      // Release syscall_mutex_. This signals the trusted process that
+      // it can write into the original thread's secure memory again.
+      "mov  $10, %%eax\n"          // NR_mprotect
+      "lea  playground$syscall_mutex(%%rip), %%rdi\n"
+      "mov  $4096, %%esi\n"
+      "mov  $3, %%edx\n"           // PROT_READ | PROT_WRITE
+      "syscall\n"
+      "cmp  %%rbx, 8(%%rbp)\n"
+      "jne  25b\n"                 // exit process
+      "lock; addl $0x80000000, (%%rdi)\n"
+      "jz   30f\n"                 // exit process (no error message)
+      "mov  $1, %%edx\n"
+      "mov  %%rdx, %%rsi\n"        // FUTEX_WAKE
+      "mov  $202, %%eax\n"         // NR_futex
+      "syscall\n"
+   "30:xor  %%rdi, %%rdi\n"
+      "jmp  27b\n"                 // exit process (no error message)
+
+      // Reap helper
+   "31:mov  %%rax, %%rdi\n"
+   "32:lea  -4(%%r8), %%rsi\n"
+      "xor  %%rdx, %%rdx\n"
+      "xor  %%r10, %%r10\n"
+      "mov  $61, %%eax\n"          // NR_wait4
+      "syscall\n"
+      "cmp  $-4, %%eax\n"          // EINTR
+      "jz   32b\n"
+      "mov  -4(%%r8), %%eax\n"
+      "test %%rax, %%rax\n"
+      "jnz  26b\n"                 // exit process (no error message)
+
+      // Release privileges by entering seccomp mode.
+      "mov  $157, %%eax\n"         // NR_prctl
+      "mov  $22, %%edi\n"          // PR_SET_SECCOMP
+      "mov  $1, %%esi\n"
+      "syscall\n"
+      "test %%rax, %%rax\n"
+      "jnz  25b\n"                 // exit process
+
+      // We can finally start using the stack. Signal handlers no longer pose
+      // a threat to us.
+      "mov  %%r8, %%rsp\n"
+
+      // Back in the newly created sandboxed thread, wait for trusted process
+      // to receive request. It is possible for an attacker to make us
+      // continue even before the trusted process is done. This is OK. It'll
+      // result in us putting stale values into the new thread's TLS. But that
+      // data is considered untrusted anyway.
+      "push %%rax\n"
+      "mov  $1, %%edx\n"           // len       = 1
+      "mov  %%rsp, %%rsi\n"        // buf       = %rsp
+      "mov  %%r9, %%rdi\n"         // fd        = threadFdPub
+   "33:xor  %%rax, %%rax\n"        // NR_read
+      "syscall\n"
+      "cmp  $-4, %%rax\n"          // EINTR
+      "jz   33b\n"
+      "cmp  %%rdx, %%rax\n"
+      "jne  25b\n"                 // exit process
+      "pop  %%rax\n"
+
+      // Return to caller. We are in the new thread, now.
+      "test %%r15, %%r15\n"
+      "jnz  34f\n"                 // Returning to createTrustedThread()
+
+      // Returning to the place where clone() had been called. We rely on
+      // using rt_sigreturn() for restoring our registers. The caller already
+      // created a signal stack frame, and we patched the register values
+      // with the ones that were in effect prior to calling sandbox_clone().
+      "pop %%r15\n"
+   "34:mov  %%r15, 0xA8(%%rsp)\n"  // compute new %rip
+      "mov  $15, %%eax\n"          // NR_rt_sigreturn
+      "syscall\n"
+
+      ".pushsection \".rodata\"\n"
+  "100:.ascii \"Sandbox violation detected, program aborted\\n\"\n"
+  "101:.ascii \"WARNING! This is an expensive system call\\n\"\n"
+  "102:\n"
+      ".popsection\n"
+
+  "999:pop  %%rbp\n"
+      "pop  %%rbx\n"
+      :
+      : "g"(&args)
+      : "rax", "rcx", "rdx", "rdi", "rsi", "r8", "r9", "r10", "r11", "r12",
+        "r13", "r14", "r15", "rsp", "memory"
+#elif defined(__i386__)
+  struct user_desc u;
+  u.entry_number    = (typeof u.entry_number)-1;
+  u.base_addr       = 0;
+  u.limit           = 0xfffff;
+  u.seg_32bit       = 1;
+  u.contents        = 0;
+  u.read_exec_only  = 0;
+  u.limit_in_pages  = 1;
+  u.seg_not_present = 0;
+  u.useable         = 1;
+  SysCalls sys;
+  if (sys.set_thread_area(&u) < 0) {
+    die("Cannot set up thread local storage");
+  }
+  asm volatile("movw %w0, %%fs"
+      :
+      : "q"(8*u.entry_number+3));
+  asm volatile(
+      "push %%ebx\n"
+      "push %%ebp\n"
+
+      // Signal handlers are process-wide. This means that for security
+      // reasons, we cannot allow that the trusted thread ever executes any
+      // signal handlers.
+      // We prevent the execution of signal handlers by setting a signal
+      // mask that blocks all signals. In addition, we make sure that the
+      // stack pointer is invalid.
+      // We cannot reset the signal mask until after we have enabled
+      // Seccomp mode. Our sigprocmask() wrapper would normally do this by
+      // raising a signal, modifying the signal mask in the kernel-generated
+      // signal frame, and then calling sigreturn(). This presents a bit of
+      // a Catch-22, as all signals are masked and we can therefore not
+      // raise any signal that would allow us to generate the signal stack
+      // frame.
+      // Instead, we have to create the signal stack frame prior to entering
+      // Seccomp mode. This incidentally also helps us to restore the
+      // signal mask to the same value that it had prior to entering the
+      // sandbox.
+      // The signal wrapper for clone() is the second entry point into this
+      // code (by means of sending an IPC to its trusted thread). It goes
+      // through the same steps of creating a signal stack frame on the
+      // newly created thread's stacks prior to cloning. See clone.cc for
+      // details.
+      "mov  %0, %%edi\n"           // create signal stack before accessing MMX
+      "mov  $120+0xF000, %%eax\n"  // __NR_clone + 0xF000
+      "mov  %%esp, %%ebp\n"
+      "int  $0\n"                  // push a signal stack frame (see clone.cc)
+      "mov  %%ebp, 0x1C(%%esp)\n"  // pop stack upon call to sigreturn()
+      "mov  %%esp, %%ebp\n"
+      "mov  $2, %%ebx\n"           // how     = SIG_SETMASK
+      "pushl $-1\n"
+      "pushl $-1\n"
+      "mov  %%esp, %%ecx\n"        // set     = full mask
+      "xor  %%edx, %%edx\n"        // old_set = NULL
+      "mov  $8, %%esi\n"           // mask all 64 signals
+      "mov  $175, %%eax\n"         // NR_rt_sigprocmask
+      "int  $0x80\n"
+      "mov  $126, %%eax\n"         // NR_sigprocmask
+      "int  $0x80\n"
+      "xor  %%esp, %%esp\n"        // invalidate the stack in all trusted code
+      "movd %%edi, %%mm6\n"        // %mm6 = args
+      "lea  999f, %%edi\n"         // continue in same thread
+      "movd %%edi, %%mm3\n"
+      "xor  %%edi, %%edi\n"        // initial sequence number
+      "movd %%edi, %%mm2\n"
+      "jmp  20f\n"                 // create trusted thread
+
+      // TODO(markus): Coalesce the read() operations by reading into a bigger
+      // buffer.
+
+      // Parameters:
+      //   %mm0: thread's side of threadFd
+      //   %mm1: processFdPub
+      //   %mm3: return address after creation of new trusted thread
+      //   %mm5: secure memory region
+      //         the page following this one contains the scratch space
+
+      // Local variables:
+      //   %mm2: sequence number for trusted calls
+      //   %mm4: thread id
+
+      // Temporary variables:
+      //   %ebp: system call number
+      //   %mm6: secure memory of previous thread
+      //   %mm7: temporary variable for spilling data
+
+      // Layout of secure shared memory region (c.f. securemem.h):
+      //   0x00: pointer to the secure shared memory region (i.e. self)
+      //   0x04: sequence number; must match %mm2
+      //   0x08: call type; must match %eax, iff %eax == -1 || %eax == -2
+      //   0x0C: system call number; passed to syscall in %eax
+      //   0x10: first argument; passed to syscall in %ebx
+      //   0x14: second argument; passed to syscall in %ecx
+      //   0x18: third argument; passed to syscall in %edx
+      //   0x1C: fourth argument; passed to syscall in %esi
+      //   0x20: fifth argument; passed to syscall in %edi
+      //   0x24: sixth argument; passed to syscall in %ebp
+      //   0x28: stored return address for clone() system call
+      //   0x2C: stored %ebp value for clone() system call
+      //   0x30: stored %edi value for clone() system call
+      //   0x34: stored %esi value for clone() system call
+      //   0x38: stored %edx value for clone() system call
+      //   0x3C: stored %ecx value for clone() system call
+      //   0x40: stored %ebx value for clone() system call
+      //   0x44: new shared memory for clone()
+      //   0x48: processFdPub for talking to trusted process
+      //   0x4C: cloneFdPub for talking to trusted process
+      //   0x50: set to non-zero, if in debugging mode
+      //   0x54: most recent SHM id returned by shmget(IPC_PRIVATE)
+      //   0x58: cookie assigned to us by the trusted process (TLS_COOKIE)
+      //   0x60: thread id (TLS_TID)
+      //   0x68: threadFdPub (TLS_THREAD_FD)
+      //   0x200-0x1000: securely passed verified file name(s)
+
+      // Layout of (untrusted) scratch space:
+      //   0x00: syscall number; passed in %eax
+      //   0x04: first argument; passed in %ebx
+      //   0x08: second argument; passed in %ecx
+      //   0x0C: third argument; passed in %edx
+      //   0x10: fourth argument; passed in %esi
+      //   0x14: fifth argument; passed in %edi
+      //   0x18: sixth argument; passed in %ebp
+      //   0x1C: return value
+      //   0x20: RDTSCP result (%eax)
+      //   0x24: RDTSCP result (%edx)
+      //   0x28: RDTSCP result (%ecx)
+      //   0x2C: last system call (updated in syscall.cc)
+      //   0x30: number of consecutive calls to a time fnc. (e.g. gettimeofday)
+      //   0x34: nesting level of system calls (for debugging purposes only)
+      //   0x38: signal mask
+      //   0x40: in SEGV handler
+
+    "0:xor  %%esp, %%esp\n"
+      "mov  $2, %%eax\n"           // %mm2 = initial sequence number
+      "movd %%eax, %%mm2\n"
+
+      // Read request from untrusted thread, or from trusted process. In either
+      // case, the data that we read has to be considered untrusted.
+      // read(threadFd, &scratch, 4)
+    "1:mov  $3, %%eax\n"           // NR_read
+      "movd %%mm0, %%ebx\n"        // fd  = threadFd
+      "movd %%mm5, %%ecx\n"        // secure_mem
+      "add  $0x1000, %%ecx\n"      // buf = &scratch
+      "mov  $4, %%edx\n"           // len = 4
+    "2:int  $0x80\n"
+      "cmp  $-4, %%eax\n"          // EINTR
+      "jz   2b\n"
+      "cmp  %%edx, %%eax\n"
+      "jnz  25f\n"                 // exit process
+
+      // Retrieve system call number. It is crucial that we only dereference
+      // 0x1000(%mm5) exactly once. Afterwards, memory becomes untrusted and
+      // we must use the value that we have read the first time.
+      "mov  0(%%ecx), %%eax\n"
+
+      // If syscall number is -1, execute an unlocked system call from the
+      // secure memory area
+      "cmp  $-1, %%eax\n"
+      "jnz  5f\n"
+    "3:movd %%mm2, %%ebp\n"
+      "cmp  %%ebp, 0x4-0x1000(%%ecx)\n"
+      "jne  25f\n"                 // exit process
+      "cmp  0x08-0x1000(%%ecx), %%eax\n"
+      "jne  25f\n"                 // exit process
+      "mov  0x0C-0x1000(%%ecx), %%eax\n"
+      "mov  0x10-0x1000(%%ecx), %%ebx\n"
+      "mov  0x18-0x1000(%%ecx), %%edx\n"
+      "mov  0x1C-0x1000(%%ecx), %%esi\n"
+      "mov  0x20-0x1000(%%ecx), %%edi\n"
+      "mov  0x24-0x1000(%%ecx), %%ebp\n"
+      "mov  0x14-0x1000(%%ecx), %%ecx\n"
+      "movd %%edi, %%mm4\n"
+      "movd %%ebp, %%mm7\n"
+      "movd %%mm2, %%ebp\n"
+      "movd %%mm5, %%edi\n"
+      "cmp  %%ebp, 4(%%edi)\n"
+      "jne  25f\n"                 // exit process
+      "add  $2, %%ebp\n"
+      "movd %%ebp, %%mm2\n"
+      "movd %%mm4, %%edi\n"
+      "movd %%mm7, %%ebp\n"
+
+      // shmget() gets some special treatment. Whenever we return from this
+      // system call, we remember the most recently returned SysV shm id.
+      "cmp  $117, %%eax\n"         // NR_ipc
+      "jnz  4f\n"
+      "cmp  $23, %%ebx\n"          // shmget()
+      "jnz  4f\n"
+      "int  $0x80\n"
+      "mov  %%eax, %%ebp\n"
+      "mov  $120, %%eax\n"         // NR_clone
+      "mov  $17, %%ebx\n"          // flags = SIGCHLD
+      "mov  $1, %%ecx\n"           // stack = 1
+      "int  $0x80\n"
+      "test %%eax, %%eax\n"
+      "js   25f\n"                 // exit process
+      "mov  %%eax, %%ebx\n"
+      "jnz  8f\n"                  // wait for child, then return result
+      "movd %%mm5, %%ebx\n"        // start = secure_mem
+      "mov  $4096, %%ecx\n"        // len   = 4096
+      "mov  $3, %%edx\n"           // prot  = PROT_READ | PROT_WRITE
+      "mov  $125, %%eax\n"         // NR_mprotect
+      "int  $0x80\n"
+      "mov  %%ebp, 0x54(%%ebx)\n"  // set most recently returned SysV shm id
+      "xor  %%ebx, %%ebx\n"
+
+      // When debugging messages are enabled, warn about expensive system calls
+      #ifndef NDEBUG
+      "movd %%mm5, %%ecx\n"
+      "cmpw $0, 0x50(%%ecx)\n"     // debug mode
+      "jz   27f\n"
+      "mov  $4, %%eax\n"           // NR_write
+      "mov  $2, %%ebx\n"           // fd = stderr
+      "lea  101f, %%ecx\n"         // "This is an expensive system call"
+      "mov  $102f-101f, %%edx\n"   // len = strlen(msg)
+      "int  $0x80\n"
+      "xor  %%ebx, %%ebx\n"
+      #endif
+
+      "jmp  27f\n"                 // exit program, no message
+    "4:int  $0x80\n"
+      "jmp  15f\n"                 // return result
+
+      // If syscall number is -2, execute locked system call from the
+      // secure memory area
+    "5:jg   12f\n"
+      "cmp  $-2, %%eax\n"
+      "jnz  9f\n"
+      "movd %%mm2, %%ebp\n"
+      "cmp  %%ebp, 0x4-0x1000(%%ecx)\n"
+      "jne  25f\n"                 // exit process
+      "cmp  %%eax, 0x8-0x1000(%%ecx)\n"
+      "jne  25f\n"                 // exit process
+
+      // When debugging messages are enabled, warn about expensive system calls
+      #ifndef NDEBUG
+      "cmpw $0, 0x50-0x1000(%%ecx)\n"
+      "jz   6f\n"                  // debug mode
+      "mov  %%ecx, %%ebp\n"
+      "mov  $4, %%eax\n"           // NR_write
+      "mov  $2, %%ebx\n"           // fd = stderr
+      "lea  101f, %%ecx\n"         // "This is an expensive system call"
+      "mov  $102f-101f, %%edx\n"   // len = strlen(msg)
+      "int  $0x80\n"
+      "mov  %%ebp, %%ecx\n"
+   "6:"
+      #endif
+
+      "mov  0x0C-0x1000(%%ecx), %%eax\n"
+      "mov  0x10-0x1000(%%ecx), %%ebx\n"
+      "mov  0x18-0x1000(%%ecx), %%edx\n"
+      "mov  0x1C-0x1000(%%ecx), %%esi\n"
+      "mov  0x20-0x1000(%%ecx), %%edi\n"
+      "mov  0x24-0x1000(%%ecx), %%ebp\n"
+      "mov  0x14-0x1000(%%ecx), %%ecx\n"
+      "movd %%edi, %%mm4\n"
+      "movd %%ebp, %%mm7\n"
+      "movd %%mm2, %%ebp\n"
+      "movd %%mm5, %%edi\n"
+      "cmp  %%ebp, 4(%%edi)\n"
+      "jne  25f\n"                 // exit process
+
+      // clone() has unusual calling conventions and must be handled specially
+      "cmp  $120, %%eax\n"         // NR_clone
+      "jz   19f\n"
+
+      // exit() terminates trusted thread
+      "cmp  $1, %%eax\n"           // NR_exit
+      "jz   18f\n"
+
+      // Perform requested system call
+      "movd %%mm4, %%edi\n"
+      "movd %%mm7, %%ebp\n"
+      "int  $0x80\n"
+
+      // Unlock mutex
+    "7:movd %%mm2, %%ebp\n"
+      "movd %%mm5, %%edi\n"
+      "cmp  %%ebp, 4(%%edi)\n"
+      "jne  25f\n"                 // exit process
+      "add  $2, %%ebp\n"
+      "movd %%ebp, %%mm2\n"
+      "mov  %%eax, %%ebp\n"
+      "mov  $120, %%eax\n"         // NR_clone
+      "mov  $17, %%ebx\n"          // flags = SIGCHLD
+      "mov  $1, %%ecx\n"           // stack = 1
+      "int  $0x80\n"
+      "test %%eax, %%eax\n"
+      "js   25f\n"                 // exit process
+      "jz   22f\n"                 // unlock and exit
+      "mov  %%eax, %%ebx\n"
+    "8:xor  %%ecx, %%ecx\n"
+      "xor  %%edx, %%edx\n"
+      "mov  $7, %%eax\n"           // NR_waitpid
+      "int  $0x80\n"
+      "cmp  $-4, %%eax\n"          // EINTR
+      "jz   8b\n"
+      "mov  %%ebp, %%eax\n"
+      "jmp  15f\n"                 // return result
+
+      // If syscall number is -3, read the time stamp counter
+    "9:cmp  $-3, %%eax\n"
+      "jnz  10f\n"
+      "rdtsc\n"                    // sets %edx:%eax
+      "xor  %%ecx, %%ecx\n"
+      "jmp  11f\n"
+   "10:cmp  $-4, %%eax\n"
+      "jnz  12f\n"
+      "rdtscp\n"                   // sets %edx:%eax and %ecx
+   "11:movd %%mm5, %%ebx\n"
+      "add  $0x1020, %%ebx\n"
+      "mov  %%eax, 0(%%ebx)\n"
+      "mov  %%edx, 4(%%ebx)\n"
+      "mov  %%ecx, 8(%%ebx)\n"
+      "mov  %%ebx, %%ecx\n"
+      "mov  $12, %%edx\n"
+      "jmp  16f\n"                 // return result
+
+      // Check in syscallTable whether this system call is unrestricted
+   "12:mov  %%eax, %%ebp\n"
+      #ifndef NDEBUG
+      "cmpw $0, 0x50-0x1000(%%ecx)\n"
+      "jnz  13f\n"                 // debug mode
+      #endif
+      "cmp  playground$maxSyscall, %%eax\n"
+      "ja   25f\n"                 // exit process
+      "shl  $3, %%eax\n"
+      "add  $playground$syscallTable, %%eax\n"
+      "mov  0(%%eax), %%eax\n"
+      "cmp  $1, %%eax\n"
+      "jne  25f\n"                 // exit process
+
+      // Default behavior for unrestricted system calls is to just execute
+      // them. Read the remaining arguments first.
+   "13:mov  $3, %%eax\n"           // NR_read
+      "movd %%mm0, %%ebx\n"        // fd  = threadFd
+      "add  $4, %%ecx\n"           // buf = &scratch + 4
+      "mov  $24, %%edx\n"          // len = 6*sizeof(void *)
+   "14:int  $0x80\n"
+      "cmp  $-4, %%eax\n"          // EINTR
+      "jz   14b\n"
+      "cmp  %%edx, %%eax\n"
+      "jnz  25f\n"                 // exit process
+      "mov  %%ebp, %%eax\n"
+      "mov  0x00(%%ecx), %%ebx\n"
+      "mov  0x08(%%ecx), %%edx\n"
+      "mov  0x0C(%%ecx), %%esi\n"
+      "mov  0x10(%%ecx), %%edi\n"
+      "mov  0x14(%%ecx), %%ebp\n"
+      "mov  0x04(%%ecx), %%ecx\n"
+      "cmp  $252, %%eax\n"         // NR_exit_group
+      "jz   27f\n"                 // exit program, no message
+      "int  $0x80\n"
+
+      // Return result of system call to sandboxed thread
+   "15:movd %%mm5, %%ecx\n"        // secure_mem
+      "add  $0x101C, %%ecx\n"      // buf   = &scratch + 28
+      "mov  %%eax, (%%ecx)\n"
+      "mov  $4, %%edx\n"           // len   = 4
+   "16:movd %%mm0, %%ebx\n"        // fd    = threadFd
+      "mov  $4, %%eax\n"           // NR_write
+   "17:int  $0x80\n"
+      "cmp  %%edx, %%eax\n"
+      "jz   1b\n"
+      "cmp  $-4, %%eax\n"          // EINTR
+      "jz   17b\n"
+      "jmp  25f\n"                 // exit process
+
+      // NR_exit:
+      // Exit trusted thread after cleaning up resources
+   "18:mov  %%edi, %%ecx\n"        // secure_mem
+      "mov  0x68(%%ecx), %%ebx\n"  // fd     = threadFdPub
+      "mov  $6, %%eax\n"           // NR_close
+      "int  $0x80\n"
+      "mov  %%ecx, %%ebx\n"        // start  = secure_mem
+      "mov  $8192, %%ecx\n"        // length = 8192
+      "xor  %%edx, %%edx\n"        // prot   = PROT_NONE
+      "mov  $125, %%eax\n"         // NR_mprotect
+      "int  $0x80\n"
+      "movd %%mm0, %%ebx\n"        // fd     = threadFd
+      "mov  $6, %%eax\n"           // NR_close
+      "int  $0x80\n"
+      "mov  $120, %%eax\n"         // NR_clone
+      "mov  $17, %%ebx\n"          // flags = SIGCHLD
+      "mov  $1, %%ecx\n"           // stack = 1
+      "int  $0x80\n"
+      "mov  %%eax, %%ebx\n"
+      "test %%eax, %%eax\n"
+      "js   25f\n"                 // exit process
+      "jne  21f\n"                 // reap helper, exit thread
+      "jmp  22f\n"                 // unlock mutex
+
+      // NR_clone:
+      // Original trusted thread calls clone() to create new nascent
+      // thread. This thread is (typically) fully privileged and shares all
+      // resources with the caller (i.e. the previous trusted thread),
+      // and by extension it shares all resources with the sandbox'd
+      // threads.
+   "19:movd %%edi, %%mm6\n"        // %mm6  = old_shared_mem
+      "movd %%mm4, %%edi\n"        // child_tidptr
+      "mov  %%ecx, %%ebp\n"        // remember child stack
+      "mov  $1, %%ecx\n"           // stack = 1
+      "int  $0x80\n"               // calls NR_clone
+      "cmp  $-4095, %%eax\n"       // return codes -1..-4095 are errno values
+      "jae  7b\n"                  // unlock mutex, return result
+      "movd %%mm2, %%edi\n"
+      "add  $2, %%edi\n"
+      "movd %%edi, %%mm2\n"
+      "test %%eax, %%eax\n"
+      "jne  15b\n"                 // return result
+
+      // In nascent thread, now.
+      "sub  $2, %%edi\n"
+      "movd %%edi, %%mm2\n"
+
+      // We want to maintain an invalid %esp whenver we access untrusted
+      // memory. This ensures that even if an attacker can trick us into
+      // triggering a SIGSEGV, we will never successfully execute a signal
+      // handler.
+      // Signal handlers are inherently dangerous, as an attacker could trick
+      // us into returning to the wrong address by adjusting the signal stack
+      // right before the handler returns.
+      // N.B. While POSIX is curiously silent about this, it appears that on
+      // Linux, alternate signal stacks are a per-thread property. That is
+      // good. It means that this security mechanism works, even if the
+      // sandboxed thread manages to set up an alternate signal stack.
+      //
+      // TODO(markus): We currently do not support emulating calls to
+      // sys_clone() with a zero (i.e. copy) stack parameter. See clone.cc
+      // for a discussion on how to fix this, if this ever becomes neccessary.
+      "movd %%eax, %%mm3\n"        // Request to return from clone() when done
+
+      // Get thread id of nascent thread
+   "20:mov  $224, %%eax\n"         // NR_gettid
+      "int  $0x80\n"
+      "movd %%eax, %%mm4\n"
+
+      // Nascent thread creates socketpair() for sending requests to
+      // trusted thread.
+      // We can create the filehandles on the child's stack. Filehandles are
+      // always treated as untrusted.
+      // socketpair(AF_UNIX, SOCK_STREAM, 0, fds)
+      "mov  $102, %%eax\n"         // NR_socketcall
+      "mov  $8, %%ebx\n"           // socketpair
+      "sub  $8, %%ebp\n"           // sv       = child_stack
+      "mov  %%ebp, -0x04(%%ebp)\n"
+      "movl $0, -0x08(%%ebp)\n"    // protocol = 0
+      "movl $1, -0x0C(%%ebp)\n"    // type     = SOCK_STREAM
+      "movl $1, -0x10(%%ebp)\n"    // domain   = AF_UNIX
+      "lea  -0x10(%%ebp), %%ecx\n"
+      "int  $0x80\n"
+      "test %%eax, %%eax\n"
+      "jz   28f\n"
+
+      // If things went wrong, we don't have an (easy) way of signaling
+      // the parent. For our purposes, it is sufficient to fail with a
+      // fatal error.
+      "jmp  25f\n"                 // exit process
+   "21:xor  %%ecx, %%ecx\n"
+      "xor  %%edx, %%edx\n"
+      "mov  $7, %%eax\n"           // NR_waitpid
+      "int  $0x80\n"
+      "cmp  $-4, %%eax\n"          // EINTR
+      "jz   21b\n"
+      "jmp  23f\n"                 // exit thread (no message)
+   "22:lea  playground$syscall_mutex, %%ebx\n"
+      "mov  $4096, %%ecx\n"
+      "mov  $3, %%edx\n"           // prot = PROT_READ | PROT_WRITE
+      "mov  $125, %%eax\n"         // NR_mprotect
+      "int  $0x80\n"
+      "lock; addl $0x80000000, (%%ebx)\n"
+      "jz   23f\n"                 // exit thread
+      "mov  $1, %%edx\n"
+      "mov  %%edx, %%ecx\n"        // FUTEX_WAKE
+      "mov  $240, %%eax\n"         // NR_futex
+      "int  $0x80\n"
+   "23:mov  $1, %%eax\n"           // NR_exit
+      "mov  $1, %%ebx\n"           // status = 1
+   "24:int  $0x80\n"
+   "25:mov  $4, %%eax\n"           // NR_write
+      "mov  $2, %%ebx\n"           // fd = stderr
+      "lea  100f, %%ecx\n"         // "Sandbox violation detected"
+      "mov  $101f-100f, %%edx\n"   // len = strlen(msg)
+      "int  $0x80\n"
+   "26:mov  $1, %%ebx\n"
+   "27:mov  $252, %%eax\n"         // NR_exit_group
+      "jmp  24b\n"
+
+      // The first page is mapped read-only for use as securely shared memory
+   "28:movd %%mm6, %%edi\n"        // %edi = old_shared_mem
+      "mov  0x44(%%edi), %%ebx\n"  // addr = secure_mem
+      "movd %%ebx, %%mm5\n"        // %mm5 = secure_mem
+      "movd %%mm2, %%esi\n"
+      "cmp  %%esi, 4(%%edi)\n"
+      "jne  25b\n"                 // exit process
+      "mov  $125, %%eax\n"         // NR_mprotect
+      "mov  $4096, %%ecx\n"        // len  = 4096
+      "mov  $1, %%edx\n"           // prot = PROT_READ
+      "int  $0x80\n"
+
+      // The second page is used as scratch space by the trusted thread.
+      // Make it writable.
+      "mov  $125, %%eax\n"         // NR_mprotect
+      "add  $4096, %%ebx\n"        // addr = secure_mem + 4096
+      "mov  $3, %%edx\n"           // prot = PROT_READ | PROT_WRITE
+      "int  $0x80\n"
+
+      // Call clone() to create new trusted thread().
+      // clone(CLONE_VM|CLONE_FS|CLONE_FILES|CLONE_SIGHAND|CLONE_THREAD|
+      //       CLONE_SYSVSEM|CLONE_UNTRACED, stack, NULL, NULL, NULL)
+      "mov  4(%%ebp), %%eax\n"     // threadFd (on child's stack)
+      "movd %%eax, %%mm0\n"        // %mm0  = threadFd
+      "mov  $120, %%eax\n"         // NR_clone
+      "mov  $0x850F00, %%ebx\n"    // flags = VM|FS|FILES|SIGH|THR|SYSV|UTR
+      "mov  $1, %%ecx\n"           // stack = 1
+      "movd 0x48(%%edi), %%mm1\n"  // %mm1  = processFdPub
+      "cmp  %%esi, 4(%%edi)\n"
+      "jne  25b\n"                 // exit process
+      "int  $0x80\n"
+      "test %%eax, %%eax\n"
+      "js   25b\n"                 // exit process
+      "jz   0b\n"                  // invoke trustedThreadFnc()
+
+      // Set up thread local storage
+      "mov  $0x51, %%eax\n"        // seg_32bit, limit_in_pages, useable
+      "mov  %%eax, -0x04(%%ebp)\n"
+      "mov  $0xFFFFF, %%eax\n"     // limit
+      "mov  %%eax, -0x08(%%ebp)\n"
+      "movd %%mm5, %%eax\n"
+      "add  $0x58, %%eax\n"
+      "mov  %%eax, -0x0C(%%ebp)\n" // base_addr = &secure_mem.TLS
+      "mov  %%fs, %%eax\n"
+      "shr  $3, %%eax\n"
+      "mov  %%eax, -0x10(%%ebp)\n" // entry_number
+      "mov  $243, %%eax\n"         // NR_set_thread_area
+      "lea  -0x10(%%ebp), %%ebx\n"
+      "int  $0x80\n"
+      "test %%eax, %%eax\n"
+      "jnz  25b\n"                 // exit process
+
+      // Copy the caller's signal mask
+      "movd %%mm5, %%edx\n"
+      "mov  0x1038(%%edi), %%eax\n"
+      "mov  %%eax, 0x1038(%%edx)\n"
+      "mov  0x103C(%%edi), %%eax\n"
+      "mov  %%eax, 0x103C(%%edx)\n"
+
+      // Done creating trusted thread. We can now get ready to return to caller
+      "mov  0(%%ebp), %%esi\n"     // %esi = threadFdPub
+      "add  $8, %%ebp\n"
+
+      // Check whether this is the initial thread, or a newly created one.
+      // At startup we run the same code as when we create a new thread. At
+      // the very top of this function, you will find that we store 999f
+      // in %%mm3. That is the signal that we should return on the same
+      // stack rather than return to where clone was called.
+      "movd %%mm3, %%eax\n"
+      "movd %%mm2, %%edx\n"
+      "test %%eax, %%eax\n"
+      "jne  29f\n"
+
+      // Returning from clone() into the newly created thread is special. We
+      // cannot unroll the stack, as we just set up a new stack for this
+      // thread. We have to explicitly restore CPU registers to the values
+      // that they had when the program originally called clone().
+      // We patch the register values in the signal stack frame so that we
+      // can ask sigreturn() to restore all registers for us.
+      "sub  $0x4, %%ebp\n"
+      "mov  0x28(%%edi), %%eax\n"
+      "mov  %%eax, 0x00(%%ebp)\n"  // return address
+      "xor  %%eax, %%eax\n"
+      "mov  %%eax, 0x30(%%ebp)\n"  // %eax = 0
+      "mov  0x2C(%%edi), %%eax\n"
+      "mov  %%eax, 0x1C(%%ebp)\n"  // %ebp
+      "mov  0x30(%%edi), %%eax\n"
+      "mov  %%eax, 0x14(%%ebp)\n"  // %edi
+      "mov  0x34(%%edi), %%eax\n"
+      "mov  %%eax, 0x18(%%ebp)\n"  // %esi
+      "mov  0x38(%%edi), %%eax\n"
+      "mov  %%eax, 0x28(%%ebp)\n"  // %edx
+      "mov  0x3C(%%edi), %%eax\n"
+      "mov  %%eax, 0x2C(%%ebp)\n"  // %ecx
+      "mov  0x40(%%edi), %%eax\n"
+      "mov  %%eax, 0x24(%%ebp)\n"  // %ebx
+      "cmp  %%edx, 4(%%edi)\n"
+      "jne  25b\n"                 // exit process
+
+      // Nascent thread launches a helper that doesn't share any of our
+      // resources, except for pages mapped as MAP_SHARED.
+      // clone(SIGCHLD, stack=1)
+   "29:mov  $120, %%eax\n"         // NR_clone
+      "mov  $17, %%ebx\n"          // flags = SIGCHLD
+      "mov  $1, %%ecx\n"           // stack = 1
+      "int  $0x80\n"
+      "test %%eax, %%eax\n"
+      "js   25b\n"                 // exit process
+      "jne  31f\n"
+
+      // Use sendmsg() to send to the trusted process the file handles for
+      // communicating with the new trusted thread. We also send the address
+      // of the secure memory area (for sanity checks) and the thread id.
+      "cmp  %%edx, 4(%%edi)\n"
+      "jne  25b\n"                 // exit process
+
+      // 0x00 socketcall:
+      //   0x00 socket         (0x4C(%edi))
+      //   0x04 msg            (%ecx + 0x0C)
+      //   0x08 flags          ($0)
+      // 0x0C msg:
+      //   0x0C msg_name       ($0)
+      //   0x10 msg_namelen    ($0)
+      //   0x14 msg_iov        (%ecx + 0x34)
+      //   0x18 msg_iovlen     ($1)
+      //   0x1C msg_control    (%ecx + 0x3C)
+      //   0x20 msg_controllen ($0x14)
+      // 0x24 data:
+      //   0x24 msg_flags/err  ($0)
+      //   0x28 secure_mem     (%mm5)
+      //   0x2C threadId       (%mm4)
+      //   0x30 threadFdPub    (%esi)
+      // 0x34 iov:
+      //   0x34 iov_base       (%ecx + 0x24)
+      //   0x38 iov_len        ($0x10)
+      // 0x3C cmsg:
+      //   0x3C cmsg_len       ($0x14)
+      //   0x40 cmsg_level     ($1, SOL_SOCKET)
+      //   0x44 cmsg_type      ($1, SCM_RIGHTS)
+      //   0x48 threadFdPub    (%esi)
+      //   0x4C threadFd       (%mm0)
+      // 0x50
+      "lea  -0x50(%%ebp), %%ecx\n"
+      "xor  %%eax, %%eax\n"
+      "mov  %%eax, 0x08(%%ecx)\n"  // flags
+      "mov  %%eax, 0x0C(%%ecx)\n"  // msg_name
+      "mov  %%eax, 0x10(%%ecx)\n"  // msg_namelen
+      "mov  %%eax, 0x24(%%ecx)\n"  // msg_flags
+      "inc  %%eax\n"
+      "mov  %%eax, 0x18(%%ecx)\n"  // msg_iovlen
+      "mov  %%eax, 0x40(%%ecx)\n"  // cmsg_level
+      "mov  %%eax, 0x44(%%ecx)\n"  // cmsg_type
+      "movl $0x10, 0x38(%%ecx)\n"  // iov_len
+      "mov  $0x14, %%eax\n"
+      "mov  %%eax, 0x20(%%ecx)\n"  // msg_controllen
+      "mov  %%eax, 0x3C(%%ecx)\n"  // cmsg_len
+      "mov  0x4C(%%edi), %%eax\n"  // cloneFdPub
+      "mov  %%eax, 0x00(%%ecx)\n"  // socket
+      "lea  0x0C(%%ecx), %%eax\n"
+      "mov  %%eax, 0x04(%%ecx)\n"  // msg
+      "add  $0x18, %%eax\n"
+      "mov  %%eax, 0x34(%%ecx)\n"  // iov_base
+      "add  $0x10, %%eax\n"
+      "mov  %%eax, 0x14(%%ecx)\n"  // msg_iov
+      "add  $8, %%eax\n"
+      "mov  %%eax, 0x1C(%%ecx)\n"  // msg_control
+      "mov  %%esi, 0x30(%%ecx)\n"  // threadFdPub
+      "mov  %%esi, 0x48(%%ecx)\n"  // threadFdPub
+      "movd %%mm5, %%eax\n"
+      "mov  %%eax, 0x28(%%ecx)\n"  // secure_mem
+      "movd %%mm4, %%eax\n"
+      "mov  %%eax, 0x2C(%%ecx)\n"  // threadId
+      "movd %%mm0, %%eax\n"
+      "mov  %%eax, 0x4C(%%ecx)\n"  // threadFd
+      "mov  $16, %%ebx\n"          // sendmsg()
+      "mov  $102, %%eax\n"         // NR_socketcall
+      "int  $0x80\n"
+
+      // Release syscall_mutex_. This signals the trusted process that
+      // it can write into the original thread's secure memory again.
+      "mov  $125, %%eax\n"         // NR_mprotect
+      "lea  playground$syscall_mutex, %%ebx\n"
+      "mov  $4096, %%ecx\n"
+      "mov  $3, %%edx\n"           // PROT_READ | PROT_WRITE
+      "int  $0x80\n"
+      "movd %%mm2, %%edx\n"
+      "cmp  %%edx, 0x4(%%edi)\n"
+      "jnz  25b\n"                 // exit process
+      "lock; addl $0x80000000, (%%ebx)\n"
+      "jz   30f\n"                 // exit process (no error message)
+      "mov  $1, %%edx\n"
+      "mov  %%edx, %%ecx\n"        // FUTEX_WAKE
+      "mov  $240, %%eax\n"         // NR_futex
+      "int  $0x80\n"
+   "30:xor  %%ebx, %%ebx\n"
+      "jmp  27b\n"                 // exit process (no error message)
+
+      // Reap helper
+   "31:mov  %%eax, %%ebx\n"
+   "32:lea  -4(%%ebp), %%ecx\n"
+      "xor  %%edx, %%edx\n"
+      "mov  $7, %%eax\n"           // NR_waitpid
+      "int  $0x80\n"
+      "cmp  $-4, %%eax\n"          // EINTR
+      "jz   32b\n"
+      "mov  -4(%%ebp), %%eax\n"
+      "test %%eax, %%eax\n"
+      "jnz  26b\n"                 // exit process (no error message)
+
+      // Release privileges by entering seccomp mode.
+   "33:mov  $172, %%eax\n"         // NR_prctl
+      "mov  $22, %%ebx\n"          // PR_SET_SECCOMP
+      "mov  $1, %%ecx\n"
+      "int  $0x80\n"
+      "test %%eax, %%eax\n"
+      "jnz  25b\n"                 // exit process
+
+      // We can finally start using the stack. Signal handlers no longer pose
+      // a threat to us.
+      "mov  %%ebp, %%esp\n"
+
+      // Back in the newly created sandboxed thread, wait for trusted process
+      // to receive request. It is possible for an attacker to make us
+      // continue even before the trusted process is done. This is OK. It'll
+      // result in us putting stale values into the new thread's TLS. But that
+      // data is considered untrusted anyway.
+      "push %%eax\n"
+      "mov  $1, %%edx\n"           // len       = 1
+      "mov  %%esp, %%ecx\n"        // buf       = %esp
+      "mov  %%esi, %%ebx\n"        // fd        = threadFdPub
+   "34:mov  $3, %%eax\n"           // NR_read
+      "int  $0x80\n"
+      "cmp  $-4, %%eax\n"          // EINTR
+      "jz   34b\n"
+      "cmp  %%edx, %%eax\n"
+      "jne  25b\n"                 // exit process
+      "pop  %%eax\n"
+
+      // Return to caller. We are in the new thread, now.
+      "movd %%mm3, %%ebx\n"
+      "test %%ebx, %%ebx\n"
+      "jnz  35f\n"                 // Returning to createTrustedThread()
+
+      // Returning to the place where clone() had been called. We rely on
+      // using sigreturn() for restoring our registers. The caller already
+      // created a signal stack frame, and we patched the register values
+      // with the ones that were in effect prior to calling sandbox_clone().
+      "pop  %%ebx\n"
+   "35:mov  %%ebx, 0x38(%%esp)\n"  // compute new %eip
+      "mov  $119, %%eax\n"         // NR_sigreturn
+      "int  $0x80\n"
+
+      ".pushsection \".rodata\"\n"
+  "100:.ascii \"Sandbox violation detected, program aborted\\n\"\n"
+  "101:.ascii \"WARNING! This is an expensive system call\\n\"\n"
+  "102:\n"
+      ".popsection\n"
+
+  "999:pop  %%ebp\n"
+      "pop  %%ebx\n"
+      :
+      : "g"(&args)
+      : "eax", "ecx", "edx", "edi", "esi", "esp", "memory"
+#else
+#error Unsupported target platform
+#endif
+);
+}
+
+} // namespace
diff --git a/sandbox/linux/seccomp/x86_decode.cc b/sandbox/linux/seccomp/x86_decode.cc
new file mode 100644
index 0000000..1b55139
--- /dev/null
+++ b/sandbox/linux/seccomp/x86_decode.cc
@@ -0,0 +1,310 @@
+// Copyright (c) 2010 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "x86_decode.h"
+
+namespace playground {
+
+#if defined(__x86_64__) || defined(__i386__)
+unsigned short next_inst(const char **ip, bool is64bit, bool *has_prefix,
+                         char **rex_ptr, char **mod_rm_ptr, char **sib_ptr,
+                         bool *is_group) {
+  enum {
+    BYTE_OP      = (1<<1), // 0x02
+    IMM          = (1<<2), // 0x04
+    IMM_BYTE     = (2<<2), // 0x08
+    MEM_ABS      = (3<<2), // 0x0C
+    MODE_MASK    = (7<<2), // 0x1C
+    MOD_RM       = (1<<5), // 0x20
+    STACK        = (1<<6), // 0x40
+    GROUP        = (1<<7), // 0x80
+    GROUP_MASK   = 0x7F,
+  };
+
+  static unsigned char opcode_types[512] = {
+    0x23, 0x21, 0x23, 0x21, 0x09, 0x05, 0x01, 0x01, // 0x00  -  0x07
+    0x23, 0x21, 0x23, 0x21, 0x09, 0x05, 0x01, 0x00, // 0x08  -  0x0F
+    0x23, 0x21, 0x23, 0x21, 0x09, 0x05, 0x01, 0x01, // 0x10  -  0x17
+    0x23, 0x21, 0x23, 0x21, 0x09, 0x05, 0x01, 0x01, // 0x18  -  0x1F
+    0x23, 0x21, 0x23, 0x21, 0x09, 0x05, 0x00, 0x01, // 0x20  -  0x27
+    0x23, 0x21, 0x23, 0x21, 0x09, 0x05, 0x00, 0x01, // 0x28  -  0x2F
+    0x23, 0x21, 0x23, 0x21, 0x09, 0x05, 0x00, 0x01, // 0x30  -  0x37
+    0x23, 0x21, 0x23, 0x21, 0x09, 0x05, 0x00, 0x01, // 0x38  -  0x3F
+    0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, // 0x40  -  0x47
+    0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, // 0x48  -  0x4F
+    0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, // 0x50  -  0x57
+    0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, // 0x58  -  0x5F
+    0x01, 0x01, 0x21, 0x21, 0x00, 0x00, 0x00, 0x00, // 0x60  -  0x67
+    0x45, 0x25, 0x49, 0x29, 0x03, 0x01, 0x03, 0x01, // 0x68  -  0x6F
+    0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, // 0x70  -  0x77
+    0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, // 0x78  -  0x7F
+    0x27, 0x25, 0x27, 0x29, 0x23, 0x21, 0x23, 0x21, // 0x80  -  0x87
+    0x23, 0x21, 0x23, 0x21, 0x21, 0x21, 0x21, 0x80, // 0x88  -  0x8F
+    0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, // 0x90  -  0x97
+    0x01, 0x01, 0x05, 0x01, 0x41, 0x41, 0x01, 0x01, // 0x98  -  0x9F
+    0x0F, 0x0D, 0x0F, 0x0D, 0x03, 0x01, 0x03, 0x01, // 0xA0  -  0xA7
+    0x09, 0x05, 0x03, 0x01, 0x03, 0x01, 0x03, 0x01, // 0xA8  -  0xAF
+    0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, // 0xB0  -  0xB7
+    0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, // 0xB8  -  0xBF
+    0x27, 0x29, 0x01, 0x01, 0x21, 0x21, 0x27, 0x25, // 0xC0  -  0xC7
+    0x01, 0x01, 0x01, 0x01, 0x01, 0x09, 0x01, 0x01, // 0xC8  -  0xCF
+    0x23, 0x21, 0x23, 0x21, 0x09, 0x09, 0x01, 0x01, // 0xD0  -  0xD7
+    0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, // 0xD8  -  0xDF
+    0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, // 0xE0  -  0xE7
+    0x05, 0x05, 0x05, 0x09, 0x03, 0x01, 0x03, 0x01, // 0xE8  -  0xEF
+    0x00, 0x01, 0x00, 0x00, 0x01, 0x01, 0x88, 0x90, // 0xF0  -  0xF7
+    0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x98, 0xA0, // 0xF8  -  0xFF
+    0x00, 0xA8, 0x00, 0x00, 0x00, 0x01, 0x01, 0x01, // 0xF00 - 0xF07
+    0x01, 0x01, 0x00, 0x01, 0x00, 0x21, 0x01, 0x00, // 0xF08 - 0xF0F
+    0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, // 0xF10 - 0xF17
+    0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, // 0xF18 - 0xF1F
+    0x21, 0x21, 0x21, 0x21, 0x00, 0x00, 0x00, 0x00, // 0xF20 - 0xF27
+    0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, // 0xF28 - 0xF2F
+    0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, // 0xF30 - 0xF37
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0xF38 - 0xF3F
+    0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, // 0xF40 - 0xF47
+    0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, // 0xF48 - 0xF4F
+    0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, // 0xF50 - 0xF57
+    0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, // 0xF58 - 0xF5F
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0xF60 - 0xF67
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0xF68 - 0xF6F
+    0x21, 0x00, 0x00, 0x00, 0x21, 0x21, 0x21, 0x00, // 0xF70 - 0xF77
+    0x21, 0x21, 0x00, 0x00, 0x21, 0x21, 0x21, 0x21, // 0xF78 - 0xF7F
+    0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, // 0xF80 - 0xF87
+    0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, // 0xF88 - 0xF8F
+    0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, // 0xF90 - 0xF97
+    0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, // 0xF98 - 0xF9F
+    0x01, 0x01, 0x01, 0x21, 0x29, 0x21, 0x00, 0x00, // 0xFA0 - 0xFA7
+    0x01, 0x01, 0x01, 0x21, 0x29, 0x21, 0x21, 0x21, // 0xFA8 - 0xFAF
+    0x23, 0x21, 0x00, 0x21, 0x00, 0x00, 0x23, 0x21, // 0xFB0 - 0xFB7
+    0x21, 0x00, 0x29, 0x21, 0x21, 0x21, 0x21, 0x21, // 0xFB8 - 0xFBF
+    0x21, 0x21, 0x00, 0x21, 0x00, 0x00, 0x00, 0x21, // 0xFC0 - 0xFC7
+    0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, // 0xFC8 - 0xFCF
+    0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, // 0xFD0 - 0xFD7
+    0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, // 0xFD8 - 0xFDF
+    0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, // 0xFE0 - 0xFE7
+    0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, // 0xFE8 - 0xFEF
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0xFF0 - 0xFF7
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0xFF8 - 0xFFF
+  };
+
+  static unsigned char group_table[56] = {
+    0x61, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Group 1A
+    0x27, 0x27, 0x23, 0x23, 0x23, 0x23, 0x23, 0x23, // Group 3 (Byte)
+    0x25, 0x25, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, // Group 3
+    0x23, 0x23, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Group 4
+    0x21, 0x21, 0x61, 0x21, 0x61, 0x21, 0x61, 0x00, // Group 5
+    0x00, 0x00, 0x21, 0x21, 0x21, 0x00, 0x21, 0x23, // Group 7
+    0x21, 0x00, 0x00, 0x21, 0x21, 0x00, 0x21, 0x00, // Group 7 (Alternate)
+  };
+
+  const unsigned char *insn_ptr = reinterpret_cast<const unsigned char *>(*ip);
+  int operand_width = 4;
+  int address_width = 4;
+  if (is64bit) {
+    address_width = 8;
+  }
+  unsigned char byte, rex = 0;
+  bool found_prefix = false;
+  if (rex_ptr) {
+    *rex_ptr = 0;
+  }
+  if (mod_rm_ptr) {
+    *mod_rm_ptr = 0;
+  }
+  if (sib_ptr) {
+    *sib_ptr = 0;
+  }
+  for (;; ++insn_ptr) {
+    switch (byte = *insn_ptr) {
+      case 0x66: // Operand width prefix
+        operand_width ^= 6;
+        break;
+      case 0x67: // Address width prefix
+        address_width ^= is64bit ? 12 : 6;
+        break;
+      case 0x26: // Segment selector prefixes
+      case 0x2e:
+      case 0x36:
+      case 0x3e:
+      case 0x64:
+      case 0x65:
+      case 0xF0:
+      case 0xF2:
+      case 0xF3:
+        break;
+      case 0x40: case 0x41: case 0x42: case 0x43: // 64 bit REX prefixes
+      case 0x44: case 0x45: case 0x46: case 0x47:
+      case 0x48: case 0x49: case 0x4A: case 0x4B:
+      case 0x4C: case 0x4D: case 0x4E: case 0x4F:
+        if (is64bit) {
+          if (rex_ptr) {
+            *rex_ptr = (char *)insn_ptr;
+          }
+          rex = byte;
+          found_prefix = true;
+          continue;
+        }
+        // fall through
+      default:
+        ++insn_ptr;
+        goto no_more_prefixes;
+    }
+    rex = 0;
+    found_prefix = true;
+  }
+no_more_prefixes:
+  if (has_prefix) {
+    *has_prefix = found_prefix;
+  }
+  if (rex & REX_W) {
+    operand_width = 8;
+  }
+  unsigned char type;
+  unsigned short insn = byte;
+  unsigned int idx = 0;
+  if (byte == 0x0F) {
+    byte = *insn_ptr++;
+    insn = (insn << 8) | byte;
+    idx  = 256;
+  }
+  type = opcode_types[idx + byte];
+  bool found_mod_rm = false;
+  bool found_group = false;
+  bool found_sib = false;
+  unsigned char mod_rm = 0;
+  unsigned char sib = 0;
+  if (type & GROUP) {
+    found_mod_rm = true;
+    found_group = true;
+    mod_rm = *insn_ptr;
+    if (mod_rm_ptr) {
+      *mod_rm_ptr = (char *)insn_ptr;
+    }
+    unsigned char group = (type & GROUP_MASK) + ((mod_rm >> 3) & 0x7);
+    if ((type & GROUP_MASK) == 40 && (mod_rm >> 6) == 3) {
+      group += 8;
+    }
+    type = group_table[group];
+  }
+  if (!type) {
+    // We know that we still don't decode some of the more obscure
+    // instructions, but for all practical purposes that doesn't matter.
+    // Compilers are unlikely to output them, and even if we encounter
+    // hand-coded assembly, we will soon synchronize to the instruction
+    // stream again.
+    //
+    // std::cerr << "Unsupported instruction at 0x" << std::hex <<
+    //     std::uppercase << reinterpret_cast<long>(*ip) << " [ ";
+    // for (const unsigned char *ptr =
+    //          reinterpret_cast<const unsigned char *>(*ip);
+    //      ptr < insn_ptr; ) {
+    //   std::cerr << std::hex << std::uppercase << std::setw(2) <<
+    //       std::setfill('0') << (unsigned int)*ptr++ << ' ';
+    // }
+    // std::cerr << "]" << std::endl;
+  } else {
+    if (is64bit && (type & STACK)) {
+      operand_width = 8;
+    }
+    if (type & MOD_RM) {
+      found_mod_rm = true;
+      if (mod_rm_ptr) {
+        *mod_rm_ptr = (char *)insn_ptr;
+      }
+      mod_rm = *insn_ptr++;
+      int mod = (mod_rm >> 6) & 0x3;
+      int rm  = 8*(rex & REX_B) + (mod_rm & 0x7);
+      if (mod != 3) {
+        if (address_width == 2) {
+          switch (mod) {
+            case 0:
+              if (rm != 6 /* SI */) {
+                break;
+              }
+              // fall through
+            case 2:
+              insn_ptr++;
+              // fall through
+            case 1:
+              insn_ptr++;
+              break;
+          }
+        } else {
+          if ((rm & 0x7) == 4) {
+            found_sib = true;
+            if (sib_ptr) {
+              *sib_ptr = (char *)insn_ptr;
+            }
+            sib = *insn_ptr++;
+            if (!mod && (sib & 0x7) == 5 /* BP */) {
+              insn_ptr += 4;
+            }
+          }
+          switch (mod) {
+            case 0:
+              if (rm != 5 /* BP */) {
+                break;
+              }
+              // fall through
+            case 2:
+              insn_ptr += 3;
+              // fall through
+            case 1:
+              insn_ptr++;
+              break;
+          }
+        }
+      }
+    }
+    switch (insn) {
+      case 0xC8: // ENTER
+        insn_ptr++;
+        // fall through
+      case 0x9A: // CALL (far)
+      case 0xC2: // RET (near)
+      case 0xCA: // LRET
+      case 0xEA: // JMP (far)
+        insn_ptr += 2;
+        break;
+      case 0xF80: case 0xF81: case 0xF82: case 0xF83: // Jcc (rel)
+      case 0xF84: case 0xF85: case 0xF86: case 0xF87:
+      case 0xF88: case 0xF89: case 0xF8A: case 0xF8B:
+      case 0xF8C: case 0xF8D: case 0xF8E: case 0xF8F:
+        insn_ptr += operand_width;
+        break;
+    }
+    switch (type & MODE_MASK) {
+      case IMM:
+        if (!(type & BYTE_OP)) {
+          switch (insn) {
+            case 0xB8: case 0xB9: case 0xBA: case 0xBB:
+            case 0xBC: case 0xBD: case 0xBE: case 0xBF:
+              // Allow MOV to/from 64bit addresses
+              insn_ptr += operand_width;
+              break;
+            default:
+              insn_ptr += (operand_width == 8) ? 4 : operand_width;
+              break;
+          }
+          break;
+        }
+        // fall through
+      case IMM_BYTE:
+        insn_ptr++;
+        break;
+      case MEM_ABS:
+        insn_ptr += address_width;
+        break;
+    }
+  }
+  if (is_group) {
+    *is_group = found_group;
+  }
+  *ip = reinterpret_cast<const char *>(insn_ptr);
+  return insn;
+}
+#endif
+
+} // namespace
diff --git a/sandbox/linux/seccomp/x86_decode.h b/sandbox/linux/seccomp/x86_decode.h
new file mode 100644
index 0000000..68f0ab5
--- /dev/null
+++ b/sandbox/linux/seccomp/x86_decode.h
@@ -0,0 +1,19 @@
+// Copyright (c) 2010 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef X86_DECODE_H__
+#define X86_DECODE_H__
+namespace playground {
+enum {
+    REX_B        = 0x01,
+    REX_X        = 0x02,
+    REX_R        = 0x04,
+    REX_W        = 0x08
+};
+
+unsigned short next_inst(const char **ip, bool is64bit, bool *has_prefix = 0,
+                         char **rex_ptr    = 0, char **mod_rm_ptr = 0,
+                         char **sib_ptr    = 0, bool *is_group   = 0);
+} // namespace
+#endif // X86_DECODE_H__
author	nsylvain@chromium.org <nsylvain@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>	2010-08-31 01:16:35 +0000
committer	nsylvain@chromium.org <nsylvain@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>	2010-08-31 01:16:35 +0000
commit	fb7b5328a5fd3aecfc27f765dea94b961c657597 (patch)
tree	84adc617db0031a881265e95f9c569de66fa733d
parent	7302ea910ce937d482780649d6a84bbfff4ac521 (diff)
download	chromium_src-fb7b5328a5fd3aecfc27f765dea94b961c657597.zip chromium_src-fb7b5328a5fd3aecfc27f765dea94b961c657597.tar.gz chromium_src-fb7b5328a5fd3aecfc27f765dea94b961c657597.tar.bz2