summaryrefslogtreecommitdiffstats
path: root/sandbox
diff options
context:
space:
mode:
authormseaborn@chromium.org <mseaborn@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2010-08-30 22:22:07 +0000
committermseaborn@chromium.org <mseaborn@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2010-08-30 22:22:07 +0000
commit439764b703a5edd48aa878b86fbd07a117b6a3cc (patch)
treee2d6b02e2dd58e7dd01effc9311807318883a2a0 /sandbox
parent0429d943467575406d5fbc3ac859a6e9604d6d0d (diff)
downloadchromium_src-439764b703a5edd48aa878b86fbd07a117b6a3cc.zip
chromium_src-439764b703a5edd48aa878b86fbd07a117b6a3cc.tar.gz
chromium_src-439764b703a5edd48aa878b86fbd07a117b6a3cc.tar.bz2
Pull seccomp-sandbox in via DEPS rather than using an in-tree copy
This means changes to the sandbox won't have to be committed twice, to both trees. BUG=none TEST=smoke test of running chromium with --enable-seccomp-sandbox Review URL: http://codereview.chromium.org/3249003 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@57921 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'sandbox')
-rw-r--r--sandbox/linux/seccomp/Makefile59
-rw-r--r--sandbox/linux/seccomp/access.cc97
-rw-r--r--sandbox/linux/seccomp/allocator.cc136
-rw-r--r--sandbox/linux/seccomp/allocator.h88
-rw-r--r--sandbox/linux/seccomp/clone.cc179
-rw-r--r--sandbox/linux/seccomp/debug.cc363
-rw-r--r--sandbox/linux/seccomp/debug.h80
-rw-r--r--sandbox/linux/seccomp/exit.cc38
-rw-r--r--sandbox/linux/seccomp/getpid.cc17
-rw-r--r--sandbox/linux/seccomp/gettid.cc18
-rw-r--r--sandbox/linux/seccomp/ioctl.cc61
-rw-r--r--sandbox/linux/seccomp/ipc.cc351
-rw-r--r--sandbox/linux/seccomp/library.cc1208
-rw-r--r--sandbox/linux/seccomp/library.h199
-rw-r--r--sandbox/linux/seccomp/linux_syscall_support.h3208
-rw-r--r--sandbox/linux/seccomp/madvise.cc81
-rw-r--r--sandbox/linux/seccomp/maps.cc267
-rw-r--r--sandbox/linux/seccomp/maps.h94
-rw-r--r--sandbox/linux/seccomp/mmap.cc75
-rw-r--r--sandbox/linux/seccomp/mprotect.cc73
-rw-r--r--sandbox/linux/seccomp/munmap.cc70
-rw-r--r--sandbox/linux/seccomp/mutex.h153
-rw-r--r--sandbox/linux/seccomp/open.cc99
-rw-r--r--sandbox/linux/seccomp/sandbox.cc838
-rw-r--r--sandbox/linux/seccomp/sandbox.h12
-rw-r--r--sandbox/linux/seccomp/sandbox_impl.h715
-rw-r--r--sandbox/linux/seccomp/seccomp.gyp93
-rw-r--r--sandbox/linux/seccomp/securemem.cc105
-rw-r--r--sandbox/linux/seccomp/securemem.h205
-rw-r--r--sandbox/linux/seccomp/sigaction.cc177
-rw-r--r--sandbox/linux/seccomp/sigprocmask.cc120
-rw-r--r--sandbox/linux/seccomp/socketcall.cc1039
-rw-r--r--sandbox/linux/seccomp/stat.cc197
-rw-r--r--sandbox/linux/seccomp/syscall.cc380
-rw-r--r--sandbox/linux/seccomp/syscall.h22
-rw-r--r--sandbox/linux/seccomp/syscall_table.c153
-rw-r--r--sandbox/linux/seccomp/syscall_table.h43
-rw-r--r--sandbox/linux/seccomp/tests/list_tests.py22
-rw-r--r--sandbox/linux/seccomp/tests/test_syscalls.cc758
-rw-r--r--sandbox/linux/seccomp/timestats.cc191
-rw-r--r--sandbox/linux/seccomp/tls.h155
-rw-r--r--sandbox/linux/seccomp/trusted_process.cc268
-rw-r--r--sandbox/linux/seccomp/trusted_thread.cc1483
-rw-r--r--sandbox/linux/seccomp/x86_decode.cc310
-rw-r--r--sandbox/linux/seccomp/x86_decode.h19
45 files changed, 0 insertions, 14319 deletions
diff --git a/sandbox/linux/seccomp/Makefile b/sandbox/linux/seccomp/Makefile
deleted file mode 100644
index 141d8c3..0000000
--- a/sandbox/linux/seccomp/Makefile
+++ /dev/null
@@ -1,59 +0,0 @@
-# Copyright (c) 2010 The Chromium Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
-
-# This Makefile temporarily has been checked into the source tree so that
-# we can run the tests. It will be replaced with a proper gyp file.
-
-CFLAGS = -g -O0 -Wall -Werror -Wextra -Wno-missing-field-initializers \
- -Wno-unused-parameter -I.
-LDFLAGS = -g
-CPPFLAGS =
-MODS := allocator library debug maps x86_decode securemem sandbox \
- syscall syscall_table trusted_thread trusted_process \
- access exit clone getpid gettid ioctl ipc madvise mmap mprotect \
- munmap open sigaction sigprocmask socketcall stat
-OBJS64 := $(shell echo ${MODS} | xargs -n 1 | sed -e 's/$$/.o64/')
-OBJS32 := $(shell echo ${MODS} | xargs -n 1 | sed -e 's/$$/.o32/')
-HEADERS:= $(shell for i in ${MODS}; do [ -r "$$i" ] && echo "$$i"; done)
-
-.SUFFIXES: .o64 .o32
-
-all: test
-
-clean:
- -rm -f *.o *.o32 *.o64 tests/*.o32 tests/*.o.64
- -rm -f core core.* vgcore vgcore.* strace.log*
- -rm -f run_tests_32 run_tests_64
- -rm -f tests/test_syscalls.o64 tests/test_syscalls.o32
- -rm -f tests/test-list.h
-
-test: run_tests_64 run_tests_32
- ./run_tests_64
- ./run_tests_32
-
-# TODO: Track header file dependencies properly
-tests/test_syscalls.o64 tests/test_syscalls.o32: tests/test-list.h
-
-tests/test-list.h: tests/list_tests.py tests/test_syscalls.cc
- python tests/list_tests.py tests/test_syscalls.cc > $@
-
-run_tests_64: $(OBJS64) tests/test_syscalls.o64 tests/test-list.h
- g++ -m64 tests/test_syscalls.o64 $(OBJS64) -lpthread -lutil -o $@
-run_tests_32: $(OBJS32) tests/test_syscalls.o32 tests/test-list.h
- g++ -m32 tests/test_syscalls.o32 $(OBJS32) -lpthread -lutil -o $@
-
-.cc.o: ${HEADERS}
- ${CXX} ${CFLAGS} ${CPPFLAGS} -c -o $@ $<
-
-.cc.o64: ${HEADERS}
- ${CXX} ${CFLAGS} ${CPPFLAGS} -fPIC -c -o $@ $<
-
-.c.o64: ${HEADERS}
- ${CC} ${CFLAGS} ${CPPFLAGS} --std=gnu99 -fPIC -c -o $@ $<
-
-.cc.o32: ${HEADERS}
- ${CXX} ${CFLAGS} ${CPPFLAGS} -m32 -fPIC -c -o $@ $<
-
-.c.o32: ${HEADERS}
- ${CC} ${CFLAGS} ${CPPFLAGS} -m32 --std=gnu99 -fPIC -c -o $@ $<
diff --git a/sandbox/linux/seccomp/access.cc b/sandbox/linux/seccomp/access.cc
deleted file mode 100644
index fbe7e53..0000000
--- a/sandbox/linux/seccomp/access.cc
+++ /dev/null
@@ -1,97 +0,0 @@
-// Copyright (c) 2010 The Chromium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#include "debug.h"
-#include "sandbox_impl.h"
-
-namespace playground {
-
-long Sandbox::sandbox_access(const char *pathname, int mode) {
- long long tm;
- Debug::syscall(&tm, __NR_access, "Executing handler");
- size_t len = strlen(pathname);
- struct Request {
- int sysnum;
- long long cookie;
- Access access_req;
- char pathname[0];
- } __attribute__((packed)) *request;
- char data[sizeof(struct Request) + len];
- request = reinterpret_cast<struct Request*>(data);
- request->sysnum = __NR_access;
- request->cookie = cookie();
- request->access_req.path_length = len;
- request->access_req.mode = mode;
- memcpy(request->pathname, pathname, len);
-
- long rc;
- SysCalls sys;
- if (write(sys, processFdPub(), request, sizeof(data)) != (int)sizeof(data) ||
- read(sys, threadFdPub(), &rc, sizeof(rc)) != sizeof(rc)) {
- die("Failed to forward access() request [sandbox]");
- }
- Debug::elapsed(tm, __NR_access);
- return rc;
-}
-
-bool Sandbox::process_access(int parentMapsFd, int sandboxFd, int threadFdPub,
- int threadFd, SecureMem::Args* mem) {
- // Read request
- SysCalls sys;
- Access access_req;
- if (read(sys, sandboxFd, &access_req, sizeof(access_req)) !=
- sizeof(access_req)) {
- read_parm_failed:
- die("Failed to read parameters for access() [process]");
- }
- int rc = -ENAMETOOLONG;
- if (access_req.path_length >= sizeof(mem->pathname)) {
- char buf[32];
- while (access_req.path_length > 0) {
- size_t len = access_req.path_length > sizeof(buf) ?
- sizeof(buf) : access_req.path_length;
- ssize_t i = read(sys, sandboxFd, buf, len);
- if (i <= 0) {
- goto read_parm_failed;
- }
- access_req.path_length -= i;
- }
- if (write(sys, threadFd, &rc, sizeof(rc)) != sizeof(rc)) {
- die("Failed to return data from access() [process]");
- }
- return false;
- }
-
- if (!g_policy.allow_file_namespace) {
- // After locking the mutex, we can no longer abandon the system call. So,
- // perform checks before clobbering the securely shared memory.
- char tmp[access_req.path_length];
- if (read(sys, sandboxFd, tmp, access_req.path_length) !=
- (ssize_t)access_req.path_length) {
- goto read_parm_failed;
- }
- Debug::message(("Denying access to \"" + std::string(tmp) + "\"").c_str());
- SecureMem::abandonSystemCall(threadFd, -EACCES);
- return false;
- }
-
- SecureMem::lockSystemCall(parentMapsFd, mem);
- if (read(sys, sandboxFd, mem->pathname, access_req.path_length) !=
- (ssize_t)access_req.path_length) {
- goto read_parm_failed;
- }
- mem->pathname[access_req.path_length] = '\000';
-
- // TODO(markus): Implement sandboxing policy
- Debug::message(("Allowing access to \"" + std::string(mem->pathname) +
- "\"").c_str());
-
- // Tell trusted thread to access the file.
- SecureMem::sendSystemCall(threadFdPub, true, parentMapsFd, mem, __NR_access,
- mem->pathname - (char*)mem + (char*)mem->self,
- access_req.mode);
- return true;
-}
-
-} // namespace
diff --git a/sandbox/linux/seccomp/allocator.cc b/sandbox/linux/seccomp/allocator.cc
deleted file mode 100644
index 6e11a4a..0000000
--- a/sandbox/linux/seccomp/allocator.cc
+++ /dev/null
@@ -1,136 +0,0 @@
-// Copyright (c) 2010 The Chromium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-// The allocator is very simplistic. It requests memory pages directly from
-// the system. Each page starts with a header describing the allocation. This
-// makes sure that we can return the memory to the system when it is
-// deallocated.
-// For allocations that are smaller than a single page, we try to squeeze
-// multiple of them into the same page.
-// We expect to use this allocator for a moderate number of small allocations.
-// In most cases, it will only need to ever make a single request to the
-// operating system for the lifetime of the STL container object.
-// We don't worry about memory fragmentation as the allocator is expected to
-// be short-lived.
-
-#include <stdint.h>
-#include <sys/mman.h>
-
-#include "allocator.h"
-#include "linux_syscall_support.h"
-
-namespace playground {
-
-class SysCalls {
- public:
- #define SYS_CPLUSPLUS
- #define SYS_ERRNO my_errno
- #define SYS_INLINE inline
- #define SYS_PREFIX -1
- #undef SYS_LINUX_SYSCALL_SUPPORT_H
- #include "linux_syscall_support.h"
- SysCalls() : my_errno(0) { }
- int my_errno;
-};
-#ifdef __NR_mmap2
- #define MMAP mmap2
- #define __NR_MMAP __NR_mmap2
-#else
- #define MMAP mmap
- #define __NR_MMAP __NR_mmap
-#endif
-
-// We only ever keep track of the very last partial page that was used for
-// allocations. This approach simplifies the code a lot. It can theoretically
-// lead to more memory fragmentation, but for our use case that is unlikely
-// to happen.
-struct Header {
- // The total amount of memory allocated for this chunk of memory. Typically,
- // this would be a single page.
- size_t total_len;
-
- // "used" keeps track of the number of bytes currently allocated in this
- // page. Note that as elements are freed from this page, "used" is updated
- // allowing us to track when the page is free. However, these holes in the
- // page are never re-used, so "tail" is the only way to find out how much
- // free space remains and when we need to request another chunk of memory
- // from the system.
- size_t used;
- void *tail;
-};
-static Header* last_alloc;
-
-void* SystemAllocatorHelper::sys_allocate(size_t size) {
- // Number of bytes that need to be allocated
- if (size + 3 < size) {
- return NULL;
- }
- size_t len = (size + 3) & ~3;
-
- if (last_alloc) {
- // Remaining space in the last chunk of memory allocated from system
- size_t remainder = last_alloc->total_len -
- (reinterpret_cast<char *>(last_alloc->tail) -
- reinterpret_cast<char *>(last_alloc));
-
- if (remainder >= len) {
- void* ret = last_alloc->tail;
- last_alloc->tail = reinterpret_cast<char *>(last_alloc->tail) + len;
- last_alloc->used += len;
- return ret;
- }
- }
-
- SysCalls sys;
- if (sizeof(Header) + len + 4095 < len) {
- return NULL;
- }
- size_t total_len = (sizeof(Header) + len + 4095) & ~4095;
- Header* mem = reinterpret_cast<Header *>(
- sys.MMAP(NULL, total_len, PROT_READ|PROT_WRITE,
- MAP_PRIVATE|MAP_ANONYMOUS, -1, 0));
- if (mem == MAP_FAILED) {
- return NULL;
- }
-
- // If we were only asked to allocate a single page, then we will use any
- // remaining space for other small allocations.
- if (total_len - sizeof(Header) - len >= 4) {
- last_alloc = mem;
- }
- mem->total_len = total_len;
- mem->used = len;
- char* ret = reinterpret_cast<char *>(mem) + sizeof(Header);
- mem->tail = ret + len;
-
- return ret;
-}
-
-void SystemAllocatorHelper::sys_deallocate(void* p, size_t size) {
- // Number of bytes in this allocation
- if (size + 3 < size) {
- return;
- }
- size_t len = (size + 3) & ~3;
-
- // All allocations (small and large) have starting addresses in the
- // first page that was allocated from the system. This page starts with
- // a header that keeps track of how many bytes are currently used. The
- // header can be found by truncating the last few bits of the address.
- Header* header = reinterpret_cast<Header *>(
- reinterpret_cast<uintptr_t>(p) & ~4095);
- header->used -= len;
-
- // After the last allocation has been freed, return the page(s) to the
- // system
- if (!header->used) {
- SysCalls sys;
- sys.munmap(header, header->total_len);
- if (last_alloc == header) {
- last_alloc = NULL;
- }
- }
-}
-
-} // namespace
diff --git a/sandbox/linux/seccomp/allocator.h b/sandbox/linux/seccomp/allocator.h
deleted file mode 100644
index 29e0065..0000000
--- a/sandbox/linux/seccomp/allocator.h
+++ /dev/null
@@ -1,88 +0,0 @@
-// Copyright (c) 2010 The Chromium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-// Implement a very basic memory allocator that make direct system calls
-// instead of relying on libc.
-// This allocator is not thread-safe.
-
-#ifndef ALLOCATOR_H__
-#define ALLOCATOR_H__
-
-#include <cstddef>
-
-namespace playground {
-
-class SystemAllocatorHelper {
- protected:
- static void *sys_allocate(size_t size);
- static void sys_deallocate(void* p, size_t size);
-};
-
-template <class T>
-class SystemAllocator : SystemAllocatorHelper {
- public:
- typedef T value_type;
- typedef T* pointer;
- typedef const T* const_pointer;
- typedef T& reference;
- typedef const T& const_reference;
- typedef size_t size_type;
- typedef std::ptrdiff_t difference_type;
-
- template <class U>
- struct rebind {
- typedef SystemAllocator<U> other;
- };
-
- pointer address(reference value) const {
- return &value;
- }
-
- const_pointer address(const_reference value) const {
- return &value;
- }
-
- SystemAllocator() throw() { }
- SystemAllocator(const SystemAllocator& src) throw() { }
- template <class U> SystemAllocator(const SystemAllocator<U>& src) throw() { }
- ~SystemAllocator() throw() { }
-
- size_type max_size() const throw() {
- return (1 << 30) / sizeof(T);
- }
-
- pointer allocate(size_type num, const void* = 0) {
- if (num > max_size()) {
- return NULL;
- }
- return (pointer)sys_allocate(num * sizeof(T));
- }
-
- void construct(pointer p, const T& value) {
- new(reinterpret_cast<void *>(p))T(value);
- }
-
- void destroy(pointer p) {
- p->~T();
- }
-
- void deallocate(pointer p, size_type num) {
- sys_deallocate(p, num * sizeof(T));
- }
-};
-
-template <class T1, class T2>
-bool operator== (const SystemAllocator<T1>&, const SystemAllocator<T2>&)
- throw() {
- return true;
-}
-template <class T1, class T2>
-bool operator!= (const SystemAllocator<T1>&, const SystemAllocator<T2>&)
- throw() {
- return false;
-}
-
-} // namespace
-
-#endif // ALLOCATOR_H__
diff --git a/sandbox/linux/seccomp/clone.cc b/sandbox/linux/seccomp/clone.cc
deleted file mode 100644
index 0d35181..0000000
--- a/sandbox/linux/seccomp/clone.cc
+++ /dev/null
@@ -1,179 +0,0 @@
-// Copyright (c) 2010 The Chromium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#include "debug.h"
-#include "sandbox_impl.h"
-
-namespace playground {
-
-long Sandbox::sandbox_clone(int flags, char* stack, int* pid, int* ctid,
- void* tls, void *wrapper_sp) {
- long long tm;
- Debug::syscall(&tm, __NR_clone, "Executing handler");
- struct {
- int sysnum;
- long long cookie;
- Clone clone_req;
- } __attribute__((packed)) request;
- request.sysnum = __NR_clone;
- request.cookie = cookie();
- request.clone_req.flags = flags;
- request.clone_req.stack = stack;
- request.clone_req.pid = pid;
- request.clone_req.ctid = ctid;
- request.clone_req.tls = tls;
-
- // TODO(markus): Passing stack == 0 currently does not do the same thing
- // that the kernel would do without the sandbox. This is just going to
- // cause a crash. We should detect this case, and replace the stack pointer
- // with the correct value, instead.
- // This is complicated by the fact that we will temporarily be executing
- // both threads from the same stack. Some synchronization will be necessary.
- // Fortunately, this complication also explains why hardly anybody ever
- // does this.
- // See trusted_thread.cc for more information.
- long rc;
- if (stack == 0) {
- rc = -EINVAL;
- } else {
- // Pass along the address on the stack where syscallWrapper() stored the
- // original CPU registers. These registers will be restored in the newly
- // created thread prior to returning from the wrapped system call.
- #if defined(__x86_64__)
- memcpy(&request.clone_req.regs64, wrapper_sp,
- sizeof(request.clone_req.regs64) + sizeof(void *));
- #elif defined(__i386__)
- memcpy(&request.clone_req.regs32, wrapper_sp,
- sizeof(request.clone_req.regs32) + sizeof(void *));
- #else
- #error Unsupported target platform
- #endif
-
- // In order to unblock the signal mask in the newly created thread and
- // after entering Seccomp mode, we have to call sigreturn(). But that
- // requires access to a proper stack frame describing a valid signal.
- // We trigger a signal now and make sure the stack frame ends up on the
- // new stack. Our segv() handler (in sandbox.cc) does that for us.
- // See trusted_thread.cc for more details on how threads get created.
- //
- // In general we rely on the kernel for generating the signal stack
- // frame, as the exact binary format has been extended several times over
- // the course of the kernel's development. Fortunately, the kernel
- // developers treat the initial part of the stack frame as a stable part
- // of the ABI. So, we can rely on fixed, well-defined offsets for accessing
- // register values and for accessing the signal mask.
- #if defined(__x86_64__)
- // Red zone compensation. The instrumented system call will remove 128
- // bytes from the thread's stack prior to returning to the original
- // call site.
- stack -= 128;
- request.clone_req.stack = stack;
- void *dummy;
- asm volatile("mov %%rsp, %%rcx\n"
- "mov %3, %%rsp\n"
- "int $0\n"
- "mov %%rcx, %%rsp\n"
- : "=a"(request.clone_req.stack), "=&c"(dummy)
- : "a"(__NR_clone + 0xF000), "m"(request.clone_req.stack)
- : "memory");
- #elif defined(__i386__)
- void *dummy;
- asm volatile("mov %%esp, %%ecx\n"
- "mov %3, %%esp\n"
- "int $0\n"
- "mov %%ecx, %%esp\n"
- : "=a"(request.clone_req.stack), "=&c"(dummy)
- : "a"(__NR_clone + 0xF000), "m"(request.clone_req.stack)
- : "memory");
- #else
- #error Unsupported target platform
- #endif
-
- // Adjust the signal stack frame so that it contains the correct stack
- // pointer upon returning from sigreturn().
- #if defined(__x86_64__)
- *(char **)(request.clone_req.stack + 0xA0) = stack;
- #elif defined(__i386__)
- *(char **)(request.clone_req.stack + 0x1C) = stack;
- #else
- #error Unsupported target platform
- #endif
-
- SysCalls sys;
- if (write(sys, processFdPub(), &request, sizeof(request)) !=
- sizeof(request) ||
- read(sys, threadFdPub(), &rc, sizeof(rc)) != sizeof(rc)) {
- die("Failed to forward clone() request [sandbox]");
- }
- }
- Debug::elapsed(tm, __NR_clone);
- return rc;
-}
-
-bool Sandbox::process_clone(int parentMapsFd, int sandboxFd, int threadFdPub,
- int threadFd, SecureMem::Args* mem) {
- // Read request
- Clone clone_req;
- SysCalls sys;
- if (read(sys, sandboxFd, &clone_req, sizeof(clone_req)) !=sizeof(clone_req)){
- die("Failed to read parameters for clone() [process]");
- }
-
- // TODO(markus): add policy restricting parameters for clone
- if ((clone_req.flags & ~CLONE_DETACHED) != (CLONE_VM|CLONE_FS|CLONE_FILES|
- CLONE_SIGHAND|CLONE_THREAD|CLONE_SYSVSEM|CLONE_SETTLS|
- CLONE_PARENT_SETTID|CLONE_CHILD_CLEARTID)) {
- SecureMem::abandonSystemCall(threadFd, -EPERM);
- return false;
- } else {
- SecureMem::Args* newMem = getNewSecureMem();
- if (!newMem) {
- SecureMem::abandonSystemCall(threadFd, -ENOMEM);
- return false;
- } else {
- // clone() has unusual semantics. We don't want to return back into the
- // trusted thread, but instead we need to continue execution at the IP
- // where we got called initially.
- SecureMem::lockSystemCall(parentMapsFd, mem);
- mem->ret = clone_req.ret;
- #if defined(__x86_64__)
- mem->rbp = clone_req.regs64.rbp;
- mem->rbx = clone_req.regs64.rbx;
- mem->rcx = clone_req.regs64.rcx;
- mem->rdx = clone_req.regs64.rdx;
- mem->rsi = clone_req.regs64.rsi;
- mem->rdi = clone_req.regs64.rdi;
- mem->r8 = clone_req.regs64.r8;
- mem->r9 = clone_req.regs64.r9;
- mem->r10 = clone_req.regs64.r10;
- mem->r11 = clone_req.regs64.r11;
- mem->r12 = clone_req.regs64.r12;
- mem->r13 = clone_req.regs64.r13;
- mem->r14 = clone_req.regs64.r14;
- mem->r15 = clone_req.regs64.r15;
- #elif defined(__i386__)
- mem->ebp = clone_req.regs32.ebp;
- mem->edi = clone_req.regs32.edi;
- mem->esi = clone_req.regs32.esi;
- mem->edx = clone_req.regs32.edx;
- mem->ecx = clone_req.regs32.ecx;
- mem->ebx = clone_req.regs32.ebx;
- #else
- #error Unsupported target platform
- #endif
- newMem->sequence = 0;
- newMem->shmId = -1;
- mem->newSecureMem = newMem;
- mem->processFdPub = processFdPub_;
- mem->cloneFdPub = cloneFdPub_;
-
- SecureMem::sendSystemCall(threadFdPub, true, parentMapsFd, mem,
- __NR_clone, clone_req.flags, clone_req.stack,
- clone_req.pid, clone_req.ctid, clone_req.tls);
- return true;
- }
- }
-}
-
-} // namespace
diff --git a/sandbox/linux/seccomp/debug.cc b/sandbox/linux/seccomp/debug.cc
deleted file mode 100644
index 5d6de49..0000000
--- a/sandbox/linux/seccomp/debug.cc
+++ /dev/null
@@ -1,363 +0,0 @@
-// Copyright (c) 2010 The Chromium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#ifndef NDEBUG
-
-#include "debug.h"
-
-namespace playground {
-
-bool Debug::enabled_;
-int Debug::numSyscallNames_;
-const char **Debug::syscallNames_;
-std::map<int, std::string> Debug::syscallNamesMap_;
-
-Debug Debug::debug_;
-
-Debug::Debug() {
- // Logging is disabled by default, but can be turned on by setting an
- // appropriate environment variable. Initialize this code from a global
- // constructor, so that it runs before the sandbox is turned on.
- enabled_ = !!getenv("SECCOMP_SANDBOX_DEBUGGING");
-
- // Read names of system calls from header files, if available. Symbolic
- // names make debugging so much nicer.
- if (enabled_) {
- static const char *filenames[] = {
- #if __WORDSIZE == 64
- "/usr/include/asm/unistd_64.h",
- #elif __WORDSIZE == 32
- "/usr/include/asm/unistd_32.h",
- #endif
- "/usr/include/asm/unistd.h",
- NULL };
- numSyscallNames_ = 0;
- for (const char **fn = filenames; *fn; ++fn) {
- FILE *fp = fopen(*fn, "r");
- if (fp) {
- std::string baseName;
- int baseNum = -1;
- char buf[80];
- while (fgets(buf, sizeof(buf), fp)) {
- // Check if the line starts with "#define"
- static const char* whitespace = " \t\r\n";
- char *token, *save;
- token = strtok_r(buf, whitespace, &save);
- if (token && !strcmp(token, "#define")) {
-
- // Only parse identifiers that start with "__NR_"
- token = strtok_r(NULL, whitespace, &save);
- if (token) {
- if (strncmp(token, "__NR_", 5)) {
- continue;
- }
- std::string syscallName(token + 5);
-
- // Parse the value of the symbol. Try to be forgiving in what
- // we accept, as the file format might change over time.
- token = strtok_r(NULL, "\r\n", &save);
- if (token) {
- // Some values are defined relative to previous values, we
- // detect these examples by finding an earlier symbol name
- // followed by a '+' plus character.
- bool isRelative = false;
- char *base = strstr(token, baseName.c_str());
- if (baseNum >= 0 && base) {
- base += baseName.length();
- while (*base == ' ' || *base == '\t') {
- ++base;
- }
- if (*base == '+') {
- isRelative = true;
- token = base;
- }
- }
-
- // Skip any characters that are not part of the syscall number.
- while (*token < '0' || *token > '9') {
- token++;
- }
-
- // If we now have a valid datum, enter it into our map.
- if (*token) {
- int sysnum = atoi(token);
-
- // Deal with symbols that are defined relative to earlier
- // ones.
- if (isRelative) {
- sysnum += baseNum;
- } else {
- baseNum = sysnum;
- baseName = syscallName;
- }
-
- // Keep track of the highest syscall number that we know
- // about.
- if (sysnum >= numSyscallNames_) {
- numSyscallNames_ = sysnum + 1;
- }
-
- syscallNamesMap_[sysnum] = syscallName;
- }
- }
- }
- }
- }
- fclose(fp);
- break;
- }
- }
- if (numSyscallNames_) {
- // We cannot make system calls at the time, when we are looking up
- // the names. So, copy them into a data structure that can be
- // accessed without having to allocated memory (i.e. no more STL).
- syscallNames_ = reinterpret_cast<const char **>(
- calloc(sizeof(char *), numSyscallNames_));
- for (std::map<int, std::string>::const_iterator iter =
- syscallNamesMap_.begin();
- iter != syscallNamesMap_.end();
- ++iter) {
- syscallNames_[iter->first] = iter->second.c_str();
- }
- }
- }
-}
-
-bool Debug::enter() {
- // Increment the recursion level in TLS storage. This allows us to
- // make system calls from within our debugging functions, without triggering
- // additional debugging output.
- //
- // This function can be called from both the sandboxed process and from the
- // trusted process. Only the sandboxed process needs to worry about
- // recursively calling system calls. The trusted process doesn't intercept
- // system calls and thus doesn't have this problem. It also doesn't have
- // a TLS. We explicitly set the segment selector to zero, when in the
- // trusted process, so that we can avoid tracking recursion levels.
- int level;
- #if defined(__x86_64__)
- asm volatile("mov %%gs, %0\n"
- "test %0, %0\n"
- "jz 1f\n"
- "movl %%gs:0x1050-0xE0, %0\n"
- "incl %%gs:0x1050-0xE0\n"
- "1:\n"
- : "=r"(level)
- :
- : "memory");
- #elif defined(__i386__)
- asm volatile("mov %%fs, %0\n"
- "test %0, %0\n"
- "jz 1f\n"
- "movl %%fs:0x1034-0x58, %0\n"
- "incl %%fs:0x1034-0x58\n"
- "1:\n"
- : "=r"(level)
- :
- : "memory");
- #else
- #error "Unsupported target platform"
- #endif
- return !level;
-}
-
-bool Debug::leave() {
- // Decrement the recursion level in TLS storage. This allows us to
- // make system calls from within our debugging functions, without triggering
- // additional debugging output.
- //
- // This function can be called from both the sandboxed process and from the
- // trusted process. Only the sandboxed process needs to worry about
- // recursively calling system calls. The trusted process doesn't intercept
- // system calls and thus doesn't have this problem. It also doesn't have
- // a TLS. We explicitly set the segment selector to zero, when in the
- // trusted process, so that we can avoid tracking recursion levels.
- int level;
- #if defined(__x86_64__)
- asm volatile("mov %%gs, %0\n"
- "test %0, %0\n"
- "jz 1f\n"
- "decl %%gs:0x1050-0xE0\n"
- "movl %%gs:0x1050-0xE0, %0\n"
- "1:\n"
- : "=r"(level)
- :
- : "memory");
- #elif defined(__i386__)
- asm volatile("mov %%fs, %0\n"
- "test %0, %0\n"
- "jz 1f\n"
- "decl %%fs:0x1034-0x58\n"
- "movl %%fs:0x1034-0x58, %0\n"
- "1:\n"
- : "=r"(level)
- :
- : "memory");
- #else
- #error Unsupported target platform
- #endif
- return !level;
-}
-
-void Debug::_message(const char* msg) {
- if (enabled_) {
- Sandbox::SysCalls sys;
- size_t len = strlen(msg);
- if (len && msg[len-1] != '\n') {
- // Write operations should be atomic, so that we don't interleave
- // messages from multiple threads. Append a newline, if it is not
- // already there.
- char copy[len + 1];
- memcpy(copy, msg, len);
- copy[len] = '\n';
- Sandbox::write(sys, 2, copy, len + 1);
- } else {
- Sandbox::write(sys, 2, msg, len);
- }
- }
-}
-
-void Debug::message(const char* msg) {
- if (enabled_) {
- if (enter()) {
- _message(msg);
- }
- leave();
- }
-}
-
-void Debug::gettimeofday(long long* tm) {
- if (tm) {
- struct timeval tv;
- #if defined(__i386__)
- // Zero out the lastSyscallNum, so that we don't try to coalesce
- // calls to gettimeofday(). For debugging purposes, we need the
- // exact time.
- asm volatile("movl $0, %fs:0x102C-0x58");
- #elif !defined(__x86_64__)
- #error Unsupported target platform
- #endif
- ::gettimeofday(&tv, NULL);
- *tm = 1000ULL*1000ULL*static_cast<unsigned>(tv.tv_sec) +
- static_cast<unsigned>(tv.tv_usec);
- }
-}
-
-void Debug::syscall(long long* tm, int sysnum, const char* msg, int call) {
- // This function gets called from the system call wrapper. Avoid calling
- // any library functions that themselves need system calls.
- if (enabled_) {
- if (enter() || !tm) {
- gettimeofday(tm);
-
- const char *sysname = NULL;
- if (sysnum >= 0 && sysnum < numSyscallNames_) {
- sysname = syscallNames_[sysnum];
- }
- static const char kUnnamedMessage[] = "Unnamed syscall #";
- char unnamed[40];
- if (!sysname) {
- memcpy(unnamed, kUnnamedMessage, sizeof(kUnnamedMessage) - 1);
- itoa(unnamed + sizeof(kUnnamedMessage) - 1, sysnum);
- sysname = unnamed;
- }
- #if defined(__NR_socketcall) || defined(__NR_ipc)
- char extra[40];
- *extra = '\000';
- #if defined(__NR_socketcall)
- if (sysnum == __NR_socketcall) {
- static const char* socketcall_name[] = {
- 0, "socket", "bind", "connect", "listen", "accept", "getsockname",
- "getpeername", "socketpair", "send", "recv", "sendto","recvfrom",
- "shutdown", "setsockopt", "getsockopt", "sendmsg", "recvmsg",
- "accept4"
- };
- if (call >= 1 &&
- call < (int)(sizeof(socketcall_name)/sizeof(char *))) {
- strcat(strcpy(extra, " "), socketcall_name[call]);
- } else {
- itoa(strcpy(extra, " #") + 2, call);
- }
- }
- #endif
- #if defined(__NR_ipc)
- if (sysnum == __NR_ipc) {
- static const char* ipc_name[] = {
- 0, "semop", "semget", "semctl", "semtimedop", 0, 0, 0, 0, 0, 0,
- "msgsnd", "msgrcv", "msgget", "msgctl", 0, 0, 0, 0, 0, 0,
- "shmat", "shmdt", "shmget", "shmctl" };
- if (call >= 1 && call < (int)(sizeof(ipc_name)/sizeof(char *)) &&
- ipc_name[call]) {
- strcat(strcpy(extra, " "), ipc_name[call]);
- } else {
- itoa(strcpy(extra, " #") + 2, call);
- }
- }
- #endif
- #else
- static const char extra[1] = { 0 };
- #endif
- char buf[strlen(sysname) + strlen(extra) + (msg ? strlen(msg) : 0) + 4];
- strcat(strcat(strcat(strcat(strcpy(buf, sysname), extra), ": "),
- msg ? msg : ""), "\n");
- _message(buf);
- }
- leave();
- }
-}
-
-char* Debug::itoa(char* s, int n) {
- // Remember return value
- char *ret = s;
-
- // Insert sign for negative numbers
- if (n < 0) {
- *s++ = '-';
- n = -n;
- }
-
- // Convert to decimal (in reverse order)
- char *start = s;
- do {
- *s++ = '0' + (n % 10);
- n /= 10;
- } while (n);
- *s-- = '\000';
-
- // Reverse order of digits
- while (start < s) {
- char ch = *s;
- *s-- = *start;
- *start++ = ch;
- }
-
- return ret;
-}
-
-void Debug::elapsed(long long tm, int sysnum, int call) {
- if (enabled_) {
- if (enter()) {
- // Compute the time that has passed since the system call started.
- long long delta;
- gettimeofday(&delta);
- delta -= tm;
-
- // Format "Elapsed time: %d.%03dms" without using sprintf().
- char buf[80];
- itoa(strrchr(strcpy(buf, "Elapsed time: "), '\000'), delta/1000);
- delta %= 1000;
- strcat(buf, delta < 100 ? delta < 10 ? ".00" : ".0" : ".");
- itoa(strrchr(buf, '\000'), delta);
- strcat(buf, "ms");
-
- // Print system call name and elapsed time.
- syscall(NULL, sysnum, buf, call);
- }
- leave();
- }
-}
-
-} // namespace
-
-#endif // NDEBUG
diff --git a/sandbox/linux/seccomp/debug.h b/sandbox/linux/seccomp/debug.h
deleted file mode 100644
index eb5a194..0000000
--- a/sandbox/linux/seccomp/debug.h
+++ /dev/null
@@ -1,80 +0,0 @@
-// Copyright (c) 2010 The Chromium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#ifndef DEBUG_H__
-#define DEBUG_H__
-
-#include <map>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string>
-#include <string.h>
-
-#include "sandbox_impl.h"
-
-namespace playground {
-
-class Debug {
- public:
- // If debugging is enabled, write a message to stderr.
- static void message(const char* msg)
- #ifndef NDEBUG
- asm("playground$debugMessage")
- #if defined(__x86_64__)
- __attribute__((visibility("internal")))
- #endif
- ;
- #else
- { }
- #endif
-
- // If debugging is enabled, write the name of the syscall and an optional
- // message to stderr.
- static void syscall(long long* tm, int sysnum,
- const char* msg, int call = -1)
- #ifndef NDEBUG
- ;
- #else
- { }
- #endif
-
- // Print how much wall-time has elapsed since the last call to syscall()
- static void elapsed(long long tm, int sysnum, int call = -1)
- #ifndef NDEBUG
- ;
- #else
- {
- }
- #endif
-
- // Check whether debugging is enabled.
- static bool isEnabled() {
- #ifndef NDEBUG
- return enabled_;
- #else
- return false;
- #endif
- }
-
- private:
- #ifndef NDEBUG
- Debug();
- static bool enter();
- static bool leave();
- static void _message(const char* msg);
- static void gettimeofday(long long* tm);
- static char* itoa(char* s, int n);
-
- static Debug debug_;
-
- static bool enabled_;
- static int numSyscallNames_;
- static const char **syscallNames_;
- static std::map<int, std::string> syscallNamesMap_;
- #endif
-};
-
-} // namespace
-
-#endif // DEBUG_H__
diff --git a/sandbox/linux/seccomp/exit.cc b/sandbox/linux/seccomp/exit.cc
deleted file mode 100644
index f4db643..0000000
--- a/sandbox/linux/seccomp/exit.cc
+++ /dev/null
@@ -1,38 +0,0 @@
-// Copyright (c) 2010 The Chromium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#include "debug.h"
-#include "sandbox_impl.h"
-
-namespace playground {
-
-long Sandbox::sandbox_exit(int status) {
- long long tm;
- Debug::syscall(&tm, __NR_exit, "Executing handler");
- struct {
- int sysnum;
- long long cookie;
- } __attribute__((packed)) request;
- request.sysnum = __NR_exit;
- request.cookie = cookie();
-
- SysCalls sys;
- if (write(sys, processFdPub(), &request, sizeof(request)) !=
- sizeof(request)) {
- die("Failed to forward exit() request [sandbox]");
- }
- for (;;) {
- sys._exit(status);
- }
-}
-
-bool Sandbox::process_exit(int parentMapsFd, int sandboxFd, int threadFdPub,
- int threadFd, SecureMem::Args* mem) {
- SecureMem::lockSystemCall(parentMapsFd, mem);
- SecureMem::sendSystemCall(threadFdPub, true, parentMapsFd, mem,
- __NR_exit, 0);
- return true;
-}
-
-} // namespace
diff --git a/sandbox/linux/seccomp/getpid.cc b/sandbox/linux/seccomp/getpid.cc
deleted file mode 100644
index be5449b..0000000
--- a/sandbox/linux/seccomp/getpid.cc
+++ /dev/null
@@ -1,17 +0,0 @@
-// Copyright (c) 2010 The Chromium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#include "debug.h"
-#include "sandbox_impl.h"
-
-namespace playground {
-
-long Sandbox::sandbox_getpid() {
- long long tm;
- Debug::syscall(&tm, __NR_getpid, "Executing handler");
- Debug::elapsed(tm, __NR_getpid);
- return pid_;
-}
-
-} // namespace
diff --git a/sandbox/linux/seccomp/gettid.cc b/sandbox/linux/seccomp/gettid.cc
deleted file mode 100644
index 699774a..0000000
--- a/sandbox/linux/seccomp/gettid.cc
+++ /dev/null
@@ -1,18 +0,0 @@
-// Copyright (c) 2010 The Chromium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#include "debug.h"
-#include "sandbox_impl.h"
-
-namespace playground {
-
-long Sandbox::sandbox_gettid() {
- long long tm;
- Debug::syscall(&tm, __NR_gettid, "Executing handler");
- pid_t t = tid();
- Debug::elapsed(tm, __NR_gettid);
- return t;
-}
-
-} // namespace
diff --git a/sandbox/linux/seccomp/ioctl.cc b/sandbox/linux/seccomp/ioctl.cc
deleted file mode 100644
index 4d2b3c5c5..0000000
--- a/sandbox/linux/seccomp/ioctl.cc
+++ /dev/null
@@ -1,61 +0,0 @@
-// Copyright (c) 2010 The Chromium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#include "debug.h"
-#include "sandbox_impl.h"
-
-namespace playground {
-
-long Sandbox::sandbox_ioctl(int d, int req, void *arg) {
- long long tm;
- Debug::syscall(&tm, __NR_ioctl, "Executing handler");
- struct {
- int sysnum;
- long long cookie;
- IOCtl ioctl_req;
- } __attribute__((packed)) request;
- request.sysnum = __NR_ioctl;
- request.cookie = cookie();
- request.ioctl_req.d = d;
- request.ioctl_req.req = req;
- request.ioctl_req.arg = arg;
-
- long rc;
- SysCalls sys;
- if (write(sys, processFdPub(), &request, sizeof(request)) !=
- sizeof(request) ||
- read(sys, threadFdPub(), &rc, sizeof(rc)) != sizeof(rc)) {
- die("Failed to forward ioctl() request [sandbox]");
- }
- Debug::elapsed(tm, __NR_ioctl);
- return rc;
-}
-
-bool Sandbox::process_ioctl(int parentMapsFd, int sandboxFd, int threadFdPub,
- int threadFd, SecureMem::Args* mem) {
- // Read request
- IOCtl ioctl_req;
- SysCalls sys;
- if (read(sys, sandboxFd, &ioctl_req, sizeof(ioctl_req)) !=sizeof(ioctl_req)){
- die("Failed to read parameters for ioctl() [process]");
- }
- int rc = -EINVAL;
- switch (ioctl_req.req) {
- case TCGETS:
- case TIOCGWINSZ:
- SecureMem::sendSystemCall(threadFdPub, false, -1, mem, __NR_ioctl,
- ioctl_req.d, ioctl_req.req, ioctl_req.arg);
- return true;
- default:
- if (Debug::isEnabled()) {
- char buf[80];
- sprintf(buf, "Unsupported ioctl: 0x%04X\n", ioctl_req.req);
- Debug::message(buf);
- }
- SecureMem::abandonSystemCall(threadFd, rc);
- return false;
- }
-}
-
-} // namespace
diff --git a/sandbox/linux/seccomp/ipc.cc b/sandbox/linux/seccomp/ipc.cc
deleted file mode 100644
index 67a4e34..0000000
--- a/sandbox/linux/seccomp/ipc.cc
+++ /dev/null
@@ -1,351 +0,0 @@
-// Copyright (c) 2010 The Chromium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#include "debug.h"
-#include "sandbox_impl.h"
-
-namespace playground {
-
-#ifndef IPC_PRIVATE
-#define IPC_PRIVATE 0
-#endif
-#ifndef IPC_RMID
-#define IPC_RMID 0
-#endif
-#ifndef IPC_64
-#define IPC_64 256
-#endif
-
-#if defined(__NR_shmget)
-void* Sandbox::sandbox_shmat(int shmid, const void* shmaddr, int shmflg) {
- long long tm;
- Debug::syscall(&tm, __NR_shmat, "Executing handler");
-
- struct {
- int sysnum;
- long long cookie;
- ShmAt shmat_req;
- } __attribute__((packed)) request;
- request.sysnum = __NR_shmat;
- request.cookie = cookie();
- request.shmat_req.shmid = shmid;
- request.shmat_req.shmaddr = shmaddr;
- request.shmat_req.shmflg = shmflg;
-
- long rc;
- SysCalls sys;
- if (write(sys, processFdPub(), &request, sizeof(request)) !=
- sizeof(request) ||
- read(sys, threadFdPub(), &rc, sizeof(rc)) != sizeof(rc)) {
- die("Failed to forward shmat() request [sandbox]");
- }
- Debug::elapsed(tm, __NR_shmat);
- return reinterpret_cast<void *>(rc);
-}
-
-long Sandbox::sandbox_shmctl(int shmid, int cmd, void* buf) {
- long long tm;
- Debug::syscall(&tm, __NR_shmctl, "Executing handler");
-
- struct {
- int sysnum;
- long long cookie;
- ShmCtl shmctl_req;
- } __attribute__((packed)) request;
- request.sysnum = __NR_shmctl;
- request.cookie = cookie();
- request.shmctl_req.shmid = shmid;
- request.shmctl_req.cmd = cmd;
- request.shmctl_req.buf = buf;
-
- long rc;
- SysCalls sys;
- if (write(sys, processFdPub(), &request, sizeof(request)) !=
- sizeof(request) ||
- read(sys, threadFdPub(), &rc, sizeof(rc)) != sizeof(rc)) {
- die("Failed to forward shmctl() request [sandbox]");
- }
- Debug::elapsed(tm, __NR_shmctl);
- return rc;
-}
-
-long Sandbox::sandbox_shmdt(const void* shmaddr) {
- long long tm;
- Debug::syscall(&tm, __NR_shmdt, "Executing handler");
-
- struct {
- int sysnum;
- long long cookie;
- ShmDt shmdt_req;
- } __attribute__((packed)) request;
- request.sysnum = __NR_shmdt;
- request.cookie = cookie();
- request.shmdt_req.shmaddr = shmaddr;
-
- long rc;
- SysCalls sys;
- if (write(sys, processFdPub(), &request, sizeof(request)) !=
- sizeof(request) ||
- read(sys, threadFdPub(), &rc, sizeof(rc)) != sizeof(rc)) {
- die("Failed to forward shmdt() request [sandbox]");
- }
- Debug::elapsed(tm, __NR_shmdt);
- return rc;
-}
-
-long Sandbox::sandbox_shmget(int key, size_t size, int shmflg) {
- long long tm;
- Debug::syscall(&tm, __NR_shmget, "Executing handler");
-
- struct {
- int sysnum;
- long long cookie;
- ShmGet shmget_req;
- } __attribute__((packed)) request;
- request.sysnum = __NR_shmget;
- request.cookie = cookie();
- request.shmget_req.key = key;
- request.shmget_req.size = size;
- request.shmget_req.shmflg = shmflg;
-
- long rc;
- SysCalls sys;
- if (write(sys, processFdPub(), &request, sizeof(request)) !=
- sizeof(request) ||
- read(sys, threadFdPub(), &rc, sizeof(rc)) != sizeof(rc)) {
- die("Failed to forward shmget() request [sandbox]");
- }
- Debug::elapsed(tm, __NR_shmget);
- return rc;
-}
-
-bool Sandbox::process_shmat(int parentMapsFd, int sandboxFd, int threadFdPub,
- int threadFd, SecureMem::Args* mem) {
- // Read request
- ShmAt shmat_req;
- SysCalls sys;
- if (read(sys, sandboxFd, &shmat_req, sizeof(shmat_req)) !=
- sizeof(shmat_req)) {
- die("Failed to read parameters for shmat() [process]");
- }
-
- // We only allow attaching to the shm identifier that was returned by
- // the most recent call to shmget(IPC_PRIVATE)
- if (shmat_req.shmaddr || shmat_req.shmflg || shmat_req.shmid != mem->shmId) {
- mem->shmId = -1;
- SecureMem::abandonSystemCall(threadFd, -EINVAL);
- return false;
- }
-
- mem->shmId = -1;
- SecureMem::sendSystemCall(threadFdPub, false, -1, mem,
- __NR_shmat, shmat_req.shmid, shmat_req.shmaddr,
- shmat_req.shmflg);
- return true;
-}
-
-bool Sandbox::process_shmctl(int parentMapsFd, int sandboxFd, int threadFdPub,
- int threadFd, SecureMem::Args* mem) {
- // Read request
- ShmCtl shmctl_req;
- SysCalls sys;
- if (read(sys, sandboxFd, &shmctl_req, sizeof(shmctl_req)) !=
- sizeof(shmctl_req)) {
- die("Failed to read parameters for shmctl() [process]");
- }
-
- // The only shmctl() operation that we need to support is removal. This
- // operation is generally safe.
- if ((shmctl_req.cmd & ~(IPC_64 | IPC_RMID)) || shmctl_req.buf) {
- mem->shmId = -1;
- SecureMem::abandonSystemCall(threadFd, -EINVAL);
- return false;
- }
-
- mem->shmId = -1;
- SecureMem::sendSystemCall(threadFdPub, false, -1, mem,
- __NR_shmctl, shmctl_req.shmid, shmctl_req.cmd,
- shmctl_req.buf);
- return true;
-}
-
-bool Sandbox::process_shmdt(int parentMapsFd, int sandboxFd, int threadFdPub,
- int threadFd, SecureMem::Args* mem) {
- // Read request
- ShmDt shmdt_req;
- SysCalls sys;
- if (read(sys, sandboxFd, &shmdt_req, sizeof(shmdt_req)) !=
- sizeof(shmdt_req)) {
- die("Failed to read parameters for shmdt() [process]");
- }
-
- // Detaching shared memory segments it generally safe, but just in case
- // of a kernel bug, we make sure that the address does not fall into any
- // of the reserved memory regions.
- ProtectedMap::const_iterator iter = protectedMap_.lower_bound(
- (void *)shmdt_req.shmaddr);
- if (iter != protectedMap_.begin()) {
- --iter;
- }
- for (; iter != protectedMap_.end() && iter->first <= shmdt_req.shmaddr;
- ++iter){
- if (shmdt_req.shmaddr < reinterpret_cast<void *>(
- reinterpret_cast<char *>(iter->first) + iter->second) &&
- shmdt_req.shmaddr >= iter->first) {
- mem->shmId = -1;
- SecureMem::abandonSystemCall(threadFd, -EINVAL);
- return false;
- }
- }
-
- mem->shmId = -1;
- SecureMem::sendSystemCall(threadFdPub, false, -1, mem,
- __NR_shmdt, shmdt_req.shmaddr);
- return true;
-}
-
-bool Sandbox::process_shmget(int parentMapsFd, int sandboxFd, int threadFdPub,
- int threadFd, SecureMem::Args* mem) {
- // Read request
- ShmGet shmget_req;
- SysCalls sys;
- if (read(sys, sandboxFd, &shmget_req, sizeof(shmget_req)) !=
- sizeof(shmget_req)) {
- die("Failed to read parameters for shmget() [process]");
- }
-
- // We do not want to allow the sandboxed application to access arbitrary
- // shared memory regions. We only allow it to access regions that it
- // created itself.
- if (shmget_req.key != IPC_PRIVATE || shmget_req.shmflg & ~0777) {
- mem->shmId = -1;
- SecureMem::abandonSystemCall(threadFd, -EINVAL);
- return false;
- }
-
- mem->shmId = -1;
- SecureMem::sendSystemCall(threadFdPub, false, -1, mem,
- __NR_shmget, shmget_req.key, shmget_req.size,
- shmget_req.shmflg);
- return true;
-}
-#endif
-
-#if defined(__NR_ipc)
-#ifndef SHMAT
-#define SHMAT 21
-#endif
-#ifndef SHMDT
-#define SHMDT 22
-#endif
-#ifndef SHMGET
-#define SHMGET 23
-#endif
-#ifndef SHMCTL
-#define SHMCTL 24
-#endif
-
-long Sandbox::sandbox_ipc(unsigned call, int first, int second, int third,
- void* ptr, long fifth) {
- long long tm;
- Debug::syscall(&tm, __NR_ipc, "Executing handler", call);
- struct {
- int sysnum;
- long long cookie;
- IPC ipc_req;
- } __attribute__((packed)) request;
- request.sysnum = __NR_ipc;
- request.cookie = cookie();
- request.ipc_req.call = call;
- request.ipc_req.first = first;
- request.ipc_req.second = second;
- request.ipc_req.third = third;
- request.ipc_req.ptr = ptr;
- request.ipc_req.fifth = fifth;
-
- long rc;
- SysCalls sys;
- if (write(sys, processFdPub(), &request, sizeof(request)) !=
- sizeof(request) ||
- read(sys, threadFdPub(), &rc, sizeof(rc)) != sizeof(rc)) {
- die("Failed to forward ipc() request [sandbox]");
- }
- Debug::elapsed(tm, __NR_ipc, call);
- return rc;
-}
-
-bool Sandbox::process_ipc(int parentMapsFd, int sandboxFd, int threadFdPub,
- int threadFd, SecureMem::Args* mem) {
- // Read request
- IPC ipc_req;
- SysCalls sys;
- if (read(sys, sandboxFd, &ipc_req, sizeof(ipc_req)) != sizeof(ipc_req)) {
- die("Failed to read parameters for ipc() [process]");
- }
-
- // We do not support all of the SysV IPC calls. In fact, we only support
- // the minimum feature set necessary for Chrome's renderers to share memory
- // with the X server.
- switch (ipc_req.call) {
- case SHMAT: {
- // We only allow attaching to the shm identifier that was returned by
- // the most recent call to shmget(IPC_PRIVATE)
- if (ipc_req.ptr || ipc_req.second || ipc_req.first != mem->shmId) {
- goto deny;
- }
- accept:
- mem->shmId = -1;
- SecureMem::sendSystemCall(threadFdPub, false, -1, mem,
- __NR_ipc, ipc_req.call, ipc_req.first,
- ipc_req.second, ipc_req.third, ipc_req.ptr,
- ipc_req.fifth);
- return true;
- }
- case SHMCTL:
- // The only shmctl() operation that we need to support is removal. This
- // operation is generally safe.
- if ((ipc_req.second & ~(IPC_64 | IPC_RMID)) || ipc_req.ptr) {
- goto deny;
- } else {
- goto accept;
- }
- case SHMDT: {
- // Detaching shared memory segments it generally safe, but just in case
- // of a kernel bug, we make sure that the address does not fall into any
- // of the reserved memory regions.
- ProtectedMap::const_iterator iter = protectedMap_.lower_bound(
- (void *)ipc_req.ptr);
- if (iter != protectedMap_.begin()) {
- --iter;
- }
- for (; iter != protectedMap_.end() && iter->first <=ipc_req.ptr; ++iter){
- if (ipc_req.ptr < reinterpret_cast<void *>(
- reinterpret_cast<char *>(iter->first) + iter->second) &&
- ipc_req.ptr >= iter->first) {
- goto deny;
- }
- }
- goto accept;
- }
- case SHMGET:
- // We do not want to allow the sandboxed application to access arbitrary
- // shared memory regions. We only allow it to access regions that it
- // created itself.
- if (ipc_req.first != IPC_PRIVATE || ipc_req.third & ~0777) {
- goto deny;
- } else {
- goto accept;
- }
- default:
- // Other than SysV shared memory, we do not actually need to support any
- // other SysV IPC calls.
- deny:
- mem->shmId = -1;
- SecureMem::abandonSystemCall(threadFd, -EINVAL);
- return false;
- }
-}
-#endif
-
-} // namespace
diff --git a/sandbox/linux/seccomp/library.cc b/sandbox/linux/seccomp/library.cc
deleted file mode 100644
index 8dd9b93..0000000
--- a/sandbox/linux/seccomp/library.cc
+++ /dev/null
@@ -1,1208 +0,0 @@
-// Copyright (c) 2010 The Chromium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#define XOPEN_SOURCE 500
-#include <algorithm>
-#include <elf.h>
-#include <errno.h>
-#include <errno.h>
-#include <fcntl.h>
-#include <linux/unistd.h>
-#include <set>
-#include <signal.h>
-#include <stdarg.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <sys/ptrace.h>
-#include <sys/resource.h>
-#include <sys/stat.h>
-#include <sys/types.h>
-
-#include "allocator.h"
-#include "debug.h"
-#include "library.h"
-#include "sandbox_impl.h"
-#include "syscall.h"
-#include "syscall_table.h"
-#include "x86_decode.h"
-
-#if defined(__x86_64__)
-typedef Elf64_Phdr Elf_Phdr;
-typedef Elf64_Rela Elf_Rel;
-
-typedef Elf64_Half Elf_Half;
-typedef Elf64_Word Elf_Word;
-typedef Elf64_Sword Elf_Sword;
-typedef Elf64_Xword Elf_Xword;
-typedef Elf64_Sxword Elf_Sxword;
-typedef Elf64_Off Elf_Off;
-typedef Elf64_Section Elf_Section;
-typedef Elf64_Versym Elf_Versym;
-
-#define ELF_ST_BIND ELF64_ST_BIND
-#define ELF_ST_TYPE ELF64_ST_TYPE
-#define ELF_ST_INFO ELF64_ST_INFO
-#define ELF_R_SYM ELF64_R_SYM
-#define ELF_R_TYPE ELF64_R_TYPE
-#define ELF_R_INFO ELF64_R_INFO
-
-#define ELF_REL_PLT ".rela.plt"
-#define ELF_JUMP_SLOT R_X86_64_JUMP_SLOT
-#elif defined(__i386__)
-typedef Elf32_Phdr Elf_Phdr;
-typedef Elf32_Rel Elf_Rel;
-
-typedef Elf32_Half Elf_Half;
-typedef Elf32_Word Elf_Word;
-typedef Elf32_Sword Elf_Sword;
-typedef Elf32_Xword Elf_Xword;
-typedef Elf32_Sxword Elf_Sxword;
-typedef Elf32_Off Elf_Off;
-typedef Elf32_Section Elf_Section;
-typedef Elf32_Versym Elf_Versym;
-
-#define ELF_ST_BIND ELF32_ST_BIND
-#define ELF_ST_TYPE ELF32_ST_TYPE
-#define ELF_ST_INFO ELF32_ST_INFO
-#define ELF_R_SYM ELF32_R_SYM
-#define ELF_R_TYPE ELF32_R_TYPE
-#define ELF_R_INFO ELF32_R_INFO
-
-#define ELF_REL_PLT ".rel.plt"
-#define ELF_JUMP_SLOT R_386_JMP_SLOT
-#else
-#error Unsupported target platform
-#endif
-
-namespace playground {
-
-char* Library::__kernel_vsyscall;
-char* Library::__kernel_sigreturn;
-char* Library::__kernel_rt_sigreturn;
-
-Library::~Library() {
- if (image_size_) {
- // We no longer need access to a full mapping of the underlying library
- // file. Move the temporarily extended mapping back to where we originally
- // found. Make sure to preserve any changes that we might have made since.
- Sandbox::SysCalls sys;
- sys.mprotect(image_, 4096, PROT_READ | PROT_WRITE | PROT_EXEC);
- if (memcmp(image_, memory_ranges_.rbegin()->second.start, 4096)) {
- // Only copy data, if we made any changes in this data. Otherwise there
- // is no need to create another modified COW mapping.
- memcpy(image_, memory_ranges_.rbegin()->second.start, 4096);
- }
- sys.mprotect(image_, 4096, PROT_READ | PROT_EXEC);
- sys.mremap(image_, image_size_, 4096, MREMAP_MAYMOVE | MREMAP_FIXED,
- memory_ranges_.rbegin()->second.start);
- }
-}
-
-char* Library::getBytes(char* dst, const char* src, ssize_t len) {
- // Some kernels don't allow accessing the VDSO from write()
- if (isVDSO_ &&
- src >= memory_ranges_.begin()->second.start &&
- src <= memory_ranges_.begin()->second.stop) {
- ssize_t max =
- reinterpret_cast<char *>(memory_ranges_.begin()->second.stop) - src;
- if (len > max) {
- len = max;
- }
- memcpy(dst, src, len);
- return dst;
- }
-
- // Read up to "len" bytes from "src" and copy them to "dst". Short
- // copies are possible, if we are at the end of a mapping. Returns
- // NULL, if the operation failed completely.
- static int helper_socket[2];
- Sandbox::SysCalls sys;
- if (!helper_socket[0] && !helper_socket[1]) {
- // Copy data through a socketpair, as this allows us to access it
- // without incurring a segmentation fault.
- sys.socketpair(AF_UNIX, SOCK_STREAM, 0, helper_socket);
- }
- char* ptr = dst;
- int inc = 4096;
- while (len > 0) {
- ssize_t l = inc == 1 ? inc : 4096 - (reinterpret_cast<long>(src) & 0xFFF);
- if (l > len) {
- l = len;
- }
- l = NOINTR_SYS(sys.write(helper_socket[0], src, l));
- if (l == -1) {
- if (sys.my_errno == EFAULT) {
- if (inc == 1) {
- if (ptr == dst) {
- return NULL;
- }
- break;
- }
- inc = 1;
- continue;
- } else {
- return NULL;
- }
- }
- l = sys.read(helper_socket[1], ptr, l);
- if (l <= 0) {
- return NULL;
- }
- ptr += l;
- src += l;
- len -= l;
- }
- return dst;
-}
-
-char *Library::get(Elf_Addr offset, char *buf, size_t len) {
- if (!valid_) {
- memset(buf, 0, len);
- return NULL;
- }
- RangeMap::const_iterator iter = memory_ranges_.lower_bound(offset);
- if (iter == memory_ranges_.end()) {
- memset(buf, 0, len);
- return NULL;
- }
- offset -= iter->first;
- long size = reinterpret_cast<char *>(iter->second.stop) -
- reinterpret_cast<char *>(iter->second.start);
- if (offset > size - len) {
- memset(buf, 0, len);
- return NULL;
- }
- char *src = reinterpret_cast<char *>(iter->second.start) + offset;
- memset(buf, 0, len);
- if (!getBytes(buf, src, len)) {
- return NULL;
- }
- return buf;
-}
-
-Library::string Library::get(Elf_Addr offset) {
- if (!valid_) {
- return "";
- }
- RangeMap::const_iterator iter = memory_ranges_.lower_bound(offset);
- if (iter == memory_ranges_.end()) {
- return "";
- }
- offset -= iter->first;
- const char *start = reinterpret_cast<char *>(iter->second.start) + offset;
- const char *stop = reinterpret_cast<char *>(iter->second.stop) + offset;
- char buf[4096] = { 0 };
- getBytes(buf, start, stop - start >= (int)sizeof(buf) ?
- sizeof(buf) - 1 : stop - start);
- start = buf;
- stop = buf;
- while (*stop) {
- ++stop;
- }
- string s = stop > start ? string(start, stop - start) : "";
- return s;
-}
-
-char *Library::getOriginal(Elf_Addr offset, char *buf, size_t len) {
- if (!valid_) {
- memset(buf, 0, len);
- return NULL;
- }
- Sandbox::SysCalls sys;
- if (!image_ && !isVDSO_ && !memory_ranges_.empty() &&
- memory_ranges_.rbegin()->first == 0) {
- // Extend the mapping of the very first page of the underlying library
- // file. This way, we can read the original file contents of the entire
- // library.
- // We have to be careful, because doing so temporarily removes the first
- // 4096 bytes of the library from memory. And we don't want to accidentally
- // unmap code that we are executing. So, only use functions that can be
- // inlined.
- void* start = memory_ranges_.rbegin()->second.start;
- image_size_ = memory_ranges_.begin()->first +
- (reinterpret_cast<char *>(memory_ranges_.begin()->second.stop) -
- reinterpret_cast<char *>(memory_ranges_.begin()->second.start));
- if (image_size_ < 8192) {
- // It is possible to create a library that is only a single page in
- // size. In that case, we have to make sure that we artificially map
- // one extra page past the end of it, as our code relies on mremap()
- // actually moving the mapping.
- image_size_ = 8192;
- }
- image_ = reinterpret_cast<char *>(sys.mremap(start, 4096, image_size_,
- MREMAP_MAYMOVE));
- if (image_size_ == 8192 && image_ == start) {
- // We really mean it, when we say we want the memory to be moved.
- image_ = reinterpret_cast<char *>(sys.mremap(start, 4096, image_size_,
- MREMAP_MAYMOVE));
- sys.munmap(reinterpret_cast<char *>(start) + 4096, 4096);
- }
- if (image_ == MAP_FAILED) {
- image_ = NULL;
- } else {
- sys.MMAP(start, 4096, PROT_READ | PROT_WRITE | PROT_EXEC,
- MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0);
- for (int i = 4096 / sizeof(long); --i;
- reinterpret_cast<long *>(start)[i] =
- reinterpret_cast<long *>(image_)[i]);
- }
- }
-
- if (image_) {
- if (offset + len > image_size_) {
- // It is quite likely that we initially did not map the entire file as
- // we did not know how large it is. So, if necessary, try to extend the
- // mapping.
- size_t new_size = (offset + len + 4095) & ~4095;
- char* tmp =
- reinterpret_cast<char *>(sys.mremap(image_, image_size_, new_size,
- MREMAP_MAYMOVE));
- if (tmp != MAP_FAILED) {
- image_ = tmp;
- image_size_ = new_size;
- }
- }
- if (buf && offset + len <= image_size_) {
- return reinterpret_cast<char *>(memcpy(buf, image_ + offset, len));
- }
- return NULL;
- }
- return buf ? get(offset, buf, len) : NULL;
-}
-
-Library::string Library::getOriginal(Elf_Addr offset) {
- if (!valid_) {
- return "";
- }
- // Make sure we actually have a mapping that we can access. If the string
- // is located at the end of the image, we might not yet have extended the
- // mapping sufficiently.
- if (!image_ || image_size_ <= offset) {
- getOriginal(offset, NULL, 1);
- }
-
- if (image_) {
- if (offset < image_size_) {
- char* start = image_ + offset;
- char* stop = start;
- while (stop < image_ + image_size_ && *stop) {
- ++stop;
- if (stop >= image_ + image_size_) {
- getOriginal(stop - image_, NULL, 1);
- }
- }
- return string(start, stop - start);
- }
- return "";
- }
- return get(offset);
-}
-
-const Elf_Ehdr* Library::getEhdr() {
- if (!valid_) {
- return NULL;
- }
- return &ehdr_;
-}
-
-const Elf_Shdr* Library::getSection(const string& section) {
- if (!valid_) {
- return NULL;
- }
- SectionTable::const_iterator iter = section_table_.find(section);
- if (iter == section_table_.end()) {
- return NULL;
- }
- return &iter->second.second;
-}
-
-int Library::getSectionIndex(const string& section) {
- if (!valid_) {
- return -1;
- }
- SectionTable::const_iterator iter = section_table_.find(section);
- if (iter == section_table_.end()) {
- return -1;
- }
- return iter->second.first;
-}
-
-void Library::makeWritable(bool state) const {
- for (RangeMap::const_iterator iter = memory_ranges_.begin();
- iter != memory_ranges_.end(); ++iter) {
- const Range& range = iter->second;
- long length = reinterpret_cast<char *>(range.stop) -
- reinterpret_cast<char *>(range.start);
- Sandbox::SysCalls sys;
- sys.mprotect(range.start, length,
- range.prot | (state ? PROT_WRITE : 0));
- }
-}
-
-bool Library::isSafeInsn(unsigned short insn) {
- // Check if the instruction has no unexpected side-effects. If so, it can
- // be safely relocated from the function that we are patching into the
- // out-of-line scratch space that we are setting up. This is often necessary
- // to make room for the JMP into the scratch space.
- return ((insn & 0x7) < 0x6 && (insn & 0xF0) < 0x40
- /* ADD, OR, ADC, SBB, AND, SUB, XOR, CMP */) ||
- #if defined(__x86_64__)
- insn == 0x63 /* MOVSXD */ ||
- #endif
- (insn >= 0x80 && insn <= 0x8E /* ADD, OR, ADC,
- SBB, AND, SUB, XOR, CMP, TEST, XCHG, MOV, LEA */) ||
- (insn == 0x90) || /* NOP */
- (insn >= 0xA0 && insn <= 0xA9) /* MOV, TEST */ ||
- (insn >= 0xB0 && insn <= 0xBF /* MOV */) ||
- (insn >= 0xC0 && insn <= 0xC1) || /* Bit Shift */
- (insn >= 0xD0 && insn <= 0xD3) || /* Bit Shift */
- (insn >= 0xC6 && insn <= 0xC7 /* MOV */) ||
- (insn == 0xF7) /* TEST, NOT, NEG, MUL, IMUL, DIV, IDIV */;
-}
-
-char* Library::getScratchSpace(const Maps* maps, char* near, int needed,
- char** extraSpace, int* extraLength) {
- if (needed > *extraLength ||
- labs(*extraSpace - reinterpret_cast<char *>(near)) > (1536 << 20)) {
- if (*extraSpace) {
- // Start a new scratch page and mark any previous page as write-protected
- Sandbox::SysCalls sys;
- sys.mprotect(*extraSpace, 4096, PROT_READ|PROT_EXEC);
- }
- // Our new scratch space is initially executable and writable.
- *extraLength = 4096;
- *extraSpace = maps->allocNearAddr(near, *extraLength,
- PROT_READ|PROT_WRITE|PROT_EXEC);
- }
- if (*extraSpace) {
- *extraLength -= needed;
- return *extraSpace + *extraLength;
- }
- Sandbox::die("Insufficient space to intercept system call");
-}
-
-void Library::patchSystemCallsInFunction(const Maps* maps, char *start,
- char *end, char** extraSpace,
- int* extraLength) {
- std::set<char *, std::less<char *>, SystemAllocator<char *> > branch_targets;
- for (char *ptr = start; ptr < end; ) {
- unsigned short insn = next_inst((const char **)&ptr, __WORDSIZE == 64);
- char *target;
- if ((insn >= 0x70 && insn <= 0x7F) /* Jcc */ || insn == 0xEB /* JMP */) {
- target = ptr + (reinterpret_cast<signed char *>(ptr))[-1];
- } else if (insn == 0xE8 /* CALL */ || insn == 0xE9 /* JMP */ ||
- (insn >= 0x0F80 && insn <= 0x0F8F) /* Jcc */) {
- target = ptr + (reinterpret_cast<int *>(ptr))[-1];
- } else {
- continue;
- }
- branch_targets.insert(target);
- }
- struct Code {
- char* addr;
- int len;
- unsigned short insn;
- bool is_ip_relative;
- } code[5] = { { 0 } };
- int codeIdx = 0;
- char* ptr = start;
- while (ptr < end) {
- // Keep a ring-buffer of the last few instruction in order to find the
- // correct place to patch the code.
- char *mod_rm;
- code[codeIdx].addr = ptr;
- code[codeIdx].insn = next_inst((const char **)&ptr, __WORDSIZE == 64,
- 0, 0, &mod_rm, 0, 0);
- code[codeIdx].len = ptr - code[codeIdx].addr;
- code[codeIdx].is_ip_relative =
- #if defined(__x86_64__)
- mod_rm && (*mod_rm & 0xC7) == 0x5;
- #else
- false;
- #endif
-
- // Whenever we find a system call, we patch it with a jump to out-of-line
- // code that redirects to our system call wrapper.
- bool is_syscall = true;
- #if defined(__x86_64__)
- bool is_indirect_call = false;
- if (code[codeIdx].insn == 0x0F05 /* SYSCALL */ ||
- // In addition, on x86-64, we need to redirect all CALLs between the
- // VDSO and the VSyscalls page. We want these to jump to our own
- // modified copy of the VSyscalls. As we know that the VSyscalls are
- // always more than 2GB away from the VDSO, the compiler has to
- // generate some form of indirect jumps. We can find all indirect
- // CALLs and redirect them to a separate scratch area, where we can
- // inspect the destination address. If it indeed points to the
- // VSyscall area, we then adjust the destination address accordingly.
- (is_indirect_call =
- (isVDSO_ && vsys_offset_ && code[codeIdx].insn == 0xFF &&
- !code[codeIdx].is_ip_relative &&
- mod_rm && (*mod_rm & 0x38) == 0x10 /* CALL (indirect) */))) {
- is_syscall = !is_indirect_call;
- #elif defined(__i386__)
- bool is_gs_call = false;
- if (code[codeIdx].len == 7 &&
- code[codeIdx].insn == 0xFF &&
- code[codeIdx].addr[2] == '\x15' /* CALL (indirect) */ &&
- code[codeIdx].addr[0] == '\x65' /* %gs prefix */) {
- char* target;
- asm volatile("mov %%gs:(%1), %0\n"
- : "=a"(target)
- : "c"(*reinterpret_cast<int *>(code[codeIdx].addr+3)));
- if (target == __kernel_vsyscall) {
- is_gs_call = true;
- // TODO(markus): also handle the other vsyscalls
- }
- }
- if (is_gs_call ||
- (code[codeIdx].insn == 0xCD &&
- code[codeIdx].addr[1] == '\x80' /* INT $0x80 */)) {
- #else
- #error Unsupported target platform
- #endif
- // Found a system call. Search backwards to figure out how to redirect
- // the code. We will need to overwrite a couple of instructions and,
- // of course, move these instructions somewhere else.
- int startIdx = codeIdx;
- int endIdx = codeIdx;
- int length = code[codeIdx].len;
- for (int idx = codeIdx;
- (idx = (idx + (sizeof(code) / sizeof(struct Code)) - 1) %
- (sizeof(code) / sizeof(struct Code))) != codeIdx; ) {
- std::set<char *>::const_iterator iter =
- std::upper_bound(branch_targets.begin(), branch_targets.end(),
- code[idx].addr);
- if (iter != branch_targets.end() && *iter < ptr) {
- // Found a branch pointing to somewhere past our instruction. This
- // instruction cannot be moved safely. Leave it in place.
- break;
- }
- if (code[idx].addr && !code[idx].is_ip_relative &&
- isSafeInsn(code[idx].insn)) {
- // These are all benign instructions with no side-effects and no
- // dependency on the program counter. We should be able to safely
- // relocate them.
- startIdx = idx;
- length = ptr - code[startIdx].addr;
- } else {
- break;
- }
- }
- // Search forward past the system call, too. Sometimes, we can only
- // find relocatable instructions following the system call.
- #if defined(__i386__)
- findEndIdx:
- #endif
- char *next = ptr;
- for (int i = codeIdx;
- next < end &&
- (i = (i + 1) % (sizeof(code) / sizeof(struct Code))) != startIdx;
- ) {
- std::set<char *>::const_iterator iter =
- std::lower_bound(branch_targets.begin(), branch_targets.end(),
- next);
- if (iter != branch_targets.end() && *iter == next) {
- // Found branch target pointing to our instruction
- break;
- }
- char *tmp_rm;
- code[i].addr = next;
- code[i].insn = next_inst((const char **)&next, __WORDSIZE == 64,
- 0, 0, &tmp_rm, 0, 0);
- code[i].len = next - code[i].addr;
- code[i].is_ip_relative = tmp_rm && (*tmp_rm & 0xC7) == 0x5;
- if (!code[i].is_ip_relative && isSafeInsn(code[i].insn)) {
- endIdx = i;
- length = next - code[startIdx].addr;
- } else {
- break;
- }
- }
- // We now know, how many instructions neighboring the system call we
- // can safely overwrite. On x86-32 we need six bytes, and on x86-64
- // We need five bytes to insert a JMPQ and a 32bit address. We then
- // jump to a code fragment that safely forwards to our system call
- // wrapper.
- // On x86-64, this is complicated by the fact that the API allows up
- // to 128 bytes of red-zones below the current stack pointer. So, we
- // cannot write to the stack until we have adjusted the stack
- // pointer.
- // On both x86-32 and x86-64 we take care to leave the stack unchanged
- // while we are executing the preamble and postamble. This allows us
- // to treat instructions that reference %esp/%rsp as safe for
- // relocation.
- // In particular, this means that on x86-32 we cannot use CALL, but
- // have to use a PUSH/RET combination to change the instruction pointer.
- // On x86-64, we can instead use a 32bit JMPQ.
- //
- // .. .. .. .. ; any leading instructions copied from original code
- // 48 81 EC 80 00 00 00 SUB $0x80, %rsp
- // 50 PUSH %rax
- // 48 8D 05 .. .. .. .. LEA ...(%rip), %rax
- // 50 PUSH %rax
- // 48 B8 .. .. .. .. MOV $syscallWrapper, %rax
- // .. .. .. ..
- // 50 PUSH %rax
- // 48 8D 05 06 00 00 00 LEA 6(%rip), %rax
- // 48 87 44 24 10 XCHG %rax, 16(%rsp)
- // C3 RETQ
- // 48 81 C4 80 00 00 00 ADD $0x80, %rsp
- // .. .. .. .. ; any trailing instructions copied from original code
- // E9 .. .. .. .. JMPQ ...
- //
- // Total: 52 bytes + any bytes that were copied
- //
- // On x86-32, the stack is available and we can do:
- //
- // TODO(markus): Try to maintain frame pointers on x86-32
- //
- // .. .. .. .. ; any leading instructions copied from original code
- // 68 .. .. .. .. PUSH return_addr
- // 68 .. .. .. .. PUSH $syscallWrapper
- // C3 RET
- // .. .. .. .. ; any trailing instructions copied from original code
- // 68 .. .. .. .. PUSH return_addr
- // C3 RET
- //
- // Total: 17 bytes + any bytes that were copied
- //
- // For indirect jumps from the VDSO to the VSyscall page, we instead
- // replace the following code (this is only necessary on x86-64). This
- // time, we don't have to worry about red zones:
- //
- // .. .. .. .. ; any leading instructions copied from original code
- // E8 00 00 00 00 CALL .
- // 48 83 04 24 .. ADDQ $.., (%rsp)
- // FF .. .. .. .. .. PUSH .. ; from original CALL instruction
- // 48 81 3C 24 00 00 00 FF CMPQ $0xFFFFFFFFFF000000, 0(%rsp)
- // 72 10 JB . + 16
- // 81 2C 24 .. .. .. .. SUBL ..., 0(%rsp)
- // C7 44 24 04 00 00 00 00 MOVL $0, 4(%rsp)
- // C3 RETQ
- // 48 87 04 24 XCHG %rax,(%rsp)
- // 48 89 44 24 08 MOV %rax,0x8(%rsp)
- // 58 POP %rax
- // C3 RETQ
- // .. .. .. .. ; any trailing instructions copied from original code
- // E9 .. .. .. .. JMPQ ...
- //
- // Total: 52 bytes + any bytes that were copied
-
- if (length < (__WORDSIZE == 32 ? 6 : 5)) {
- // There are a very small number of instruction sequences that we
- // cannot easily intercept, and that have been observed in real world
- // examples. Handle them here:
- #if defined(__i386__)
- int diff;
- if (!memcmp(code[codeIdx].addr, "\xCD\x80\xEB", 3) &&
- (diff = *reinterpret_cast<signed char *>(
- code[codeIdx].addr + 3)) < 0 && diff >= -6) {
- // We have seen...
- // for (;;) {
- // _exit(0);
- // }
- // ..get compiled to:
- // B8 01 00 00 00 MOV $__NR_exit, %eax
- // 66 90 XCHG %ax, %ax
- // 31 DB 0:XOR %ebx, %ebx
- // CD 80 INT $0x80
- // EB FA JMP 0b
- // The JMP is really superfluous as the system call never returns.
- // And there are in fact no returning system calls that need to be
- // unconditionally repeated in an infinite loop.
- // If we replace the JMP with NOPs, the system call can successfully
- // be intercepted.
- *reinterpret_cast<unsigned short *>(code[codeIdx].addr + 2) = 0x9090;
- goto findEndIdx;
- }
- #elif defined(__x86_64__)
- std::set<char *>::const_iterator iter;
- #endif
- // If we cannot figure out any other way to intercept this system call,
- // we replace it with a call to INT0. This causes a SEGV which we then
- // handle in the signal handler. That's a lot slower than rewriting the
- // instruction with a jump, but it should only happen very rarely.
- if (is_syscall) {
- memcpy(code[codeIdx].addr, "\xCD", 2);
- if (code[codeIdx].len > 2) {
- memset(code[codeIdx].addr + 2, 0x90, code[codeIdx].len - 2);
- }
- goto replaced;
- }
- #if defined(__x86_64__)
- // On x86-64, we occasionally see code like this in the VDSO:
- // 48 8B 05 CF FE FF FF MOV -0x131(%rip),%rax
- // FF 50 20 CALLQ *0x20(%rax)
- // By default, we would not replace the MOV instruction, as it is
- // IP relative. But if the following instruction is also IP relative,
- // we are left with only three bytes which is not enough to insert a
- // jump.
- // We recognize this particular situation, and as long as the CALLQ
- // is not a branch target, we decide to still relocate the entire
- // sequence. We just have to make sure that we then patch up the
- // IP relative addressing.
- else if (is_indirect_call && startIdx == codeIdx &&
- code[startIdx = (startIdx + (sizeof(code) /
- sizeof(struct Code)) - 1) %
- (sizeof(code) / sizeof(struct Code))].addr &&
- ptr - code[startIdx].addr >= 5 &&
- code[startIdx].is_ip_relative &&
- isSafeInsn(code[startIdx].insn) &&
- ((iter = std::upper_bound(branch_targets.begin(),
- branch_targets.end(),
- code[startIdx].addr)) ==
- branch_targets.end() || *iter >= ptr)) {
- // We changed startIdx to include the IP relative instruction.
- // When copying this preamble, we make sure to patch up the
- // offset.
- }
- #endif
- else {
- Sandbox::die("Cannot intercept system call");
- }
- }
- int needed = (__WORDSIZE == 32 ? 6 : 5) - code[codeIdx].len;
- int first = codeIdx;
- while (needed > 0 && first != startIdx) {
- first = (first + (sizeof(code) / sizeof(struct Code)) - 1) %
- (sizeof(code) / sizeof(struct Code));
- needed -= code[first].len;
- }
- int second = codeIdx;
- while (needed > 0) {
- second = (second + 1) % (sizeof(code) / sizeof(struct Code));
- needed -= code[second].len;
- }
- int preamble = code[codeIdx].addr - code[first].addr;
- int postamble = code[second].addr + code[second].len -
- code[codeIdx].addr - code[codeIdx].len;
-
- // The following is all the code that construct the various bits of
- // assembly code.
- #if defined(__x86_64__)
- if (is_indirect_call) {
- needed = 52 + preamble + code[codeIdx].len + postamble;
- } else {
- needed = 52 + preamble + postamble;
- }
- #elif defined(__i386__)
- needed = 17 + preamble + postamble;
- #else
- #error Unsupported target platform
- #endif
-
- // Allocate scratch space and copy the preamble of code that was moved
- // from the function that we are patching.
- char* dest = getScratchSpace(maps, code[first].addr, needed,
- extraSpace, extraLength);
- memcpy(dest, code[first].addr, preamble);
-
- // For jumps from the VDSO to the VSyscalls we sometimes allow exactly
- // one IP relative instruction in the preamble.
- if (code[first].is_ip_relative) {
- *reinterpret_cast<int *>(dest + (code[codeIdx].addr -
- code[first].addr) - 4)
- -= dest - code[first].addr;
- }
-
- // For indirect calls, we need to copy the actual CALL instruction and
- // turn it into a PUSH instruction.
- #if defined(__x86_64__)
- if (is_indirect_call) {
- memcpy(dest + preamble, "\xE8\x00\x00\x00\x00\x48\x83\x04\x24", 9);
- dest[preamble + 9] = code[codeIdx].len + 42;
- memcpy(dest + preamble + 10, code[codeIdx].addr, code[codeIdx].len);
-
- // Convert CALL -> PUSH
- dest[preamble + 10 + (mod_rm - code[codeIdx].addr)] |= 0x20;
- preamble += 10 + code[codeIdx].len;
- }
- #endif
-
- // Copy the static body of the assembly code.
- memcpy(dest + preamble,
- #if defined(__x86_64__)
- is_indirect_call ?
- "\x48\x81\x3C\x24\x00\x00\x00\xFF\x72\x10\x81\x2C\x24\x00\x00\x00"
- "\x00\xC7\x44\x24\x04\x00\x00\x00\x00\xC3\x48\x87\x04\x24\x48\x89"
- "\x44\x24\x08\x58\xC3" :
- "\x48\x81\xEC\x80\x00\x00\x00\x50\x48\x8D\x05\x00\x00\x00\x00\x50"
- "\x48\xB8\x00\x00\x00\x00\x00\x00\x00\x00\x50\x48\x8D\x05\x06\x00"
- "\x00\x00\x48\x87\x44\x24\x10\xC3\x48\x81\xC4\x80\x00\x00",
- is_indirect_call ? 37 : 47
- #elif defined(__i386__)
- "\x68\x00\x00\x00\x00\x68\x00\x00\x00\x00\xC3", 11
- #else
- #error Unsupported target platform
- #endif
- );
-
- // Copy the postamble that was moved from the function that we are
- // patching.
- memcpy(dest + preamble +
- #if defined(__x86_64__)
- (is_indirect_call ? 37 : 47),
- #elif defined(__i386__)
- 11,
- #else
- #error Unsupported target platform
- #endif
- code[codeIdx].addr + code[codeIdx].len,
- postamble);
-
- // Patch up the various computed values
- #if defined(__x86_64__)
- int post = preamble + (is_indirect_call ? 37 : 47) + postamble;
- dest[post] = '\xE9';
- *reinterpret_cast<int *>(dest + post + 1) =
- (code[second].addr + code[second].len) - (dest + post + 5);
- if (is_indirect_call) {
- *reinterpret_cast<int *>(dest + preamble + 13) = vsys_offset_;
- } else {
- *reinterpret_cast<int *>(dest + preamble + 11) =
- (code[second].addr + code[second].len) - (dest + preamble + 15);
- *reinterpret_cast<void **>(dest + preamble + 18) =
- reinterpret_cast<void *>(&syscallWrapper);
- }
- #elif defined(__i386__)
- *(dest + preamble + 11 + postamble) = '\x68'; // PUSH
- *reinterpret_cast<char **>(dest + preamble + 12 + postamble) =
- code[second].addr + code[second].len;
- *(dest + preamble + 16 + postamble) = '\xC3'; // RET
- *reinterpret_cast<char **>(dest + preamble + 1) =
- dest + preamble + 11;
- *reinterpret_cast<void (**)()>(dest + preamble + 6) = syscallWrapper;
- #else
- #error Unsupported target platform
- #endif
-
- // Pad unused space in the original function with NOPs
- memset(code[first].addr, 0x90 /* NOP */,
- code[second].addr + code[second].len - code[first].addr);
-
- // Replace the system call with an unconditional jump to our new code.
- #if defined(__x86_64__)
- *code[first].addr = '\xE9'; // JMPQ
- *reinterpret_cast<int *>(code[first].addr + 1) =
- dest - (code[first].addr + 5);
- #elif defined(__i386__)
- code[first].addr[0] = '\x68'; // PUSH
- *reinterpret_cast<char **>(code[first].addr + 1) = dest;
- code[first].addr[5] = '\xC3'; // RET
- #else
- #error Unsupported target platform
- #endif
- }
- replaced:
- codeIdx = (codeIdx + 1) % (sizeof(code) / sizeof(struct Code));
- }
-}
-
-void Library::patchVDSO(char** extraSpace, int* extraLength){
- #if defined(__i386__)
- Sandbox::SysCalls sys;
- if (!__kernel_vsyscall ||
- sys.mprotect(reinterpret_cast<void *>(
- reinterpret_cast<long>(__kernel_vsyscall) & ~0xFFF),
- 4096, PROT_READ|PROT_WRITE|PROT_EXEC)) {
- return;
- }
-
- // x86-32 has a small number of well-defined functions in the VDSO library.
- // These functions do not easily lend themselves to be rewritten by the
- // automatic code. Instead, we explicitly find new definitions for them.
- //
- // We don't bother with optimizing the syscall instruction instead always
- // use INT $0x80, no matter whether the hardware supports more modern
- // calling conventions.
- //
- // TODO(markus): Investigate whether it is worthwhile to optimize this
- // code path and use the platform-specific entry code.
- if (__kernel_vsyscall) {
- // Replace the kernel entry point with:
- //
- // E9 .. .. .. .. JMP syscallWrapper
- *__kernel_vsyscall = '\xE9';
- *reinterpret_cast<long *>(__kernel_vsyscall + 1) =
- reinterpret_cast<char *>(&syscallWrapper) -
- reinterpret_cast<char *>(__kernel_vsyscall + 5);
- }
- if (__kernel_sigreturn) {
- // Replace the sigreturn() system call with a jump to code that does:
- //
- // 58 POP %eax
- // B8 77 00 00 00 MOV $0x77, %eax
- // E8 .. .. .. .. CALL syscallWrapper
- char* dest = getScratchSpace(maps_, __kernel_sigreturn, 11, extraSpace,
- extraLength);
- memcpy(dest, "\x58\xB8\x77\x00\x00\x00\xE8", 7);
- *reinterpret_cast<long *>(dest + 7) =
- reinterpret_cast<char *>(&syscallWrapper) - dest - 11;;
- *__kernel_sigreturn = '\xE9';
- *reinterpret_cast<long *>(__kernel_sigreturn + 1) =
- dest - reinterpret_cast<char *>(__kernel_sigreturn) - 5;
- }
- if (__kernel_rt_sigreturn) {
- // Replace the rt_sigreturn() system call with a jump to code that does:
- //
- // B8 AD 00 00 00 MOV $0xAD, %eax
- // E8 .. .. .. .. CALL syscallWrapper
- char* dest = getScratchSpace(maps_, __kernel_rt_sigreturn, 10, extraSpace,
- extraLength);
- memcpy(dest, "\xB8\xAD\x00\x00\x00\xE8", 6);
- *reinterpret_cast<long *>(dest + 6) =
- reinterpret_cast<char *>(&syscallWrapper) - dest - 10;
- *__kernel_rt_sigreturn = '\xE9';
- *reinterpret_cast<long *>(__kernel_rt_sigreturn + 1) =
- dest - reinterpret_cast<char *>(__kernel_rt_sigreturn) - 5;
- }
- #endif
-}
-
-int Library::patchVSystemCalls() {
- #if defined(__x86_64__)
- // VSyscalls live in a shared 4kB page at the top of the address space. This
- // page cannot be unmapped nor remapped. We have to create a copy within
- // 2GB of the page, and rewrite all IP-relative accesses to shared variables.
- // As the top of the address space is not accessible by mmap(), this means
- // that we need to wrap around addresses to the bottom 2GB of the address
- // space.
- // Only x86-64 has VSyscalls.
- if (maps_->vsyscall()) {
- char* copy = maps_->allocNearAddr(maps_->vsyscall(), 0x1000,
- PROT_READ|PROT_WRITE|PROT_EXEC);
- char* extraSpace = copy;
- int extraLength = 0x1000;
- memcpy(copy, maps_->vsyscall(), 0x1000);
- long adjust = (long)maps_->vsyscall() - (long)copy;
- for (int vsys = 0; vsys < 0x1000; vsys += 0x400) {
- char* start = copy + vsys;
- char* end = start + 0x400;
-
- // There can only be up to four VSyscalls starting at an offset of
- // n*0x1000, each. VSyscalls are invoked by functions in the VDSO
- // and provide fast implementations of a time source. We don't exactly
- // know where the code and where the data is in the VSyscalls page.
- // So, we disassemble the code for each function and find all branch
- // targets within the function in order to find the last address of
- // function.
- for (char *last = start, *vars = end, *ptr = start; ptr < end; ) {
- new_function:
- char* mod_rm;
- unsigned short insn = next_inst((const char **)&ptr, true, 0, 0,
- &mod_rm, 0, 0);
- if (mod_rm && (*mod_rm & 0xC7) == 0x5) {
- // Instruction has IP relative addressing mode. Adjust to reference
- // the variables in the original VSyscall segment.
- long offset = *reinterpret_cast<int *>(mod_rm + 1);
- char* var = ptr + offset;
- if (var >= ptr && var < vars) {
- // Variables are stored somewhere past all the functions. Remember
- // the first variable in the VSyscall slot, so that we stop
- // scanning for instructions once we reach that address.
- vars = var;
- }
- offset += adjust;
- if ((offset >> 32) && (offset >> 32) != -1) {
- Sandbox::die("Cannot patch [vsystemcall]");
- }
- *reinterpret_cast<int *>(mod_rm + 1) = offset;
- }
-
- // Check for jump targets to higher addresses (but within our own
- // VSyscall slot). They extend the possible end-address of this
- // function.
- char *target = 0;
- if ((insn >= 0x70 && insn <= 0x7F) /* Jcc */ ||
- insn == 0xEB /* JMP */) {
- target = ptr + (reinterpret_cast<signed char *>(ptr))[-1];
- } else if (insn == 0xE8 /* CALL */ || insn == 0xE9 /* JMP */ ||
- (insn >= 0x0F80 && insn <= 0x0F8F) /* Jcc */) {
- target = ptr + (reinterpret_cast<int *>(ptr))[-1];
- }
-
- // The function end is found, once the loop reaches the last valid
- // address in the VSyscall slot, or once it finds a RET instruction
- // that is not followed by any jump targets. Unconditional jumps that
- // point backwards are treated the same as a RET instruction.
- if (insn == 0xC3 /* RET */ ||
- (target < ptr &&
- (insn == 0xEB /* JMP */ || insn == 0xE9 /* JMP */))) {
- if (last >= ptr) {
- continue;
- } else {
- // The function can optionally be followed by more functions in
- // the same VSyscall slot. Allow for alignment to a 16 byte
- // boundary. If we then find more non-zero bytes, and if this is
- // not the known start of the variables, assume a new function
- // started.
- for (; ptr < vars; ++ptr) {
- if ((long)ptr & 0xF) {
- if (*ptr && *ptr != '\x90' /* NOP */) {
- goto new_function;
- }
- *ptr = '\x90'; // NOP
- } else {
- if (*ptr && *ptr != '\x90' /* NOP */) {
- goto new_function;
- }
- break;
- }
- }
-
- // Translate all SYSCALLs to jumps into our system call handler.
- patchSystemCallsInFunction(NULL, start, ptr,
- &extraSpace, &extraLength);
- break;
- }
- }
-
- // Adjust assumed end address for this function, if a valid jump
- // target has been found that originates from the current instruction.
- if (target > last && target < start + 0x100) {
- last = target;
- }
- }
- }
-
- // We are done. Write-protect our code and make it executable.
- Sandbox::SysCalls sys;
- sys.mprotect(copy, 0x1000, PROT_READ|PROT_EXEC);
- return maps_->vsyscall() - copy;
- }
- #endif
- return 0;
-}
-
-void Library::patchSystemCalls() {
- if (!valid_) {
- return;
- }
- int extraLength = 0;
- char* extraSpace = NULL;
- if (isVDSO_) {
- // patchVDSO() calls patchSystemCallsInFunction() which needs vsys_offset_
- // iff processing the VDSO library. So, make sure we call
- // patchVSystemCalls() first.
- vsys_offset_ = patchVSystemCalls();
- #if defined(__i386__)
- patchVDSO(&extraSpace, &extraLength);
- return;
- #endif
- }
- SectionTable::const_iterator iter;
- if ((iter = section_table_.find(".text")) == section_table_.end()) {
- return;
- }
- const Elf_Shdr& shdr = iter->second.second;
- char* start = reinterpret_cast<char *>(shdr.sh_addr + asr_offset_);
- char* stop = start + shdr.sh_size;
- char* func = start;
- int nopcount = 0;
- bool has_syscall = false;
- for (char *ptr = start; ptr < stop; ptr++) {
- #if defined(__x86_64__)
- if ((*ptr == '\x0F' && ptr[1] == '\x05' /* SYSCALL */) ||
- (isVDSO_ && *ptr == '\xFF')) {
- #elif defined(__i386__)
- if ((*ptr == '\xCD' && ptr[1] == '\x80' /* INT $0x80 */) ||
- (*ptr == '\x65' && ptr[1] == '\xFF' &&
- ptr[2] == '\x15' /* CALL %gs:.. */)) {
- #else
- #error Unsupported target platform
- #endif
- ptr++;
- has_syscall = true;
- nopcount = 0;
- } else if (*ptr == '\x90' /* NOP */) {
- nopcount++;
- } else if (!(reinterpret_cast<long>(ptr) & 0xF)) {
- if (nopcount > 2) {
- // This is very likely the beginning of a new function. Functions
- // are aligned on 16 byte boundaries and the preceding function is
- // padded out with NOPs.
- //
- // For performance reasons, we quickly scan the entire text segment
- // for potential SYSCALLs, and then patch the code in increments of
- // individual functions.
- if (has_syscall) {
- has_syscall = false;
- // Our quick scan of the function found a potential system call.
- // Do a more thorough scan, now.
- patchSystemCallsInFunction(maps_, func, ptr, &extraSpace,
- &extraLength);
- }
- func = ptr;
- }
- nopcount = 0;
- } else {
- nopcount = 0;
- }
- }
- if (has_syscall) {
- // Patch any remaining system calls that were in the last function before
- // the loop terminated.
- patchSystemCallsInFunction(maps_, func, stop, &extraSpace, &extraLength);
- }
-
- // Mark our scratch space as write-protected and executable.
- if (extraSpace) {
- Sandbox::SysCalls sys;
- sys.mprotect(extraSpace, 4096, PROT_READ|PROT_EXEC);
- }
-}
-
-bool Library::parseElf() {
- valid_ = true;
-
- // Verify ELF header
- Elf_Shdr str_shdr;
- if (!getOriginal(0, &ehdr_) ||
- ehdr_.e_ehsize < sizeof(Elf_Ehdr) ||
- ehdr_.e_phentsize < sizeof(Elf_Phdr) ||
- ehdr_.e_shentsize < sizeof(Elf_Shdr) ||
- !getOriginal(ehdr_.e_shoff + ehdr_.e_shstrndx * ehdr_.e_shentsize,
- &str_shdr)) {
- // Not all memory mappings are necessarily ELF files. Skip memory
- // mappings that we cannot identify.
- error:
- valid_ = false;
- return false;
- }
-
- // Parse section table and find all sections in this ELF file
- for (int i = 0; i < ehdr_.e_shnum; i++) {
- Elf_Shdr shdr;
- if (!getOriginal(ehdr_.e_shoff + i*ehdr_.e_shentsize, &shdr)) {
- continue;
- }
- section_table_.insert(
- std::make_pair(getOriginal(str_shdr.sh_offset + shdr.sh_name),
- std::make_pair(i, shdr)));
- }
-
- // Compute the offset of entries in the .text segment
- const Elf_Shdr* text = getSection(".text");
- if (text == NULL) {
- // On x86-32, the VDSO is unusual in as much as it does not have a single
- // ".text" section. Instead, it has one section per function. Each
- // section name starts with ".text". We just need to pick an arbitrary
- // one in order to find the asr_offset_ -- which would typically be zero
- // for the VDSO.
- for (SectionTable::const_iterator iter = section_table_.begin();
- iter != section_table_.end(); ++iter) {
- if (!strncmp(iter->first.c_str(), ".text", 5)) {
- text = &iter->second.second;
- break;
- }
- }
- }
-
- // Now that we know where the .text segment is located, we can compute the
- // asr_offset_.
- if (text) {
- RangeMap::const_iterator iter =
- memory_ranges_.lower_bound(text->sh_offset);
- if (iter != memory_ranges_.end()) {
- asr_offset_ = reinterpret_cast<char *>(iter->second.start) -
- (text->sh_addr - (text->sh_offset - iter->first));
- } else {
- goto error;
- }
- } else {
- goto error;
- }
-
- return !isVDSO_ || parseSymbols();
-}
-
-bool Library::parseSymbols() {
- if (!valid_) {
- return false;
- }
-
- Elf_Shdr str_shdr;
- getOriginal(ehdr_.e_shoff + ehdr_.e_shstrndx * ehdr_.e_shentsize, &str_shdr);
-
- // Find PLT and symbol tables
- const Elf_Shdr* plt = getSection(ELF_REL_PLT);
- const Elf_Shdr* symtab = getSection(".dynsym");
- Elf_Shdr strtab = { 0 };
- if (symtab) {
- if (symtab->sh_link >= ehdr_.e_shnum ||
- !getOriginal(ehdr_.e_shoff + symtab->sh_link * ehdr_.e_shentsize,
- &strtab)) {
- Debug::message("Cannot find valid symbol table\n");
- valid_ = false;
- return false;
- }
- }
-
- if (plt && symtab) {
- // Parse PLT table and add its entries
- for (int i = plt->sh_size/sizeof(Elf_Rel); --i >= 0; ) {
- Elf_Rel rel;
- if (!getOriginal(plt->sh_offset + i * sizeof(Elf_Rel), &rel) ||
- ELF_R_SYM(rel.r_info)*sizeof(Elf_Sym) >= symtab->sh_size) {
- Debug::message("Encountered invalid plt entry\n");
- valid_ = false;
- return false;
- }
-
- if (ELF_R_TYPE(rel.r_info) != ELF_JUMP_SLOT) {
- continue;
- }
- Elf_Sym sym;
- if (!getOriginal(symtab->sh_offset +
- ELF_R_SYM(rel.r_info)*sizeof(Elf_Sym), &sym) ||
- sym.st_shndx >= ehdr_.e_shnum) {
- Debug::message("Encountered invalid symbol for plt entry\n");
- valid_ = false;
- return false;
- }
- string name = getOriginal(strtab.sh_offset + sym.st_name);
- if (name.empty()) {
- continue;
- }
- plt_entries_.insert(std::make_pair(name, rel.r_offset));
- }
- }
-
- if (symtab) {
- // Parse symbol table and add its entries
- for (Elf_Addr addr = 0; addr < symtab->sh_size; addr += sizeof(Elf_Sym)) {
- Elf_Sym sym;
- if (!getOriginal(symtab->sh_offset + addr, &sym) ||
- (sym.st_shndx >= ehdr_.e_shnum &&
- sym.st_shndx < SHN_LORESERVE)) {
- Debug::message("Encountered invalid symbol\n");
- valid_ = false;
- return false;
- }
- string name = getOriginal(strtab.sh_offset + sym.st_name);
- if (name.empty()) {
- continue;
- }
- symbols_.insert(std::make_pair(name, sym));
- }
- }
-
- SymbolTable::const_iterator iter = symbols_.find("__kernel_vsyscall");
- if (iter != symbols_.end() && iter->second.st_value) {
- __kernel_vsyscall = asr_offset_ + iter->second.st_value;
- }
- iter = symbols_.find("__kernel_sigreturn");
- if (iter != symbols_.end() && iter->second.st_value) {
- __kernel_sigreturn = asr_offset_ + iter->second.st_value;
- }
- iter = symbols_.find("__kernel_rt_sigreturn");
- if (iter != symbols_.end() && iter->second.st_value) {
- __kernel_rt_sigreturn = asr_offset_ + iter->second.st_value;
- }
-
- return true;
-}
-
-} // namespace
diff --git a/sandbox/linux/seccomp/library.h b/sandbox/linux/seccomp/library.h
deleted file mode 100644
index e27bfde..0000000
--- a/sandbox/linux/seccomp/library.h
+++ /dev/null
@@ -1,199 +0,0 @@
-// Copyright (c) 2010 The Chromium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#ifndef LIBRARY_H__
-#define LIBRARY_H__
-
-#include <elf.h>
-#include <functional>
-#include <map>
-#include <set>
-#include <string>
-#include <string.h>
-#include <sys/mman.h>
-
-#include "maps.h"
-
-#if defined(__x86_64__)
-typedef Elf64_Ehdr Elf_Ehdr;
-typedef Elf64_Shdr Elf_Shdr;
-typedef Elf64_Sym Elf_Sym;
-typedef Elf64_Addr Elf_Addr;
-#elif defined(__i386__)
-typedef Elf32_Ehdr Elf_Ehdr;
-typedef Elf32_Shdr Elf_Shdr;
-typedef Elf32_Sym Elf_Sym;
-typedef Elf32_Addr Elf_Addr;
-#else
-#error Unsupported target platform
-#endif
-
-struct SyscallTable;
-namespace playground {
-
-class Library {
- friend class Maps;
- public:
- typedef Maps::string string;
-
- Library() :
- valid_(false),
- isVDSO_(false),
- asr_offset_(0),
- vsys_offset_(0),
- maps_(0),
- image_(0),
- image_size_(0) {
- }
-
- ~Library();
-
- void setLibraryInfo(Maps* maps) {
- if (!maps_) {
- maps_ = maps;
- }
- }
-
- void addMemoryRange(void* start, void* stop, Elf_Addr offset,
- int prot, int isVDSO) {
- isVDSO_ = isVDSO;
- RangeMap::const_iterator iter = memory_ranges_.find(offset);
- if (iter != memory_ranges_.end()) {
- // It is possible to have overlapping mappings. This is particularly
- // likely to happen with very small programs or libraries. If it does
- // happen, we really only care about the text segment. Look for a
- // mapping that is mapped executable.
- if ((prot & PROT_EXEC) == 0) {
- return;
- }
- }
- memory_ranges_.insert(std::make_pair(offset, Range(start, stop, prot)));
- }
-
- char *get(Elf_Addr offset, char *buf, size_t len);
- string get(Elf_Addr offset);
- char *getOriginal(Elf_Addr offset, char *buf, size_t len);
- string getOriginal(Elf_Addr offset);
-
- template<class T>T* get(Elf_Addr offset, T* t) {
- if (!valid_) {
- memset(t, 0, sizeof(T));
- return NULL;
- }
- return reinterpret_cast<T *>(get(offset, reinterpret_cast<char *>(t),
- sizeof(T)));
- }
-
- template<class T>T* getOriginal(Elf_Addr offset, T* t) {
- if (!valid_) {
- memset(t, 0, sizeof(T));
- return NULL;
- }
- return reinterpret_cast<T *>(getOriginal(offset,
- reinterpret_cast<char *>(t),
- sizeof(T)));
- }
-
- template<class T>bool set(void *addr, T* value) {
- if (!valid_) {
- return false;
- }
- *reinterpret_cast<T *>(addr) = *value;
- return true;
- }
-
- template<class T>bool set(Elf_Addr offset, T* value) {
- if (!valid_) {
- return false;
- }
- RangeMap::const_iterator iter = memory_ranges_.lower_bound(offset);
- if (iter == memory_ranges_.end()) {
- return false;
- }
- offset -= iter->first;
- if (offset >
- reinterpret_cast<char *>(iter->second.stop) -
- reinterpret_cast<char *>(iter->second.start) -
- sizeof(T)) {
- return false;
- }
- *reinterpret_cast<T *>(
- reinterpret_cast<char *>(iter->second.start) + offset) = *value;
- return true;
- }
-
- bool parseElf();
- const Elf_Ehdr* getEhdr();
- const Elf_Shdr* getSection(const string& section);
- int getSectionIndex(const string& section);
- void makeWritable(bool state) const;
- void patchSystemCalls();
- bool isVDSO() const { return isVDSO_; }
-
- protected:
- bool parseSymbols();
-
- private:
- class GreaterThan : public std::binary_function<Elf_Addr, Elf_Addr, bool> {
- // We create the RangeMap with a GreaterThan rather than the default
- // comparator, as that allows us to use lower_bound() to find memory
- // mappings.
- public:
- bool operator() (Elf_Addr s1, Elf_Addr s2) const {
- return s1 > s2;
- }
- };
-
- struct Range {
- Range(void* start, void* stop, int prot) :
- start(start), stop(stop), prot(prot) { }
- void* start;
- void* stop;
- int prot;
- };
-
- typedef std::map<Elf_Addr, Range, GreaterThan,
- SystemAllocator<std::pair<const Elf_Addr,
- Range> > > RangeMap;
- typedef std::map<string, std::pair<int, Elf_Shdr>, std::less<string>,
- SystemAllocator<std::pair<const string,
- std::pair<int, Elf_Shdr> > > >
- SectionTable;
- typedef std::map<string, Elf_Sym, std::less<string>,
- SystemAllocator<std::pair<const string,
- Elf_Sym> > > SymbolTable;
- typedef std::map<string, Elf_Addr, std::less<string>,
- SystemAllocator<std::pair<const string,
- Elf_Addr> > > PltTable;
-
- char* getBytes(char* dst, const char* src, ssize_t len);
- static bool isSafeInsn(unsigned short insn);
- static int isSimpleSystemCall(char *start, char *end);
- static char* getScratchSpace(const Maps* maps, char* near, int needed,
- char** extraSpace, int* extraLength);
- void patchSystemCallsInFunction(const Maps* maps, char *start, char *end,
- char** extraSpace, int* extraLength);
- int patchVSystemCalls();
- void patchVDSO(char** extraSpace, int* extraLength);
-
- RangeMap memory_ranges_;
- bool valid_;
- bool isVDSO_;
- char* asr_offset_;
- int vsys_offset_;
- Maps* maps_;
- Elf_Ehdr ehdr_;
- SectionTable section_table_;
- SymbolTable symbols_;
- PltTable plt_entries_;
- char* image_;
- size_t image_size_;
- static char* __kernel_vsyscall;
- static char* __kernel_sigreturn;
- static char* __kernel_rt_sigreturn;
-};
-
-} // namespace
-
-#endif // LIBRARY_H__
diff --git a/sandbox/linux/seccomp/linux_syscall_support.h b/sandbox/linux/seccomp/linux_syscall_support.h
deleted file mode 100644
index 2ee0426..0000000
--- a/sandbox/linux/seccomp/linux_syscall_support.h
+++ /dev/null
@@ -1,3208 +0,0 @@
-/* Copyright (c) 2005-2010, Google Inc.
- * Author: Markus Gutschke
- *
- * All rights reserved.
- * Use of this source code is governed by a BSD-style license that can be
- * found in the Chromium LICENSE file.
- */
-
-/* This file includes Linux-specific support functions common to the
- * coredumper and the thread lister; primarily, this is a collection
- * of direct system calls, and a couple of symbols missing from
- * standard header files.
- * There are a few options that the including file can set to control
- * the behavior of this file:
- *
- * SYS_CPLUSPLUS:
- * The entire header file will normally be wrapped in 'extern "C" { }",
- * making it suitable for compilation as both C and C++ source. If you
- * do not want to do this, you can set the SYS_CPLUSPLUS macro to inhibit
- * the wrapping. N.B. doing so will suppress inclusion of all prerequisite
- * system header files, too. It is the caller's responsibility to provide
- * the necessary definitions.
- *
- * SYS_ERRNO:
- * All system calls will update "errno" unless overriden by setting the
- * SYS_ERRNO macro prior to including this file. SYS_ERRNO should be
- * an l-value.
- *
- * SYS_INLINE:
- * New symbols will be defined "static inline", unless overridden by
- * the SYS_INLINE macro.
- *
- * SYS_LINUX_SYSCALL_SUPPORT_H
- * This macro is used to avoid multiple inclusions of this header file.
- * If you need to include this file more than once, make sure to
- * unset SYS_LINUX_SYSCALL_SUPPORT_H before each inclusion.
- *
- * SYS_PREFIX:
- * New system calls will have a prefix of "sys_" unless overridden by
- * the SYS_PREFIX macro. Valid values for this macro are [0..9] which
- * results in prefixes "sys[0..9]_". It is also possible to set this
- * macro to -1, which avoids all prefixes.
- *
- * This file defines a few internal symbols that all start with "LSS_".
- * Do not access these symbols from outside this file. They are not part
- * of the supported API.
- */
-#ifndef SYS_LINUX_SYSCALL_SUPPORT_H
-#define SYS_LINUX_SYSCALL_SUPPORT_H
-
-/* We currently only support x86-32, x86-64, ARM, MIPS, and PPC on Linux.
- * Porting to other related platforms should not be difficult.
- */
-#if (defined(__i386__) || defined(__x86_64__) || defined(__ARM_ARCH_3__) || \
- defined(__mips__) || defined(__PPC__)) && defined(__linux)
-
-#ifndef SYS_CPLUSPLUS
-#ifdef __cplusplus
-/* Some system header files in older versions of gcc neglect to properly
- * handle being included from C++. As it appears to be harmless to have
- * multiple nested 'extern "C"' blocks, just add another one here.
- */
-extern "C" {
-#endif
-
-#include <errno.h>
-#include <signal.h>
-#include <stdarg.h>
-#include <stddef.h>
-#include <string.h>
-#include <sys/ptrace.h>
-#include <sys/resource.h>
-#include <sys/time.h>
-#include <sys/types.h>
-#include <syscall.h>
-#include <unistd.h>
-#include <linux/unistd.h>
-#include <endian.h>
-
-#ifdef __mips__
-/* Include definitions of the ABI currently in use. */
-#include <sgidefs.h>
-#endif
-
-#endif
-
-/* As glibc often provides subtly incompatible data structures (and implicit
- * wrapper functions that convert them), we provide our own kernel data
- * structures for use by the system calls.
- * These structures have been developed by using Linux 2.6.23 headers for
- * reference. Note though, we do not care about exact API compatibility
- * with the kernel, and in fact the kernel often does not have a single
- * API that works across architectures. Instead, we try to mimic the glibc
- * API where reasonable, and only guarantee ABI compatibility with the
- * kernel headers.
- * Most notably, here are a few changes that were made to the structures
- * defined by kernel headers:
- *
- * - we only define structures, but not symbolic names for kernel data
- * types. For the latter, we directly use the native C datatype
- * (i.e. "unsigned" instead of "mode_t").
- * - in a few cases, it is possible to define identical structures for
- * both 32bit (e.g. i386) and 64bit (e.g. x86-64) platforms by
- * standardizing on the 64bit version of the data types. In particular,
- * this means that we use "unsigned" where the 32bit headers say
- * "unsigned long".
- * - overall, we try to minimize the number of cases where we need to
- * conditionally define different structures.
- * - the "struct kernel_sigaction" class of structures have been
- * modified to more closely mimic glibc's API by introducing an
- * anonymous union for the function pointer.
- * - a small number of field names had to have an underscore appended to
- * them, because glibc defines a global macro by the same name.
- */
-
-/* include/linux/dirent.h */
-struct kernel_dirent64 {
- unsigned long long d_ino;
- long long d_off;
- unsigned short d_reclen;
- unsigned char d_type;
- char d_name[256];
-};
-
-/* include/linux/dirent.h */
-struct kernel_dirent {
- long d_ino;
- long d_off;
- unsigned short d_reclen;
- char d_name[256];
-};
-
-/* include/linux/uio.h */
-struct kernel_iovec {
- void *iov_base;
- unsigned long iov_len;
-};
-
-/* include/linux/socket.h */
-struct kernel_msghdr {
- void *msg_name;
- int msg_namelen;
- struct kernel_iovec*msg_iov;
- unsigned long msg_iovlen;
- void *msg_control;
- unsigned long msg_controllen;
- unsigned msg_flags;
-};
-
-/* include/asm-generic/poll.h */
-struct kernel_pollfd {
- int fd;
- short events;
- short revents;
-};
-
-/* include/linux/resource.h */
-struct kernel_rlimit {
- unsigned long rlim_cur;
- unsigned long rlim_max;
-};
-
-/* include/linux/time.h */
-struct kernel_timespec {
- long tv_sec;
- long tv_nsec;
-};
-
-/* include/linux/time.h */
-struct kernel_timeval {
- long tv_sec;
- long tv_usec;
-};
-
-/* include/linux/resource.h */
-struct kernel_rusage {
- struct kernel_timeval ru_utime;
- struct kernel_timeval ru_stime;
- long ru_maxrss;
- long ru_ixrss;
- long ru_idrss;
- long ru_isrss;
- long ru_minflt;
- long ru_majflt;
- long ru_nswap;
- long ru_inblock;
- long ru_oublock;
- long ru_msgsnd;
- long ru_msgrcv;
- long ru_nsignals;
- long ru_nvcsw;
- long ru_nivcsw;
-};
-
-struct siginfo;
-#if defined(__i386__) || defined(__ARM_ARCH_3__) || defined(__PPC__)
-
-/* include/asm-{arm,i386,mips,ppc}/signal.h */
-struct kernel_old_sigaction {
- union {
- void (*sa_handler_)(int);
- void (*sa_sigaction_)(int, struct siginfo *, void *);
- };
- unsigned long sa_mask;
- unsigned long sa_flags;
- void (*sa_restorer)(void);
-} __attribute__((packed,aligned(4)));
-#elif (defined(__mips__) && _MIPS_SIM == _MIPS_SIM_ABI32)
- #define kernel_old_sigaction kernel_sigaction
-#endif
-
-/* Some kernel functions (e.g. sigaction() in 2.6.23) require that the
- * exactly match the size of the signal set, even though the API was
- * intended to be extensible. We define our own KERNEL_NSIG to deal with
- * this.
- * Please note that glibc provides signals [1.._NSIG-1], whereas the
- * kernel (and this header) provides the range [1..KERNEL_NSIG]. The
- * actual number of signals is obviously the same, but the constants
- * differ by one.
- */
-#ifdef __mips__
-#define KERNEL_NSIG 128
-#else
-#define KERNEL_NSIG 64
-#endif
-
-/* include/asm-{arm,i386,mips,x86_64}/signal.h */
-struct kernel_sigset_t {
- unsigned long sig[(KERNEL_NSIG + 8*sizeof(unsigned long) - 1)/
- (8*sizeof(unsigned long))];
-};
-
-/* include/asm-{arm,i386,mips,x86_64,ppc}/signal.h */
-struct kernel_sigaction {
-#ifdef __mips__
- unsigned long sa_flags;
- union {
- void (*sa_handler_)(int);
- void (*sa_sigaction_)(int, struct siginfo *, void *);
- };
- struct kernel_sigset_t sa_mask;
-#else
- union {
- void (*sa_handler_)(int);
- void (*sa_sigaction_)(int, struct siginfo *, void *);
- };
- unsigned long sa_flags;
- void (*sa_restorer)(void);
- struct kernel_sigset_t sa_mask;
-#endif
-};
-
-/* include/linux/socket.h */
-struct kernel_sockaddr {
- unsigned short sa_family;
- char sa_data[14];
-};
-
-/* include/asm-{arm,i386,mips,ppc}/stat.h */
-#ifdef __mips__
-#if _MIPS_SIM == _MIPS_SIM_ABI64
-struct kernel_stat {
-#else
-struct kernel_stat64 {
-#endif
- unsigned st_dev;
- unsigned __pad0[3];
- unsigned long long st_ino;
- unsigned st_mode;
- unsigned st_nlink;
- unsigned st_uid;
- unsigned st_gid;
- unsigned st_rdev;
- unsigned __pad1[3];
- long long st_size;
- unsigned st_atime_;
- unsigned st_atime_nsec_;
- unsigned st_mtime_;
- unsigned st_mtime_nsec_;
- unsigned st_ctime_;
- unsigned st_ctime_nsec_;
- unsigned st_blksize;
- unsigned __pad2;
- unsigned long long st_blocks;
-};
-#elif defined __PPC__
-struct kernel_stat64 {
- unsigned long long st_dev;
- unsigned long long st_ino;
- unsigned st_mode;
- unsigned st_nlink;
- unsigned st_uid;
- unsigned st_gid;
- unsigned long long st_rdev;
- unsigned short int __pad2;
- long long st_size;
- long st_blksize;
- long long st_blocks;
- long st_atime_;
- unsigned long st_atime_nsec_;
- long st_mtime_;
- unsigned long st_mtime_nsec_;
- long st_ctime_;
- unsigned long st_ctime_nsec_;
- unsigned long __unused4;
- unsigned long __unused5;
-};
-#else
-struct kernel_stat64 {
- unsigned long long st_dev;
- unsigned char __pad0[4];
- unsigned __st_ino;
- unsigned st_mode;
- unsigned st_nlink;
- unsigned st_uid;
- unsigned st_gid;
- unsigned long long st_rdev;
- unsigned char __pad3[4];
- long long st_size;
- unsigned st_blksize;
- unsigned long long st_blocks;
- unsigned st_atime_;
- unsigned st_atime_nsec_;
- unsigned st_mtime_;
- unsigned st_mtime_nsec_;
- unsigned st_ctime_;
- unsigned st_ctime_nsec_;
- unsigned long long st_ino;
-};
-#endif
-
-/* include/asm-{arm,i386,mips,x86_64,ppc}/stat.h */
-#if defined(__i386__) || defined(__ARM_ARCH_3__)
-struct kernel_stat {
- /* The kernel headers suggest that st_dev and st_rdev should be 32bit
- * quantities encoding 12bit major and 20bit minor numbers in an interleaved
- * format. In reality, we do not see useful data in the top bits. So,
- * we'll leave the padding in here, until we find a better solution.
- */
- unsigned short st_dev;
- short pad1;
- unsigned st_ino;
- unsigned short st_mode;
- unsigned short st_nlink;
- unsigned short st_uid;
- unsigned short st_gid;
- unsigned short st_rdev;
- short pad2;
- unsigned st_size;
- unsigned st_blksize;
- unsigned st_blocks;
- unsigned st_atime_;
- unsigned st_atime_nsec_;
- unsigned st_mtime_;
- unsigned st_mtime_nsec_;
- unsigned st_ctime_;
- unsigned st_ctime_nsec_;
- unsigned __unused4;
- unsigned __unused5;
-};
-#elif defined(__x86_64__)
-struct kernel_stat {
- unsigned long st_dev;
- unsigned long st_ino;
- unsigned long st_nlink;
- unsigned st_mode;
- unsigned st_uid;
- unsigned st_gid;
- unsigned __pad0;
- unsigned long st_rdev;
- long st_size;
- long st_blksize;
- long st_blocks;
- unsigned long st_atime_;
- unsigned long st_atime_nsec_;
- unsigned long st_mtime_;
- unsigned long st_mtime_nsec_;
- unsigned long st_ctime_;
- unsigned long st_ctime_nsec_;
- long __unused[3];
-};
-#elif defined(__PPC__)
-struct kernel_stat {
- unsigned st_dev;
- unsigned long st_ino; // ino_t
- unsigned long st_mode; // mode_t
- unsigned short st_nlink; // nlink_t
- unsigned st_uid; // uid_t
- unsigned st_gid; // gid_t
- unsigned st_rdev;
- long st_size; // off_t
- unsigned long st_blksize;
- unsigned long st_blocks;
- unsigned long st_atime_;
- unsigned long st_atime_nsec_;
- unsigned long st_mtime_;
- unsigned long st_mtime_nsec_;
- unsigned long st_ctime_;
- unsigned long st_ctime_nsec_;
- unsigned long __unused4;
- unsigned long __unused5;
-};
-#elif (defined(__mips__) && _MIPS_SIM != _MIPS_SIM_ABI64)
-struct kernel_stat {
- unsigned st_dev;
- int st_pad1[3];
- unsigned st_ino;
- unsigned st_mode;
- unsigned st_nlink;
- unsigned st_uid;
- unsigned st_gid;
- unsigned st_rdev;
- int st_pad2[2];
- long st_size;
- int st_pad3;
- long st_atime_;
- long st_atime_nsec_;
- long st_mtime_;
- long st_mtime_nsec_;
- long st_ctime_;
- long st_ctime_nsec_;
- int st_blksize;
- int st_blocks;
- int st_pad4[14];
-};
-#endif
-
-/* include/asm-{arm,i386,mips,x86_64,ppc}/statfs.h */
-#ifdef __mips__
-#if _MIPS_SIM != _MIPS_SIM_ABI64
-struct kernel_statfs64 {
- unsigned long f_type;
- unsigned long f_bsize;
- unsigned long f_frsize;
- unsigned long __pad;
- unsigned long long f_blocks;
- unsigned long long f_bfree;
- unsigned long long f_files;
- unsigned long long f_ffree;
- unsigned long long f_bavail;
- struct { int val[2]; } f_fsid;
- unsigned long f_namelen;
- unsigned long f_spare[6];
-};
-#endif
-#elif !defined(__x86_64__)
-struct kernel_statfs64 {
- unsigned long f_type;
- unsigned long f_bsize;
- unsigned long long f_blocks;
- unsigned long long f_bfree;
- unsigned long long f_bavail;
- unsigned long long f_files;
- unsigned long long f_ffree;
- struct { int val[2]; } f_fsid;
- unsigned long f_namelen;
- unsigned long f_frsize;
- unsigned long f_spare[5];
-};
-#endif
-
-/* include/asm-{arm,i386,mips,x86_64,ppc,generic}/statfs.h */
-#ifdef __mips__
-struct kernel_statfs {
- long f_type;
- long f_bsize;
- long f_frsize;
- long f_blocks;
- long f_bfree;
- long f_files;
- long f_ffree;
- long f_bavail;
- struct { int val[2]; } f_fsid;
- long f_namelen;
- long f_spare[6];
-};
-#else
-struct kernel_statfs {
- /* x86_64 actually defines all these fields as signed, whereas all other */
- /* platforms define them as unsigned. Leaving them at unsigned should not */
- /* cause any problems. */
- unsigned long f_type;
- unsigned long f_bsize;
- unsigned long f_blocks;
- unsigned long f_bfree;
- unsigned long f_bavail;
- unsigned long f_files;
- unsigned long f_ffree;
- struct { int val[2]; } f_fsid;
- unsigned long f_namelen;
- unsigned long f_frsize;
- unsigned long f_spare[5];
-};
-#endif
-
-
-/* Definitions missing from the standard header files */
-#ifndef O_DIRECTORY
-#if defined(__ARM_ARCH_3__)
-#define O_DIRECTORY 0040000
-#else
-#define O_DIRECTORY 0200000
-#endif
-#endif
-#ifndef NT_PRXFPREG
-#define NT_PRXFPREG 0x46e62b7f
-#endif
-#ifndef PTRACE_GETFPXREGS
-#define PTRACE_GETFPXREGS ((enum __ptrace_request)18)
-#endif
-#ifndef PR_GET_DUMPABLE
-#define PR_GET_DUMPABLE 3
-#endif
-#ifndef PR_SET_DUMPABLE
-#define PR_SET_DUMPABLE 4
-#endif
-#ifndef PR_GET_SECCOMP
-#define PR_GET_SECCOMP 21
-#endif
-#ifndef PR_SET_SECCOMP
-#define PR_SET_SECCOMP 22
-#endif
-#ifndef AT_FDCWD
-#define AT_FDCWD (-100)
-#endif
-#ifndef AT_SYMLINK_NOFOLLOW
-#define AT_SYMLINK_NOFOLLOW 0x100
-#endif
-#ifndef AT_REMOVEDIR
-#define AT_REMOVEDIR 0x200
-#endif
-#ifndef MREMAP_FIXED
-#define MREMAP_FIXED 2
-#endif
-#ifndef SA_RESTORER
-#define SA_RESTORER 0x04000000
-#endif
-#ifndef CPUCLOCK_PROF
-#define CPUCLOCK_PROF 0
-#endif
-#ifndef CPUCLOCK_VIRT
-#define CPUCLOCK_VIRT 1
-#endif
-#ifndef CPUCLOCK_SCHED
-#define CPUCLOCK_SCHED 2
-#endif
-#ifndef CPUCLOCK_PERTHREAD_MASK
-#define CPUCLOCK_PERTHREAD_MASK 4
-#endif
-#ifndef MAKE_PROCESS_CPUCLOCK
-#define MAKE_PROCESS_CPUCLOCK(pid, clock) \
- ((~(int)(pid) << 3) | (int)(clock))
-#endif
-#ifndef MAKE_THREAD_CPUCLOCK
-#define MAKE_THREAD_CPUCLOCK(tid, clock) \
- ((~(int)(tid) << 3) | (int)((clock) | CPUCLOCK_PERTHREAD_MASK))
-#endif
-
-#ifndef FUTEX_WAIT
-#define FUTEX_WAIT 0
-#endif
-#ifndef FUTEX_WAKE
-#define FUTEX_WAKE 1
-#endif
-#ifndef FUTEX_FD
-#define FUTEX_FD 2
-#endif
-#ifndef FUTEX_REQUEUE
-#define FUTEX_REQUEUE 3
-#endif
-#ifndef FUTEX_CMP_REQUEUE
-#define FUTEX_CMP_REQUEUE 4
-#endif
-#ifndef FUTEX_WAKE_OP
-#define FUTEX_WAKE_OP 5
-#endif
-#ifndef FUTEX_LOCK_PI
-#define FUTEX_LOCK_PI 6
-#endif
-#ifndef FUTEX_UNLOCK_PI
-#define FUTEX_UNLOCK_PI 7
-#endif
-#ifndef FUTEX_TRYLOCK_PI
-#define FUTEX_TRYLOCK_PI 8
-#endif
-#ifndef FUTEX_PRIVATE_FLAG
-#define FUTEX_PRIVATE_FLAG 128
-#endif
-#ifndef FUTEX_CMD_MASK
-#define FUTEX_CMD_MASK ~FUTEX_PRIVATE_FLAG
-#endif
-#ifndef FUTEX_WAIT_PRIVATE
-#define FUTEX_WAIT_PRIVATE (FUTEX_WAIT | FUTEX_PRIVATE_FLAG)
-#endif
-#ifndef FUTEX_WAKE_PRIVATE
-#define FUTEX_WAKE_PRIVATE (FUTEX_WAKE | FUTEX_PRIVATE_FLAG)
-#endif
-#ifndef FUTEX_REQUEUE_PRIVATE
-#define FUTEX_REQUEUE_PRIVATE (FUTEX_REQUEUE | FUTEX_PRIVATE_FLAG)
-#endif
-#ifndef FUTEX_CMP_REQUEUE_PRIVATE
-#define FUTEX_CMP_REQUEUE_PRIVATE (FUTEX_CMP_REQUEUE | FUTEX_PRIVATE_FLAG)
-#endif
-#ifndef FUTEX_WAKE_OP_PRIVATE
-#define FUTEX_WAKE_OP_PRIVATE (FUTEX_WAKE_OP | FUTEX_PRIVATE_FLAG)
-#endif
-#ifndef FUTEX_LOCK_PI_PRIVATE
-#define FUTEX_LOCK_PI_PRIVATE (FUTEX_LOCK_PI | FUTEX_PRIVATE_FLAG)
-#endif
-#ifndef FUTEX_UNLOCK_PI_PRIVATE
-#define FUTEX_UNLOCK_PI_PRIVATE (FUTEX_UNLOCK_PI | FUTEX_PRIVATE_FLAG)
-#endif
-#ifndef FUTEX_TRYLOCK_PI_PRIVATE
-#define FUTEX_TRYLOCK_PI_PRIVATE (FUTEX_TRYLOCK_PI | FUTEX_PRIVATE_FLAG)
-#endif
-
-
-#if defined(__x86_64__)
-#ifndef ARCH_SET_GS
-#define ARCH_SET_GS 0x1001
-#endif
-#ifndef ARCH_GET_GS
-#define ARCH_GET_GS 0x1004
-#endif
-#endif
-
-#if defined(__i386__)
-#ifndef __NR_quotactl
-#define __NR_quotactl 131
-#endif
-#ifndef __NR_setresuid
-#define __NR_setresuid 164
-#define __NR_getresuid 165
-#define __NR_setresgid 170
-#define __NR_getresgid 171
-#endif
-#ifndef __NR_rt_sigaction
-#define __NR_rt_sigreturn 173
-#define __NR_rt_sigaction 174
-#define __NR_rt_sigprocmask 175
-#define __NR_rt_sigpending 176
-#define __NR_rt_sigsuspend 179
-#endif
-#ifndef __NR_pread64
-#define __NR_pread64 180
-#endif
-#ifndef __NR_pwrite64
-#define __NR_pwrite64 181
-#endif
-#ifndef __NR_ugetrlimit
-#define __NR_ugetrlimit 191
-#endif
-#ifndef __NR_stat64
-#define __NR_stat64 195
-#endif
-#ifndef __NR_fstat64
-#define __NR_fstat64 197
-#endif
-#ifndef __NR_setresuid32
-#define __NR_setresuid32 208
-#define __NR_getresuid32 209
-#define __NR_setresgid32 210
-#define __NR_getresgid32 211
-#endif
-#ifndef __NR_setfsuid32
-#define __NR_setfsuid32 215
-#define __NR_setfsgid32 216
-#endif
-#ifndef __NR_getdents64
-#define __NR_getdents64 220
-#endif
-#ifndef __NR_gettid
-#define __NR_gettid 224
-#endif
-#ifndef __NR_readahead
-#define __NR_readahead 225
-#endif
-#ifndef __NR_setxattr
-#define __NR_setxattr 226
-#endif
-#ifndef __NR_lsetxattr
-#define __NR_lsetxattr 227
-#endif
-#ifndef __NR_getxattr
-#define __NR_getxattr 229
-#endif
-#ifndef __NR_lgetxattr
-#define __NR_lgetxattr 230
-#endif
-#ifndef __NR_listxattr
-#define __NR_listxattr 232
-#endif
-#ifndef __NR_llistxattr
-#define __NR_llistxattr 233
-#endif
-#ifndef __NR_tkill
-#define __NR_tkill 238
-#endif
-#ifndef __NR_futex
-#define __NR_futex 240
-#endif
-#ifndef __NR_sched_setaffinity
-#define __NR_sched_setaffinity 241
-#define __NR_sched_getaffinity 242
-#endif
-#ifndef __NR_set_tid_address
-#define __NR_set_tid_address 258
-#endif
-#ifndef __NR_clock_gettime
-#define __NR_clock_gettime 265
-#endif
-#ifndef __NR_clock_getres
-#define __NR_clock_getres 266
-#endif
-#ifndef __NR_statfs64
-#define __NR_statfs64 268
-#endif
-#ifndef __NR_fstatfs64
-#define __NR_fstatfs64 269
-#endif
-#ifndef __NR_fadvise64_64
-#define __NR_fadvise64_64 272
-#endif
-#ifndef __NR_ioprio_set
-#define __NR_ioprio_set 289
-#endif
-#ifndef __NR_ioprio_get
-#define __NR_ioprio_get 290
-#endif
-#ifndef __NR_openat
-#define __NR_openat 295
-#endif
-#ifndef __NR_fstatat64
-#define __NR_fstatat64 300
-#endif
-#ifndef __NR_unlinkat
-#define __NR_unlinkat 301
-#endif
-#ifndef __NR_move_pages
-#define __NR_move_pages 317
-#endif
-#ifndef __NR_getcpu
-#define __NR_getcpu 318
-#endif
-#ifndef __NR_fallocate
-#define __NR_fallocate 324
-#endif
-/* End of i386 definitions */
-#elif defined(__ARM_ARCH_3__)
-#ifndef __NR_setresuid
-#define __NR_setresuid (__NR_SYSCALL_BASE + 164)
-#define __NR_getresuid (__NR_SYSCALL_BASE + 165)
-#define __NR_setresgid (__NR_SYSCALL_BASE + 170)
-#define __NR_getresgid (__NR_SYSCALL_BASE + 171)
-#endif
-#ifndef __NR_rt_sigaction
-#define __NR_rt_sigreturn (__NR_SYSCALL_BASE + 173)
-#define __NR_rt_sigaction (__NR_SYSCALL_BASE + 174)
-#define __NR_rt_sigprocmask (__NR_SYSCALL_BASE + 175)
-#define __NR_rt_sigpending (__NR_SYSCALL_BASE + 176)
-#define __NR_rt_sigsuspend (__NR_SYSCALL_BASE + 179)
-#endif
-#ifndef __NR_pread64
-#define __NR_pread64 (__NR_SYSCALL_BASE + 180)
-#endif
-#ifndef __NR_pwrite64
-#define __NR_pwrite64 (__NR_SYSCALL_BASE + 181)
-#endif
-#ifndef __NR_ugetrlimit
-#define __NR_ugetrlimit (__NR_SYSCALL_BASE + 191)
-#endif
-#ifndef __NR_stat64
-#define __NR_stat64 (__NR_SYSCALL_BASE + 195)
-#endif
-#ifndef __NR_fstat64
-#define __NR_fstat64 (__NR_SYSCALL_BASE + 197)
-#endif
-#ifndef __NR_setresuid32
-#define __NR_setresuid32 (__NR_SYSCALL_BASE + 208)
-#define __NR_getresuid32 (__NR_SYSCALL_BASE + 209)
-#define __NR_setresgid32 (__NR_SYSCALL_BASE + 210)
-#define __NR_getresgid32 (__NR_SYSCALL_BASE + 211)
-#endif
-#ifndef __NR_setfsuid32
-#define __NR_setfsuid32 (__NR_SYSCALL_BASE + 215)
-#define __NR_setfsgid32 (__NR_SYSCALL_BASE + 216)
-#endif
-#ifndef __NR_getdents64
-#define __NR_getdents64 (__NR_SYSCALL_BASE + 217)
-#endif
-#ifndef __NR_gettid
-#define __NR_gettid (__NR_SYSCALL_BASE + 224)
-#endif
-#ifndef __NR_readahead
-#define __NR_readahead (__NR_SYSCALL_BASE + 225)
-#endif
-#ifndef __NR_setxattr
-#define __NR_setxattr (__NR_SYSCALL_BASE + 226)
-#endif
-#ifndef __NR_lsetxattr
-#define __NR_lsetxattr (__NR_SYSCALL_BASE + 227)
-#endif
-#ifndef __NR_getxattr
-#define __NR_getxattr (__NR_SYSCALL_BASE + 229)
-#endif
-#ifndef __NR_lgetxattr
-#define __NR_lgetxattr (__NR_SYSCALL_BASE + 230)
-#endif
-#ifndef __NR_listxattr
-#define __NR_listxattr (__NR_SYSCALL_BASE + 232)
-#endif
-#ifndef __NR_llistxattr
-#define __NR_llistxattr (__NR_SYSCALL_BASE + 233)
-#endif
-#ifndef __NR_tkill
-#define __NR_tkill (__NR_SYSCALL_BASE + 238)
-#endif
-#ifndef __NR_futex
-#define __NR_futex (__NR_SYSCALL_BASE + 240)
-#endif
-#ifndef __NR_sched_setaffinity
-#define __NR_sched_setaffinity (__NR_SYSCALL_BASE + 241)
-#define __NR_sched_getaffinity (__NR_SYSCALL_BASE + 242)
-#endif
-#ifndef __NR_set_tid_address
-#define __NR_set_tid_address (__NR_SYSCALL_BASE + 256)
-#endif
-#ifndef __NR_clock_gettime
-#define __NR_clock_gettime (__NR_SYSCALL_BASE + 263)
-#endif
-#ifndef __NR_clock_getres
-#define __NR_clock_getres (__NR_SYSCALL_BASE + 264)
-#endif
-#ifndef __NR_statfs64
-#define __NR_statfs64 (__NR_SYSCALL_BASE + 266)
-#endif
-#ifndef __NR_fstatfs64
-#define __NR_fstatfs64 (__NR_SYSCALL_BASE + 267)
-#endif
-#ifndef __NR_ioprio_set
-#define __NR_ioprio_set (__NR_SYSCALL_BASE + 314)
-#endif
-#ifndef __NR_ioprio_get
-#define __NR_ioprio_get (__NR_SYSCALL_BASE + 315)
-#endif
-#ifndef __NR_move_pages
-#define __NR_move_pages (__NR_SYSCALL_BASE + 344)
-#endif
-#ifndef __NR_getcpu
-#define __NR_getcpu (__NR_SYSCALL_BASE + 345)
-#endif
-/* End of ARM 3 definitions */
-#elif defined(__x86_64__)
-#ifndef __NR_pread64
-#define __NR_pread64 17
-#endif
-#ifndef __NR_pwrite64
-#define __NR_pwrite64 18
-#endif
-#ifndef __NR_setresuid
-#define __NR_setresuid 117
-#define __NR_getresuid 118
-#define __NR_setresgid 119
-#define __NR_getresgid 120
-#endif
-#ifndef __NR_quotactl
-#define __NR_quotactl 179
-#endif
-#ifndef __NR_gettid
-#define __NR_gettid 186
-#endif
-#ifndef __NR_readahead
-#define __NR_readahead 187
-#endif
-#ifndef __NR_setxattr
-#define __NR_setxattr 188
-#endif
-#ifndef __NR_lsetxattr
-#define __NR_lsetxattr 189
-#endif
-#ifndef __NR_getxattr
-#define __NR_getxattr 191
-#endif
-#ifndef __NR_lgetxattr
-#define __NR_lgetxattr 192
-#endif
-#ifndef __NR_listxattr
-#define __NR_listxattr 194
-#endif
-#ifndef __NR_llistxattr
-#define __NR_llistxattr 195
-#endif
-#ifndef __NR_tkill
-#define __NR_tkill 200
-#endif
-#ifndef __NR_futex
-#define __NR_futex 202
-#endif
-#ifndef __NR_sched_setaffinity
-#define __NR_sched_setaffinity 203
-#define __NR_sched_getaffinity 204
-#endif
-#ifndef __NR_getdents64
-#define __NR_getdents64 217
-#endif
-#ifndef __NR_set_tid_address
-#define __NR_set_tid_address 218
-#endif
-#ifndef __NR_fadvise64
-#define __NR_fadvise64 221
-#endif
-#ifndef __NR_clock_gettime
-#define __NR_clock_gettime 228
-#endif
-#ifndef __NR_clock_getres
-#define __NR_clock_getres 229
-#endif
-#ifndef __NR_ioprio_set
-#define __NR_ioprio_set 251
-#endif
-#ifndef __NR_ioprio_get
-#define __NR_ioprio_get 252
-#endif
-#ifndef __NR_openat
-#define __NR_openat 257
-#endif
-#ifndef __NR_newfstatat
-#define __NR_newfstatat 262
-#endif
-#ifndef __NR_unlinkat
-#define __NR_unlinkat 263
-#endif
-#ifndef __NR_move_pages
-#define __NR_move_pages 279
-#endif
-#ifndef __NR_fallocate
-#define __NR_fallocate 285
-#endif
-/* End of x86-64 definitions */
-#elif defined(__mips__)
-#if _MIPS_SIM == _MIPS_SIM_ABI32
-#ifndef __NR_setresuid
-#define __NR_setresuid (__NR_Linux + 185)
-#define __NR_getresuid (__NR_Linux + 186)
-#define __NR_setresgid (__NR_Linux + 190)
-#define __NR_getresgid (__NR_Linux + 191)
-#endif
-#ifndef __NR_rt_sigaction
-#define __NR_rt_sigreturn (__NR_Linux + 193)
-#define __NR_rt_sigaction (__NR_Linux + 194)
-#define __NR_rt_sigprocmask (__NR_Linux + 195)
-#define __NR_rt_sigpending (__NR_Linux + 196)
-#define __NR_rt_sigsuspend (__NR_Linux + 199)
-#endif
-#ifndef __NR_pread64
-#define __NR_pread64 (__NR_Linux + 200)
-#endif
-#ifndef __NR_pwrite64
-#define __NR_pwrite64 (__NR_Linux + 201)
-#endif
-#ifndef __NR_stat64
-#define __NR_stat64 (__NR_Linux + 213)
-#endif
-#ifndef __NR_fstat64
-#define __NR_fstat64 (__NR_Linux + 215)
-#endif
-#ifndef __NR_getdents64
-#define __NR_getdents64 (__NR_Linux + 219)
-#endif
-#ifndef __NR_gettid
-#define __NR_gettid (__NR_Linux + 222)
-#endif
-#ifndef __NR_readahead
-#define __NR_readahead (__NR_Linux + 223)
-#endif
-#ifndef __NR_setxattr
-#define __NR_setxattr (__NR_Linux + 224)
-#endif
-#ifndef __NR_lsetxattr
-#define __NR_lsetxattr (__NR_Linux + 225)
-#endif
-#ifndef __NR_getxattr
-#define __NR_getxattr (__NR_Linux + 227)
-#endif
-#ifndef __NR_lgetxattr
-#define __NR_lgetxattr (__NR_Linux + 228)
-#endif
-#ifndef __NR_listxattr
-#define __NR_listxattr (__NR_Linux + 230)
-#endif
-#ifndef __NR_llistxattr
-#define __NR_llistxattr (__NR_Linux + 231)
-#endif
-#ifndef __NR_tkill
-#define __NR_tkill (__NR_Linux + 236)
-#endif
-#ifndef __NR_futex
-#define __NR_futex (__NR_Linux + 238)
-#endif
-#ifndef __NR_sched_setaffinity
-#define __NR_sched_setaffinity (__NR_Linux + 239)
-#define __NR_sched_getaffinity (__NR_Linux + 240)
-#endif
-#ifndef __NR_set_tid_address
-#define __NR_set_tid_address (__NR_Linux + 252)
-#endif
-#ifndef __NR_statfs64
-#define __NR_statfs64 (__NR_Linux + 255)
-#endif
-#ifndef __NR_fstatfs64
-#define __NR_fstatfs64 (__NR_Linux + 256)
-#endif
-#ifndef __NR_clock_gettime
-#define __NR_clock_gettime (__NR_Linux + 263)
-#endif
-#ifndef __NR_clock_getres
-#define __NR_clock_getres (__NR_Linux + 264)
-#endif
-#ifndef __NR_openat
-#define __NR_openat (__NR_Linux + 288)
-#endif
-#ifndef __NR_fstatat
-#define __NR_fstatat (__NR_Linux + 293)
-#endif
-#ifndef __NR_unlinkat
-#define __NR_unlinkat (__NR_Linux + 294)
-#endif
-#ifndef __NR_move_pages
-#define __NR_move_pages (__NR_Linux + 308)
-#endif
-#ifndef __NR_getcpu
-#define __NR_getcpu (__NR_Linux + 312)
-#endif
-#ifndef __NR_ioprio_set
-#define __NR_ioprio_set (__NR_Linux + 314)
-#endif
-#ifndef __NR_ioprio_get
-#define __NR_ioprio_get (__NR_Linux + 315)
-#endif
-/* End of MIPS (old 32bit API) definitions */
-#elif _MIPS_SIM == _MIPS_SIM_ABI64
-#ifndef __NR_pread64
-#define __NR_pread64 (__NR_Linux + 16)
-#endif
-#ifndef __NR_pwrite64
-#define __NR_pwrite64 (__NR_Linux + 17)
-#endif
-#ifndef __NR_setresuid
-#define __NR_setresuid (__NR_Linux + 115)
-#define __NR_getresuid (__NR_Linux + 116)
-#define __NR_setresgid (__NR_Linux + 117)
-#define __NR_getresgid (__NR_Linux + 118)
-#endif
-#ifndef __NR_gettid
-#define __NR_gettid (__NR_Linux + 178)
-#endif
-#ifndef __NR_readahead
-#define __NR_readahead (__NR_Linux + 179)
-#endif
-#ifndef __NR_setxattr
-#define __NR_setxattr (__NR_Linux + 180)
-#endif
-#ifndef __NR_lsetxattr
-#define __NR_lsetxattr (__NR_Linux + 181)
-#endif
-#ifndef __NR_getxattr
-#define __NR_getxattr (__NR_Linux + 183)
-#endif
-#ifndef __NR_lgetxattr
-#define __NR_lgetxattr (__NR_Linux + 184)
-#endif
-#ifndef __NR_listxattr
-#define __NR_listxattr (__NR_Linux + 186)
-#endif
-#ifndef __NR_llistxattr
-#define __NR_llistxattr (__NR_Linux + 187)
-#endif
-#ifndef __NR_tkill
-#define __NR_tkill (__NR_Linux + 192)
-#endif
-#ifndef __NR_futex
-#define __NR_futex (__NR_Linux + 194)
-#endif
-#ifndef __NR_sched_setaffinity
-#define __NR_sched_setaffinity (__NR_Linux + 195)
-#define __NR_sched_getaffinity (__NR_Linux + 196)
-#endif
-#ifndef __NR_set_tid_address
-#define __NR_set_tid_address (__NR_Linux + 212)
-#endif
-#ifndef __NR_clock_gettime
-#define __NR_clock_gettime (__NR_Linux + 222)
-#endif
-#ifndef __NR_clock_getres
-#define __NR_clock_getres (__NR_Linux + 223)
-#endif
-#ifndef __NR_openat
-#define __NR_openat (__NR_Linux + 247)
-#endif
-#ifndef __NR_fstatat
-#define __NR_fstatat (__NR_Linux + 252)
-#endif
-#ifndef __NR_unlinkat
-#define __NR_unlinkat (__NR_Linux + 253)
-#endif
-#ifndef __NR_move_pages
-#define __NR_move_pages (__NR_Linux + 267)
-#endif
-#ifndef __NR_getcpu
-#define __NR_getcpu (__NR_Linux + 271)
-#endif
-#ifndef __NR_ioprio_set
-#define __NR_ioprio_set (__NR_Linux + 273)
-#endif
-#ifndef __NR_ioprio_get
-#define __NR_ioprio_get (__NR_Linux + 274)
-#endif
-/* End of MIPS (64bit API) definitions */
-#else
-#ifndef __NR_setresuid
-#define __NR_setresuid (__NR_Linux + 115)
-#define __NR_getresuid (__NR_Linux + 116)
-#define __NR_setresgid (__NR_Linux + 117)
-#define __NR_getresgid (__NR_Linux + 118)
-#endif
-#ifndef __NR_gettid
-#define __NR_gettid (__NR_Linux + 178)
-#endif
-#ifndef __NR_readahead
-#define __NR_readahead (__NR_Linux + 179)
-#endif
-#ifndef __NR_setxattr
-#define __NR_setxattr (__NR_Linux + 180)
-#endif
-#ifndef __NR_lsetxattr
-#define __NR_lsetxattr (__NR_Linux + 181)
-#endif
-#ifndef __NR_getxattr
-#define __NR_getxattr (__NR_Linux + 183)
-#endif
-#ifndef __NR_lgetxattr
-#define __NR_lgetxattr (__NR_Linux + 184)
-#endif
-#ifndef __NR_listxattr
-#define __NR_listxattr (__NR_Linux + 186)
-#endif
-#ifndef __NR_llistxattr
-#define __NR_llistxattr (__NR_Linux + 187)
-#endif
-#ifndef __NR_tkill
-#define __NR_tkill (__NR_Linux + 192)
-#endif
-#ifndef __NR_futex
-#define __NR_futex (__NR_Linux + 194)
-#endif
-#ifndef __NR_sched_setaffinity
-#define __NR_sched_setaffinity (__NR_Linux + 195)
-#define __NR_sched_getaffinity (__NR_Linux + 196)
-#endif
-#ifndef __NR_set_tid_address
-#define __NR_set_tid_address (__NR_Linux + 213)
-#endif
-#ifndef __NR_statfs64
-#define __NR_statfs64 (__NR_Linux + 217)
-#endif
-#ifndef __NR_fstatfs64
-#define __NR_fstatfs64 (__NR_Linux + 218)
-#endif
-#ifndef __NR_clock_gettime
-#define __NR_clock_gettime (__NR_Linux + 226)
-#endif
-#ifndef __NR_clock_getres
-#define __NR_clock_getres (__NR_Linux + 227)
-#endif
-#ifndef __NR_openat
-#define __NR_openat (__NR_Linux + 251)
-#endif
-#ifndef __NR_fstatat
-#define __NR_fstatat (__NR_Linux + 256)
-#endif
-#ifndef __NR_unlinkat
-#define __NR_unlinkat (__NR_Linux + 257)
-#endif
-#ifndef __NR_move_pages
-#define __NR_move_pages (__NR_Linux + 271)
-#endif
-#ifndef __NR_getcpu
-#define __NR_getcpu (__NR_Linux + 275)
-#endif
-#ifndef __NR_ioprio_set
-#define __NR_ioprio_set (__NR_Linux + 277)
-#endif
-#ifndef __NR_ioprio_get
-#define __NR_ioprio_get (__NR_Linux + 278)
-#endif
-/* End of MIPS (new 32bit API) definitions */
-#endif
-/* End of MIPS definitions */
-#elif defined(__PPC__)
-#ifndef __NR_setfsuid
-#define __NR_setfsuid 138
-#define __NR_setfsgid 139
-#endif
-#ifndef __NR_setresuid
-#define __NR_setresuid 164
-#define __NR_getresuid 165
-#define __NR_setresgid 169
-#define __NR_getresgid 170
-#endif
-#ifndef __NR_rt_sigaction
-#define __NR_rt_sigreturn 172
-#define __NR_rt_sigaction 173
-#define __NR_rt_sigprocmask 174
-#define __NR_rt_sigpending 175
-#define __NR_rt_sigsuspend 178
-#endif
-#ifndef __NR_pread64
-#define __NR_pread64 179
-#endif
-#ifndef __NR_pwrite64
-#define __NR_pwrite64 180
-#endif
-#ifndef __NR_ugetrlimit
-#define __NR_ugetrlimit 190
-#endif
-#ifndef __NR_readahead
-#define __NR_readahead 191
-#endif
-#ifndef __NR_stat64
-#define __NR_stat64 195
-#endif
-#ifndef __NR_fstat64
-#define __NR_fstat64 197
-#endif
-#ifndef __NR_getdents64
-#define __NR_getdents64 202
-#endif
-#ifndef __NR_gettid
-#define __NR_gettid 207
-#endif
-#ifndef __NR_tkill
-#define __NR_tkill 208
-#endif
-#ifndef __NR_setxattr
-#define __NR_setxattr 209
-#endif
-#ifndef __NR_lsetxattr
-#define __NR_lsetxattr 210
-#endif
-#ifndef __NR_getxattr
-#define __NR_getxattr 212
-#endif
-#ifndef __NR_lgetxattr
-#define __NR_lgetxattr 213
-#endif
-#ifndef __NR_listxattr
-#define __NR_listxattr 215
-#endif
-#ifndef __NR_llistxattr
-#define __NR_llistxattr 216
-#endif
-#ifndef __NR_futex
-#define __NR_futex 221
-#endif
-#ifndef __NR_sched_setaffinity
-#define __NR_sched_setaffinity 222
-#define __NR_sched_getaffinity 223
-#endif
-#ifndef __NR_set_tid_address
-#define __NR_set_tid_address 232
-#endif
-#ifndef __NR_clock_gettime
-#define __NR_clock_gettime 246
-#endif
-#ifndef __NR_clock_getres
-#define __NR_clock_getres 247
-#endif
-#ifndef __NR_statfs64
-#define __NR_statfs64 252
-#endif
-#ifndef __NR_fstatfs64
-#define __NR_fstatfs64 253
-#endif
-#ifndef __NR_fadvise64_64
-#define __NR_fadvise64_64 254
-#endif
-#ifndef __NR_ioprio_set
-#define __NR_ioprio_set 273
-#endif
-#ifndef __NR_ioprio_get
-#define __NR_ioprio_get 274
-#endif
-#ifndef __NR_openat
-#define __NR_openat 286
-#endif
-#ifndef __NR_fstatat64
-#define __NR_fstatat64 291
-#endif
-#ifndef __NR_unlinkat
-#define __NR_unlinkat 292
-#endif
-#ifndef __NR_move_pages
-#define __NR_move_pages 301
-#endif
-#ifndef __NR_getcpu
-#define __NR_getcpu 302
-#endif
-/* End of powerpc defininitions */
-#endif
-
-
-/* After forking, we must make sure to only call system calls. */
-#if __BOUNDED_POINTERS__
- #error "Need to port invocations of syscalls for bounded ptrs"
-#else
- /* The core dumper and the thread lister get executed after threads
- * have been suspended. As a consequence, we cannot call any functions
- * that acquire locks. Unfortunately, libc wraps most system calls
- * (e.g. in order to implement pthread_atfork, and to make calls
- * cancellable), which means we cannot call these functions. Instead,
- * we have to call syscall() directly.
- */
- #undef LSS_ERRNO
- #ifdef SYS_ERRNO
- /* Allow the including file to override the location of errno. This can
- * be useful when using clone() with the CLONE_VM option.
- */
- #define LSS_ERRNO SYS_ERRNO
- #else
- #define LSS_ERRNO errno
- #endif
-
- #undef LSS_INLINE
- #ifdef SYS_INLINE
- #define LSS_INLINE SYS_INLINE
- #else
- #define LSS_INLINE static inline
- #endif
-
- /* Allow the including file to override the prefix used for all new
- * system calls. By default, it will be set to "sys_".
- */
- #undef LSS_NAME
- #ifndef SYS_PREFIX
- #define LSS_NAME(name) sys_##name
- #elif SYS_PREFIX < 0
- #define LSS_NAME(name) name
- #elif SYS_PREFIX == 0
- #define LSS_NAME(name) sys0_##name
- #elif SYS_PREFIX == 1
- #define LSS_NAME(name) sys1_##name
- #elif SYS_PREFIX == 2
- #define LSS_NAME(name) sys2_##name
- #elif SYS_PREFIX == 3
- #define LSS_NAME(name) sys3_##name
- #elif SYS_PREFIX == 4
- #define LSS_NAME(name) sys4_##name
- #elif SYS_PREFIX == 5
- #define LSS_NAME(name) sys5_##name
- #elif SYS_PREFIX == 6
- #define LSS_NAME(name) sys6_##name
- #elif SYS_PREFIX == 7
- #define LSS_NAME(name) sys7_##name
- #elif SYS_PREFIX == 8
- #define LSS_NAME(name) sys8_##name
- #elif SYS_PREFIX == 9
- #define LSS_NAME(name) sys9_##name
- #endif
-
- #undef LSS_RETURN
- #if (defined(__i386__) || defined(__x86_64__) || defined(__ARM_ARCH_3__))
- /* Failing system calls return a negative result in the range of
- * -1..-4095. These are "errno" values with the sign inverted.
- */
- #define LSS_RETURN(type, res) \
- do { \
- if ((unsigned long)(res) >= (unsigned long)(-4095)) { \
- LSS_ERRNO = -(res); \
- res = -1; \
- } \
- return (type) (res); \
- } while (0)
- #elif defined(__mips__)
- /* On MIPS, failing system calls return -1, and set errno in a
- * separate CPU register.
- */
- #define LSS_RETURN(type, res, err) \
- do { \
- if (err) { \
- LSS_ERRNO = (res); \
- res = -1; \
- } \
- return (type) (res); \
- } while (0)
- #elif defined(__PPC__)
- /* On PPC, failing system calls return -1, and set errno in a
- * separate CPU register. See linux/unistd.h.
- */
- #define LSS_RETURN(type, res, err) \
- do { \
- if (err & 0x10000000 ) { \
- LSS_ERRNO = (res); \
- res = -1; \
- } \
- return (type) (res); \
- } while (0)
- #endif
- #if defined(__i386__)
- /* In PIC mode (e.g. when building shared libraries), gcc for i386
- * reserves ebx. Unfortunately, most distribution ship with implementations
- * of _syscallX() which clobber ebx.
- * Also, most definitions of _syscallX() neglect to mark "memory" as being
- * clobbered. This causes problems with compilers, that do a better job
- * at optimizing across __asm__ calls.
- * So, we just have to redefine all of the _syscallX() macros.
- */
- #undef LSS_BODY
- #define LSS_BODY(type,args...) \
- long __res; \
- __asm__ __volatile__("push %%ebx\n" \
- "movl %2,%%ebx\n" \
- "int $0x80\n" \
- "pop %%ebx" \
- args \
- : "esp", "memory"); \
- LSS_RETURN(type,__res)
- #undef _syscall0
- #define _syscall0(type,name) \
- type LSS_NAME(name)(void) { \
- long __res; \
- __asm__ volatile("int $0x80" \
- : "=a" (__res) \
- : "0" (__NR_##name) \
- : "memory"); \
- LSS_RETURN(type,__res); \
- }
- #undef _syscall1
- #define _syscall1(type,name,type1,arg1) \
- type LSS_NAME(name)(type1 arg1) { \
- LSS_BODY(type, \
- : "=a" (__res) \
- : "0" (__NR_##name), "ri" ((long)(arg1))); \
- }
- #undef _syscall2
- #define _syscall2(type,name,type1,arg1,type2,arg2) \
- type LSS_NAME(name)(type1 arg1,type2 arg2) { \
- LSS_BODY(type, \
- : "=a" (__res) \
- : "0" (__NR_##name),"ri" ((long)(arg1)), "c" ((long)(arg2))); \
- }
- #undef _syscall3
- #define _syscall3(type,name,type1,arg1,type2,arg2,type3,arg3) \
- type LSS_NAME(name)(type1 arg1,type2 arg2,type3 arg3) { \
- LSS_BODY(type, \
- : "=a" (__res) \
- : "0" (__NR_##name), "ri" ((long)(arg1)), "c" ((long)(arg2)), \
- "d" ((long)(arg3))); \
- }
- #undef _syscall4
- #define _syscall4(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4) \
- type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4) { \
- LSS_BODY(type, \
- : "=a" (__res) \
- : "0" (__NR_##name), "ri" ((long)(arg1)), "c" ((long)(arg2)), \
- "d" ((long)(arg3)),"S" ((long)(arg4))); \
- }
- #undef _syscall5
- #define _syscall5(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4, \
- type5,arg5) \
- type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \
- type5 arg5) { \
- long __res; \
- __asm__ __volatile__("push %%ebx\n" \
- "movl %2,%%ebx\n" \
- "movl %1,%%eax\n" \
- "int $0x80\n" \
- "pop %%ebx" \
- : "=a" (__res) \
- : "i" (__NR_##name), "ri" ((long)(arg1)), \
- "c" ((long)(arg2)), "d" ((long)(arg3)), \
- "S" ((long)(arg4)), "D" ((long)(arg5)) \
- : "esp", "memory"); \
- LSS_RETURN(type,__res); \
- }
- #undef _syscall6
- #define _syscall6(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4, \
- type5,arg5,type6,arg6) \
- type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \
- type5 arg5, type6 arg6) { \
- long __res; \
- struct { long __a1; long __a6; } __s = { (long)arg1, (long) arg6 }; \
- __asm__ __volatile__("push %%ebp\n" \
- "push %%ebx\n" \
- "movl 4(%2),%%ebp\n" \
- "movl 0(%2), %%ebx\n" \
- "movl %1,%%eax\n" \
- "int $0x80\n" \
- "pop %%ebx\n" \
- "pop %%ebp" \
- : "=a" (__res) \
- : "i" (__NR_##name), "0" ((long)(&__s)), \
- "c" ((long)(arg2)), "d" ((long)(arg3)), \
- "S" ((long)(arg4)), "D" ((long)(arg5)) \
- : "esp", "memory"); \
- LSS_RETURN(type,__res); \
- }
- LSS_INLINE int LSS_NAME(clone)(int (*fn)(void *), void *child_stack,
- int flags, void *arg, int *parent_tidptr,
- void *newtls, int *child_tidptr) {
- long __res;
- __asm__ __volatile__(/* if (fn == NULL)
- * return -EINVAL;
- */
- "movl %3,%%ecx\n"
- "jecxz 1f\n"
-
- /* if (child_stack == NULL)
- * return -EINVAL;
- */
- "movl %4,%%ecx\n"
- "jecxz 1f\n"
-
- /* Set up alignment of the child stack:
- * child_stack = (child_stack & ~0xF) - 20;
- */
- "andl $-16,%%ecx\n"
- "subl $20,%%ecx\n"
-
- /* Push "arg" and "fn" onto the stack that will be
- * used by the child.
- */
- "movl %6,%%eax\n"
- "movl %%eax,4(%%ecx)\n"
- "movl %3,%%eax\n"
- "movl %%eax,(%%ecx)\n"
-
- /* %eax = syscall(%eax = __NR_clone,
- * %ebx = flags,
- * %ecx = child_stack,
- * %edx = parent_tidptr,
- * %esi = newtls,
- * %edi = child_tidptr)
- * Also, make sure that %ebx gets preserved as it is
- * used in PIC mode.
- */
- "movl %8,%%esi\n"
- "movl %7,%%edx\n"
- "movl %5,%%eax\n"
- "movl %9,%%edi\n"
- "pushl %%ebx\n"
- "movl %%eax,%%ebx\n"
- "movl %2,%%eax\n"
- "int $0x80\n"
-
- /* In the parent: restore %ebx
- * In the child: move "fn" into %ebx
- */
- "popl %%ebx\n"
-
- /* if (%eax != 0)
- * return %eax;
- */
- "test %%eax,%%eax\n"
- "jnz 1f\n"
-
- /* In the child, now. Terminate frame pointer chain.
- */
- "movl $0,%%ebp\n"
-
- /* Call "fn". "arg" is already on the stack.
- */
- "call *%%ebx\n"
-
- /* Call _exit(%ebx). Unfortunately older versions
- * of gcc restrict the number of arguments that can
- * be passed to asm(). So, we need to hard-code the
- * system call number.
- */
- "movl %%eax,%%ebx\n"
- "movl $1,%%eax\n"
- "int $0x80\n"
-
- /* Return to parent.
- */
- "1:\n"
- : "=a" (__res)
- : "0"(-EINVAL), "i"(__NR_clone),
- "m"(fn), "m"(child_stack), "m"(flags), "m"(arg),
- "m"(parent_tidptr), "m"(newtls), "m"(child_tidptr)
- : "esp", "memory", "ecx", "edx", "esi", "edi");
- LSS_RETURN(int, __res);
- }
-
- #define __NR__fadvise64_64 __NR_fadvise64_64
- LSS_INLINE _syscall6(int, _fadvise64_64, int, fd,
- unsigned, offset_lo, unsigned, offset_hi,
- unsigned, len_lo, unsigned, len_hi,
- int, advice)
-
- LSS_INLINE int LSS_NAME(fadvise64)(int fd, loff_t offset,
- loff_t len, int advice) {
- return LSS_NAME(_fadvise64_64)(fd,
- (unsigned)offset, (unsigned)(offset >>32),
- (unsigned)len, (unsigned)(len >> 32),
- advice);
- }
-
- #define __NR__fallocate __NR_fallocate
- LSS_INLINE _syscall6(int, _fallocate, int, fd,
- int, mode,
- unsigned, offset_lo, unsigned, offset_hi,
- unsigned, len_lo, unsigned, len_hi)
-
- LSS_INLINE int LSS_NAME(fallocate)(int fd, int mode,
- loff_t offset, loff_t len) {
- union { loff_t off; unsigned w[2]; } o = { offset }, l = { len };
- return LSS_NAME(_fallocate)(fd, mode, o.w[0], o.w[1], l.w[0], l.w[1]);
- }
-
- LSS_INLINE _syscall1(int, set_thread_area, void *, u)
- LSS_INLINE _syscall1(int, get_thread_area, void *, u)
-
- LSS_INLINE void (*LSS_NAME(restore_rt)(void))(void) {
- /* On i386, the kernel does not know how to return from a signal
- * handler. Instead, it relies on user space to provide a
- * restorer function that calls the {rt_,}sigreturn() system call.
- * Unfortunately, we cannot just reference the glibc version of this
- * function, as glibc goes out of its way to make it inaccessible.
- */
- void (*res)(void);
- __asm__ __volatile__("call 2f\n"
- "0:.align 16\n"
- "1:movl %1,%%eax\n"
- "int $0x80\n"
- "2:popl %0\n"
- "addl $(1b-0b),%0\n"
- : "=a" (res)
- : "i" (__NR_rt_sigreturn));
- return res;
- }
- LSS_INLINE void (*LSS_NAME(restore)(void))(void) {
- /* On i386, the kernel does not know how to return from a signal
- * handler. Instead, it relies on user space to provide a
- * restorer function that calls the {rt_,}sigreturn() system call.
- * Unfortunately, we cannot just reference the glibc version of this
- * function, as glibc goes out of its way to make it inaccessible.
- */
- void (*res)(void);
- __asm__ __volatile__("call 2f\n"
- "0:.align 16\n"
- "1:pop %%eax\n"
- "movl %1,%%eax\n"
- "int $0x80\n"
- "2:popl %0\n"
- "addl $(1b-0b),%0\n"
- : "=a" (res)
- : "i" (__NR_sigreturn));
- return res;
- }
- #elif defined(__x86_64__)
- /* There are no known problems with any of the _syscallX() macros
- * currently shipping for x86_64, but we still need to be able to define
- * our own version so that we can override the location of the errno
- * location (e.g. when using the clone() system call with the CLONE_VM
- * option).
- */
- #undef LSS_BODY
- #define LSS_BODY(type,name, ...) \
- long __res; \
- __asm__ __volatile__("syscall" : "=a" (__res) : "0" (__NR_##name), \
- ##__VA_ARGS__ : "r11", "rcx", "memory"); \
- LSS_RETURN(type, __res)
- #undef _syscall0
- #define _syscall0(type,name) \
- type LSS_NAME(name)() { \
- LSS_BODY(type, name); \
- }
- #undef _syscall1
- #define _syscall1(type,name,type1,arg1) \
- type LSS_NAME(name)(type1 arg1) { \
- LSS_BODY(type, name, "D" ((long)(arg1))); \
- }
- #undef _syscall2
- #define _syscall2(type,name,type1,arg1,type2,arg2) \
- type LSS_NAME(name)(type1 arg1, type2 arg2) { \
- LSS_BODY(type, name, "D" ((long)(arg1)), "S" ((long)(arg2))); \
- }
- #undef _syscall3
- #define _syscall3(type,name,type1,arg1,type2,arg2,type3,arg3) \
- type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3) { \
- LSS_BODY(type, name, "D" ((long)(arg1)), "S" ((long)(arg2)), \
- "d" ((long)(arg3))); \
- }
- #undef _syscall4
- #define _syscall4(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4) \
- type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4) { \
- long __res; \
- __asm__ __volatile__("movq %5,%%r10; syscall" : \
- "=a" (__res) : "0" (__NR_##name), \
- "D" ((long)(arg1)), "S" ((long)(arg2)), "d" ((long)(arg3)), \
- "r" ((long)(arg4)) : "r10", "r11", "rcx", "memory"); \
- LSS_RETURN(type, __res); \
- }
- #undef _syscall5
- #define _syscall5(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4, \
- type5,arg5) \
- type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \
- type5 arg5) { \
- long __res; \
- __asm__ __volatile__("movq %5,%%r10; movq %6,%%r8; syscall" : \
- "=a" (__res) : "0" (__NR_##name), \
- "D" ((long)(arg1)), "S" ((long)(arg2)), "d" ((long)(arg3)), \
- "r" ((long)(arg4)), "r" ((long)(arg5)) : \
- "r8", "r10", "r11", "rcx", "memory"); \
- LSS_RETURN(type, __res); \
- }
- #undef _syscall6
- #define _syscall6(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4, \
- type5,arg5,type6,arg6) \
- type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \
- type5 arg5, type6 arg6) { \
- long __res; \
- __asm__ __volatile__("movq %5,%%r10; movq %6,%%r8; movq %7,%%r9;" \
- "syscall" : \
- "=a" (__res) : "0" (__NR_##name), \
- "D" ((long)(arg1)), "S" ((long)(arg2)), "d" ((long)(arg3)), \
- "r" ((long)(arg4)), "r" ((long)(arg5)), "r" ((long)(arg6)) : \
- "r8", "r9", "r10", "r11", "rcx", "memory"); \
- LSS_RETURN(type, __res); \
- }
- LSS_INLINE int LSS_NAME(clone)(int (*fn)(void *), void *child_stack,
- int flags, void *arg, int *parent_tidptr,
- void *newtls, int *child_tidptr) {
- long __res;
- {
- register void *__tls __asm__("r8") = newtls;
- register int *__ctid __asm__("r10") = child_tidptr;
- __asm__ __volatile__(/* if (fn == NULL)
- * return -EINVAL;
- */
- "testq %4,%4\n"
- "jz 1f\n"
-
- /* if (child_stack == NULL)
- * return -EINVAL;
- */
- "testq %5,%5\n"
- "jz 1f\n"
-
- /* childstack -= 2*sizeof(void *);
- */
- "subq $16,%5\n"
-
- /* Push "arg" and "fn" onto the stack that will be
- * used by the child.
- */
- "movq %7,8(%5)\n"
- "movq %4,0(%5)\n"
-
- /* %rax = syscall(%rax = __NR_clone,
- * %rdi = flags,
- * %rsi = child_stack,
- * %rdx = parent_tidptr,
- * %r8 = new_tls,
- * %r10 = child_tidptr)
- */
- "movq %2,%%rax\n"
- "syscall\n"
-
- /* if (%rax != 0)
- * return;
- */
- "testq %%rax,%%rax\n"
- "jnz 1f\n"
-
- /* In the child. Terminate frame pointer chain.
- */
- "xorq %%rbp,%%rbp\n"
-
- /* Call "fn(arg)".
- */
- "popq %%rax\n"
- "popq %%rdi\n"
- "call *%%rax\n"
-
- /* Call _exit(%ebx).
- */
- "movq %%rax,%%rdi\n"
- "movq %3,%%rax\n"
- "syscall\n"
-
- /* Return to parent.
- */
- "1:\n"
- : "=a" (__res)
- : "0"(-EINVAL), "i"(__NR_clone), "i"(__NR_exit),
- "r"(fn), "S"(child_stack), "D"(flags), "r"(arg),
- "d"(parent_tidptr), "r"(__tls), "r"(__ctid)
- : "rsp", "memory", "r11", "rcx");
- }
- LSS_RETURN(int, __res);
- }
- LSS_INLINE _syscall2(int, arch_prctl, int, c, void *, a)
- LSS_INLINE _syscall4(int, fadvise64, int, fd, loff_t, offset, loff_t, len,
- int, advice)
-
- LSS_INLINE void (*LSS_NAME(restore_rt)(void))(void) {
- /* On x86-64, the kernel does not know how to return from
- * a signal handler. Instead, it relies on user space to provide a
- * restorer function that calls the rt_sigreturn() system call.
- * Unfortunately, we cannot just reference the glibc version of this
- * function, as glibc goes out of its way to make it inaccessible.
- */
- void (*res)(void);
- __asm__ __volatile__("call 2f\n"
- "0:.align 16\n"
- "1:movq %1,%%rax\n"
- "syscall\n"
- "2:popq %0\n"
- "addq $(1b-0b),%0\n"
- : "=a" (res)
- : "i" (__NR_rt_sigreturn));
- return res;
- }
- #elif defined(__ARM_ARCH_3__)
- /* Most definitions of _syscallX() neglect to mark "memory" as being
- * clobbered. This causes problems with compilers, that do a better job
- * at optimizing across __asm__ calls.
- * So, we just have to redefine all fo the _syscallX() macros.
- */
- #undef LSS_REG
- #define LSS_REG(r,a) register long __r##r __asm__("r"#r) = (long)a
- #undef LSS_BODY
- #define LSS_BODY(type,name,args...) \
- register long __res_r0 __asm__("r0"); \
- long __res; \
- __asm__ __volatile__ (__syscall(name) \
- : "=r"(__res_r0) : args : "lr", "memory"); \
- __res = __res_r0; \
- LSS_RETURN(type, __res)
- #undef _syscall0
- #define _syscall0(type, name) \
- type LSS_NAME(name)() { \
- LSS_BODY(type, name); \
- }
- #undef _syscall1
- #define _syscall1(type, name, type1, arg1) \
- type LSS_NAME(name)(type1 arg1) { \
- LSS_REG(0, arg1); LSS_BODY(type, name, "r"(__r0)); \
- }
- #undef _syscall2
- #define _syscall2(type, name, type1, arg1, type2, arg2) \
- type LSS_NAME(name)(type1 arg1, type2 arg2) { \
- LSS_REG(0, arg1); LSS_REG(1, arg2); \
- LSS_BODY(type, name, "r"(__r0), "r"(__r1)); \
- }
- #undef _syscall3
- #define _syscall3(type, name, type1, arg1, type2, arg2, type3, arg3) \
- type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3) { \
- LSS_REG(0, arg1); LSS_REG(1, arg2); LSS_REG(2, arg3); \
- LSS_BODY(type, name, "r"(__r0), "r"(__r1), "r"(__r2)); \
- }
- #undef _syscall4
- #define _syscall4(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4) \
- type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4) { \
- LSS_REG(0, arg1); LSS_REG(1, arg2); LSS_REG(2, arg3); \
- LSS_REG(3, arg4); \
- LSS_BODY(type, name, "r"(__r0), "r"(__r1), "r"(__r2), "r"(__r3)); \
- }
- #undef _syscall5
- #define _syscall5(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4, \
- type5,arg5) \
- type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \
- type5 arg5) { \
- LSS_REG(0, arg1); LSS_REG(1, arg2); LSS_REG(2, arg3); \
- LSS_REG(3, arg4); LSS_REG(4, arg5); \
- LSS_BODY(type, name, "r"(__r0), "r"(__r1), "r"(__r2), "r"(__r3), \
- "r"(__r4)); \
- }
- #undef _syscall6
- #define _syscall6(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4, \
- type5,arg5,type6,arg6) \
- type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \
- type5 arg5, type6 arg6) { \
- LSS_REG(0, arg1); LSS_REG(1, arg2); LSS_REG(2, arg3); \
- LSS_REG(3, arg4); LSS_REG(4, arg5); LSS_REG(5, arg6); \
- LSS_BODY(type, name, "r"(__r0), "r"(__r1), "r"(__r2), "r"(__r3), \
- "r"(__r4), "r"(__r5)); \
- }
- LSS_INLINE int LSS_NAME(clone)(int (*fn)(void *), void *child_stack,
- int flags, void *arg, int *parent_tidptr,
- void *newtls, int *child_tidptr) {
- long __res;
- {
- register int __flags __asm__("r0") = flags;
- register void *__stack __asm__("r1") = child_stack;
- register void *__ptid __asm__("r2") = parent_tidptr;
- register void *__tls __asm__("r3") = newtls;
- register int *__ctid __asm__("r4") = child_tidptr;
- __asm__ __volatile__(/* if (fn == NULL || child_stack == NULL)
- * return -EINVAL;
- */
- "cmp %2,#0\n"
- "cmpne %3,#0\n"
- "moveq %0,%1\n"
- "beq 1f\n"
-
- /* Push "arg" and "fn" onto the stack that will be
- * used by the child.
- */
- "str %5,[%3,#-4]!\n"
- "str %2,[%3,#-4]!\n"
-
- /* %r0 = syscall(%r0 = flags,
- * %r1 = child_stack,
- * %r2 = parent_tidptr,
- * %r3 = newtls,
- * %r4 = child_tidptr)
- */
- __syscall(clone)"\n"
-
- /* if (%r0 != 0)
- * return %r0;
- */
- "movs %0,r0\n"
- "bne 1f\n"
-
- /* In the child, now. Call "fn(arg)".
- */
- "ldr r0,[sp, #4]\n"
- "mov lr,pc\n"
- "ldr pc,[sp]\n"
-
- /* Call _exit(%r0).
- */
- __syscall(exit)"\n"
- "1:\n"
- : "=r" (__res)
- : "i"(-EINVAL),
- "r"(fn), "r"(__stack), "r"(__flags), "r"(arg),
- "r"(__ptid), "r"(__tls), "r"(__ctid)
- : "lr", "memory");
- }
- LSS_RETURN(int, __res);
- }
- #elif defined(__mips__)
- #undef LSS_REG
- #define LSS_REG(r,a) register unsigned long __r##r __asm__("$"#r) = \
- (unsigned long)(a)
- #undef LSS_BODY
- #define LSS_BODY(type,name,r7,...) \
- register unsigned long __v0 __asm__("$2") = __NR_##name; \
- __asm__ __volatile__ ("syscall\n" \
- : "=&r"(__v0), r7 (__r7) \
- : "0"(__v0), ##__VA_ARGS__ \
- : "$8", "$9", "$10", "$11", "$12", \
- "$13", "$14", "$15", "$24", "memory"); \
- LSS_RETURN(type, __v0, __r7)
- #undef _syscall0
- #define _syscall0(type, name) \
- type LSS_NAME(name)() { \
- register unsigned long __r7 __asm__("$7"); \
- LSS_BODY(type, name, "=r"); \
- }
- #undef _syscall1
- #define _syscall1(type, name, type1, arg1) \
- type LSS_NAME(name)(type1 arg1) { \
- register unsigned long __r7 __asm__("$7"); \
- LSS_REG(4, arg1); LSS_BODY(type, name, "=r", "r"(__r4)); \
- }
- #undef _syscall2
- #define _syscall2(type, name, type1, arg1, type2, arg2) \
- type LSS_NAME(name)(type1 arg1, type2 arg2) { \
- register unsigned long __r7 __asm__("$7"); \
- LSS_REG(4, arg1); LSS_REG(5, arg2); \
- LSS_BODY(type, name, "=r", "r"(__r4), "r"(__r5)); \
- }
- #undef _syscall3
- #define _syscall3(type, name, type1, arg1, type2, arg2, type3, arg3) \
- type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3) { \
- register unsigned long __r7 __asm__("$7"); \
- LSS_REG(4, arg1); LSS_REG(5, arg2); LSS_REG(6, arg3); \
- LSS_BODY(type, name, "=r", "r"(__r4), "r"(__r5), "r"(__r6)); \
- }
- #undef _syscall4
- #define _syscall4(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4) \
- type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4) { \
- LSS_REG(4, arg1); LSS_REG(5, arg2); LSS_REG(6, arg3); \
- LSS_REG(7, arg4); \
- LSS_BODY(type, name, "+r", "r"(__r4), "r"(__r5), "r"(__r6)); \
- }
- #undef _syscall5
- #if _MIPS_SIM == _MIPS_SIM_ABI32
- /* The old 32bit MIPS system call API passes the fifth and sixth argument
- * on the stack, whereas the new APIs use registers "r8" and "r9".
- */
- #define _syscall5(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4, \
- type5,arg5) \
- type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \
- type5 arg5) { \
- LSS_REG(4, arg1); LSS_REG(5, arg2); LSS_REG(6, arg3); \
- LSS_REG(7, arg4); \
- register unsigned long __v0 __asm__("$2"); \
- __asm__ __volatile__ (".set noreorder\n" \
- "lw $2, %6\n" \
- "subu $29, 32\n" \
- "sw $2, 16($29)\n" \
- "li $2, %2\n" \
- "syscall\n" \
- "addiu $29, 32\n" \
- ".set reorder\n" \
- : "=&r"(__v0), "+r" (__r7) \
- : "i" (__NR_##name), "r"(__r4), "r"(__r5), \
- "r"(__r6), "m" ((unsigned long)arg5) \
- : "$8", "$9", "$10", "$11", "$12", \
- "$13", "$14", "$15", "$24", "memory"); \
- LSS_RETURN(type, __v0, __r7); \
- }
- #else
- #define _syscall5(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4, \
- type5,arg5) \
- type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \
- type5 arg5) { \
- LSS_REG(4, arg1); LSS_REG(5, arg2); LSS_REG(6, arg3); \
- LSS_REG(7, arg4); LSS_REG(8, arg5); \
- LSS_BODY(type, name, "+r", "r"(__r4), "r"(__r5), "r"(__r6), \
- "r"(__r8)); \
- }
- #endif
- #undef _syscall6
- #if _MIPS_SIM == _MIPS_SIM_ABI32
- /* The old 32bit MIPS system call API passes the fifth and sixth argument
- * on the stack, whereas the new APIs use registers "r8" and "r9".
- */
- #define _syscall6(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4, \
- type5,arg5,type6,arg6) \
- type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \
- type5 arg5, type6 arg6) { \
- LSS_REG(4, arg1); LSS_REG(5, arg2); LSS_REG(6, arg3); \
- LSS_REG(7, arg4); \
- register unsigned long __v0 __asm__("$2"); \
- __asm__ __volatile__ (".set noreorder\n" \
- "lw $2, %6\n" \
- "lw $8, %7\n" \
- "subu $29, 32\n" \
- "sw $2, 16($29)\n" \
- "sw $8, 20($29)\n" \
- "li $2, %2\n" \
- "syscall\n" \
- "addiu $29, 32\n" \
- ".set reorder\n" \
- : "=&r"(__v0), "+r" (__r7) \
- : "i" (__NR_##name), "r"(__r4), "r"(__r5), \
- "r"(__r6), "r" ((unsigned long)arg5), \
- "r" ((unsigned long)arg6) \
- : "$8", "$9", "$10", "$11", "$12", \
- "$13", "$14", "$15", "$24", "memory"); \
- LSS_RETURN(type, __v0, __r7); \
- }
- #else
- #define _syscall6(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4, \
- type5,arg5,type6,arg6) \
- type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \
- type5 arg5,type6 arg6) { \
- LSS_REG(4, arg1); LSS_REG(5, arg2); LSS_REG(6, arg3); \
- LSS_REG(7, arg4); LSS_REG(8, arg5); LSS_REG(9, arg6); \
- LSS_BODY(type, name, "+r", "r"(__r4), "r"(__r5), "r"(__r6), \
- "r"(__r8), "r"(__r9)); \
- }
- #endif
- LSS_INLINE int LSS_NAME(clone)(int (*fn)(void *), void *child_stack,
- int flags, void *arg, int *parent_tidptr,
- void *newtls, int *child_tidptr) {
- register unsigned long __v0 __asm__("$2");
- register unsigned long __r7 __asm__("$7") = (unsigned long)newtls;
- {
- register int __flags __asm__("$4") = flags;
- register void *__stack __asm__("$5") = child_stack;
- register void *__ptid __asm__("$6") = parent_tidptr;
- register int *__ctid __asm__("$8") = child_tidptr;
- __asm__ __volatile__(
- #if _MIPS_SIM == _MIPS_SIM_ABI32 && _MIPS_SZPTR == 32
- "subu $29,24\n"
- #elif _MIPS_SIM == _MIPS_SIM_NABI32
- "sub $29,16\n"
- #else
- "dsubu $29,16\n"
- #endif
-
- /* if (fn == NULL || child_stack == NULL)
- * return -EINVAL;
- */
- "li %0,%2\n"
- "beqz %5,1f\n"
- "beqz %6,1f\n"
-
- /* Push "arg" and "fn" onto the stack that will be
- * used by the child.
- */
- #if _MIPS_SIM == _MIPS_SIM_ABI32 && _MIPS_SZPTR == 32
- "subu %6,32\n"
- "sw %5,0(%6)\n"
- "sw %8,4(%6)\n"
- #elif _MIPS_SIM == _MIPS_SIM_NABI32
- "sub %6,32\n"
- "sw %5,0(%6)\n"
- "sw %8,8(%6)\n"
- #else
- "dsubu %6,32\n"
- "sd %5,0(%6)\n"
- "sd %8,8(%6)\n"
- #endif
-
- /* $7 = syscall($4 = flags,
- * $5 = child_stack,
- * $6 = parent_tidptr,
- * $7 = newtls,
- * $8 = child_tidptr)
- */
- "li $2,%3\n"
- "syscall\n"
-
- /* if ($7 != 0)
- * return $2;
- */
- "bnez $7,1f\n"
- "bnez $2,1f\n"
-
- /* In the child, now. Call "fn(arg)".
- */
- #if _MIPS_SIM == _MIPS_SIM_ABI32 && _MIPS_SZPTR == 32
- "lw $25,0($29)\n"
- "lw $4,4($29)\n"
- #elif _MIPS_SIM == _MIPS_SIM_NABI32
- "lw $25,0($29)\n"
- "lw $4,8($29)\n"
- #else
- "ld $25,0($29)\n"
- "ld $4,8($29)\n"
- #endif
- "jalr $25\n"
-
- /* Call _exit($2)
- */
- "move $4,$2\n"
- "li $2,%4\n"
- "syscall\n"
-
- "1:\n"
- #if _MIPS_SIM == _MIPS_SIM_ABI32 && _MIPS_SZPTR == 32
- "addu $29, 24\n"
- #elif _MIPS_SIM == _MIPS_SIM_NABI32
- "add $29, 16\n"
- #else
- "daddu $29,16\n"
- #endif
- : "=&r" (__v0), "=r" (__r7)
- : "i"(-EINVAL), "i"(__NR_clone), "i"(__NR_exit),
- "r"(fn), "r"(__stack), "r"(__flags), "r"(arg),
- "r"(__ptid), "r"(__r7), "r"(__ctid)
- : "$9", "$10", "$11", "$12", "$13", "$14", "$15",
- "$24", "memory");
- }
- LSS_RETURN(int, __v0, __r7);
- }
- #elif defined (__PPC__)
- #undef LSS_LOADARGS_0
- #define LSS_LOADARGS_0(name, dummy...) \
- __sc_0 = __NR_##name
- #undef LSS_LOADARGS_1
- #define LSS_LOADARGS_1(name, arg1) \
- LSS_LOADARGS_0(name); \
- __sc_3 = (unsigned long) (arg1)
- #undef LSS_LOADARGS_2
- #define LSS_LOADARGS_2(name, arg1, arg2) \
- LSS_LOADARGS_1(name, arg1); \
- __sc_4 = (unsigned long) (arg2)
- #undef LSS_LOADARGS_3
- #define LSS_LOADARGS_3(name, arg1, arg2, arg3) \
- LSS_LOADARGS_2(name, arg1, arg2); \
- __sc_5 = (unsigned long) (arg3)
- #undef LSS_LOADARGS_4
- #define LSS_LOADARGS_4(name, arg1, arg2, arg3, arg4) \
- LSS_LOADARGS_3(name, arg1, arg2, arg3); \
- __sc_6 = (unsigned long) (arg4)
- #undef LSS_LOADARGS_5
- #define LSS_LOADARGS_5(name, arg1, arg2, arg3, arg4, arg5) \
- LSS_LOADARGS_4(name, arg1, arg2, arg3, arg4); \
- __sc_7 = (unsigned long) (arg5)
- #undef LSS_LOADARGS_6
- #define LSS_LOADARGS_6(name, arg1, arg2, arg3, arg4, arg5, arg6) \
- LSS_LOADARGS_5(name, arg1, arg2, arg3, arg4, arg5); \
- __sc_8 = (unsigned long) (arg6)
- #undef LSS_ASMINPUT_0
- #define LSS_ASMINPUT_0 "0" (__sc_0)
- #undef LSS_ASMINPUT_1
- #define LSS_ASMINPUT_1 LSS_ASMINPUT_0, "1" (__sc_3)
- #undef LSS_ASMINPUT_2
- #define LSS_ASMINPUT_2 LSS_ASMINPUT_1, "2" (__sc_4)
- #undef LSS_ASMINPUT_3
- #define LSS_ASMINPUT_3 LSS_ASMINPUT_2, "3" (__sc_5)
- #undef LSS_ASMINPUT_4
- #define LSS_ASMINPUT_4 LSS_ASMINPUT_3, "4" (__sc_6)
- #undef LSS_ASMINPUT_5
- #define LSS_ASMINPUT_5 LSS_ASMINPUT_4, "5" (__sc_7)
- #undef LSS_ASMINPUT_6
- #define LSS_ASMINPUT_6 LSS_ASMINPUT_5, "6" (__sc_8)
- #undef LSS_BODY
- #define LSS_BODY(nr, type, name, args...) \
- long __sc_ret, __sc_err; \
- { \
- register unsigned long __sc_0 __asm__ ("r0"); \
- register unsigned long __sc_3 __asm__ ("r3"); \
- register unsigned long __sc_4 __asm__ ("r4"); \
- register unsigned long __sc_5 __asm__ ("r5"); \
- register unsigned long __sc_6 __asm__ ("r6"); \
- register unsigned long __sc_7 __asm__ ("r7"); \
- register unsigned long __sc_8 __asm__ ("r8"); \
- \
- LSS_LOADARGS_##nr(name, args); \
- __asm__ __volatile__ \
- ("sc\n\t" \
- "mfcr %0" \
- : "=&r" (__sc_0), \
- "=&r" (__sc_3), "=&r" (__sc_4), \
- "=&r" (__sc_5), "=&r" (__sc_6), \
- "=&r" (__sc_7), "=&r" (__sc_8) \
- : LSS_ASMINPUT_##nr \
- : "cr0", "ctr", "memory", \
- "r9", "r10", "r11", "r12"); \
- __sc_ret = __sc_3; \
- __sc_err = __sc_0; \
- } \
- LSS_RETURN(type, __sc_ret, __sc_err)
- #undef _syscall0
- #define _syscall0(type, name) \
- type LSS_NAME(name)(void) { \
- LSS_BODY(0, type, name); \
- }
- #undef _syscall1
- #define _syscall1(type, name, type1, arg1) \
- type LSS_NAME(name)(type1 arg1) { \
- LSS_BODY(1, type, name, arg1); \
- }
- #undef _syscall2
- #define _syscall2(type, name, type1, arg1, type2, arg2) \
- type LSS_NAME(name)(type1 arg1, type2 arg2) { \
- LSS_BODY(2, type, name, arg1, arg2); \
- }
- #undef _syscall3
- #define _syscall3(type, name, type1, arg1, type2, arg2, type3, arg3) \
- type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3) { \
- LSS_BODY(3, type, name, arg1, arg2, arg3); \
- }
- #undef _syscall4
- #define _syscall4(type, name, type1, arg1, type2, arg2, type3, arg3, \
- type4, arg4) \
- type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4) { \
- LSS_BODY(4, type, name, arg1, arg2, arg3, arg4); \
- }
- #undef _syscall5
- #define _syscall5(type, name, type1, arg1, type2, arg2, type3, arg3, \
- type4, arg4, type5, arg5) \
- type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \
- type5 arg5) { \
- LSS_BODY(5, type, name, arg1, arg2, arg3, arg4, arg5); \
- }
- #undef _syscall6
- #define _syscall6(type, name, type1, arg1, type2, arg2, type3, arg3, \
- type4, arg4, type5, arg5, type6, arg6) \
- type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \
- type5 arg5, type6 arg6) { \
- LSS_BODY(6, type, name, arg1, arg2, arg3, arg4, arg5, arg6); \
- }
- /* clone function adapted from glibc 2.3.6 clone.S */
- /* TODO(csilvers): consider wrapping some args up in a struct, like we
- * do for i386's _syscall6, so we can compile successfully on gcc 2.95
- */
- LSS_INLINE int LSS_NAME(clone)(int (*fn)(void *), void *child_stack,
- int flags, void *arg, int *parent_tidptr,
- void *newtls, int *child_tidptr) {
- long __ret, __err;
- {
- register int (*__fn)(void *) __asm__ ("r8") = fn;
- register void *__cstack __asm__ ("r4") = child_stack;
- register int __flags __asm__ ("r3") = flags;
- register void * __arg __asm__ ("r9") = arg;
- register int * __ptidptr __asm__ ("r5") = parent_tidptr;
- register void * __newtls __asm__ ("r6") = newtls;
- register int * __ctidptr __asm__ ("r7") = child_tidptr;
- __asm__ __volatile__(
- /* check for fn == NULL
- * and child_stack == NULL
- */
- "cmpwi cr0, %6, 0\n\t"
- "cmpwi cr1, %7, 0\n\t"
- "cror cr0*4+eq, cr1*4+eq, cr0*4+eq\n\t"
- "beq- cr0, 1f\n\t"
-
- /* set up stack frame for child */
- "clrrwi %7, %7, 4\n\t"
- "li 0, 0\n\t"
- "stwu 0, -16(%7)\n\t"
-
- /* fn, arg, child_stack are saved across the syscall: r28-30 */
- "mr 28, %6\n\t"
- "mr 29, %7\n\t"
- "mr 27, %9\n\t"
-
- /* syscall */
- "li 0, %4\n\t"
- /* flags already in r3
- * child_stack already in r4
- * ptidptr already in r5
- * newtls already in r6
- * ctidptr already in r7
- */
- "sc\n\t"
-
- /* Test if syscall was successful */
- "cmpwi cr1, 3, 0\n\t"
- "crandc cr1*4+eq, cr1*4+eq, cr0*4+so\n\t"
- "bne- cr1, 1f\n\t"
-
- /* Do the function call */
- "mtctr 28\n\t"
- "mr 3, 27\n\t"
- "bctrl\n\t"
-
- /* Call _exit(r3) */
- "li 0, %5\n\t"
- "sc\n\t"
-
- /* Return to parent */
- "1:\n"
- "mfcr %1\n\t"
- "mr %0, 3\n\t"
- : "=r" (__ret), "=r" (__err)
- : "0" (-1), "1" (EINVAL),
- "i" (__NR_clone), "i" (__NR_exit),
- "r" (__fn), "r" (__cstack), "r" (__flags),
- "r" (__arg), "r" (__ptidptr), "r" (__newtls),
- "r" (__ctidptr)
- : "cr0", "cr1", "memory", "ctr",
- "r0", "r29", "r27", "r28");
- }
- LSS_RETURN(int, __ret, __err);
- }
- #endif
- #define __NR__exit __NR_exit
- #define __NR__gettid __NR_gettid
- #define __NR__mremap __NR_mremap
- LSS_INLINE _syscall1(int, brk, void *, e)
- LSS_INLINE _syscall1(int, chdir, const char *,p)
- LSS_INLINE _syscall1(int, close, int, f)
- LSS_INLINE _syscall2(int, clock_getres, int, c,
- struct kernel_timespec*, t)
- LSS_INLINE _syscall2(int, clock_gettime, int, c,
- struct kernel_timespec*, t)
- LSS_INLINE _syscall1(int, dup, int, f)
- LSS_INLINE _syscall2(int, dup2, int, s,
- int, d)
- LSS_INLINE _syscall3(int, execve, const char*, f,
- const char*const*,a,const char*const*, e)
- LSS_INLINE _syscall1(int, _exit, int, e)
- LSS_INLINE _syscall1(int, exit_group, int, e)
- LSS_INLINE _syscall3(int, fcntl, int, f,
- int, c, long, a)
- LSS_INLINE _syscall0(pid_t, fork)
- LSS_INLINE _syscall2(int, fstat, int, f,
- struct kernel_stat*, b)
- LSS_INLINE _syscall2(int, fstatfs, int, f,
- struct kernel_statfs*, b)
- LSS_INLINE _syscall2(int, ftruncate, int, f,
- off_t, l)
- LSS_INLINE _syscall4(int, futex, int*, a,
- int, o, int, v,
- struct kernel_timespec*, t)
- LSS_INLINE _syscall3(int, getdents, int, f,
- struct kernel_dirent*, d, int, c)
- LSS_INLINE _syscall3(int, getdents64, int, f,
- struct kernel_dirent64*, d, int, c)
- LSS_INLINE _syscall0(gid_t, getegid)
- LSS_INLINE _syscall0(uid_t, geteuid)
- LSS_INLINE _syscall0(pid_t, getpgrp)
- LSS_INLINE _syscall0(pid_t, getpid)
- LSS_INLINE _syscall0(pid_t, getppid)
- LSS_INLINE _syscall2(int, getpriority, int, a,
- int, b)
- LSS_INLINE _syscall3(int, getresgid, gid_t *, r,
- gid_t *, e, gid_t *, s)
- LSS_INLINE _syscall3(int, getresuid, uid_t *, r,
- uid_t *, e, uid_t *, s)
- LSS_INLINE _syscall2(int, getrlimit, int, r,
- struct kernel_rlimit*, l)
- LSS_INLINE _syscall1(pid_t, getsid, pid_t, p)
- LSS_INLINE _syscall0(pid_t, _gettid)
- LSS_INLINE _syscall2(int, gettimeofday, struct timeval *, v,
- struct timezone *, z)
- LSS_INLINE _syscall5(int, setxattr, const char *,p,
- const char *, n, const void *,v,
- size_t, s, int, f)
- LSS_INLINE _syscall5(int, lsetxattr, const char *,p,
- const char *, n, const void *,v,
- size_t, s, int, f)
- LSS_INLINE _syscall4(ssize_t, getxattr, const char *,p,
- const char *, n, void *, v, size_t, s)
- LSS_INLINE _syscall4(ssize_t, lgetxattr, const char *,p,
- const char *, n, void *, v, size_t, s)
- LSS_INLINE _syscall3(ssize_t, listxattr, const char *,p,
- char *, l, size_t, s)
- LSS_INLINE _syscall3(ssize_t, llistxattr, const char *,p,
- char *, l, size_t, s)
- LSS_INLINE _syscall3(int, ioctl, int, d,
- int, r, void *, a)
- LSS_INLINE _syscall2(int, ioprio_get, int, which,
- int, who)
- LSS_INLINE _syscall3(int, ioprio_set, int, which,
- int, who, int, ioprio)
- LSS_INLINE _syscall2(int, kill, pid_t, p,
- int, s)
- LSS_INLINE _syscall3(off_t, lseek, int, f,
- off_t, o, int, w)
- LSS_INLINE _syscall2(int, munmap, void*, s,
- size_t, l)
- LSS_INLINE _syscall6(long, move_pages, pid_t, p,
- unsigned long, n, void **,g, int *, d,
- int *, s, int, f)
- LSS_INLINE _syscall3(int, mprotect, const void *,a,
- size_t, l, int, p)
- LSS_INLINE _syscall5(void*, _mremap, void*, o,
- size_t, os, size_t, ns,
- unsigned long, f, void *, a)
- LSS_INLINE _syscall3(int, open, const char*, p,
- int, f, int, m)
- LSS_INLINE _syscall3(int, poll, struct kernel_pollfd*, u,
- unsigned int, n, int, t)
- LSS_INLINE _syscall2(int, prctl, int, o,
- long, a)
- LSS_INLINE _syscall4(long, ptrace, int, r,
- pid_t, p, void *, a, void *, d)
- #if defined(__NR_quotactl)
- // Defined on x86_64 / i386 only
- LSS_INLINE _syscall4(int, quotactl, int, cmd, const char *, special,
- int, id, caddr_t, addr)
- #endif
- LSS_INLINE _syscall3(ssize_t, read, int, f,
- void *, b, size_t, c)
- LSS_INLINE _syscall3(int, readlink, const char*, p,
- char*, b, size_t, s)
- LSS_INLINE _syscall4(int, rt_sigaction, int, s,
- const struct kernel_sigaction*, a,
- struct kernel_sigaction*, o, size_t, c)
- LSS_INLINE _syscall2(int, rt_sigpending, struct kernel_sigset_t *, s,
- size_t, c)
- LSS_INLINE _syscall4(int, rt_sigprocmask, int, h,
- const struct kernel_sigset_t*, s,
- struct kernel_sigset_t*, o, size_t, c);
- LSS_INLINE _syscall1(int, rt_sigreturn, unsigned long, u);
- LSS_INLINE _syscall2(int, rt_sigsuspend,
- const struct kernel_sigset_t*, s, size_t, c);
- LSS_INLINE _syscall3(int, sched_getaffinity,pid_t, p,
- unsigned int, l, unsigned long *, m)
- LSS_INLINE _syscall3(int, sched_setaffinity,pid_t, p,
- unsigned int, l, unsigned long *, m)
- LSS_INLINE _syscall0(int, sched_yield)
- LSS_INLINE _syscall1(long, set_tid_address, int *, t)
- LSS_INLINE _syscall1(int, setfsgid, gid_t, g)
- LSS_INLINE _syscall1(int, setfsuid, uid_t, u)
- LSS_INLINE _syscall1(int, setuid, uid_t, u)
- LSS_INLINE _syscall1(int, setgid, gid_t, g)
- LSS_INLINE _syscall2(int, setpgid, pid_t, p,
- pid_t, g)
- LSS_INLINE _syscall3(int, setpriority, int, a,
- int, b, int, p)
- LSS_INLINE _syscall3(int, setresgid, gid_t, r,
- gid_t, e, gid_t, s)
- LSS_INLINE _syscall3(int, setresuid, uid_t, r,
- uid_t, e, uid_t, s)
- LSS_INLINE _syscall2(int, setrlimit, int, r,
- const struct kernel_rlimit*, l)
- LSS_INLINE _syscall0(pid_t, setsid)
- LSS_INLINE _syscall2(int, sigaltstack, const stack_t*, s,
- const stack_t*, o)
- #if defined(__NR_sigreturn)
- LSS_INLINE _syscall1(int, sigreturn, unsigned long, u);
- #endif
- LSS_INLINE _syscall2(int, stat, const char*, f,
- struct kernel_stat*, b)
- LSS_INLINE _syscall2(int, statfs, const char*, f,
- struct kernel_statfs*, b)
- LSS_INLINE _syscall3(int, tgkill, pid_t, p,
- pid_t, t, int, s)
- LSS_INLINE _syscall2(int, tkill, pid_t, p,
- int, s)
- LSS_INLINE _syscall3(ssize_t, write, int, f,
- const void *, b, size_t, c)
- LSS_INLINE _syscall3(ssize_t, writev, int, f,
- const struct kernel_iovec*, v, size_t, c)
- LSS_INLINE _syscall1(int, unlink, const char*, f)
- #if defined(__NR_getcpu)
- LSS_INLINE _syscall3(long, getcpu, unsigned *, cpu,
- unsigned *, node, void *, unused);
- #endif
- #if defined(__x86_64__) || \
- (defined(__mips__) && _MIPS_SIM != _MIPS_SIM_ABI32)
- LSS_INLINE _syscall3(int, recvmsg, int, s,
- struct kernel_msghdr*, m, int, f)
- LSS_INLINE _syscall3(int, sendmsg, int, s,
- const struct kernel_msghdr*, m, int, f)
- LSS_INLINE _syscall6(int, sendto, int, s,
- const void*, m, size_t, l,
- int, f,
- const struct kernel_sockaddr*, a, int, t)
- LSS_INLINE _syscall2(int, shutdown, int, s,
- int, h)
- LSS_INLINE _syscall3(int, socket, int, d,
- int, t, int, p)
- LSS_INLINE _syscall4(int, socketpair, int, d,
- int, t, int, p, int*, s)
- #endif
- #if defined(__x86_64__)
- LSS_INLINE _syscall4(int, fallocate, int, fd, int, mode,
- loff_t, offset, loff_t, len)
-
- LSS_INLINE int LSS_NAME(getresgid32)(gid_t *rgid,
- gid_t *egid,
- gid_t *sgid) {
- return LSS_NAME(getresgid)(rgid, egid, sgid);
- }
-
- LSS_INLINE int LSS_NAME(getresuid32)(uid_t *ruid,
- uid_t *euid,
- uid_t *suid) {
- return LSS_NAME(getresuid)(ruid, euid, suid);
- }
-
- LSS_INLINE _syscall6(void*, mmap, void*, s,
- size_t, l, int, p,
- int, f, int, d,
- __off64_t, o)
-
- LSS_INLINE _syscall4(int, newfstatat, int, d,
- const char *, p,
- struct kernel_stat*, b, int, f)
-
- LSS_INLINE int LSS_NAME(setfsgid32)(gid_t gid) {
- return LSS_NAME(setfsgid)(gid);
- }
-
- LSS_INLINE int LSS_NAME(setfsuid32)(uid_t uid) {
- return LSS_NAME(setfsuid)(uid);
- }
-
- LSS_INLINE int LSS_NAME(setresgid32)(gid_t rgid, gid_t egid, gid_t sgid) {
- return LSS_NAME(setresgid)(rgid, egid, sgid);
- }
-
- LSS_INLINE int LSS_NAME(setresuid32)(uid_t ruid, uid_t euid, uid_t suid) {
- return LSS_NAME(setresuid)(ruid, euid, suid);
- }
-
- LSS_INLINE int LSS_NAME(sigaction)(int signum,
- const struct kernel_sigaction *act,
- struct kernel_sigaction *oldact) {
- /* On x86_64, the kernel requires us to always set our own
- * SA_RESTORER in order to be able to return from a signal handler.
- * This function must have a "magic" signature that the "gdb"
- * (and maybe the kernel?) can recognize.
- */
- if (act != NULL && !(act->sa_flags & SA_RESTORER)) {
- struct kernel_sigaction a = *act;
- a.sa_flags |= SA_RESTORER;
- a.sa_restorer = LSS_NAME(restore_rt)();
- return LSS_NAME(rt_sigaction)(signum, &a, oldact,
- (KERNEL_NSIG+7)/8);
- } else {
- return LSS_NAME(rt_sigaction)(signum, act, oldact,
- (KERNEL_NSIG+7)/8);
- }
- }
-
- LSS_INLINE int LSS_NAME(sigpending)(struct kernel_sigset_t *set) {
- return LSS_NAME(rt_sigpending)(set, (KERNEL_NSIG+7)/8);
- }
-
- LSS_INLINE int LSS_NAME(sigprocmask)(int how,
- const struct kernel_sigset_t *set,
- struct kernel_sigset_t *oldset) {
- return LSS_NAME(rt_sigprocmask)(how, set, oldset, (KERNEL_NSIG+7)/8);
- }
-
- LSS_INLINE int LSS_NAME(sigsuspend)(const struct kernel_sigset_t *set) {
- return LSS_NAME(rt_sigsuspend)(set, (KERNEL_NSIG+7)/8);
- }
- #endif
- #if defined(__x86_64__) || defined(__ARM_ARCH_3__) || \
- (defined(__mips__) && _MIPS_SIM != _MIPS_SIM_ABI32)
- LSS_INLINE _syscall4(pid_t, wait4, pid_t, p,
- int*, s, int, o,
- struct kernel_rusage*, r)
-
- LSS_INLINE pid_t LSS_NAME(waitpid)(pid_t pid, int *status, int options){
- return LSS_NAME(wait4)(pid, status, options, 0);
- }
- #endif
- #if defined(__i386__) || defined(__x86_64__)
- LSS_INLINE _syscall4(int, openat, int, d, const char *, p, int, f, int, m)
- LSS_INLINE _syscall3(int, unlinkat, int, d, const char *, p, int, f)
- #endif
- #if defined(__i386__) || defined(__ARM_ARCH_3__)
- #define __NR__getresgid32 __NR_getresgid32
- #define __NR__getresuid32 __NR_getresuid32
- #define __NR__setfsgid32 __NR_setfsgid32
- #define __NR__setfsuid32 __NR_setfsuid32
- #define __NR__setresgid32 __NR_setresgid32
- #define __NR__setresuid32 __NR_setresuid32
- LSS_INLINE _syscall2(int, ugetrlimit, int, r,
- struct kernel_rlimit*, l)
- LSS_INLINE _syscall3(int, _getresgid32, gid_t *, r,
- gid_t *, e, gid_t *, s)
- LSS_INLINE _syscall3(int, _getresuid32, uid_t *, r,
- uid_t *, e, uid_t *, s)
- LSS_INLINE _syscall1(int, _setfsgid32, gid_t, f)
- LSS_INLINE _syscall1(int, _setfsuid32, uid_t, f)
- LSS_INLINE _syscall3(int, _setresgid32, gid_t, r,
- gid_t, e, gid_t, s)
- LSS_INLINE _syscall3(int, _setresuid32, uid_t, r,
- uid_t, e, uid_t, s)
-
- LSS_INLINE int LSS_NAME(getresgid32)(gid_t *rgid,
- gid_t *egid,
- gid_t *sgid) {
- int rc;
- if ((rc = LSS_NAME(_getresgid32)(rgid, egid, sgid)) < 0 &&
- LSS_ERRNO == ENOSYS) {
- if ((rgid == NULL) || (egid == NULL) || (sgid == NULL)) {
- return EFAULT;
- }
- // Clear the high bits first, since getresgid only sets 16 bits
- *rgid = *egid = *sgid = 0;
- rc = LSS_NAME(getresgid)(rgid, egid, sgid);
- }
- return rc;
- }
-
- LSS_INLINE int LSS_NAME(getresuid32)(uid_t *ruid,
- uid_t *euid,
- uid_t *suid) {
- int rc;
- if ((rc = LSS_NAME(_getresuid32)(ruid, euid, suid)) < 0 &&
- LSS_ERRNO == ENOSYS) {
- if ((ruid == NULL) || (euid == NULL) || (suid == NULL)) {
- return EFAULT;
- }
- // Clear the high bits first, since getresuid only sets 16 bits
- *ruid = *euid = *suid = 0;
- rc = LSS_NAME(getresuid)(ruid, euid, suid);
- }
- return rc;
- }
-
- LSS_INLINE int LSS_NAME(setfsgid32)(gid_t gid) {
- int rc;
- if ((rc = LSS_NAME(_setfsgid32)(gid)) < 0 &&
- LSS_ERRNO == ENOSYS) {
- if ((unsigned int)gid & ~0xFFFFu) {
- rc = EINVAL;
- } else {
- rc = LSS_NAME(setfsgid)(gid);
- }
- }
- return rc;
- }
-
- LSS_INLINE int LSS_NAME(setfsuid32)(uid_t uid) {
- int rc;
- if ((rc = LSS_NAME(_setfsuid32)(uid)) < 0 &&
- LSS_ERRNO == ENOSYS) {
- if ((unsigned int)uid & ~0xFFFFu) {
- rc = EINVAL;
- } else {
- rc = LSS_NAME(setfsuid)(uid);
- }
- }
- return rc;
- }
-
- LSS_INLINE int LSS_NAME(setresgid32)(gid_t rgid, gid_t egid, gid_t sgid) {
- int rc;
- if ((rc = LSS_NAME(_setresgid32)(rgid, egid, sgid)) < 0 &&
- LSS_ERRNO == ENOSYS) {
- if ((unsigned int)rgid & ~0xFFFFu ||
- (unsigned int)egid & ~0xFFFFu ||
- (unsigned int)sgid & ~0xFFFFu) {
- rc = EINVAL;
- } else {
- rc = LSS_NAME(setresgid)(rgid, egid, sgid);
- }
- }
- return rc;
- }
-
- LSS_INLINE int LSS_NAME(setresuid32)(uid_t ruid, uid_t euid, uid_t suid) {
- int rc;
- if ((rc = LSS_NAME(_setresuid32)(ruid, euid, suid)) < 0 &&
- LSS_ERRNO == ENOSYS) {
- if ((unsigned int)ruid & ~0xFFFFu ||
- (unsigned int)euid & ~0xFFFFu ||
- (unsigned int)suid & ~0xFFFFu) {
- rc = EINVAL;
- } else {
- rc = LSS_NAME(setresuid)(ruid, euid, suid);
- }
- }
- return rc;
- }
- #endif
- LSS_INLINE int LSS_NAME(sigemptyset)(struct kernel_sigset_t *set) {
- memset(&set->sig, 0, sizeof(set->sig));
- return 0;
- }
-
- LSS_INLINE int LSS_NAME(sigfillset)(struct kernel_sigset_t *set) {
- memset(&set->sig, -1, sizeof(set->sig));
- return 0;
- }
-
- LSS_INLINE int LSS_NAME(sigaddset)(struct kernel_sigset_t *set,
- int signum) {
- if (signum < 1 || signum > (int)(8*sizeof(set->sig))) {
- LSS_ERRNO = EINVAL;
- return -1;
- } else {
- set->sig[(signum - 1)/(8*sizeof(set->sig[0]))]
- |= 1UL << ((signum - 1) % (8*sizeof(set->sig[0])));
- return 0;
- }
- }
-
- LSS_INLINE int LSS_NAME(sigdelset)(struct kernel_sigset_t *set,
- int signum) {
- if (signum < 1 || signum > (int)(8*sizeof(set->sig))) {
- LSS_ERRNO = EINVAL;
- return -1;
- } else {
- set->sig[(signum - 1)/(8*sizeof(set->sig[0]))]
- &= ~(1UL << ((signum - 1) % (8*sizeof(set->sig[0]))));
- return 0;
- }
- }
-
- LSS_INLINE int LSS_NAME(sigismember)(struct kernel_sigset_t *set,
- int signum) {
- if (signum < 1 || signum > (int)(8*sizeof(set->sig))) {
- LSS_ERRNO = EINVAL;
- return -1;
- } else {
- return !!(set->sig[(signum - 1)/(8*sizeof(set->sig[0]))] &
- (1UL << ((signum - 1) % (8*sizeof(set->sig[0])))));
- }
- }
- #if defined(__i386__) || defined(__ARM_ARCH_3__) || \
- (defined(__mips__) && _MIPS_SIM == _MIPS_SIM_ABI32) || defined(__PPC__)
- #define __NR__sigaction __NR_sigaction
- #define __NR__sigpending __NR_sigpending
- #define __NR__sigprocmask __NR_sigprocmask
- #define __NR__sigsuspend __NR_sigsuspend
- #define __NR__socketcall __NR_socketcall
- LSS_INLINE _syscall2(int, fstat64, int, f,
- struct kernel_stat64 *, b)
- LSS_INLINE _syscall5(int, _llseek, uint, fd, ulong, hi, ulong, lo,
- loff_t *, res, uint, wh)
- LSS_INLINE _syscall1(void*, mmap, void*, a)
- LSS_INLINE _syscall6(void*, mmap2, void*, s,
- size_t, l, int, p,
- int, f, int, d,
- __off64_t, o)
- LSS_INLINE _syscall3(int, _sigaction, int, s,
- const struct kernel_old_sigaction*, a,
- struct kernel_old_sigaction*, o)
- LSS_INLINE _syscall1(int, _sigpending, unsigned long*, s)
- LSS_INLINE _syscall3(int, _sigprocmask, int, h,
- const unsigned long*, s,
- unsigned long*, o)
- #ifdef __PPC__
- LSS_INLINE _syscall1(int, _sigsuspend, unsigned long, s)
- #else
- LSS_INLINE _syscall3(int, _sigsuspend, const void*, a,
- int, b,
- unsigned long, s)
- #endif
- LSS_INLINE _syscall2(int, stat64, const char *, p,
- struct kernel_stat64 *, b)
-
- LSS_INLINE int LSS_NAME(sigaction)(int signum,
- const struct kernel_sigaction *act,
- struct kernel_sigaction *oldact) {
- int old_errno = LSS_ERRNO;
- int rc;
- struct kernel_sigaction a;
- if (act != NULL) {
- a = *act;
- #ifdef __i386__
- /* On i386, the kernel requires us to always set our own
- * SA_RESTORER when using realtime signals. Otherwise, it does not
- * know how to return from a signal handler. This function must have
- * a "magic" signature that the "gdb" (and maybe the kernel?) can
- * recognize.
- * Apparently, a SA_RESTORER is implicitly set by the kernel, when
- * using non-realtime signals.
- *
- * TODO: Test whether ARM needs a restorer
- */
- if (!(a.sa_flags & SA_RESTORER)) {
- a.sa_flags |= SA_RESTORER;
- a.sa_restorer = (a.sa_flags & SA_SIGINFO)
- ? LSS_NAME(restore_rt)() : LSS_NAME(restore)();
- }
- #endif
- }
- rc = LSS_NAME(rt_sigaction)(signum, act ? &a : act, oldact,
- (KERNEL_NSIG+7)/8);
- if (rc < 0 && LSS_ERRNO == ENOSYS) {
- struct kernel_old_sigaction oa, ooa, *ptr_a = &oa, *ptr_oa = &ooa;
- if (!act) {
- ptr_a = NULL;
- } else {
- oa.sa_handler_ = act->sa_handler_;
- memcpy(&oa.sa_mask, &act->sa_mask, sizeof(oa.sa_mask));
- #ifndef __mips__
- oa.sa_restorer = act->sa_restorer;
- #endif
- oa.sa_flags = act->sa_flags;
- }
- if (!oldact) {
- ptr_oa = NULL;
- }
- LSS_ERRNO = old_errno;
- rc = LSS_NAME(_sigaction)(signum, ptr_a, ptr_oa);
- if (rc == 0 && oldact) {
- if (act) {
- memcpy(oldact, act, sizeof(*act));
- } else {
- memset(oldact, 0, sizeof(*oldact));
- }
- oldact->sa_handler_ = ptr_oa->sa_handler_;
- oldact->sa_flags = ptr_oa->sa_flags;
- memcpy(&oldact->sa_mask, &ptr_oa->sa_mask, sizeof(ptr_oa->sa_mask));
- #ifndef __mips__
- oldact->sa_restorer = ptr_oa->sa_restorer;
- #endif
- }
- }
- return rc;
- }
-
- LSS_INLINE int LSS_NAME(sigpending)(struct kernel_sigset_t *set) {
- int old_errno = LSS_ERRNO;
- int rc = LSS_NAME(rt_sigpending)(set, (KERNEL_NSIG+7)/8);
- if (rc < 0 && LSS_ERRNO == ENOSYS) {
- LSS_ERRNO = old_errno;
- LSS_NAME(sigemptyset)(set);
- rc = LSS_NAME(_sigpending)(&set->sig[0]);
- }
- return rc;
- }
-
- LSS_INLINE int LSS_NAME(sigprocmask)(int how,
- const struct kernel_sigset_t *set,
- struct kernel_sigset_t *oldset) {
- int olderrno = LSS_ERRNO;
- int rc = LSS_NAME(rt_sigprocmask)(how, set, oldset, (KERNEL_NSIG+7)/8);
- if (rc < 0 && LSS_ERRNO == ENOSYS) {
- LSS_ERRNO = olderrno;
- if (oldset) {
- LSS_NAME(sigemptyset)(oldset);
- }
- rc = LSS_NAME(_sigprocmask)(how,
- set ? &set->sig[0] : NULL,
- oldset ? &oldset->sig[0] : NULL);
- }
- return rc;
- }
-
- LSS_INLINE int LSS_NAME(sigsuspend)(const struct kernel_sigset_t *set) {
- int olderrno = LSS_ERRNO;
- int rc = LSS_NAME(rt_sigsuspend)(set, (KERNEL_NSIG+7)/8);
- if (rc < 0 && LSS_ERRNO == ENOSYS) {
- LSS_ERRNO = olderrno;
- rc = LSS_NAME(_sigsuspend)(
- #ifndef __PPC__
- set, 0,
- #endif
- set->sig[0]);
- }
- return rc;
- }
- #endif
- #if defined(__PPC__)
- #undef LSS_SC_LOADARGS_0
- #define LSS_SC_LOADARGS_0(dummy...)
- #undef LSS_SC_LOADARGS_1
- #define LSS_SC_LOADARGS_1(arg1) \
- __sc_4 = (unsigned long) (arg1)
- #undef LSS_SC_LOADARGS_2
- #define LSS_SC_LOADARGS_2(arg1, arg2) \
- LSS_SC_LOADARGS_1(arg1); \
- __sc_5 = (unsigned long) (arg2)
- #undef LSS_SC_LOADARGS_3
- #define LSS_SC_LOADARGS_3(arg1, arg2, arg3) \
- LSS_SC_LOADARGS_2(arg1, arg2); \
- __sc_6 = (unsigned long) (arg3)
- #undef LSS_SC_LOADARGS_4
- #define LSS_SC_LOADARGS_4(arg1, arg2, arg3, arg4) \
- LSS_SC_LOADARGS_3(arg1, arg2, arg3); \
- __sc_7 = (unsigned long) (arg4)
- #undef LSS_SC_LOADARGS_5
- #define LSS_SC_LOADARGS_5(arg1, arg2, arg3, arg4, arg5) \
- LSS_SC_LOADARGS_4(arg1, arg2, arg3, arg4); \
- __sc_8 = (unsigned long) (arg5)
- #undef LSS_SC_BODY
- #define LSS_SC_BODY(nr, type, opt, args...) \
- long __sc_ret, __sc_err; \
- { \
- register unsigned long __sc_0 __asm__ ("r0") = __NR_socketcall; \
- register unsigned long __sc_3 __asm__ ("r3") = opt; \
- register unsigned long __sc_4 __asm__ ("r4"); \
- register unsigned long __sc_5 __asm__ ("r5"); \
- register unsigned long __sc_6 __asm__ ("r6"); \
- register unsigned long __sc_7 __asm__ ("r7"); \
- register unsigned long __sc_8 __asm__ ("r8"); \
- LSS_SC_LOADARGS_##nr(args); \
- __asm__ __volatile__ \
- ("stwu 1, -48(1)\n\t" \
- "stw 4, 20(1)\n\t" \
- "stw 5, 24(1)\n\t" \
- "stw 6, 28(1)\n\t" \
- "stw 7, 32(1)\n\t" \
- "stw 8, 36(1)\n\t" \
- "addi 4, 1, 20\n\t" \
- "sc\n\t" \
- "mfcr %0" \
- : "=&r" (__sc_0), \
- "=&r" (__sc_3), "=&r" (__sc_4), \
- "=&r" (__sc_5), "=&r" (__sc_6), \
- "=&r" (__sc_7), "=&r" (__sc_8) \
- : LSS_ASMINPUT_##nr \
- : "cr0", "ctr", "memory"); \
- __sc_ret = __sc_3; \
- __sc_err = __sc_0; \
- } \
- LSS_RETURN(type, __sc_ret, __sc_err)
-
- LSS_INLINE ssize_t LSS_NAME(recvmsg)(int s,struct kernel_msghdr *msg,
- int flags){
- LSS_SC_BODY(3, ssize_t, 17, s, msg, flags);
- }
-
- LSS_INLINE ssize_t LSS_NAME(sendmsg)(int s,
- const struct kernel_msghdr *msg,
- int flags) {
- LSS_SC_BODY(3, ssize_t, 16, s, msg, flags);
- }
-
- // TODO(csilvers): why is this ifdef'ed out?
-#if 0
- LSS_INLINE ssize_t LSS_NAME(sendto)(int s, const void *buf, size_t len,
- int flags,
- const struct kernel_sockaddr *to,
- unsigned int tolen) {
- LSS_BODY(6, ssize_t, 11, s, buf, len, flags, to, tolen);
- }
-#endif
-
- LSS_INLINE int LSS_NAME(shutdown)(int s, int how) {
- LSS_SC_BODY(2, int, 13, s, how);
- }
-
- LSS_INLINE int LSS_NAME(socket)(int domain, int type, int protocol) {
- LSS_SC_BODY(3, int, 1, domain, type, protocol);
- }
-
- LSS_INLINE int LSS_NAME(socketpair)(int d, int type, int protocol,
- int sv[2]) {
- LSS_SC_BODY(4, int, 8, d, type, protocol, sv);
- }
- #endif
- #if defined(__i386__) || defined(__ARM_ARCH_3__) || \
- (defined(__mips__) && _MIPS_SIM == _MIPS_SIM_ABI32)
- #define __NR__socketcall __NR_socketcall
- LSS_INLINE _syscall2(int, _socketcall, int, c,
- va_list, a)
-
- LSS_INLINE int LSS_NAME(socketcall)(int op, ...) {
- int rc;
- va_list ap;
- va_start(ap, op);
- rc = LSS_NAME(_socketcall)(op, ap);
- va_end(ap);
- return rc;
- }
-
- LSS_INLINE ssize_t LSS_NAME(recvmsg)(int s,struct kernel_msghdr *msg,
- int flags){
- return (ssize_t)LSS_NAME(socketcall)(17, s, msg, flags);
- }
-
- LSS_INLINE ssize_t LSS_NAME(sendmsg)(int s,
- const struct kernel_msghdr *msg,
- int flags) {
- return (ssize_t)LSS_NAME(socketcall)(16, s, msg, flags);
- }
-
- LSS_INLINE ssize_t LSS_NAME(sendto)(int s, const void *buf, size_t len,
- int flags,
- const struct kernel_sockaddr *to,
- unsigned int tolen) {
- return (ssize_t)LSS_NAME(socketcall)(11, s, buf, len, flags, to, tolen);
- }
-
- LSS_INLINE int LSS_NAME(shutdown)(int s, int how) {
- return LSS_NAME(socketcall)(13, s, how);
- }
-
- LSS_INLINE int LSS_NAME(socket)(int domain, int type, int protocol) {
- return LSS_NAME(socketcall)(1, domain, type, protocol);
- }
-
- LSS_INLINE int LSS_NAME(socketpair)(int d, int type, int protocol,
- int sv[2]) {
- return LSS_NAME(socketcall)(8, d, type, protocol, sv);
- }
- #endif
- #if defined(__i386__) || defined(__PPC__)
- LSS_INLINE _syscall4(int, fstatat64, int, d,
- const char *, p,
- struct kernel_stat64 *, b, int, f)
- #endif
- #if defined(__i386__) || defined(__PPC__) || \
- (defined(__mips__) && _MIPS_SIM == _MIPS_SIM_ABI32)
- LSS_INLINE _syscall3(pid_t, waitpid, pid_t, p,
- int*, s, int, o)
- #endif
- #if defined(__mips__)
- /* sys_pipe() on MIPS has non-standard calling conventions, as it returns
- * both file handles through CPU registers.
- */
- LSS_INLINE int LSS_NAME(pipe)(int *p) {
- register unsigned long __v0 __asm__("$2") = __NR_pipe;
- register unsigned long __v1 __asm__("$3");
- register unsigned long __r7 __asm__("$7");
- __asm__ __volatile__ ("syscall\n"
- : "=&r"(__v0), "=&r"(__v1), "+r" (__r7)
- : "0"(__v0)
- : "$8", "$9", "$10", "$11", "$12",
- "$13", "$14", "$15", "$24", "memory");
- if (__r7) {
- LSS_ERRNO = __v0;
- return -1;
- } else {
- p[0] = __v0;
- p[1] = __v1;
- return 0;
- }
- }
- #else
- LSS_INLINE _syscall1(int, pipe, int *, p)
- #endif
- /* TODO(csilvers): see if ppc can/should support this as well */
- #if defined(__i386__) || defined(__ARM_ARCH_3__) || \
- (defined(__mips__) && _MIPS_SIM != _MIPS_SIM_ABI64)
- #define __NR__statfs64 __NR_statfs64
- #define __NR__fstatfs64 __NR_fstatfs64
- LSS_INLINE _syscall3(int, _statfs64, const char*, p,
- size_t, s,struct kernel_statfs64*, b)
- LSS_INLINE _syscall3(int, _fstatfs64, int, f,
- size_t, s,struct kernel_statfs64*, b)
- LSS_INLINE int LSS_NAME(statfs64)(const char *p,
- struct kernel_statfs64 *b) {
- return LSS_NAME(_statfs64)(p, sizeof(*b), b);
- }
- LSS_INLINE int LSS_NAME(fstatfs64)(int f,struct kernel_statfs64 *b) {
- return LSS_NAME(_fstatfs64)(f, sizeof(*b), b);
- }
- #endif
-
- LSS_INLINE int LSS_NAME(execv)(const char *path, const char *const argv[]) {
- extern char **environ;
- return LSS_NAME(execve)(path, argv, (const char *const *)environ);
- }
-
- LSS_INLINE pid_t LSS_NAME(gettid)() {
- pid_t tid = LSS_NAME(_gettid)();
- if (tid != -1) {
- return tid;
- }
- return LSS_NAME(getpid)();
- }
-
- LSS_INLINE void *LSS_NAME(mremap)(void *old_address, size_t old_size,
- size_t new_size, int flags, ...) {
- va_list ap;
- void *new_address, *rc;
- va_start(ap, flags);
- new_address = va_arg(ap, void *);
- rc = LSS_NAME(_mremap)(old_address, old_size, new_size,
- flags, new_address);
- va_end(ap);
- return rc;
- }
-
- LSS_INLINE int LSS_NAME(ptrace_detach)(pid_t pid) {
- /* PTRACE_DETACH can sometimes forget to wake up the tracee and it
- * then sends job control signals to the real parent, rather than to
- * the tracer. We reduce the risk of this happening by starting a
- * whole new time slice, and then quickly sending a SIGCONT signal
- * right after detaching from the tracee.
- *
- * We use tkill to ensure that we only issue a wakeup for the thread being
- * detached. Large multi threaded apps can take a long time in the kernel
- * processing SIGCONT.
- */
- int rc, err;
- LSS_NAME(sched_yield)();
- rc = LSS_NAME(ptrace)(PTRACE_DETACH, pid, (void *)0, (void *)0);
- err = LSS_ERRNO;
- LSS_NAME(tkill)(pid, SIGCONT);
- /* Old systems don't have tkill */
- if (LSS_ERRNO == ENOSYS)
- LSS_NAME(kill)(pid, SIGCONT);
- LSS_ERRNO = err;
- return rc;
- }
-
- LSS_INLINE int LSS_NAME(raise)(int sig) {
- return LSS_NAME(kill)(LSS_NAME(getpid)(), sig);
- }
-
- LSS_INLINE int LSS_NAME(setpgrp)() {
- return LSS_NAME(setpgid)(0, 0);
- }
-
- LSS_INLINE int LSS_NAME(sysconf)(int name) {
- extern int __getpagesize(void);
- switch (name) {
- case _SC_OPEN_MAX: {
- struct kernel_rlimit limit;
- return LSS_NAME(getrlimit)(RLIMIT_NOFILE, &limit) < 0
- ? 8192 : limit.rlim_cur;
- }
- case _SC_PAGESIZE:
- return __getpagesize();
- default:
- LSS_ERRNO = ENOSYS;
- return -1;
- }
- }
- #if defined(__x86_64__) || \
- (defined(__mips__) && _MIPS_SIM == _MIPS_SIM_ABI64)
- LSS_INLINE _syscall4(ssize_t, pread64, int, f,
- void *, b, size_t, c,
- loff_t, o)
- LSS_INLINE _syscall4(ssize_t, pwrite64, int, f,
- const void *, b, size_t, c,
- loff_t, o)
- LSS_INLINE _syscall3(int, readahead, int, f,
- loff_t, o, unsigned, c)
- #else
- #define __NR__pread64 __NR_pread64
- #define __NR__pwrite64 __NR_pwrite64
- #define __NR__readahead __NR_readahead
- LSS_INLINE _syscall5(ssize_t, _pread64, int, f,
- void *, b, size_t, c, unsigned, o1,
- unsigned, o2)
- LSS_INLINE _syscall5(ssize_t, _pwrite64, int, f,
- const void *, b, size_t, c, unsigned, o1,
- long, o2)
- LSS_INLINE _syscall4(int, _readahead, int, f,
- unsigned, o1, unsigned, o2, size_t, c);
- /* We force 64bit-wide parameters onto the stack, then access each
- * 32-bit component individually. This guarantees that we build the
- * correct parameters independent of the native byte-order of the
- * underlying architecture.
- */
- LSS_INLINE ssize_t LSS_NAME(pread64)(int fd, void *buf, size_t count,
- loff_t off) {
- union { loff_t off; unsigned arg[2]; } o = { off };
- return LSS_NAME(_pread64)(fd, buf, count, o.arg[0], o.arg[1]);
- }
- LSS_INLINE ssize_t LSS_NAME(pwrite64)(int fd, const void *buf,
- size_t count, loff_t off) {
- union { loff_t off; unsigned arg[2]; } o = { off };
- return LSS_NAME(_pwrite64)(fd, buf, count, o.arg[0], o.arg[1]);
- }
- LSS_INLINE int LSS_NAME(readahead)(int fd, loff_t off, int len) {
- union { loff_t off; unsigned arg[2]; } o = { off };
- return LSS_NAME(_readahead)(fd, o.arg[0], o.arg[1], len);
- }
- #endif
-#endif
-
-#if defined(__cplusplus) && !defined(SYS_CPLUSPLUS)
-}
-#endif
-
-#endif
-#endif
diff --git a/sandbox/linux/seccomp/madvise.cc b/sandbox/linux/seccomp/madvise.cc
deleted file mode 100644
index 70c594f..0000000
--- a/sandbox/linux/seccomp/madvise.cc
+++ /dev/null
@@ -1,81 +0,0 @@
-// Copyright (c) 2010 The Chromium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#include "debug.h"
-#include "sandbox_impl.h"
-
-namespace playground {
-
-long Sandbox::sandbox_madvise(void* start, size_t length, int advice) {
- long long tm;
- Debug::syscall(&tm, __NR_madvise, "Executing handler");
- struct {
- int sysnum;
- long long cookie;
- MAdvise madvise_req;
- } __attribute__((packed)) request;
- request.sysnum = __NR_madvise;
- request.cookie = cookie();
- request.madvise_req.start = start;
- request.madvise_req.len = length;
- request.madvise_req.advice = advice;
-
- long rc;
- SysCalls sys;
- if (write(sys, processFdPub(), &request, sizeof(request)) !=
- sizeof(request) ||
- read(sys, threadFdPub(), &rc, sizeof(rc)) != sizeof(rc)) {
- die("Failed to forward madvise() request [sandbox]");
- }
- Debug::elapsed(tm, __NR_madvise);
- return rc;
-}
-
-bool Sandbox::process_madvise(int parentMapsFd, int sandboxFd, int threadFdPub,
- int threadFd, SecureMem::Args* mem) {
- // Read request
- MAdvise madvise_req;
- SysCalls sys;
- if (read(sys, sandboxFd, &madvise_req, sizeof(madvise_req)) !=
- sizeof(madvise_req)) {
- die("Failed to read parameters for madvise() [process]");
- }
- int rc = -EINVAL;
- switch (madvise_req.advice) {
- case MADV_NORMAL:
- case MADV_RANDOM:
- case MADV_SEQUENTIAL:
- case MADV_WILLNEED:
- ok:
- SecureMem::sendSystemCall(threadFdPub, false, -1, mem, __NR_madvise,
- madvise_req.start, madvise_req.len,
- madvise_req.advice);
- return true;
- default:
- // All other flags to madvise() are potential dangerous (as opposed to
- // merely affecting overall performance). Do not allow them on memory
- // ranges that were part of the original mappings.
- void *stop = reinterpret_cast<void *>(
- (char *)madvise_req.start + madvise_req.len);
- ProtectedMap::const_iterator iter = protectedMap_.lower_bound(
- (void *)madvise_req.start);
- if (iter != protectedMap_.begin()) {
- --iter;
- }
- for (; iter != protectedMap_.end() && iter->first < stop; ++iter) {
- if (madvise_req.start < reinterpret_cast<void *>(
- reinterpret_cast<char *>(iter->first) + iter->second) &&
- stop > iter->first) {
- SecureMem::abandonSystemCall(threadFd, rc);
- return false;
- }
- }
-
- // Changing attributes on memory regions that were newly mapped inside of
- // the sandbox is OK.
- goto ok;
- }
-}
-
-} // namespace
diff --git a/sandbox/linux/seccomp/maps.cc b/sandbox/linux/seccomp/maps.cc
deleted file mode 100644
index 8ae218d..0000000
--- a/sandbox/linux/seccomp/maps.cc
+++ /dev/null
@@ -1,267 +0,0 @@
-// Copyright (c) 2010 The Chromium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#include <errno.h>
-#include <fcntl.h>
-#include <linux/unistd.h>
-#include <signal.h>
-#include <stdarg.h>
-#include <stdlib.h>
-#include <sys/ptrace.h>
-#include <sys/types.h>
-#include <sys/wait.h>
-
-#include "library.h"
-#include "maps.h"
-#include "sandbox_impl.h"
-
-namespace playground {
-
-Maps::Maps(int proc_self_maps) :
- proc_self_maps_(proc_self_maps),
- begin_iter_(this, true, false),
- end_iter_(this, false, true),
- vsyscall_(0) {
- Sandbox::SysCalls sys;
- if (proc_self_maps_ >= 0 &&
- !sys.lseek(proc_self_maps_, 0, SEEK_SET)) {
- char buf[256] = { 0 };
- int len = 0, rc = 1;
- bool long_line = false;
- do {
- if (rc > 0) {
- rc = Sandbox::read(sys, proc_self_maps_, buf + len,
- sizeof(buf) - len - 1);
- if (rc > 0) {
- len += rc;
- }
- }
- char *ptr = buf;
- if (!long_line) {
- long_line = true;
- unsigned long start = strtoul(ptr, &ptr, 16);
- unsigned long stop = strtoul(ptr + 1, &ptr, 16);
- while (*ptr == ' ' || *ptr == '\t') ++ptr;
- char *perm_ptr = ptr;
- while (*ptr && *ptr != ' ' && *ptr != '\t') ++ptr;
- string perm(perm_ptr, ptr - perm_ptr);
- unsigned long offset = strtoul(ptr, &ptr, 16);
- while (*ptr == ' ' || *ptr == '\t') ++ptr;
- char *id_ptr = ptr;
- while (*ptr && *ptr != ' ' && *ptr != '\t') ++ptr;
- while (*ptr == ' ' || *ptr == '\t') ++ptr;
- while (*ptr && *ptr != ' ' && *ptr != '\t') ++ptr;
- string id(id_ptr, ptr - id_ptr);
- while (*ptr == ' ' || *ptr == '\t') ++ptr;
- char *library_ptr = ptr;
- while (*ptr && *ptr != ' ' && *ptr != '\t' && *ptr != '\n') ++ptr;
- string library(library_ptr, ptr - library_ptr);
- bool isVDSO = false;
- if (library == "[vdso]") {
- // /proc/self/maps has a misleading file offset in the [vdso] entry.
- // Override it with a sane value.
- offset = 0;
- isVDSO = true;
- } else if (library == "[vsyscall]") {
- vsyscall_ = reinterpret_cast<char *>(start);
- } else if (library.empty() || library[0] == '[') {
- goto skip_entry;
- }
- int prot = 0;
- if (perm.find('r') != string::npos) {
- prot |= PROT_READ;
- }
- if (perm.find('w') != string::npos) {
- prot |= PROT_WRITE;
- }
- if (perm.find('x') != string::npos) {
- prot |= PROT_EXEC;
- }
- if ((prot & (PROT_EXEC | PROT_READ)) == 0) {
- goto skip_entry;
- }
- Library* lib = &libs_[id + ' ' + library];
- lib->setLibraryInfo(this);
- lib->addMemoryRange(reinterpret_cast<void *>(start),
- reinterpret_cast<void *>(stop),
- Elf_Addr(offset),
- prot, isVDSO);
- }
- skip_entry:
- for (;;) {
- if (!*ptr || *ptr++ == '\n') {
- long_line = false;
- memmove(buf, ptr, len - (ptr - buf));
- memset(buf + len - (ptr - buf), 0, ptr - buf);
- len -= (ptr - buf);
- break;
- }
- }
- } while (len || long_line);
- }
-}
-
-Maps::Iterator::Iterator(Maps* maps, bool at_beginning, bool at_end)
- : maps_(maps),
- at_beginning_(at_beginning),
- at_end_(at_end) {
-}
-
-Maps::LibraryMap::iterator& Maps::Iterator::getIterator() const {
- if (at_beginning_) {
- iter_ = maps_->libs_.begin();
- } else if (at_end_) {
- iter_ = maps_->libs_.end();
- }
- return iter_;
-}
-
-Maps::Iterator Maps::Iterator::begin() {
- return maps_->begin_iter_;
-}
-
-Maps::Iterator Maps::Iterator::end() {
- return maps_->end_iter_;
-}
-
-Maps::Iterator& Maps::Iterator::operator++() {
- getIterator().operator++();
- at_beginning_ = false;
- return *this;
-}
-
-Maps::Iterator Maps::Iterator::operator++(int i) {
- getIterator().operator++(i);
- at_beginning_ = false;
- return *this;
-}
-
-Library* Maps::Iterator::operator*() const {
- return &getIterator().operator*().second;
-}
-
-bool Maps::Iterator::operator==(const Maps::Iterator& iter) const {
- return getIterator().operator==(iter.getIterator());
-}
-
-bool Maps::Iterator::operator!=(const Maps::Iterator& iter) const {
- return !operator==(iter);
-}
-
-Maps::string Maps::Iterator::name() const {
- return getIterator()->first;
-}
-
-// Test whether a line ends with "[stack]"; used for identifying the
-// stack entry of /proc/self/maps.
-static bool isStackLine(char* buf, char* end) {
- char* ptr = buf;
- for ( ; *ptr != '\n' && ptr < end; ++ptr)
- ;
- if (ptr < end && ptr - 7 > buf) {
- return (memcmp(ptr - 7, "[stack]", 7) == 0);
- }
- return false;
-}
-
-char* Maps::allocNearAddr(char* addr_target, size_t size, int prot) const {
- // We try to allocate memory within 1.5GB of a target address. This means,
- // we will be able to perform relative 32bit jumps from the target address.
- const unsigned long kMaxDistance = 1536 << 20;
- // In most of the code below, we just care about the numeric value of
- // the address.
- const long addr = reinterpret_cast<long>(addr_target);
- size = (size + 4095) & ~4095;
- Sandbox::SysCalls sys;
- if (sys.lseek(proc_self_maps_, 0, SEEK_SET)) {
- return NULL;
- }
-
- // Iterate through lines of /proc/self/maps to consider each mapped
- // region one at a time, looking for a gap between regions to allocate.
- char buf[256] = { 0 };
- int len = 0, rc = 1;
- bool long_line = false;
- unsigned long gap_start = 0x10000;
- void* new_addr;
- do {
- if (rc > 0) {
- do {
- rc = Sandbox::read(sys, proc_self_maps_, buf + len,
- sizeof(buf) - len - 1);
- if (rc > 0) {
- len += rc;
- }
- } while (rc > 0 && len < (int)sizeof(buf) - 1);
- }
- char *ptr = buf;
- if (!long_line) {
- long_line = true;
- // Maps lines have the form "<start address>-<end address> ... <name>".
- unsigned long gap_end = strtoul(ptr, &ptr, 16);
- unsigned long map_end = strtoul(ptr + 1, &ptr, 16);
-
- // gap_start to gap_end now covers the region of empty space before
- // the current line. Now we try to see if there's a place within the
- // gap we can use.
-
- if (gap_end - gap_start >= size) {
- // Is the gap before our target address?
- if (addr - static_cast<long>(gap_end) >= 0) {
- if (addr - (gap_end - size) < kMaxDistance) {
- unsigned long position;
- if (isStackLine(ptr, buf + len)) {
- // If we're adjacent to the stack, try to stay away from
- // the GROWS_DOWN region. Pick the farthest away region that
- // is still within the gap.
-
- if (static_cast<unsigned long>(addr) < kMaxDistance || // Underflow protection.
- static_cast<unsigned long>(addr) - kMaxDistance < gap_start) {
- position = gap_start;
- } else {
- position = (addr - kMaxDistance) & ~4095;
- if (position < gap_start) {
- position = gap_start;
- }
- }
- } else {
- // Otherwise, take the end of the region.
- position = gap_end - size;
- }
- new_addr = reinterpret_cast<char *>(sys.MMAP
- (reinterpret_cast<void *>(position), size, prot,
- MAP_PRIVATE|MAP_ANONYMOUS|MAP_FIXED, -1, 0));
- if (new_addr != MAP_FAILED) {
- goto done;
- }
- }
- } else if (gap_start + size - addr < kMaxDistance) {
- // Gap is after the address. Above checks that we can wrap around
- // through 0 to a space we'd use.
- new_addr = reinterpret_cast<char *>(sys.MMAP
- (reinterpret_cast<void *>(gap_start), size, prot,
- MAP_PRIVATE|MAP_ANONYMOUS|MAP_FIXED, -1 ,0));
- if (new_addr != MAP_FAILED) {
- goto done;
- }
- }
- }
- gap_start = map_end;
- }
- for (;;) {
- if (!*ptr || *ptr++ == '\n') {
- long_line = false;
- memmove(buf, ptr, len - (ptr - buf));
- memset(buf + len - (ptr - buf), 0, ptr - buf);
- len -= (ptr - buf);
- break;
- }
- }
- } while (len || long_line);
- new_addr = NULL;
-done:
- return reinterpret_cast<char*>(new_addr);
-}
-
-} // namespace
diff --git a/sandbox/linux/seccomp/maps.h b/sandbox/linux/seccomp/maps.h
deleted file mode 100644
index fbcc7672..0000000
--- a/sandbox/linux/seccomp/maps.h
+++ /dev/null
@@ -1,94 +0,0 @@
-// Copyright (c) 2010 The Chromium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#ifndef MAPS_H__
-#define MAPS_H__
-
-#include <elf.h>
-#include <functional>
-#include <map>
-#include <string>
-
-#include "allocator.h"
-
-#if defined(__x86_64__)
-typedef Elf64_Addr Elf_Addr;
-#elif defined(__i386__)
-typedef Elf32_Addr Elf_Addr;
-#else
-#error Undefined target platform
-#endif
-
-namespace playground {
-
-class Library;
-class Maps {
- friend class Library;
- public:
- typedef std::basic_string<char, std::char_traits<char>,
- SystemAllocator<char> > string;
-
- Maps(int proc_self_maps);
- ~Maps() { }
-
- protected:
- // A map with all the libraries currently loaded into the application.
- // The key is a unique combination of device number, inode number, and
- // file name. It should be treated as opaque.
- typedef std::map<string, Library, std::less<string>,
- SystemAllocator<std::pair<const string,
- Library> > > LibraryMap;
- friend class Iterator;
- class Iterator {
- friend class Maps;
-
- protected:
- explicit Iterator(Maps* maps);
- Iterator(Maps* maps, bool at_beginning, bool at_end);
- Maps::LibraryMap::iterator& getIterator() const;
-
- public:
- Iterator begin();
- Iterator end();
- Iterator& operator++();
- Iterator operator++(int i);
- Library* operator*() const;
- bool operator==(const Iterator& iter) const;
- bool operator!=(const Iterator& iter) const;
- string name() const;
-
- protected:
- mutable LibraryMap::iterator iter_;
- Maps *maps_;
- bool at_beginning_;
- bool at_end_;
- };
-
- public:
- typedef class Iterator const_iterator;
-
- const_iterator begin() {
- return begin_iter_;
- }
-
- const_iterator end() {
- return end_iter_;
- }
-
- char* allocNearAddr(char *addr, size_t size, int prot) const;
-
- char* vsyscall() const { return vsyscall_; }
-
- protected:
- const int proc_self_maps_;
- const Iterator begin_iter_;
- const Iterator end_iter_;
-
- LibraryMap libs_;
- char* vsyscall_;
-};
-
-} // namespace
-
-#endif // MAPS_H__
diff --git a/sandbox/linux/seccomp/mmap.cc b/sandbox/linux/seccomp/mmap.cc
deleted file mode 100644
index 700da91..0000000
--- a/sandbox/linux/seccomp/mmap.cc
+++ /dev/null
@@ -1,75 +0,0 @@
-// Copyright (c) 2010 The Chromium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#include "debug.h"
-#include "sandbox_impl.h"
-
-namespace playground {
-
-void* Sandbox::sandbox_mmap(void *start, size_t length, int prot, int flags,
- int fd, off_t offset) {
- long long tm;
- Debug::syscall(&tm, __NR_mmap, "Executing handler");
- struct {
- int sysnum;
- long long cookie;
- MMap mmap_req;
- } __attribute__((packed)) request;
- request.sysnum = __NR_MMAP;
- request.cookie = cookie();
- request.mmap_req.start = start;
- request.mmap_req.length = length;
- request.mmap_req.prot = prot;
- request.mmap_req.flags = flags;
- request.mmap_req.fd = fd;
- request.mmap_req.offset = offset;
-
- void* rc;
- SysCalls sys;
- if (write(sys, processFdPub(), &request, sizeof(request)) !=
- sizeof(request) ||
- read(sys, threadFdPub(), &rc, sizeof(rc)) != sizeof(rc)) {
- die("Failed to forward mmap() request [sandbox]");
- }
- Debug::elapsed(tm, __NR_mmap);
- return rc;
-}
-
-bool Sandbox::process_mmap(int parentMapsFd, int sandboxFd, int threadFdPub,
- int threadFd, SecureMem::Args* mem) {
- // Read request
- SysCalls sys;
- MMap mmap_req;
- if (read(sys, sandboxFd, &mmap_req, sizeof(mmap_req)) != sizeof(mmap_req)) {
- die("Failed to read parameters for mmap() [process]");
- }
-
- if (mmap_req.flags & MAP_FIXED) {
- // Cannot map a memory area that was part of the original memory mappings.
- void *stop = reinterpret_cast<void *>(
- (char *)mmap_req.start + mmap_req.length);
- ProtectedMap::const_iterator iter = protectedMap_.lower_bound(
- (void *)mmap_req.start);
- if (iter != protectedMap_.begin()) {
- --iter;
- }
- for (; iter != protectedMap_.end() && iter->first < stop; ++iter) {
- if (mmap_req.start < reinterpret_cast<void *>(
- reinterpret_cast<char *>(iter->first) + iter->second) &&
- stop > iter->first) {
- int rc = -EINVAL;
- SecureMem::abandonSystemCall(threadFd, rc);
- return false;
- }
- }
- }
-
- // All other mmap() requests are OK
- SecureMem::sendSystemCall(threadFdPub, false, -1, mem, __NR_MMAP,
- mmap_req.start, mmap_req.length, mmap_req.prot,
- mmap_req.flags, mmap_req.fd, mmap_req.offset);
- return true;
-}
-
-} // namespace
diff --git a/sandbox/linux/seccomp/mprotect.cc b/sandbox/linux/seccomp/mprotect.cc
deleted file mode 100644
index 548199d..0000000
--- a/sandbox/linux/seccomp/mprotect.cc
+++ /dev/null
@@ -1,73 +0,0 @@
-// Copyright (c) 2010 The Chromium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#include "debug.h"
-#include "sandbox_impl.h"
-
-namespace playground {
-
-long Sandbox::sandbox_mprotect(const void *addr, size_t len, int prot) {
- long long tm;
- Debug::syscall(&tm, __NR_mprotect, "Executing handler");
- struct {
- int sysnum;
- long long cookie;
- MProtect mprotect_req;
- } __attribute__((packed)) request;
- request.sysnum = __NR_mprotect;
- request.cookie = cookie();
- request.mprotect_req.addr = addr;
- request.mprotect_req.len = len;
- request.mprotect_req.prot = prot;
-
- long rc;
- SysCalls sys;
- if (write(sys, processFdPub(), &request, sizeof(request)) !=
- sizeof(request) ||
- read(sys, threadFdPub(), &rc, sizeof(rc)) != sizeof(rc)) {
- die("Failed to forward mprotect() request [sandbox]");
- }
- Debug::elapsed(tm, __NR_mprotect);
- return rc;
-}
-
-bool Sandbox::process_mprotect(int parentMapsFd, int sandboxFd,
- int threadFdPub, int threadFd,
- SecureMem::Args* mem) {
- // Read request
- SysCalls sys;
- MProtect mprotect_req;
- if (read(sys, sandboxFd, &mprotect_req, sizeof(mprotect_req)) !=
- sizeof(mprotect_req)) {
- die("Failed to read parameters for mprotect() [process]");
- }
-
- // Cannot change permissions on any memory region that was part of the
- // original memory mappings.
- int rc = -EINVAL;
- void *stop = reinterpret_cast<void *>(
- (char *)mprotect_req.addr + mprotect_req.len);
- ProtectedMap::const_iterator iter = protectedMap_.lower_bound(
- (void *)mprotect_req.addr);
- if (iter != protectedMap_.begin()) {
- --iter;
- }
- for (; iter != protectedMap_.end() && iter->first < stop; ++iter) {
- if (mprotect_req.addr < reinterpret_cast<void *>(
- reinterpret_cast<char *>(iter->first) + iter->second) &&
- stop > iter->first) {
- SecureMem::abandonSystemCall(threadFd, rc);
- return false;
- }
- }
-
- // Changing permissions on memory regions that were newly mapped inside of
- // the sandbox is OK.
- SecureMem::sendSystemCall(threadFdPub, false, -1, mem, __NR_mprotect,
- mprotect_req.addr, mprotect_req.len,
- mprotect_req.prot);
- return true;
-}
-
-} // namespace
diff --git a/sandbox/linux/seccomp/munmap.cc b/sandbox/linux/seccomp/munmap.cc
deleted file mode 100644
index dde7c7a..0000000
--- a/sandbox/linux/seccomp/munmap.cc
+++ /dev/null
@@ -1,70 +0,0 @@
-// Copyright (c) 2010 The Chromium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#include "debug.h"
-#include "sandbox_impl.h"
-
-namespace playground {
-
-long Sandbox::sandbox_munmap(void* start, size_t length) {
- long long tm;
- Debug::syscall(&tm, __NR_munmap, "Executing handler");
- struct {
- int sysnum;
- long long cookie;
- MUnmap munmap_req;
- } __attribute__((packed)) request;
- request.sysnum = __NR_munmap;
- request.cookie = cookie();
- request.munmap_req.start = start;
- request.munmap_req.length = length;
-
- long rc;
- SysCalls sys;
- if (write(sys, processFdPub(), &request, sizeof(request)) !=
- sizeof(request) ||
- read(sys, threadFdPub(), &rc, sizeof(rc)) != sizeof(rc)) {
- die("Failed to forward munmap() request [sandbox]");
- }
- Debug::elapsed(tm, __NR_munmap);
- return rc;
-}
-
-bool Sandbox::process_munmap(int parentMapsFd, int sandboxFd, int threadFdPub,
- int threadFd, SecureMem::Args* mem) {
- // Read request
- SysCalls sys;
- MUnmap munmap_req;
- if (read(sys, sandboxFd, &munmap_req, sizeof(munmap_req)) !=
- sizeof(munmap_req)) {
- die("Failed to read parameters for munmap() [process]");
- }
-
- // Cannot unmap any memory region that was part of the original memory
- // mappings.
- int rc = -EINVAL;
- void *stop = reinterpret_cast<void *>(
- reinterpret_cast<char *>(munmap_req.start) + munmap_req.length);
- ProtectedMap::const_iterator iter = protectedMap_.lower_bound(
- munmap_req.start);
- if (iter != protectedMap_.begin()) {
- --iter;
- }
- for (; iter != protectedMap_.end() && iter->first < stop; ++iter) {
- if (munmap_req.start < reinterpret_cast<void *>(
- reinterpret_cast<char *>(iter->first) + iter->second) &&
- stop > iter->first) {
- SecureMem::abandonSystemCall(threadFd, rc);
- return false;
- }
- }
-
- // Unmapping memory regions that were newly mapped inside of the sandbox
- // is OK.
- SecureMem::sendSystemCall(threadFdPub, false, -1, mem, __NR_munmap,
- munmap_req.start, munmap_req.length);
- return true;
-}
-
-} // namespace
diff --git a/sandbox/linux/seccomp/mutex.h b/sandbox/linux/seccomp/mutex.h
deleted file mode 100644
index d7e1c5d..0000000
--- a/sandbox/linux/seccomp/mutex.h
+++ /dev/null
@@ -1,153 +0,0 @@
-// Copyright (c) 2010 The Chromium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#ifndef MUTEX_H__
-#define MUTEX_H__
-
-#include "sandbox_impl.h"
-
-namespace playground {
-
-class Mutex {
- public:
- typedef int mutex_t;
-
- enum { kInitValue = 0 };
-
- static void initMutex(mutex_t* mutex) {
- // Mutex is unlocked, and nobody is waiting for it
- *mutex = kInitValue;
- }
-
- static void unlockMutex(mutex_t* mutex) {
- char status;
- #if defined(__x86_64__) || defined(__i386__)
- asm volatile(
- "lock; addl %2, %0\n"
- "setz %1"
- : "=m"(*mutex), "=qm"(status)
- : "ir"(0x80000000), "m"(*mutex));
- #else
- #error Unsupported target platform
- #endif
- if (status) {
- // Mutex is zero now. No other waiters. So, we can return.
- return;
- }
- // We unlocked the mutex, but still need to wake up other waiters.
- Sandbox::SysCalls sys;
- sys.futex(mutex, FUTEX_WAKE, 1, NULL);
- }
-
- static bool lockMutex(mutex_t* mutex, int timeout = 0) {
- bool rc = true;
- // Increment mutex to add ourselves to the list of waiters
- #if defined(__x86_64__) || defined(__i386__)
- asm volatile(
- "lock; incl %0\n"
- : "=m"(*mutex)
- : "m"(*mutex));
- #else
- #error Unsupported target platform
- #endif
- for (;;) {
- // Atomically check whether the mutex is available and if so, acquire it
- char status;
- #if defined(__x86_64__) || defined(__i386__)
- asm volatile(
- "lock; btsl %3, %1\n"
- "setc %0"
- : "=q"(status), "=m"(*mutex)
- : "m"(*mutex), "ir"(31));
- #else
- #error Unsupported target platform
- #endif
- if (!status) {
- done:
- // If the mutex was available, remove ourselves from list of waiters
- #if defined(__x86_64__) || defined(__i386__)
- asm volatile(
- "lock; decl %0\n"
- : "=m"(*mutex)
- : "m"(*mutex));
- #else
- #error Unsupported target platform
- #endif
- return rc;
- }
- int value = *mutex;
- if (value >= 0) {
- // Mutex has just become available, no need to call kernel
- continue;
- }
- Sandbox::SysCalls sys;
- Sandbox::SysCalls::kernel_timespec tm;
- if (timeout) {
- tm.tv_sec = timeout / 1000;
- tm.tv_nsec = (timeout % 1000) * 1000 * 1000;
- } else {
- tm.tv_sec = 0;
- tm.tv_nsec = 0;
- }
- if (NOINTR_SYS(sys.futex(mutex, FUTEX_WAIT, value, &tm)) &&
- sys.my_errno == ETIMEDOUT) {
- rc = false;
- goto done;
- }
- }
- }
-
- static bool waitForUnlock(mutex_t* mutex, int timeout = 0) {
- bool rc = true;
- // Increment mutex to add ourselves to the list of waiters
- #if defined(__x86_64__) || defined(__i386__)
- asm volatile(
- "lock; incl %0\n"
- : "=m"(*mutex)
- : "m"(*mutex));
- #else
- #error Unsupported target platform
- #endif
- Sandbox::SysCalls sys;
- for (;;) {
- mutex_t value = *mutex;
- if (value >= 0) {
- done:
- // Mutex was not locked. Remove ourselves from list of waiters, notify
- // any other waiters (if any), and return.
- #if defined(__x86_64__) || defined(__i386__)
- asm volatile(
- "lock; decl %0\n"
- : "=m"(*mutex)
- : "m"(*mutex));
- #else
- #error Unsupported target platform
- #endif
- NOINTR_SYS(sys.futex(mutex, FUTEX_WAKE, 1, 0));
- return rc;
- }
-
- // Wait for mutex to become unlocked
- Sandbox::SysCalls::kernel_timespec tm;
- if (timeout) {
- tm.tv_sec = timeout / 1000;
- tm.tv_nsec = (timeout % 1000) * 1000 * 1000;
- } else {
- tm.tv_sec = 0;
- tm.tv_nsec = 0;
- }
-
- if (NOINTR_SYS(sys.futex(mutex, FUTEX_WAIT, value, &tm)) &&
- sys.my_errno == ETIMEDOUT) {
- rc = false;
- goto done;
- }
- }
- }
-
-};
-
-} // namespace
-
-#endif // MUTEX_H__
diff --git a/sandbox/linux/seccomp/open.cc b/sandbox/linux/seccomp/open.cc
deleted file mode 100644
index 8a9093c..0000000
--- a/sandbox/linux/seccomp/open.cc
+++ /dev/null
@@ -1,99 +0,0 @@
-// Copyright (c) 2010 The Chromium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#include "debug.h"
-#include "sandbox_impl.h"
-
-namespace playground {
-
-long Sandbox::sandbox_open(const char *pathname, int flags, mode_t mode) {
- long long tm;
- Debug::syscall(&tm, __NR_open, "Executing handler");
- size_t len = strlen(pathname);
- struct Request {
- int sysnum;
- long long cookie;
- Open open_req;
- char pathname[0];
- } __attribute__((packed)) *request;
- char data[sizeof(struct Request) + len];
- request = reinterpret_cast<struct Request*>(data);
- request->sysnum = __NR_open;
- request->cookie = cookie();
- request->open_req.path_length = len;
- request->open_req.flags = flags;
- request->open_req.mode = mode;
- memcpy(request->pathname, pathname, len);
-
- long rc;
- SysCalls sys;
- if (write(sys, processFdPub(), request, sizeof(data)) != (int)sizeof(data) ||
- read(sys, threadFdPub(), &rc, sizeof(rc)) != sizeof(rc)) {
- die("Failed to forward open() request [sandbox]");
- }
- Debug::elapsed(tm, __NR_open);
- return rc;
-}
-
-bool Sandbox::process_open(int parentMapsFd, int sandboxFd, int threadFdPub,
- int threadFd, SecureMem::Args* mem) {
- // Read request
- SysCalls sys;
- Open open_req;
- if (read(sys, sandboxFd, &open_req, sizeof(open_req)) != sizeof(open_req)) {
- read_parm_failed:
- die("Failed to read parameters for open() [process]");
- }
- int rc = -ENAMETOOLONG;
- if (open_req.path_length >= sizeof(mem->pathname)) {
- char buf[32];
- while (open_req.path_length > 0) {
- size_t len = open_req.path_length > sizeof(buf) ?
- sizeof(buf) : open_req.path_length;
- ssize_t i = read(sys, sandboxFd, buf, len);
- if (i <= 0) {
- goto read_parm_failed;
- }
- open_req.path_length -= i;
- }
- if (write(sys, threadFd, &rc, sizeof(rc)) != sizeof(rc)) {
- die("Failed to return data from open() [process]");
- }
- return false;
- }
-
- if ((open_req.flags & O_ACCMODE) != O_RDONLY ||
- !g_policy.allow_file_namespace) {
- // After locking the mutex, we can no longer abandon the system call. So,
- // perform checks before clobbering the securely shared memory.
- char tmp[open_req.path_length];
- if (read(sys, sandboxFd, tmp, open_req.path_length) !=
- (ssize_t)open_req.path_length) {
- goto read_parm_failed;
- }
- Debug::message(("Denying access to \"" + std::string(tmp) + "\"").c_str());
- SecureMem::abandonSystemCall(threadFd, -EACCES);
- return false;
- }
-
- SecureMem::lockSystemCall(parentMapsFd, mem);
- if (read(sys, sandboxFd, mem->pathname, open_req.path_length) !=
- (ssize_t)open_req.path_length) {
- goto read_parm_failed;
- }
- mem->pathname[open_req.path_length] = '\000';
-
- // TODO(markus): Implement sandboxing policy. For now, we allow read
- // access to everything. That's probably not correct.
- Debug::message(("Allowing access to \"" + std::string(mem->pathname) +
- "\"").c_str());
-
- // Tell trusted thread to open the file.
- SecureMem::sendSystemCall(threadFdPub, true, parentMapsFd, mem, __NR_open,
- mem->pathname - (char*)mem + (char*)mem->self,
- open_req.flags, open_req.mode);
- return true;
-}
-
-} // namespace
diff --git a/sandbox/linux/seccomp/sandbox.cc b/sandbox/linux/seccomp/sandbox.cc
deleted file mode 100644
index 0b09457..0000000
--- a/sandbox/linux/seccomp/sandbox.cc
+++ /dev/null
@@ -1,838 +0,0 @@
-// Copyright (c) 2010 The Chromium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#include "library.h"
-#include "sandbox_impl.h"
-#include "syscall_table.h"
-
-namespace playground {
-
-// Global variables
-int Sandbox::proc_self_maps_ = -1;
-enum Sandbox::SandboxStatus Sandbox::status_ = STATUS_UNKNOWN;
-int Sandbox::pid_;
-int Sandbox::processFdPub_;
-int Sandbox::cloneFdPub_;
-Sandbox::SysCalls::kernel_sigaction Sandbox::sa_segv_;
-Sandbox::ProtectedMap Sandbox::protectedMap_;
-std::vector<SecureMem::Args*> Sandbox::secureMemPool_;
-
-bool Sandbox::sendFd(int transport, int fd0, int fd1, const void* buf,
- size_t len) {
- int fds[2], count = 0;
- if (fd0 >= 0) { fds[count++] = fd0; }
- if (fd1 >= 0) { fds[count++] = fd1; }
- if (!count) {
- return false;
- }
- char cmsg_buf[CMSG_SPACE(count*sizeof(int))];
- memset(cmsg_buf, 0, sizeof(cmsg_buf));
- struct SysCalls::kernel_iovec iov[2] = { { 0 } };
- struct SysCalls::kernel_msghdr msg = { 0 };
- int dummy = 0;
- iov[0].iov_base = &dummy;
- iov[0].iov_len = sizeof(dummy);
- if (buf && len > 0) {
- iov[1].iov_base = const_cast<void *>(buf);
- iov[1].iov_len = len;
- }
- msg.msg_iov = iov;
- msg.msg_iovlen = (buf && len > 0) ? 2 : 1;
- msg.msg_control = cmsg_buf;
- msg.msg_controllen = CMSG_LEN(count*sizeof(int));
- struct cmsghdr *cmsg = CMSG_FIRSTHDR(&msg);
- cmsg->cmsg_level = SOL_SOCKET;
- cmsg->cmsg_type = SCM_RIGHTS;
- cmsg->cmsg_len = CMSG_LEN(count*sizeof(int));
- memcpy(CMSG_DATA(cmsg), fds, count*sizeof(int));
- SysCalls sys;
- return NOINTR_SYS(sys.sendmsg(transport, &msg, 0)) ==
- (ssize_t)(sizeof(dummy) + ((buf && len > 0) ? len : 0));
-}
-
-bool Sandbox::getFd(int transport, int* fd0, int* fd1, void* buf, size_t*len) {
- int count = 0;
- int *err = NULL;
- if (fd0) {
- count++;
- err = fd0;
- *fd0 = -1;
- }
- if (fd1) {
- if (!count++) {
- err = fd1;
- }
- *fd1 = -1;
- }
- if (!count) {
- return false;
- }
- char cmsg_buf[CMSG_SPACE(count*sizeof(int))];
- memset(cmsg_buf, 0, sizeof(cmsg_buf));
- struct SysCalls::kernel_iovec iov[2] = { { 0 } };
- struct SysCalls::kernel_msghdr msg = { 0 };
- iov[0].iov_base = err;
- iov[0].iov_len = sizeof(int);
- if (buf && len && *len > 0) {
- iov[1].iov_base = buf;
- iov[1].iov_len = *len;
- }
- msg.msg_iov = iov;
- msg.msg_iovlen = (buf && len && *len > 0) ? 2 : 1;
- msg.msg_control = cmsg_buf;
- msg.msg_controllen = CMSG_LEN(count*sizeof(int));
- SysCalls sys;
- ssize_t bytes = NOINTR_SYS(sys.recvmsg(transport, &msg, 0));
- if (len) {
- *len = bytes > (int)sizeof(int) ?
- bytes - sizeof(int) : 0;
- }
- if (bytes != (ssize_t)(sizeof(int) + ((buf && len && *len > 0) ? *len : 0))){
- *err = bytes >= 0 ? 0 : -EBADF;
- return false;
- }
- if (*err) {
- // "err" is the first four bytes of the payload. If these are non-zero,
- // the sender on the other side of the socketpair sent us an errno value.
- // We don't expect to get any file handles in this case.
- return false;
- }
- struct cmsghdr *cmsg = CMSG_FIRSTHDR(&msg);
- if ((msg.msg_flags & (MSG_TRUNC|MSG_CTRUNC)) ||
- !cmsg ||
- cmsg->cmsg_level != SOL_SOCKET ||
- cmsg->cmsg_type != SCM_RIGHTS ||
- cmsg->cmsg_len != CMSG_LEN(count*sizeof(int))) {
- *err = -EBADF;
- return false;
- }
- if (fd1) { *fd1 = ((int *)CMSG_DATA(cmsg))[--count]; }
- if (fd0) { *fd0 = ((int *)CMSG_DATA(cmsg))[--count]; }
- return true;
-}
-
-void Sandbox::setupSignalHandlers() {
- // Set SIGCHLD to SIG_DFL so that waitpid() can work
- SysCalls sys;
- struct SysCalls::kernel_sigaction sa;
- memset(&sa, 0, sizeof(sa));
- sa.sa_handler_ = SIG_DFL;
- sys.sigaction(SIGCHLD, &sa, NULL);
-
- // Set up SEGV handler for dealing with RDTSC instructions, system calls
- // that have been rewritten to use INT0, for sigprocmask() emulation, for
- // the creation of threads, and for user-provided SEGV handlers.
- sa.sa_sigaction_ = segv();
- sa.sa_flags = SA_SIGINFO | SA_NODEFER;
- sys.sigaction(SIGSEGV, &sa, &sa_segv_);
-
- // Unblock SIGSEGV and SIGCHLD
- SysCalls::kernel_sigset_t mask;
- memset(&mask, 0x00, sizeof(mask));
- mask.sig[0] |= (1 << (SIGSEGV - 1)) | (1 << (SIGCHLD - 1));
- sys.sigprocmask(SIG_UNBLOCK, &mask, 0);
-}
-
-void (*Sandbox::segv())(int signo, SysCalls::siginfo *context, void *unused) {
- void (*fnc)(int signo, SysCalls::siginfo *context, void *unused);
- asm volatile(
- "call 999f\n"
-#if defined(__x86_64__)
- // Inspect instruction at the point where the segmentation fault
- // happened. If it is RDTSC, forward the request to the trusted
- // thread.
- "mov $-3, %%r14\n" // request for RDTSC
- "mov 0xB0(%%rsp), %%r15\n" // %rip at time of segmentation fault
- "cmpw $0x310F, (%%r15)\n" // RDTSC
- "jz 0f\n"
- "cmpw $0x010F, (%%r15)\n" // RDTSCP
- "jnz 8f\n"
- "cmpb $0xF9, 2(%%r15)\n"
- "jnz 8f\n"
- "mov $-4, %%r14\n" // request for RDTSCP
- "0:"
-#ifndef NDEBUG
- "lea 100f(%%rip), %%rdi\n"
- "call playground$debugMessage\n"
-#endif
- "sub $4, %%rsp\n"
- "push %%r14\n"
- "mov %%gs:16, %%edi\n" // fd = threadFdPub
- "mov %%rsp, %%rsi\n" // buf = %rsp
- "mov $4, %%edx\n" // len = sizeof(int)
- "1:mov $1, %%eax\n" // NR_write
- "syscall\n"
- "cmp %%rax, %%rdx\n"
- "jz 5f\n"
- "cmp $-4, %%eax\n" // EINTR
- "jz 1b\n"
- "2:add $12, %%rsp\n"
- "movq $0, 0x98(%%rsp)\n" // %rax at time of segmentation fault
- "movq $0, 0x90(%%rsp)\n" // %rdx at time of segmentation fault
- "cmpw $0x310F, (%%r15)\n" // RDTSC
- "jz 3f\n"
- "movq $0, 0xA0(%%rsp)\n" // %rcx at time of segmentation fault
- "3:addq $2, 0xB0(%%rsp)\n" // %rip at time of segmentation fault
- "cmpw $0x010F, (%%r15)\n" // RDTSC
- "jnz 4f\n"
- "addq $1, 0xB0(%%rsp)\n" // %rip at time of segmentation fault
- "4:ret\n"
- "5:mov $12, %%edx\n" // len = 3*sizeof(int)
- "6:mov $0, %%eax\n" // NR_read
- "syscall\n"
- "cmp $-4, %%eax\n" // EINTR
- "jz 6b\n"
- "cmp %%rax, %%rdx\n"
- "jnz 2b\n"
- "mov 0(%%rsp), %%eax\n"
- "mov 4(%%rsp), %%edx\n"
- "mov 8(%%rsp), %%ecx\n"
- "add $12, %%rsp\n"
- "mov %%rdx, 0x90(%%rsp)\n" // %rdx at time of segmentation fault
- "cmpw $0x310F, (%%r15)\n" // RDTSC
- "jz 7f\n"
- "mov %%rcx, 0xA0(%%rsp)\n" // %rcx at time of segmentation fault
- "7:mov %%rax, 0x98(%%rsp)\n" // %rax at time of segmentation fault
- "jmp 3b\n"
-
- // If the instruction is INT 0, then this was probably the result
- // of playground::Library being unable to find a way to safely
- // rewrite the system call instruction. Retrieve the CPU register
- // at the time of the segmentation fault and invoke syscallWrapper().
- "8:cmpw $0x00CD, (%%r15)\n" // INT $0x0
- "jnz 16f\n"
-#ifndef NDEBUG
- "lea 200f(%%rip), %%rdi\n"
- "call playground$debugMessage\n"
-#endif
- "mov 0x98(%%rsp), %%rax\n" // %rax at time of segmentation fault
- "mov 0x70(%%rsp), %%rdi\n" // %rdi at time of segmentation fault
- "mov 0x78(%%rsp), %%rsi\n" // %rsi at time of segmentation fault
- "mov 0x90(%%rsp), %%rdx\n" // %rdx at time of segmentation fault
- "mov 0x40(%%rsp), %%r10\n" // %r10 at time of segmentation fault
- "mov 0x30(%%rsp), %%r8\n" // %r8 at time of segmentation fault
- "mov 0x38(%%rsp), %%r9\n" // %r9 at time of segmentation fault
-
- // Handle rt_sigprocmask()
- "cmp $14, %%rax\n" // NR_rt_sigprocmask
- "jnz 12f\n"
- "mov $-22, %%rax\n" // -EINVAL
- "cmp $8, %%r10\n" // %r10 = sigsetsize (8 bytes = 64 signals)
- "jl 7b\n"
- "mov 0x130(%%rsp), %%r10\n" // signal mask at time of segmentation fault
- "test %%rsi, %%rsi\n" // only set mask, if set is non-NULL
- "jz 11f\n"
- "mov 0(%%rsi), %%rsi\n"
- "cmp $0, %%rdi\n" // %rdi = how (SIG_BLOCK)
- "jnz 9f\n"
- "or %%rsi, 0x130(%%rsp)\n" // signal mask at time of segmentation fault
- "jmp 11f\n"
- "9:cmp $1, %%rdi\n" // %rdi = how (SIG_UNBLOCK)
- "jnz 10f\n"
- "xor $-1, %%rsi\n"
- "and %%rsi, 0x130(%%rsp)\n" // signal mask at time of segmentation fault
- "jmp 11f\n"
- "10:cmp $2, %%rdi\n" // %rdi = how (SIG_SETMASK)
- "jnz 7b\n"
- "mov %%rsi, 0x130(%%rsp)\n" // signal mask at time of segmentation fault
- "11:xor %%rax, %%rax\n"
- "test %%rdx, %%rdx\n" // only return old mask, if set is non-NULL
- "jz 7b\n"
- "mov %%r10, 0(%%rdx)\n" // old_set
- "jmp 7b\n"
-
- // Handle rt_sigreturn()
- "12:cmp $15, %%rax\n" // NR_rt_sigreturn
- "jnz 14f\n"
- "mov 0xA8(%%rsp), %%rsp\n" // %rsp at time of segmentation fault
- "13:syscall\n" // rt_sigreturn() is unrestricted
- "mov $66, %%edi\n" // rt_sigreturn() should never return
- "mov $231, %%eax\n" // NR_exit_group
- "jmp 13b\n"
-
- // Copy signal frame onto new stack. See clone.cc for details
- "14:cmp $56+0xF000, %%rax\n" // NR_clone + 0xF000
- "jnz 15f\n"
- "lea 8(%%rsp), %%rax\n" // retain stack frame upon returning
- "mov %%rax, 0xA8(%%rsp)\n" // %rsp at time of segmentation fault
- "jmp 7b\n"
-
- // Forward system call to syscallWrapper()
- "15:lea 7b(%%rip), %%rcx\n"
- "push %%rcx\n"
- "push 0xB8(%%rsp)\n" // %rip at time of segmentation fault
- "lea playground$syscallWrapper(%%rip), %%rcx\n"
- "jmp *%%rcx\n"
-
- // In order to implement SA_NODEFER, we have to keep track of recursive
- // calls to SIGSEGV handlers. This means we have to increment a counter
- // before calling the user's signal handler, and decrement it on
- // leaving the user's signal handler.
- // Some signal handlers look at the return address of the signal
- // stack, and more importantly "gdb" uses the call to rt_sigreturn()
- // as a magic signature when doing stacktraces. So, we have to use
- // a little more unusual code to regain control after the user's
- // signal handler is done. We adjust the return address to point to
- // non-executable memory. And when we trigger another SEGV we pop the
- // extraneous signal frame and then call rt_sigreturn().
- // N.B. We currently do not correctly adjust the SEGV counter, if the
- // user's signal handler exits in way other than by returning (e.g. by
- // directly calling rt_sigreturn(), or by calling siglongjmp()).
- "16:lea 22f(%%rip), %%r14\n"
- "cmp %%r14, %%r15\n"
- "jnz 17f\n" // check if returning from user's handler
- "decl %%gs:0x105C-0xE0\n" // decrement SEGV recursion counter
- "mov 0xA8(%%rsp), %%rsp\n" // %rsp at time of segmentation fault
- "mov $0xF, %%eax\n" // NR_rt_sigreturn
- "syscall\n"
-
- // This was a genuine segmentation fault. Check Sandbox::sa_segv_ for
- // what we are supposed to do.
- "17:mov playground$sa_segv@GOTPCREL(%%rip), %%rax\n"
- "cmp $0, 0(%%rax)\n" // SIG_DFL
- "jz 18f\n"
- "cmp $1, 0(%%rax)\n" // SIG_IGN
- "jnz 19f\n" // can't really ignore synchronous signals
-
- // Trigger the kernel's default signal disposition. The only way we can
- // do this from seccomp mode is by blocking the signal and retriggering
- // it.
- "18:orb $4, 0x131(%%rsp)\n" // signal mask at time of segmentation fault
- "ret\n"
-
- // Check sa_flags:
- // - We can ignore SA_NOCLDSTOP, SA_NOCLDWAIT, and SA_RESTART as they
- // do not have any effect for SIGSEGV.
- // - On x86-64, we can also ignore SA_SIGINFO, as the calling
- // conventions for sa_handler() are a subset of the conventions for
- // sa_sigaction().
- // - We have to always register our signal handler with SA_NODEFER so
- // that the user's signal handler can make system calls which might
- // require additional help from our SEGV handler.
- // - If the user's signal handler wasn't supposed to be SA_NODEFER, then
- // we emulate this behavior by keeping track of a recursion counter.
- //
- // TODO(markus): If/when we add support for sigaltstack(), we have to
- // handle SA_ONSTACK.
- "19:cmpl $0, %%gs:0x105C-0xE0\n"// check if we failed inside of SEGV handler
- "jnz 18b\n" // if so, then terminate program
- "mov 0(%%rax), %%rbx\n" // sa_segv_.sa_sigaction
- "mov 8(%%rax), %%rcx\n" // sa_segv_.sa_flags
- "btl $31, %%ecx\n" // SA_RESETHAND
- "jnc 20f\n"
- "movq $0, 0(%%rax)\n" // set handler to SIG_DFL
- "20:btl $30, %%ecx\n" // SA_NODEFER
- "jc 21f\n"
- "mov %%r14, 0(%%rsp)\n" // trigger a SEGV on return, so that we can
- "incl %%gs:0x105C-0xE0\n" // clean up state; incr. recursion counter
- "21:jmp *%%rbx\n" // call user's signal handler
-
-
- // Non-executable version of the restorer function. We use this to
- // trigger a SEGV upon returning from the user's signal handler, giving
- // us an ability to clean up prior to returning from the SEGV handler.
- ".pushsection .data\n" // move code into non-executable section
- "22:mov $0xF, %%rax\n" // gdb looks for this signature when doing
- "syscall\n" // backtraces
- ".popsection\n"
-#elif defined(__i386__)
- // Inspect instruction at the point where the segmentation fault
- // happened. If it is RDTSC, forward the request to the trusted
- // thread.
- "mov $-3, %%ebx\n" // request for RDTSC
- "mov 0xDC(%%esp), %%ebp\n" // %eip at time of segmentation fault
- "cmpw $0x310F, (%%ebp)\n" // RDTSC
- "jz 0f\n"
- "cmpw $0x010F, (%%ebp)\n" // RDTSCP
- "jnz 9f\n"
- "cmpb $0xF9, 2(%%ebp)\n"
- "jnz 9f\n"
- "mov $-4, %%ebx\n" // request for RDTSCP
- "0:"
-#ifndef NDEBUG
- "lea 100f, %%eax\n"
- "push %%eax\n"
- "call playground$debugMessage\n"
- "sub $4, %%esp\n"
-#else
- "sub $8, %%esp\n" // allocate buffer for receiving timestamp
-#endif
- "push %%ebx\n"
- "mov %%fs:16, %%ebx\n" // fd = threadFdPub
- "mov %%esp, %%ecx\n" // buf = %esp
- "mov $4, %%edx\n" // len = sizeof(int)
- "1:mov %%edx, %%eax\n" // NR_write
- "int $0x80\n"
- "cmp %%eax, %%edx\n"
- "jz 7f\n"
- "cmp $-4, %%eax\n" // EINTR
- "jz 1b\n"
- "2:add $12, %%esp\n" // remove temporary buffer from stack
- "xor %%eax, %%eax\n"
- "movl $0, 0xC8(%%esp)\n" // %edx at time of segmentation fault
- "cmpw $0x310F, (%%ebp)\n" // RDTSC
- "jz 3f\n"
- "movl $0, 0xCC(%%esp)\n" // %ecx at time of segmentation fault
- "3:mov %%eax, 0xD0(%%esp)\n" // %eax at time of segmentation fault
- "4:mov 0xDC(%%esp), %%ebp\n" // %eip at time of segmentation fault
- "addl $2, 0xDC(%%esp)\n" // %eip at time of segmentation fault
- "cmpw $0x010F, (%%ebp)\n" // RDTSCP
- "jnz 5f\n"
- "addl $1, 0xDC(%%esp)\n" // %eip at time of segmentation fault
- "5:sub $0x1C8, %%esp\n" // a legacy signal stack is much larger
- "mov 0x1CC(%%esp), %%eax\n" // push signal number
- "push %%eax\n"
- "lea 0x270(%%esp), %%esi\n" // copy siginfo register values
- "lea 0x4(%%esp), %%edi\n" // into new location
- "mov $22, %%ecx\n"
- "cld\n"
- "rep movsl\n"
- "mov 0x2C8(%%esp), %%ebx\n" // copy first half of signal mask
- "mov %%ebx, 0x54(%%esp)\n"
- "lea 6f, %%esi\n" // copy "magic" restorer function
- "push %%esi\n" // push restorer function
- "lea 0x2D4(%%esp), %%edi\n" // patch up retcode magic numbers
- "movb $2, %%cl\n"
- "rep movsl\n"
- "ret\n" // return to restorer function
-
- // The restorer function is sometimes used by gdb as a magic marker to
- // recognize signal stack frames. Don't change any of the next three
- // instructions.
- "6:pop %%eax\n" // remove dummy argument (signo)
- "mov $119, %%eax\n" // NR_sigreturn
- "int $0x80\n"
- "7:mov $12, %%edx\n" // len = 3*sizeof(int)
- "8:mov $3, %%eax\n" // NR_read
- "int $0x80\n"
- "cmp $-4, %%eax\n" // EINTR
- "jz 8b\n"
- "cmp %%eax, %%edx\n"
- "jnz 2b\n"
- "pop %%eax\n"
- "pop %%edx\n"
- "pop %%ecx\n"
- "mov %%edx, 0xC8(%%esp)\n" // %edx at time of segmentation fault
- "cmpw $0x310F, (%%ebp)\n" // RDTSC
- "jz 3b\n"
- "mov %%ecx, 0xCC(%%esp)\n" // %ecx at time of segmentation fault
- "jmp 3b\n"
-
- // If the instruction is INT 0, then this was probably the result
- // of playground::Library being unable to find a way to safely
- // rewrite the system call instruction. Retrieve the CPU register
- // at the time of the segmentation fault and invoke syscallWrapper().
- "9:cmpw $0x00CD, (%%ebp)\n" // INT $0x0
- "jnz 20f\n"
-#ifndef NDEBUG
- "lea 200f, %%eax\n"
- "push %%eax\n"
- "call playground$debugMessage\n"
- "add $0x4, %%esp\n"
-#endif
- "mov 0xD0(%%esp), %%eax\n" // %eax at time of segmentation fault
- "mov 0xC4(%%esp), %%ebx\n" // %ebx at time of segmentation fault
- "mov 0xCC(%%esp), %%ecx\n" // %ecx at time of segmentation fault
- "mov 0xC8(%%esp), %%edx\n" // %edx at time of segmentation fault
- "mov 0xB8(%%esp), %%esi\n" // %esi at time of segmentation fault
- "mov 0xB4(%%esp), %%edi\n" // %edi at time of segmentation fault
- "mov 0xB2(%%esp), %%ebp\n" // %ebp at time of segmentation fault
-
- // Handle sigprocmask() and rt_sigprocmask()
- "cmp $175, %%eax\n" // NR_rt_sigprocmask
- "jnz 10f\n"
- "mov $-22, %%eax\n" // -EINVAL
- "cmp $8, %%esi\n" // %esi = sigsetsize (8 bytes = 64 signals)
- "jl 3b\n"
- "jmp 11f\n"
- "10:cmp $126, %%eax\n" // NR_sigprocmask
- "jnz 15f\n"
- "mov $-22, %%eax\n"
- "11:mov 0xFC(%%esp), %%edi\n" // signal mask at time of segmentation fault
- "mov 0x100(%%esp), %%ebp\n"
- "test %%ecx, %%ecx\n" // only set mask, if set is non-NULL
- "jz 14f\n"
- "mov 0(%%ecx), %%esi\n"
- "mov 4(%%ecx), %%ecx\n"
- "cmp $0, %%ebx\n" // %ebx = how (SIG_BLOCK)
- "jnz 12f\n"
- "or %%esi, 0xFC(%%esp)\n" // signal mask at time of segmentation fault
- "or %%ecx, 0x100(%%esp)\n"
- "jmp 14f\n"
- "12:cmp $1, %%ebx\n" // %ebx = how (SIG_UNBLOCK)
- "jnz 13f\n"
- "xor $-1, %%esi\n"
- "xor $-1, %%ecx\n"
- "and %%esi, 0xFC(%%esp)\n" // signal mask at time of segmentation fault
- "and %%ecx, 0x100(%%esp)\n"
- "jmp 14f\n"
- "13:cmp $2, %%ebx\n" // %ebx = how (SIG_SETMASK)
- "jnz 3b\n"
- "mov %%esi, 0xFC(%%esp)\n" // signal mask at time of segmentation fault
- "mov %%ecx, 0x100(%%esp)\n"
- "14:xor %%eax, %%eax\n"
- "test %%edx, %%edx\n" // only return old mask, if set is non-NULL
- "jz 3b\n"
- "mov %%edi, 0(%%edx)\n" // old_set
- "mov %%ebp, 4(%%edx)\n"
- "jmp 3b\n"
-
- // Handle sigreturn() and rt_sigreturn()
- // See syscall.cc for a discussion on how we can emulate rt_sigreturn()
- // by calling sigreturn() with a suitably adjusted stack.
- "15:cmp $119, %%eax\n" // NR_sigreturn
- "jnz 17f\n"
- "mov 0xC0(%%esp), %%esp\n" // %esp at time of segmentation fault
- "16:int $0x80\n" // sigreturn() is unrestricted
- "17:cmp $173, %%eax\n" // NR_rt_sigreturn
- "jnz 18f\n"
- "mov 0xC0(%%esp), %%esp\n" // %esp at time of segmentation fault
- "sub $4, %%esp\n" // add fake return address
- "jmp 4b\n"
-
- // Copy signal frame onto new stack. In the process, we have to convert
- // it from an RT signal frame to a legacy signal frame.
- // See clone.cc for details
- "18:cmp $120+0xF000, %%eax\n" // NR_clone + 0xF000
- "jnz 19f\n"
- "lea -0x1C8(%%esp), %%eax\n"// retain stack frame upon returning
- "mov %%eax, 0xC0(%%esp)\n" // %esp at time of segmentation fault
- "jmp 3b\n"
-
- // Forward system call to syscallWrapper()
- "19:call playground$syscallWrapper\n"
- "jmp 3b\n"
-
- // In order to implement SA_NODEFER, we have to keep track of recursive
- // calls to SIGSEGV handlers. This means we have to increment a counter
- // before calling the user's signal handler, and decrement it on
- // leaving the user's signal handler.
- // Some signal handlers look at the return address of the signal
- // stack, and more importantly "gdb" uses the call to {,rt_}sigreturn()
- // as a magic signature when doing stacktraces. So, we have to use
- // a little more unusual code to regain control after the user's
- // signal handler is done. We adjust the return address to point to
- // non-executable memory. And when we trigger another SEGV we pop the
- // extraneous signal frame and then call sigreturn().
- // N.B. We currently do not correctly adjust the SEGV counter, if the
- // user's signal handler exits in way other than by returning (e.g. by
- // directly calling {,rt_}sigreturn(), or by calling siglongjmp()).
- "20:lea 30f, %%edi\n" // rt-style restorer function
- "lea 31f, %%esi\n" // legacy restorer function
- "cmp %%ebp, %%edi\n" // check if returning from user's handler
- "jnz 21f\n"
- "decl %%fs:0x1040-0x58\n" // decrement SEGV recursion counter
- "mov 0xC0(%%esp), %%esp\n" // %esp at time of segmentation fault
- "jmp 29f\n"
- "21:cmp %%ebp, %%esi\n" // check if returning from user's handler
- "jnz 22f\n"
- "decl %%fs:0x1040-0x58\n" // decrement SEGV recursion counter
- "mov 0xC0(%%esp), %%esp\n" // %esp at time of segmentation fault
- "jmp 6b\n"
-
- // This was a genuine segmentation fault. Check Sandbox::sa_segv_ for
- // what we are supposed to do.
- "22:lea playground$sa_segv, %%eax\n"
- "cmp $0, 0(%%eax)\n" // SIG_DFL
- "jz 23f\n"
- "cmp $1, 0(%%eax)\n" // SIG_IGN
- "jnz 24f\n" // can't really ignore synchronous signals
-
- // Trigger the kernel's default signal disposition. The only way we can
- // do this from seccomp mode is by blocking the signal and retriggering
- // it.
- "23:orb $4, 0xFD(%%esp)\n" // signal mask at time of segmentation fault
- "jmp 5b\n"
-
- // Check sa_flags:
- // - We can ignore SA_NOCLDSTOP, SA_NOCLDWAIT, and SA_RESTART as they
- // do not have any effect for SIGSEGV.
- // - We have to always register our signal handler with SA_NODEFER so
- // that the user's signal handler can make system calls which might
- // require additional help from our SEGV handler.
- // - If the user's signal handler wasn't supposed to be SA_NODEFER, then
- // we emulate this behavior by keeping track of a recursion counter.
- //
- // TODO(markus): If/when we add support for sigaltstack(), we have to
- // handle SA_ONSTACK.
- "24:cmpl $0, %%fs:0x1040-0x58\n"// check if we failed inside of SEGV handler
- "jnz 23b\n" // if so, then terminate program
- "mov 0(%%eax), %%ebx\n" // sa_segv_.sa_sigaction
- "mov 4(%%eax), %%ecx\n" // sa_segv_.sa_flags
- "btl $31, %%ecx\n" // SA_RESETHAND
- "jnc 25f\n"
- "movl $0, 0(%%eax)\n" // set handler to SIG_DFL
- "25:btl $30, %%ecx\n" // SA_NODEFER
- "jc 28f\n"
- "btl $2, %%ecx\n" // SA_SIGINFO
- "jnc 26f\n"
- "mov %%edi, 0(%%esp)\n" // trigger a SEGV on return
- "incl %%fs:0x1040-0x58\n" // increment recursion counter
- "jmp *%%ebx\n" // call user's signal handler
- "26:mov %%esi, 0(%%esp)\n"
- "incl %%fs:0x1040-0x58\n" // increment recursion counter
-
- // We always register the signal handler to give us rt-style signal
- // frames. But if the user asked for legacy signal frames, we must
- // convert the signal frame prior to calling the user's signal handler.
- "27:sub $0x1C8, %%esp\n" // a legacy signal stack is much larger
- "mov 0x1CC(%%esp), %%eax\n" // push signal number
- "push %%eax\n"
- "mov 0x1CC(%%esp), %%eax\n" // push restorer function
- "push %%eax\n"
- "lea 0x274(%%esp), %%esi\n" // copy siginfo register values
- "lea 0x8(%%esp), %%edi\n" // into new location
- "mov $22, %%ecx\n"
- "cld\n"
- "rep movsl\n"
- "mov 0x2CC(%%esp), %%eax\n" // copy first half of signal mask
- "mov %%eax, 0x58(%%esp)\n"
- "lea 31f, %%esi\n"
- "lea 0x2D4(%%esp), %%edi\n" // patch up retcode magic numbers
- "movb $2, %%cl\n"
- "rep movsl\n"
- "jmp *%%ebx\n" // call user's signal handler
- "28:lea 6b, %%eax\n" // set appropriate restorer function
- "mov %%eax, 0(%%esp)\n"
- "btl $2, %%ecx\n" // SA_SIGINFO
- "jnc 27b\n"
- "lea 29f, %%eax\n"
- "mov %%eax, 0(%%esp)\n" // set appropriate restorer function
- "jmp *%%ebx\n" // call user's signal handler
- "29:pushl $30f\n" // emulate rt_sigreturn()
- "jmp 5b\n"
-
- // Non-executable versions of the restorer function. We use these to
- // trigger a SEGV upon returning from the user's signal handler, giving
- // us an ability to clean up prior to returning from the SEGV handler.
- ".pushsection .data\n" // move code into non-executable section
- "30:mov $173, %%eax\n" // NR_rt_sigreturn
- "int $0x80\n" // gdb looks for this signature when doing
- ".byte 0\n" // backtraces
- "31:pop %%eax\n"
- "mov $119, %%eax\n" // NR_sigreturn
- "int $0x80\n"
- ".popsection\n"
-#else
-#error Unsupported target platform
-#endif
- ".pushsection \".rodata\"\n"
-#ifndef NDEBUG
- "100:.asciz \"RDTSC(P): Executing handler\\n\"\n"
- "200:.asciz \"INT $0x0: Executing handler\\n\"\n"
-#endif
- ".popsection\n"
- "999:pop %0\n"
- : "=g"(fnc)
- :
- : "memory"
-#if defined(__x86_64__)
- , "rsp"
-#elif defined(__i386__)
- , "esp"
-#endif
- );
- return fnc;
-}
-
-SecureMem::Args* Sandbox::getSecureMem() {
- // Check trusted_thread.cc for the magic offset that gets us from the TLS
- // to the beginning of the secure memory area.
- SecureMem::Args* ret;
-#if defined(__x86_64__)
- asm volatile(
- "movq %%gs:-0xE0, %0\n"
- : "=q"(ret));
-#elif defined(__i386__)
- asm volatile(
- "movl %%fs:-0x58, %0\n"
- : "=r"(ret));
-#else
-#error Unsupported target platform
-#endif
- return ret;
-}
-
-void Sandbox::snapshotMemoryMappings(int processFd, int proc_self_maps) {
- SysCalls sys;
- if (sys.lseek(proc_self_maps, 0, SEEK_SET) ||
- !sendFd(processFd, proc_self_maps, -1, NULL, 0)) {
- failure:
- die("Cannot access /proc/self/maps");
- }
- int dummy;
- if (read(sys, processFd, &dummy, sizeof(dummy)) != sizeof(dummy)) {
- goto failure;
- }
-}
-
-int Sandbox::supportsSeccompSandbox(int proc_fd) {
- if (status_ != STATUS_UNKNOWN) {
- return status_ != STATUS_UNSUPPORTED;
- }
- int fds[2];
- SysCalls sys;
- if (sys.pipe(fds)) {
- status_ = STATUS_UNSUPPORTED;
- return 0;
- }
- pid_t pid;
- switch ((pid = sys.fork())) {
- case -1:
- status_ = STATUS_UNSUPPORTED;
- return 0;
- case 0: {
- int devnull = sys.open("/dev/null", O_RDWR, 0);
- if (devnull >= 0) {
- sys.dup2(devnull, 0);
- sys.dup2(devnull, 1);
- sys.dup2(devnull, 2);
- sys.close(devnull);
- }
- if (proc_fd >= 0) {
- setProcSelfMaps(sys.openat(proc_fd, "self/maps", O_RDONLY, 0));
- }
- startSandbox();
- write(sys, fds[1], "", 1);
-
- // Try to tell the trusted thread to shut down the entire process in an
- // orderly fashion
- defaultSystemCallHandler(__NR_exit_group, 0, 0, 0, 0, 0, 0);
-
- // If that did not work (e.g. because the kernel does not know about the
- // exit_group() system call), make a direct _exit() system call instead.
- // This system call is unrestricted in seccomp mode, so it will always
- // succeed. Normally, we don't like it, because unlike exit_group() it
- // does not terminate any other thread. But since we know that
- // exit_group() exists in all kernels which support kernel-level threads,
- // this is OK we only get here for old kernels where _exit() is OK.
- sys._exit(0);
- }
- default:
- NOINTR_SYS(sys.close(fds[1]));
- char ch;
- if (read(sys, fds[0], &ch, 1) != 1) {
- status_ = STATUS_UNSUPPORTED;
- } else {
- status_ = STATUS_AVAILABLE;
- }
- int rc;
- NOINTR_SYS(sys.waitpid(pid, &rc, 0));
- NOINTR_SYS(sys.close(fds[0]));
- return status_ != STATUS_UNSUPPORTED;
- }
-}
-
-void Sandbox::setProcSelfMaps(int proc_self_maps) {
- proc_self_maps_ = proc_self_maps;
-}
-
-void Sandbox::startSandbox() {
- if (status_ == STATUS_UNSUPPORTED) {
- die("The seccomp sandbox is not supported on this computer");
- } else if (status_ == STATUS_ENABLED) {
- return;
- }
-
- SysCalls sys;
- if (proc_self_maps_ < 0) {
- proc_self_maps_ = sys.open("/proc/self/maps", O_RDONLY, 0);
- if (proc_self_maps_ < 0) {
- die("Cannot access \"/proc/self/maps\"");
- }
- }
-
- // The pid is unchanged for the entire program, so we can retrieve it once
- // and store it in a global variable.
- pid_ = sys.getpid();
-
- // Block all signals, except for the RDTSC handler
- setupSignalHandlers();
-
- // Get socketpairs for talking to the trusted process
- int pair[4];
- if (sys.socketpair(AF_UNIX, SOCK_STREAM, 0, pair) ||
- sys.socketpair(AF_UNIX, SOCK_STREAM, 0, pair+2)) {
- die("Failed to create trusted thread");
- }
- processFdPub_ = pair[0];
- cloneFdPub_ = pair[2];
- SecureMemArgs* secureMem = createTrustedProcess(pair[0], pair[1],
- pair[2], pair[3]);
-
- // We find all libraries that have system calls and redirect the system
- // calls to the sandbox. If we miss any system calls, the application will be
- // terminated by the kernel's seccomp code. So, from a security point of
- // view, if this code fails to identify system calls, we are still behaving
- // correctly.
- {
- Maps maps(proc_self_maps_);
- const char *libs[] = { "ld", "libc", "librt", "libpthread", NULL };
-
- // Intercept system calls in the VDSO segment (if any). This has to happen
- // before intercepting system calls in any of the other libraries, as
- // the main kernel entry point might be inside of the VDSO and we need to
- // determine its address before we can compare it to jumps from inside
- // other libraries.
- for (Maps::const_iterator iter = maps.begin(); iter != maps.end(); ++iter){
- Library* library = *iter;
- if (library->isVDSO() && library->parseElf()) {
- library->makeWritable(true);
- library->patchSystemCalls();
- library->makeWritable(false);
- break;
- }
- }
-
- // Intercept system calls in libraries that are known to have them.
- for (Maps::const_iterator iter = maps.begin(); iter != maps.end(); ++iter){
- Library* library = *iter;
- const char* mapping = iter.name().c_str();
-
- // Find the actual base name of the mapped library by skipping past any
- // SPC and forward-slashes. We don't want to accidentally find matches,
- // because the directory name included part of our well-known lib names.
- //
- // Typically, prior to pruning, entries would look something like this:
- // 08:01 2289011 /lib/libc-2.7.so
- for (const char *delim = " /"; *delim; ++delim) {
- const char* skip = strrchr(mapping, *delim);
- if (skip) {
- mapping = skip + 1;
- }
- }
-
- for (const char **ptr = libs; *ptr; ptr++) {
- const char *name = strstr(mapping, *ptr);
- if (name == mapping) {
- char ch = name[strlen(*ptr)];
- if (ch < 'A' || (ch > 'Z' && ch < 'a') || ch > 'z') {
- if (library->parseElf()) {
- library->makeWritable(true);
- library->patchSystemCalls();
- library->makeWritable(false);
- break;
- }
- }
- }
- }
- }
- }
-
- // Take a snapshot of the current memory mappings. These mappings will be
- // off-limits to all future mmap(), munmap(), mremap(), and mprotect() calls.
- snapshotMemoryMappings(processFdPub_, proc_self_maps_);
- NOINTR_SYS(sys.close(proc_self_maps_));
- proc_self_maps_ = -1;
-
- // Creating the trusted thread enables sandboxing
- createTrustedThread(processFdPub_, cloneFdPub_, secureMem);
-
- // We can no longer check for sandboxing support at this point, but we also
- // know for a fact that it is available (as we just turned it on). So update
- // the status to reflect this information.
- status_ = STATUS_ENABLED;
-}
-
-} // namespace
diff --git a/sandbox/linux/seccomp/sandbox.h b/sandbox/linux/seccomp/sandbox.h
deleted file mode 100644
index 8f49575..0000000
--- a/sandbox/linux/seccomp/sandbox.h
+++ /dev/null
@@ -1,12 +0,0 @@
-// Copyright (c) 2010 The Chromium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#ifndef SANDBOX_H__
-#define SANDBOX_H__
-
-extern "C" int SupportsSeccompSandbox(int proc_fd);
-extern "C" void SeccompSandboxSetProcSelfMaps(int proc_self_maps);
-extern "C" void StartSeccompSandbox();
-
-#endif // SANDBOX_H__
diff --git a/sandbox/linux/seccomp/sandbox_impl.h b/sandbox/linux/seccomp/sandbox_impl.h
deleted file mode 100644
index 3e99a5510..0000000
--- a/sandbox/linux/seccomp/sandbox_impl.h
+++ /dev/null
@@ -1,715 +0,0 @@
-// Copyright (c) 2010 The Chromium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#ifndef SANDBOX_IMPL_H__
-#define SANDBOX_IMPL_H__
-
-#include <asm/ldt.h>
-#include <errno.h>
-#include <fcntl.h>
-#include <limits.h>
-#include <linux/prctl.h>
-#include <linux/unistd.h>
-#include <netinet/in.h>
-#include <netinet/tcp.h>
-#include <sched.h>
-#include <signal.h>
-#include <stdarg.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/ioctl.h>
-#include <sys/mman.h>
-#include <sys/ptrace.h>
-#include <sys/resource.h>
-#include <sys/socket.h>
-#include <sys/stat.h>
-#include <sys/time.h>
-#include <sys/types.h>
-#include <time.h>
-#include <unistd.h>
-
-#define NOINTR_SYS(x) \
- ({ typeof(x) i__; while ((i__ = (x)) < 0 && sys.my_errno == EINTR); i__;})
-
-#ifdef __cplusplus
-#include <map>
-#include <vector>
-#include "sandbox.h"
-#include "securemem.h"
-#include "tls.h"
-
-namespace playground {
-
-class Sandbox {
- // TODO(markus): restrict access to our private file handles
- public:
- enum { kMaxThreads = 100 };
-
-
- // There are a lot of reasons why the Seccomp sandbox might not be available.
- // This could be because the kernel does not support Seccomp mode, or it
- // could be because we fail to successfully rewrite all system call entry
- // points.
- // "proc_fd" should be a file descriptor for "/proc", or -1 if not provided
- // by the caller.
- static int supportsSeccompSandbox(int proc_fd)
- asm("SupportsSeccompSandbox");
-
- // The sandbox needs to be able to access "/proc/self/maps". If this file
- // is not accessible when "startSandbox()" gets called, the caller can
- // provide an already opened file descriptor by calling "setProcSelfMaps()".
- // The sandbox becomes the newer owner of this file descriptor and will
- // eventually close it when "startSandbox()" executes.
- static void setProcSelfMaps(int proc_self_maps)
- asm("SeccompSandboxSetProcSelfMaps");
-
- // This is the main public entry point. It finds all system calls that
- // need rewriting, sets up the resources needed by the sandbox, and
- // enters Seccomp mode.
- static void startSandbox() asm("StartSeccompSandbox");
-
- private:
-// syscall_table.c has to be implemented in C, as C++ does not support
-// designated initializers for arrays. The only other alternative would be
-// to have a source code generator for this table.
-//
-// We would still like the C source file to include our header file. This
-// requires some define statements to transform C++ specific constructs to
-// something that is palatable to a C compiler.
-#define STATIC static
-#define SecureMemArgs SecureMem::Args
- // Clone() is special as it has a wrapper in syscall_table.c. The wrapper
- // adds one extra argument (the pointer to the saved registers) and then
- // calls playground$sandbox__clone().
- static long sandbox_clone(int flags, char* stack, int* pid, int* ctid,
- void* tls, void* wrapper_sp)
- asm("playground$sandbox__clone")
- #if defined(__x86_64__)
- __attribute__((visibility("internal")))
-#endif
- ;
-#else
-#define STATIC
-#define bool int
-#define SecureMemArgs void
- // This is the wrapper entry point that is found in the syscall_table.
- long sandbox_clone(int flags, char* stack, int* pid, int* ctid, void* tls)
- asm("playground$sandbox_clone");
-#endif
-
- // Entry points for sandboxed code that is attempting to make system calls
- STATIC long sandbox_access(const char*, int)
- asm("playground$sandbox_access");
- STATIC long sandbox_exit(int status) asm("playground$sandbox_exit");
- STATIC long sandbox_getpid() asm("playground$sandbox_getpid");
- #if defined(__NR_getsockopt)
- STATIC long sandbox_getsockopt(int, int, int, void*, socklen_t*)
- asm("playground$sandbox_getsockopt");
- #endif
- STATIC long sandbox_gettid() asm("playground$sandbox_gettid");
- STATIC long sandbox_ioctl(int d, int req, void* arg)
- asm("playground$sandbox_ioctl");
- #if defined(__NR_ipc)
- STATIC long sandbox_ipc(unsigned, int, int, int, void*, long)
- asm("playground$sandbox_ipc");
- #endif
- STATIC long sandbox_lstat(const char* path, void* buf)
- asm("playground$sandbox_lstat");
- #if defined(__NR_lstat64)
- STATIC long sandbox_lstat64(const char *path, void* b)
- asm("playground$sandbox_lstat64");
- #endif
- STATIC long sandbox_madvise(void*, size_t, int)
- asm("playground$sandbox_madvise");
- STATIC void *sandbox_mmap(void* start, size_t length, int prot, int flags,
- int fd, off_t offset)
- asm("playground$sandbox_mmap");
- STATIC long sandbox_mprotect(const void*, size_t, int)
- asm("playground$sandbox_mprotect");
- STATIC long sandbox_munmap(void* start, size_t length)
- asm("playground$sandbox_munmap");
- STATIC long sandbox_open(const char*, int, mode_t)
- asm("playground$sandbox_open");
- #if defined(__NR_recvfrom)
- STATIC ssize_t sandbox_recvfrom(int, void*, size_t, int, void*, socklen_t*)
- asm("playground$sandbox_recvfrom");
- STATIC ssize_t sandbox_recvmsg(int, struct msghdr*, int)
- asm("playground$sandbox_recvmsg");
- #endif
- #if defined(__NR_rt_sigaction)
- STATIC long sandbox_rt_sigaction(int, const void*, void*, size_t)
- asm("playground$sandbox_rt_sigaction");
- #endif
- #if defined(__NR_rt_sigprocmask)
- STATIC long sandbox_rt_sigprocmask(int how, const void*, void*, size_t)
- asm("playground$sandbox_rt_sigprocmask");
- #endif
- #if defined(__NR_sendmsg)
- STATIC size_t sandbox_sendmsg(int, const struct msghdr*, int)
- asm("playground$sandbox_sendmsg");
- STATIC ssize_t sandbox_sendto(int, const void*, size_t, int, const void*,
- socklen_t)asm("playground$sandbox_sendto");
- #endif
- #if defined(__NR_shmat)
- STATIC void* sandbox_shmat(int, const void*, int)
- asm("playground$sandbox_shmat");
- STATIC long sandbox_shmctl(int, int, void*)
- asm("playground$sandbox_shmctl");
- STATIC long sandbox_shmdt(const void*) asm("playground$sandbox_shmdt");
- STATIC long sandbox_shmget(int, size_t, int)
- asm("playground$sandbox_shmget");
- #endif
- #if defined(__NR_setsockopt)
- STATIC long sandbox_setsockopt(int, int, int, const void*, socklen_t)
- asm("playground$sandbox_setsockopt");
- #endif
- #if defined(__NR_sigaction)
- STATIC long sandbox_sigaction(int, const void*, void*)
- asm("playground$sandbox_sigaction");
- #endif
- #if defined(__NR_signal)
- STATIC void* sandbox_signal(int, const void*)
- asm("playground$sandbox_signal");
- #endif
- #if defined(__NR_sigprocmask)
- STATIC long sandbox_sigprocmask(int how, const void*, void*)
- asm("playground$sandbox_sigprocmask");
- #endif
- #if defined(__NR_socketcall)
- STATIC long sandbox_socketcall(int call, void* args)
- asm("playground$sandbox_socketcall");
- #endif
- STATIC long sandbox_stat(const char* path, void* buf)
- asm("playground$sandbox_stat");
- #if defined(__NR_stat64)
- STATIC long sandbox_stat64(const char *path, void* b)
- asm("playground$sandbox_stat64");
- #endif
-
- // Functions for system calls that need to be handled in the trusted process
- STATIC bool process_access(int, int, int, int, SecureMemArgs*)
- asm("playground$process_access");
- STATIC bool process_clone(int, int, int, int, SecureMemArgs*)
- asm("playground$process_clone");
- STATIC bool process_exit(int, int, int, int, SecureMemArgs*)
- asm("playground$process_exit");
- #if defined(__NR_getsockopt)
- STATIC bool process_getsockopt(int, int, int, int, SecureMemArgs*)
- asm("playground$process_getsockopt");
- #endif
- STATIC bool process_ioctl(int, int, int, int, SecureMemArgs*)
- asm("playground$process_ioctl");
- #if defined(__NR_ipc)
- STATIC bool process_ipc(int, int, int, int, SecureMemArgs*)
- asm("playground$process_ipc");
- #endif
- STATIC bool process_madvise(int, int, int, int, SecureMemArgs*)
- asm("playground$process_madvise");
- STATIC bool process_mmap(int, int, int, int, SecureMemArgs*)
- asm("playground$process_mmap");
- STATIC bool process_mprotect(int, int, int, int, SecureMemArgs*)
- asm("playground$process_mprotect");
- STATIC bool process_munmap(int, int, int, int, SecureMemArgs*)
- asm("playground$process_munmap");
- STATIC bool process_open(int, int, int, int, SecureMemArgs*)
- asm("playground$process_open");
- #if defined(__NR_recvfrom)
- STATIC bool process_recvfrom(int, int, int, int, SecureMemArgs*)
- asm("playground$process_recvfrom");
- STATIC bool process_recvmsg(int, int, int, int, SecureMemArgs*)
- asm("playground$process_recvmsg");
- STATIC bool process_sendmsg(int, int, int, int, SecureMemArgs*)
- asm("playground$process_sendmsg");
- STATIC bool process_sendto(int, int, int, int, SecureMemArgs*)
- asm("playground$process_sendto");
- STATIC bool process_setsockopt(int, int, int, int, SecureMemArgs*)
- asm("playground$process_setsockopt");
- #endif
- #if defined(__NR_shmat)
- STATIC bool process_shmat(int, int, int, int, SecureMemArgs*)
- asm("playground$process_shmat");
- STATIC bool process_shmctl(int, int, int, int, SecureMemArgs*)
- asm("playground$process_shmctl");
- STATIC bool process_shmdt(int, int, int, int, SecureMemArgs*)
- asm("playground$process_shmdt");
- STATIC bool process_shmget(int, int, int, int, SecureMemArgs*)
- asm("playground$process_shmget");
- #endif
- STATIC bool process_sigaction(int, int, int, int, SecureMemArgs*)
- asm("playground$process_sigaction");
- #if defined(__NR_socketcall)
- STATIC bool process_socketcall(int, int, int, int, SecureMemArgs*)
- asm("playground$process_socketcall");
- #endif
- STATIC bool process_stat(int, int, int, int, SecureMemArgs*)
- asm("playground$process_stat");
-
-#ifdef __cplusplus
- friend class Debug;
- friend class Library;
- friend class Maps;
- friend class Mutex;
- friend class SecureMem;
- friend class TLS;
-
- // Define our own inline system calls. These calls will not be rewritten
- // to point to the sandboxed wrapper functions. They thus allow us to
- // make actual system calls (e.g. in the sandbox initialization code, and
- // in the trusted process)
- class SysCalls {
- public:
- #define SYS_CPLUSPLUS
- #define SYS_ERRNO my_errno
- #define SYS_INLINE inline
- #define SYS_PREFIX -1
- #undef SYS_LINUX_SYSCALL_SUPPORT_H
- #include "linux_syscall_support.h"
- SysCalls() : my_errno(0) { }
- int my_errno;
- };
- #ifdef __NR_mmap2
- #define MMAP mmap2
- #define __NR_MMAP __NR_mmap2
- #else
- #define MMAP mmap
- #define __NR_MMAP __NR_mmap
- #endif
-
- // Print an error message and terminate the program. Used for fatal errors.
- static void die(const char *msg = 0) __attribute__((noreturn)) {
- SysCalls sys;
- if (msg) {
- sys.write(2, msg, strlen(msg));
- sys.write(2, "\n", 1);
- }
- for (;;) {
- sys.exit_group(1);
- sys._exit(1);
- }
- }
-
- // Wrapper around "read()" that can deal with partial and interrupted reads
- // and that does not modify the global errno variable.
- static ssize_t read(SysCalls& sys, int fd, void* buf, size_t len) {
- if (static_cast<ssize_t>(len) < 0) {
- sys.my_errno = EINVAL;
- return -1;
- }
- size_t offset = 0;
- while (offset < len) {
- ssize_t partial =
- NOINTR_SYS(sys.read(fd, reinterpret_cast<char*>(buf) + offset,
- len - offset));
- if (partial < 0) {
- return partial;
- } else if (!partial) {
- break;
- }
- offset += partial;
- }
- return offset;
- }
-
- // Wrapper around "write()" that can deal with interrupted writes and that
- // does not modify the global errno variable.
- static ssize_t write(SysCalls& sys, int fd, const void* buf, size_t len){
- return NOINTR_SYS(sys.write(fd, buf, len));
- }
-
- // Sends a file handle to another process.
- // N.B. trusted_thread.cc has an assembly version of this function that
- // is safe to use without a call stack. If the wire-format is changed,
- /// make sure to update the assembly code.
- static bool sendFd(int transport, int fd0, int fd1, const void* buf,
- size_t len);
-
- // If getFd() fails, it will set the first valid fd slot (e.g. fd0) to
- // -errno.
- static bool getFd(int transport, int* fd0, int* fd1, void* buf,
- size_t* len);
-
- // Data structures used to forward system calls to the trusted process.
- struct Accept {
- int sockfd;
- void* addr;
- socklen_t* addrlen;
- } __attribute__((packed));
-
- struct Accept4 {
- int sockfd;
- void* addr;
- socklen_t* addrlen;
- int flags;
- } __attribute__((packed));
-
- struct Access {
- size_t path_length;
- int mode;
- } __attribute__((packed));
-
- struct Bind {
- int sockfd;
- void* addr;
- socklen_t addrlen;
- } __attribute__((packed));
-
- struct Clone {
- int flags;
- char* stack;
- int* pid;
- int* ctid;
- void* tls;
- #if defined(__x86_64__)
- struct {
- void* r15;
- void* r14;
- void* r13;
- void* r12;
- void* r11;
- void* r10;
- void* r9;
- void* r8;
- void* rdi;
- void* rsi;
- void* rdx;
- void* rcx;
- void* rbx;
- void* rbp;
- void* fake_ret;
- } regs64 __attribute__((packed));
- #elif defined(__i386__)
- struct {
- void* ebp;
- void* edi;
- void* esi;
- void* edx;
- void* ecx;
- void* ebx;
- } regs32 __attribute__((packed));
- #else
- #error Unsupported target platform
- #endif
- void* ret;
- } __attribute__((packed));
-
- struct Connect {
- int sockfd;
- void* addr;
- socklen_t addrlen;
- } __attribute__((packed));
-
- struct GetSockName {
- int sockfd;
- void* name;
- socklen_t* namelen;
- } __attribute__((packed));
-
- struct GetPeerName {
- int sockfd;
- void* name;
- socklen_t* namelen;
- } __attribute__((packed));
-
- struct GetSockOpt {
- int sockfd;
- int level;
- int optname;
- void* optval;
- socklen_t* optlen;
- } __attribute__((packed));
-
- struct IOCtl {
- int d;
- int req;
- void *arg;
- } __attribute__((packed));
-
- #if defined(__NR_ipc)
- struct IPC {
- unsigned call;
- int first;
- int second;
- int third;
- void* ptr;
- long fifth;
- } __attribute__((packed));
- #endif
-
- struct Listen {
- int sockfd;
- int backlog;
- } __attribute__((packed));
-
- struct MAdvise {
- const void* start;
- size_t len;
- int advice;
- } __attribute__((packed));
-
- struct MMap {
- void* start;
- size_t length;
- int prot;
- int flags;
- int fd;
- off_t offset;
- } __attribute__((packed));
-
- struct MProtect {
- const void* addr;
- size_t len;
- int prot;
- };
-
- struct MUnmap {
- void* start;
- size_t length;
- } __attribute__((packed));
-
- struct Open {
- size_t path_length;
- int flags;
- mode_t mode;
- } __attribute__((packed));
-
- struct Recv {
- int sockfd;
- void* buf;
- size_t len;
- int flags;
- } __attribute__((packed));
-
- struct RecvFrom {
- int sockfd;
- void* buf;
- size_t len;
- int flags;
- void* from;
- socklen_t *fromlen;
- } __attribute__((packed));
-
- struct RecvMsg {
- int sockfd;
- struct msghdr* msg;
- int flags;
- } __attribute__((packed));
-
- struct Send {
- int sockfd;
- const void* buf;
- size_t len;
- int flags;
- } __attribute__((packed));
-
- struct SendMsg {
- int sockfd;
- const struct msghdr* msg;
- int flags;
- } __attribute__((packed));
-
- struct SendTo {
- int sockfd;
- const void* buf;
- size_t len;
- int flags;
- const void* to;
- socklen_t tolen;
- } __attribute__((packed));
-
- struct SetSockOpt {
- int sockfd;
- int level;
- int optname;
- const void* optval;
- socklen_t optlen;
- } __attribute__((packed));
-
- #if defined(__NR_shmat)
- struct ShmAt {
- int shmid;
- const void* shmaddr;
- int shmflg;
- } __attribute__((packed));
-
- struct ShmCtl {
- int shmid;
- int cmd;
- void *buf;
- } __attribute__((packed));
-
- struct ShmDt {
- const void *shmaddr;
- } __attribute__((packed));
-
- struct ShmGet {
- int key;
- size_t size;
- int shmflg;
- } __attribute__((packed));
- #endif
-
- struct ShutDown {
- int sockfd;
- int how;
- } __attribute__((packed));
-
- struct SigAction {
- int sysnum;
- int signum;
- const SysCalls::kernel_sigaction* action;
- const SysCalls::kernel_sigaction* old_action;
- size_t sigsetsize;
- } __attribute__((packed));
-
- struct Socket {
- int domain;
- int type;
- int protocol;
- } __attribute__((packed));
-
- struct SocketPair {
- int domain;
- int type;
- int protocol;
- int* pair;
- } __attribute__((packed));
-
- #if defined(__NR_socketcall)
- struct SocketCall {
- int call;
- void* arg_ptr;
- union {
- Socket socket;
- Bind bind;
- Connect connect;
- Listen listen;
- Accept accept;
- GetSockName getsockname;
- GetPeerName getpeername;
- SocketPair socketpair;
- Send send;
- Recv recv;
- SendTo sendto;
- RecvFrom recvfrom;
- ShutDown shutdown;
- SetSockOpt setsockopt;
- GetSockOpt getsockopt;
- SendMsg sendmsg;
- RecvMsg recvmsg;
- Accept4 accept4;
- } args;
- } __attribute__((packed));
- #endif
-
- struct Stat {
- int sysnum;
- size_t path_length;
- void* buf;
- } __attribute__((packed));
-
- // Thread local data available from each sandboxed thread.
- enum { TLS_COOKIE, TLS_TID, TLS_THREAD_FD };
- static long long cookie() { return TLS::getTLSValue<long long>(TLS_COOKIE); }
- static int tid() { return TLS::getTLSValue<int>(TLS_TID); }
- static int threadFdPub() { return TLS::getTLSValue<int>(TLS_THREAD_FD); }
- static int processFdPub() { return processFdPub_; }
- static kernel_sigset_t* signalMask() { return &getSecureMem()->signalMask; }
-
- // The SEGV handler knows how to handle RDTSC instructions
- static void setupSignalHandlers();
- static void (*segv())(int signo, SysCalls::siginfo *context, void *unused);
-
- // If no specific handler has been registered for a system call, call this
- // function which asks the trusted thread to perform the call. This is used
- // for system calls that are not restricted.
- static void* defaultSystemCallHandler(int syscallNum, void* arg0,
- void* arg1, void* arg2, void* arg3,
- void* arg4, void* arg5)
- asm("playground$defaultSystemCallHandler")
- #if defined(__x86_64__)
- __attribute__((visibility("internal")))
- #endif
- ;
-
- // Return the current secure memory structure for this thread.
- static SecureMem::Args* getSecureMem();
-
- // Return a secure memory structure that can be used by a newly created
- // thread.
- static SecureMem::Args* getNewSecureMem();
-
- // This functions runs in the trusted process at startup and finds all the
- // memory mappings that existed when the sandbox was first enabled. Going
- // forward, all these mappings are off-limits for operations such as
- // mmap(), munmap(), and mprotect().
- static int initializeProtectedMap(int fd);
-
- // Helper functions that allows the trusted process to get access to
- // "/proc/self/maps" in the sandbox.
- static void snapshotMemoryMappings(int processFd, int proc_self_maps);
-
- // Main loop for the trusted process.
- static void trustedProcess(int parentMapsFd, int processFdPub,
- int sandboxFd, int cloneFd,
- SecureMem::Args* secureArena)
- __attribute__((noreturn));
-
- // Fork()s of the trusted process.
- static SecureMem::Args* createTrustedProcess(int processFdPub, int sandboxFd,
- int cloneFdPub, int cloneFd);
-
- // Creates the trusted thread for the initial thread, then enables
- // Seccomp mode.
- static void createTrustedThread(int processFdPub, int cloneFdPub,
- SecureMem::Args* secureMem);
-
- static int proc_self_maps_;
- static enum SandboxStatus {
- STATUS_UNKNOWN, STATUS_UNSUPPORTED, STATUS_AVAILABLE, STATUS_ENABLED
- } status_;
- static int pid_;
- static int processFdPub_;
- static int cloneFdPub_;
-
- #ifdef __i386__
- struct SocketCallArgInfo;
- static const struct SocketCallArgInfo socketCallArgInfo[];
- #endif
-
- // We always have to intercept SIGSEGV. If the application wants to set its
- // own SEGV handler, we forward to it whenever necessary.
- static SysCalls::kernel_sigaction sa_segv_ asm("playground$sa_segv");
-
- // The syscall_mutex_ can only be directly accessed by the trusted process.
- // It can be accessed by the trusted thread after fork()ing and calling
- // mprotect(PROT_READ|PROT_WRITE). The mutex is used for system calls that
- // require passing additional data, and that require the trusted process to
- // wait until the trusted thread is done processing (e.g. exit(), clone(),
- // open(), stat())
- static int syscall_mutex_ asm("playground$syscall_mutex");
-
- // Available in trusted process, only
- typedef std::map<void *, long> ProtectedMap;
- static ProtectedMap protectedMap_;
- static std::vector<SecureMem::Args*> secureMemPool_;
-};
-
-// If this struct is extended to contain parameters that are read by
-// the trusted thread, we will have to mprotect() it to be read-only when
-// starting the sandbox. However, currently it is read only by the
-// trusted process, and the sandboxed process cannot change the values
-// that the fork()'d trusted process sees.
-struct SandboxPolicy {
- bool allow_file_namespace; // Allow filename-based system calls.
-};
-
-extern struct SandboxPolicy g_policy;
-
-} // namespace
-
-using playground::Sandbox;
-#endif // __cplusplus
-
-#endif // SANDBOX_IMPL_H__
diff --git a/sandbox/linux/seccomp/seccomp.gyp b/sandbox/linux/seccomp/seccomp.gyp
deleted file mode 100644
index 596be21..0000000
--- a/sandbox/linux/seccomp/seccomp.gyp
+++ /dev/null
@@ -1,93 +0,0 @@
-# Copyright (c) 2010 The Chromium Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
-
-{
- 'variables': {
- 'chromium_code': 1,
- 'seccomp_intermediate_dir': '<(INTERMEDIATE_DIR)/seccomp-sandbox',
- },
- 'targets': [
- {
- 'target_name': 'seccomp_sandbox',
- 'type': 'static_library',
- 'sources': [
- 'access.cc',
- 'allocator.cc',
- 'allocator.h',
- 'clone.cc',
- 'exit.cc',
- 'debug.cc',
- 'getpid.cc',
- 'gettid.cc',
- 'ioctl.cc',
- 'ipc.cc',
- 'library.cc',
- 'library.h',
- 'linux_syscall_support.h',
- 'madvise.cc',
- 'maps.cc',
- 'maps.h',
- 'mmap.cc',
- 'mprotect.cc',
- 'munmap.cc',
- 'mutex.h',
- 'open.cc',
- 'sandbox.cc',
- 'sandbox.h',
- 'sandbox_impl.h',
- 'securemem.cc',
- 'securemem.h',
- 'sigaction.cc',
- 'sigprocmask.cc',
- 'socketcall.cc',
- 'stat.cc',
- 'syscall.cc',
- 'syscall.h',
- 'syscall_table.c',
- 'syscall_table.h',
- 'tls.h',
- 'trusted_process.cc',
- 'trusted_thread.cc',
- 'x86_decode.cc',
- 'x86_decode.h',
- ],
- },
- {
- 'target_name': 'seccomp_tests',
- 'type': 'executable',
- 'sources': [
- 'tests/test_syscalls.cc',
- ],
- 'include_dirs': [
- '.',
- '<(seccomp_intermediate_dir)',
- ],
- 'dependencies': [
- 'seccomp_sandbox',
- ],
- 'libraries': [
- '-lpthread',
- '-lutil', # For openpty()
- ],
- 'actions': [
- {
- 'action_name': 'make_test_list',
- 'inputs': [
- 'tests/list_tests.py',
- 'tests/test_syscalls.cc',
- ],
- 'outputs': ['<(seccomp_intermediate_dir)/test-list.h'],
- 'action': ['sh', '-c', 'python <(_inputs) > <(_outputs)'],
- },
- ],
- },
- {
- 'target_name': 'timestats',
- 'type': 'executable',
- 'sources': [
- 'timestats.cc',
- ],
- },
- ],
-}
diff --git a/sandbox/linux/seccomp/securemem.cc b/sandbox/linux/seccomp/securemem.cc
deleted file mode 100644
index 5f07bbe..0000000
--- a/sandbox/linux/seccomp/securemem.cc
+++ /dev/null
@@ -1,105 +0,0 @@
-// Copyright (c) 2010 The Chromium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#include "debug.h"
-#include "mutex.h"
-#include "sandbox_impl.h"
-#include "securemem.h"
-
-namespace playground {
-
-void SecureMem::abandonSystemCall(int fd, int err) {
- void* rc = reinterpret_cast<void *>(err);
- if (err) {
- Debug::message("System call failed\n");
- }
- Sandbox::SysCalls sys;
- if (Sandbox::write(sys, fd, &rc, sizeof(rc)) != sizeof(rc)) {
- Sandbox::die("Failed to send system call");
- }
-}
-
-void SecureMem::dieIfParentDied(int parentMapsFd) {
- // The syscall_mutex_ should not be contended. If it is, we are either
- // experiencing a very unusual load of system calls that the sandbox is not
- // optimized for; or, more likely, the sandboxed process terminated while the
- // trusted process was in the middle of waiting for the mutex. We detect
- // this situation and terminate the trusted process.
- int alive = !lseek(parentMapsFd, 0, SEEK_SET);
- if (alive) {
- char buf;
- do {
- alive = read(parentMapsFd, &buf, 1);
- } while (alive < 0 && errno == EINTR);
- }
- if (!alive) {
- Sandbox::die();
- }
-}
-
-void SecureMem::lockSystemCall(int parentMapsFd, Args* mem) {
- while (!Mutex::lockMutex(&Sandbox::syscall_mutex_, 500)) {
- dieIfParentDied(parentMapsFd);
- }
- asm volatile(
- #if defined(__x86_64__)
- "lock; incq (%0)\n"
- #elif defined(__i386__)
- "lock; incl (%0)\n"
- #else
- #error Unsupported target platform
- #endif
- :
- : "q"(&mem->sequence)
- : "memory");
-}
-
-void SecureMem::sendSystemCallInternal(int fd, bool locked, int parentMapsFd,
- Args* mem, int syscallNum, void* arg1,
- void* arg2, void* arg3, void* arg4,
- void* arg5, void* arg6) {
- if (!locked) {
- asm volatile(
- #if defined(__x86_64__)
- "lock; incq (%0)\n"
- #elif defined(__i386__)
- "lock; incl (%0)\n"
- #else
- #error Unsupported target platform
- #endif
- :
- : "q"(&mem->sequence)
- : "memory");
- }
- mem->callType = locked ? -2 : -1;
- mem->syscallNum = syscallNum;
- mem->arg1 = arg1;
- mem->arg2 = arg2;
- mem->arg3 = arg3;
- mem->arg4 = arg4;
- mem->arg5 = arg5;
- mem->arg6 = arg6;
- asm volatile(
- #if defined(__x86_64__)
- "lock; incq (%0)\n"
- #elif defined(__i386__)
- "lock; incl (%0)\n"
- #else
- #error Unsupported target platform
- #endif
- :
- : "q"(&mem->sequence)
- : "memory");
- Sandbox::SysCalls sys;
- if (Sandbox::write(sys, fd, &mem->callType, sizeof(int)) != sizeof(int)) {
- Sandbox::die("Failed to send system call");
- }
- if (parentMapsFd >= 0) {
- while (!Mutex::waitForUnlock(&Sandbox::syscall_mutex_, 500)) {
- dieIfParentDied(parentMapsFd);
- }
- }
-}
-
-} // namespace
diff --git a/sandbox/linux/seccomp/securemem.h b/sandbox/linux/seccomp/securemem.h
deleted file mode 100644
index 91283db..0000000
--- a/sandbox/linux/seccomp/securemem.h
+++ /dev/null
@@ -1,205 +0,0 @@
-// Copyright (c) 2010 The Chromium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#ifndef SECURE_MEM_H__
-#define SECURE_MEM_H__
-
-#include <stdlib.h>
-#include "linux_syscall_support.h"
-
-namespace playground {
-
-class SecureMem {
- public:
- // Each thread is associated with two memory pages (i.e. 8192 bytes). This
- // memory is fully accessible by the trusted process, but in the trusted
- // thread and the sandboxed thread, the first page is only mapped PROT_READ,
- // and the second one is PROT_READ|PROT_WRITE.
- //
- // The first page can be modified by the trusted process and this is the
- // main mechanism how it communicates with the trusted thread. After each
- // update, it updates the "sequence" number. The trusted process must
- // check the "sequence" number has the expected value, and only then can
- // it trust the data in this page.
- typedef struct Args {
- union {
- struct {
- union {
- struct {
- struct Args* self;
- long sequence;
- long callType;
- long syscallNum;
- void* arg1;
- void* arg2;
- void* arg3;
- void* arg4;
- void* arg5;
- void* arg6;
-
- // Used by clone() to allow return from the syscall wrapper.
- void* ret;
- #if defined(__x86_64__)
- void* rbp;
- void* rbx;
- void* rcx;
- void* rdx;
- void* rsi;
- void* rdi;
- void* r8;
- void* r9;
- void* r10;
- void* r11;
- void* r12;
- void* r13;
- void* r14;
- void* r15;
- #elif defined(__i386__)
- void* ebp;
- void* edi;
- void* esi;
- void* edx;
- void* ecx;
- void* ebx;
- #else
- #error Unsupported target platform
- #endif
-
- // Used by clone() to set up data for the new thread.
- struct Args* newSecureMem;
- int processFdPub;
- int cloneFdPub;
-
- // Set to non-zero, if in debugging mode
- int allowAllSystemCalls;
-
- // The most recent SysV SHM identifier returned by
- // shmget(IPC_PRIVATE)
- int shmId;
-
- // The following entries make up the sandboxed thread's TLS
- long long cookie;
- long long threadId;
- long long threadFdPub;
- } __attribute__((packed));
- char header[512];
- };
- // Used for calls such as open() and stat().
- char pathname[4096 - 512];
- } __attribute__((packed));
- char securePage[4096];
- };
- union {
- struct {
- // This scratch space is used by the trusted thread to read parameters
- // for unrestricted system calls.
- int tmpSyscallNum;
- void* tmpArg1;
- void* tmpArg2;
- void* tmpArg3;
- void* tmpArg4;
- void* tmpArg5;
- void* tmpArg6;
- void* tmpReturnValue;
-
- // Scratch space used to return the result of a rdtsc instruction
- int rdtscpEax;
- int rdtscpEdx;
- int rdtscpEcx;
-
- // We often have long sequences of calls to gettimeofday(). This is
- // needlessly expensive. Coalesce them into a single call.
- int lastSyscallNum;
- int gettimeofdayCounter;
-
- // For debugging purposes, we want to be able to log messages. This can
- // result in additional system calls. Make sure that we don't trigger
- // logging of those recursive calls.
- int recursionLevel;
-
- // Computing the signal mask is expensive. Keep a cached copy.
- kernel_sigset_t signalMask;
-
- // Keep track of whether we are in a SEGV handler
- int inSegvHandler;
- } __attribute__((packed));
- char scratchPage[4096];
- };
- } __attribute__((packed)) Args;
-
- // Allows the trusted process to check whether the parent process still
- // exists. If it doesn't, kill the trusted process.
- static void dieIfParentDied(int parentProc);
-
- // The trusted process received a system call that it intends to deny.
- static void abandonSystemCall(int fd, int err);
-
- // Acquires the syscall_mutex_ prior to making changes to the parameters in
- // the secure memory page. Used by calls such as exit(), clone(), open(),
- // socketcall(), and stat().
- // After locking the mutex, it is no longer valid to abandon the system
- // call!
- static void lockSystemCall(int parentProc, Args* mem);
-
- // Sends a system call to the trusted thread. If "locked" is true, the
- // caller must first call lockSystemCall() and must also provide
- // "parentProc". In locked mode, sendSystemCall() won't return until the
- // trusted thread has completed processing.
- // Use sparingly as it serializes the operation of the trusted process.
- static void sendSystemCall(int fd, bool locked, int parentProc, Args* mem,
- int syscallNum) {
- sendSystemCallInternal(fd, locked, parentProc, mem, syscallNum);
- }
- template<class T1> static
- void sendSystemCall(int fd, bool locked, int parentProc, Args* mem,
- int syscallNum, T1 arg1) {
- sendSystemCallInternal(fd, locked, parentProc, mem, syscallNum,
- (void*)arg1);
- }
- template<class T1, class T2> static
- void sendSystemCall(int fd, bool locked, int parentProc, Args* mem,
- int syscallNum, T1 arg1, T2 arg2) {
- sendSystemCallInternal(fd, locked, parentProc, mem, syscallNum,
- (void*)arg1, (void*)arg2);
- }
- template<class T1, class T2, class T3> static
- void sendSystemCall(int fd, bool locked, int parentProc, Args* mem,
- int syscallNum, T1 arg1, T2 arg2, T3 arg3) {
- sendSystemCallInternal(fd, locked, parentProc, mem, syscallNum,
- (void*)arg1, (void*)arg2, (void*)arg3);
- }
- template<class T1, class T2, class T3, class T4> static
- void sendSystemCall(int fd, bool locked, int parentProc, Args* mem,
- int syscallNum, T1 arg1, T2 arg2, T3 arg3, T4 arg4) {
- sendSystemCallInternal(fd, locked, parentProc, mem, syscallNum,
- (void*)arg1, (void*)arg2, (void*)arg3, (void*)arg4);
- }
- template<class T1, class T2, class T3, class T4, class T5> static
- void sendSystemCall(int fd, bool locked, int parentProc, Args* mem,
- int syscallNum, T1 arg1, T2 arg2, T3 arg3, T4 arg4,
- T5 arg5) {
- sendSystemCallInternal(fd, locked, parentProc, mem, syscallNum,
- (void*)arg1, (void*)arg2, (void*)arg3, (void*)arg4,
- (void*)arg5);
- }
- template<class T1, class T2, class T3, class T4, class T5, class T6> static
- void sendSystemCall(int fd, bool locked, int parentProc, Args* mem,
- int syscallNum, T1 arg1, T2 arg2, T3 arg3, T4 arg4,
- T5 arg5, T6 arg6) {
- sendSystemCallInternal(fd, locked, parentProc, mem, syscallNum,
- (void*)arg1, (void*)arg2, (void*)arg3, (void*)arg4,
- (void*)arg5, (void*)arg6);
- }
-
- private:
- static void sendSystemCallInternal(int fd, bool locked, int parentProc,
- Args* mem, int syscallNum, void* arg1 = 0,
- void* arg2 = 0, void* arg3 = 0,
- void* arg4 = 0, void* arg5 = 0,
- void* arg6 = 0);
-};
-
-} // namespace
-
-#endif // SECURE_MEM_H__
diff --git a/sandbox/linux/seccomp/sigaction.cc b/sandbox/linux/seccomp/sigaction.cc
deleted file mode 100644
index 162416d..0000000
--- a/sandbox/linux/seccomp/sigaction.cc
+++ /dev/null
@@ -1,177 +0,0 @@
-// Copyright (c) 2010 The Chromium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-// TODO(markus): We currently instrument the restorer functions with calls to
-// the syscallWrapper(). This prevents gdb from properly
-// creating backtraces of code that is running in signal
-// handlers. We might instead want to always override the
-// restorer with a function that contains the "magic" signature
-// but that is not executable. The SEGV handler can detect this
-// and then invoke the appropriate restorer.
-
-#include "debug.h"
-#include "sandbox_impl.h"
-
-namespace playground {
-
-#if defined(__NR_sigaction)
-long Sandbox::sandbox_sigaction(int signum, const void* a_, void* oa_) {
- const SysCalls::kernel_old_sigaction* action =
- reinterpret_cast<const SysCalls::kernel_old_sigaction*>(a_);
- SysCalls::kernel_old_sigaction* old_action =
- reinterpret_cast<SysCalls::kernel_old_sigaction*>(oa_);
-
- long rc = 0;
- long long tm;
- Debug::syscall(&tm, __NR_sigaction, "Executing handler");
- if (signum == SIGSEGV) {
- if (old_action) {
- old_action->sa_handler_ = sa_segv_.sa_handler_;
- old_action->sa_mask = sa_segv_.sa_mask.sig[0];
- old_action->sa_flags = sa_segv_.sa_flags;
- old_action->sa_restorer = sa_segv_.sa_restorer;
- }
- if (action) {
- sa_segv_.sa_handler_ = action->sa_handler_;
- sa_segv_.sa_mask.sig[0] = action->sa_mask;
- sa_segv_.sa_flags = action->sa_flags;
- sa_segv_.sa_restorer = action->sa_restorer;
- }
- } else {
- struct {
- int sysnum;
- long long cookie;
- SigAction sigaction_req;
- } __attribute__((packed)) request;
- request.sysnum = __NR_sigaction;
- request.cookie = cookie();
- request.sigaction_req.sysnum = __NR_sigaction;
- request.sigaction_req.signum = signum;
- request.sigaction_req.action =
- reinterpret_cast<const SysCalls::kernel_sigaction *>(action);
- request.sigaction_req.old_action =
- reinterpret_cast<const SysCalls::kernel_sigaction *>(old_action);
- request.sigaction_req.sigsetsize = 8;
-
- SysCalls sys;
- if (write(sys, processFdPub(), &request, sizeof(request)) !=
- sizeof(request) ||
- read(sys, threadFdPub(), &rc, sizeof(rc)) != sizeof(rc)) {
- die("Failed to forward sigaction() request [sandbox]");
- }
- }
- Debug::elapsed(tm, __NR_sigaction);
- return rc;
-}
-#endif
-
-#if defined(__NR_rt_sigaction)
-#define min(a,b) ({ typeof(a) a_=(a); typeof(b) b_=(b); a_ < b_ ? a_ : b_; })
-#define max(a,b) ({ typeof(a) a_=(a); typeof(b) b_=(b); a_ > b_ ? a_ : b_; })
-
-long Sandbox::sandbox_rt_sigaction(int signum, const void* a_, void* oa_,
- size_t sigsetsize) {
- const SysCalls::kernel_sigaction* action =
- reinterpret_cast<const SysCalls::kernel_sigaction*>(a_);
- SysCalls::kernel_sigaction* old_action =
- reinterpret_cast<SysCalls::kernel_sigaction*>(oa_);
-
- long rc = 0;
- long long tm;
- Debug::syscall(&tm, __NR_rt_sigaction, "Executing handler");
- if (signum == SIGSEGV) {
- size_t theirSize = offsetof(SysCalls::kernel_sigaction, sa_mask) +
- sigsetsize;
- if (old_action) {
- memcpy(old_action, &sa_segv_, min(sizeof(sa_segv_), theirSize));
- memset(old_action + 1, 0, max(0u, theirSize - sizeof(sa_segv_)));
- }
- if (action) {
- memcpy(&sa_segv_, action, min(sizeof(sa_segv_), theirSize));
- memset(&sa_segv_.sa_mask, 0, max(0u, 8 - sigsetsize));
- }
- } else {
- struct {
- int sysnum;
- long long cookie;
- SigAction sigaction_req;
- } __attribute__((packed)) request;
- request.sysnum = __NR_rt_sigaction;
- request.cookie = cookie();
- request.sigaction_req.sysnum = __NR_rt_sigaction;
- request.sigaction_req.signum = signum;
- request.sigaction_req.action = action;
- request.sigaction_req.old_action = old_action;
- request.sigaction_req.sigsetsize = sigsetsize;
-
- SysCalls sys;
- if (write(sys, processFdPub(), &request, sizeof(request)) !=
- sizeof(request) ||
- read(sys, threadFdPub(), &rc, sizeof(rc)) != sizeof(rc)) {
- die("Failed to forward rt_sigaction() request [sandbox]");
- }
- }
- Debug::elapsed(tm, __NR_rt_sigaction);
- return rc;
-}
-#endif
-
-#if defined(__NR_signal)
-void* Sandbox::sandbox_signal(int signum, const void* handler) {
- struct kernel_old_sigaction sa, osa;
- sa.sa_handler_ = reinterpret_cast<void (*)(int)>(handler);
- sa.sa_flags = SA_NODEFER | SA_RESETHAND | SA_RESTORER;
- sa.sa_mask = 0;
- asm volatile(
- "lea 0f, %0\n"
- "jmp 1f\n"
- "0:pop %%eax\n"
- "mov $119, %%eax\n" // __NR_sigreturn
- "int $0x80\n"
- "1:\n"
- : "=r"(sa.sa_restorer));
- long rc = sandbox_sigaction(signum, &sa, &osa);
- if (rc < 0) {
- return (void *)rc;
- }
- return reinterpret_cast<void *>(osa.sa_handler_);
-}
-#endif
-
-bool Sandbox::process_sigaction(int parentMapsFd, int sandboxFd,
- int threadFdPub, int threadFd,
- SecureMem::Args* mem) {
- // We need to intercept sigaction() in order to properly rewrite calls to
- // sigaction(SEGV). While there is no security implication if we didn't do
- // so, it would end up preventing the program from running correctly as the
- // the sandbox's SEGV handler could accidentally get removed. All of this is
- // done in sandbox_{,rt_}sigaction(). But we still bounce through the
- // trusted process as that is the only way we can instrument system calls.
- // This is somewhat needlessly complicated. But as sigaction() is not a
- // performance critical system call, it is easier to do this way than to
- // extend the format of the syscall_table so that it could deal with this
- // special case.
-
- // Read request
- SigAction sigaction_req;
- SysCalls sys;
- if (read(sys, sandboxFd, &sigaction_req, sizeof(sigaction_req)) !=
- sizeof(sigaction_req)) {
- die("Failed to read parameters for sigaction() [process]");
- }
- if (sigaction_req.signum == SIGSEGV) {
- // This should never happen. Something went wrong when intercepting the
- // system call. This is not a security problem, but it clearly doesn't
- // make sense to let the system call pass.
- SecureMem::abandonSystemCall(threadFd, -EINVAL);
- return false;
- }
- SecureMem::sendSystemCall(threadFdPub, false, -1, mem, sigaction_req.sysnum,
- sigaction_req.signum, sigaction_req.action,
- sigaction_req.old_action,
- sigaction_req.sigsetsize);
- return true;
-}
-
-} // namespace
diff --git a/sandbox/linux/seccomp/sigprocmask.cc b/sandbox/linux/seccomp/sigprocmask.cc
deleted file mode 100644
index 9ff2922..0000000
--- a/sandbox/linux/seccomp/sigprocmask.cc
+++ /dev/null
@@ -1,120 +0,0 @@
-// Copyright (c) 2010 The Chromium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#include "debug.h"
-#include "sandbox_impl.h"
-
-namespace playground {
-
-// If the sandboxed process tries to mask SIGSEGV, there is a good chance
-// the process will eventually get terminated. If this is really ever a
-// problem, we can hide the fact that SIGSEGV is unmasked. But I don't think
-// we really need this. Masking of synchronous signals is rarely necessary.
-
-#if defined(__NR_sigprocmask)
-long Sandbox::sandbox_sigprocmask(int how, const void* set, void* old_set) {
- long long tm;
- Debug::syscall(&tm, __NR_sigprocmask, "Executing handler");
-
- // Access the signal mask by triggering a SEGV and modifying the signal state
- // prior to calling rt_sigreturn().
- long res = -ENOSYS;
- #if defined(__x86_64__)
- #error x86-64 does not support sigprocmask(); use rt_sigprocmask() instead
- #elif defined(__i386__)
- asm volatile(
- "push %%ebx\n"
- "movl %2, %%ebx\n"
- "int $0\n"
- "pop %%ebx\n"
- : "=a"(res)
- : "0"(__NR_sigprocmask), "ri"((long)how),
- "c"((long)set), "d"((long)old_set)
- : "esp", "memory");
- #else
- #error Unsupported target platform
- #endif
-
- // Update our shadow signal mask, so that we can copy it upon creation of
- // new threads.
- if (res == 0 && set != NULL) {
- SecureMem::Args* args = getSecureMem();
- switch (how) {
- case SIG_BLOCK:
- *(unsigned long long *)&args->signalMask |= *(unsigned long long *)set;
- break;
- case SIG_UNBLOCK:
- *(unsigned long long *)&args->signalMask &= ~*(unsigned long long *)set;
- break;
- case SIG_SETMASK:
- *(unsigned long long *)&args->signalMask = *(unsigned long long *)set;
- break;
- default:
- break;
- }
- }
-
- Debug::elapsed(tm, __NR_sigprocmask);
-
- return res;
-}
-#endif
-
-#if defined(__NR_rt_sigprocmask)
-long Sandbox::sandbox_rt_sigprocmask(int how, const void* set, void* old_set,
- size_t bytes) {
- long long tm;
- Debug::syscall(&tm, __NR_rt_sigprocmask, "Executing handler");
-
- // Access the signal mask by triggering a SEGV and modifying the signal state
- // prior to calling rt_sigreturn().
- long res = -ENOSYS;
- #if defined(__x86_64__)
- asm volatile(
- "movq %5, %%r10\n"
- "int $0\n"
- : "=a"(res)
- : "0"(__NR_rt_sigprocmask), "D"((long)how),
- "S"((long)set), "d"((long)old_set), "r"((long)bytes)
- : "r10", "r11", "rcx", "memory");
- #elif defined(__i386__)
- asm volatile(
- "push %%ebx\n"
- "movl %2, %%ebx\n"
- "int $0\n"
- "pop %%ebx\n"
- : "=a"(res)
- : "0"(__NR_rt_sigprocmask), "ri"((long)how),
- "c"((long)set), "d"((long)old_set), "S"((long)bytes)
- : "esp", "memory");
- #else
- #error Unsupported target platform
- #endif
-
- // Update our shadow signal mask, so that we can copy it upon creation of
- // new threads.
- if (res == 0 && set != NULL && bytes >= 8) {
- SecureMem::Args* args = getSecureMem();
- switch (how) {
- case SIG_BLOCK:
- *(unsigned long long *)&args->signalMask |= *(unsigned long long *)set;
- break;
- case SIG_UNBLOCK:
- *(unsigned long long *)&args->signalMask &= ~*(unsigned long long *)set;
- break;
- case SIG_SETMASK:
- *(unsigned long long *)&args->signalMask = *(unsigned long long *)set;
- break;
- default:
- break;
- }
- }
-
- Debug::elapsed(tm, __NR_rt_sigprocmask);
-
- return res;
-}
-#endif
-
-} // namespace
diff --git a/sandbox/linux/seccomp/socketcall.cc b/sandbox/linux/seccomp/socketcall.cc
deleted file mode 100644
index c7b2015..0000000
--- a/sandbox/linux/seccomp/socketcall.cc
+++ /dev/null
@@ -1,1039 +0,0 @@
-// Copyright (c) 2010 The Chromium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#include "debug.h"
-#include "sandbox_impl.h"
-
-namespace playground {
-
-#if defined(__NR_socket)
-
-ssize_t Sandbox::sandbox_recvfrom(int sockfd, void* buf, size_t len, int flags,
- void* from, socklen_t* fromlen) {
- long long tm;
- Debug::syscall(&tm, __NR_recvfrom, "Executing handler");
-
- SysCalls sys;
- if (!from && !flags) {
- // recv() with a NULL sender and no flags is the same as read(), which
- // is unrestricted in seccomp mode.
- Debug::message("Replaced recv() with call to read()");
- ssize_t rc = sys.read(sockfd, buf, len);
- if (rc < 0) {
- Debug::elapsed(tm, __NR_recvfrom);
- return -sys.my_errno;
- } else {
- Debug::elapsed(tm, __NR_recvfrom);
- return rc;
- }
- }
-
- struct {
- int sysnum;
- long long cookie;
- RecvFrom recvfrom_req;
- } __attribute__((packed)) request;
- request.sysnum = __NR_recvfrom;
- request.cookie = cookie();
- request.recvfrom_req.sockfd = sockfd;
- request.recvfrom_req.buf = buf;
- request.recvfrom_req.len = len;
- request.recvfrom_req.flags = flags;
- request.recvfrom_req.from = from;
- request.recvfrom_req.fromlen = fromlen;
-
- long rc;
- if (write(sys, processFdPub(), &request, sizeof(request)) !=
- sizeof(request) ||
- read(sys, threadFdPub(), &rc, sizeof(rc)) != sizeof(rc)) {
- die("Failed to forward recvfrom() request [sandbox]");
- }
- Debug::elapsed(tm, __NR_recvfrom);
- return static_cast<ssize_t>(rc);
-}
-
-ssize_t Sandbox::sandbox_recvmsg(int sockfd, struct msghdr* msg, int flags) {
- long long tm;
- Debug::syscall(&tm, __NR_recvmsg, "Executing handler");
-
- // We cannot simplify recvmsg() to recvfrom(), recv() or read(), as we do
- // not know whether the caller needs us to set msg->msg_flags.
- struct {
- int sysnum;
- long long cookie;
- RecvMsg recvmsg_req;
- } __attribute__((packed)) request;
- request.sysnum = __NR_recvmsg;
- request.cookie = cookie();
- request.recvmsg_req.sockfd = sockfd;
- request.recvmsg_req.msg = msg;
- request.recvmsg_req.flags = flags;
-
- long rc;
- SysCalls sys;
- if (write(sys, processFdPub(), &request, sizeof(request)) !=
- sizeof(request) ||
- read(sys, threadFdPub(), &rc, sizeof(rc)) != sizeof(rc)) {
- die("Failed to forward recvmsg() request [sandbox]");
- }
- Debug::elapsed(tm, __NR_recvmsg);
- return static_cast<ssize_t>(rc);
-}
-
-size_t Sandbox::sandbox_sendmsg(int sockfd, const struct msghdr* msg,
- int flags) {
- long long tm;
- Debug::syscall(&tm, __NR_sendmsg, "Executing handler");
-
- if (msg->msg_iovlen == 1 && msg->msg_controllen == 0) {
- // sendmsg() can sometimes be simplified as sendto()
- return sandbox_sendto(sockfd, msg->msg_iov, msg->msg_iovlen,
- flags, msg->msg_name, msg->msg_namelen);
- }
-
- struct Request {
- int sysnum;
- long long cookie;
- SendMsg sendmsg_req;
- struct msghdr msg;
- } __attribute__((packed));
- char data[sizeof(struct Request) + msg->msg_namelen + msg->msg_controllen];
- struct Request *request = reinterpret_cast<struct Request *>(data);
- request->sysnum = __NR_sendmsg;
- request->cookie = cookie();
- request->sendmsg_req.sockfd = sockfd;
- request->sendmsg_req.msg = msg;
- request->sendmsg_req.flags = flags;
- request->msg = *msg;
- memcpy(reinterpret_cast<char *>(
- memcpy(request + 1, msg->msg_name, msg->msg_namelen)) +
- msg->msg_namelen,
- msg->msg_control, msg->msg_controllen);
-
- long rc;
- SysCalls sys;
- if (write(sys, processFdPub(), &data, sizeof(data)) !=
- (ssize_t)sizeof(data) ||
- read(sys, threadFdPub(), &rc, sizeof(rc)) != sizeof(rc)) {
- die("Failed to forward sendmsg() request [sandbox]");
- }
- Debug::elapsed(tm, __NR_sendmsg);
- return static_cast<ssize_t>(rc);
-}
-
-ssize_t Sandbox::sandbox_sendto(int sockfd, const void* buf, size_t len,
- int flags, const void* to, socklen_t tolen) {
- long long tm;
- Debug::syscall(&tm, __NR_sendto, "Executing handler");
-
- SysCalls sys;
- if (!to && !flags) {
- // sendto() with a NULL recipient and no flags is the same as write(),
- // which is unrestricted in seccomp mode.
- Debug::message("Replaced sendto() with call to write()");
- ssize_t rc = sys.write(sockfd, buf, len);
- if (rc < 0) {
- Debug::elapsed(tm, __NR_sendto);
- return -sys.my_errno;
- } else {
- Debug::elapsed(tm, __NR_sendto);
- return rc;
- }
- }
-
- struct {
- int sysnum;
- long long cookie;
- SendTo sendto_req;
- } __attribute__((packed)) request;
- request.sysnum = __NR_sendto;
- request.cookie = cookie();
- request.sendto_req.sockfd = sockfd;
- request.sendto_req.buf = buf;
- request.sendto_req.len = len;
- request.sendto_req.flags = flags;
- request.sendto_req.to = to;
- request.sendto_req.tolen = tolen;
-
- long rc;
- if (write(sys, processFdPub(), &request, sizeof(request)) !=
- sizeof(request) ||
- read(sys, threadFdPub(), &rc, sizeof(rc)) != sizeof(rc)) {
- die("Failed to forward sendto() request [sandbox]");
- }
- Debug::elapsed(tm, __NR_sendto);
- return static_cast<ssize_t>(rc);
-}
-
-long Sandbox::sandbox_setsockopt(int sockfd, int level, int optname,
- const void* optval, socklen_t optlen) {
- long long tm;
- Debug::syscall(&tm, __NR_setsockopt, "Executing handler");
-
- struct {
- int sysnum;
- long long cookie;
- SetSockOpt setsockopt_req;
- } __attribute__((packed)) request;
- request.sysnum = __NR_setsockopt;
- request.cookie = cookie();
- request.setsockopt_req.sockfd = sockfd;
- request.setsockopt_req.level = level;
- request.setsockopt_req.optname = optname;
- request.setsockopt_req.optval = optval;
- request.setsockopt_req.optlen = optlen;
-
- long rc;
- SysCalls sys;
- if (write(sys, processFdPub(), &request, sizeof(request)) !=
- sizeof(request) ||
- read(sys, threadFdPub(), &rc, sizeof(rc)) != sizeof(rc)) {
- die("Failed to forward setsockopt() request [sandbox]");
- }
- Debug::elapsed(tm, __NR_setsockopt);
- return rc;
-}
-
-long Sandbox::sandbox_getsockopt(int sockfd, int level, int optname,
- void* optval, socklen_t* optlen) {
- long long tm;
- Debug::syscall(&tm, __NR_getsockopt, "Executing handler");
-
- struct {
- int sysnum;
- long long cookie;
- GetSockOpt getsockopt_req;
- } __attribute__((packed)) request;
- request.sysnum = __NR_getsockopt;
- request.cookie = cookie();
- request.getsockopt_req.sockfd = sockfd;
- request.getsockopt_req.level = level;
- request.getsockopt_req.optname = optname;
- request.getsockopt_req.optval = optval;
- request.getsockopt_req.optlen = optlen;
-
- long rc;
- SysCalls sys;
- if (write(sys, processFdPub(), &request, sizeof(request)) !=
- sizeof(request) ||
- read(sys, threadFdPub(), &rc, sizeof(rc)) != sizeof(rc)) {
- die("Failed to forward getsockopt() request [sandbox]");
- }
- Debug::elapsed(tm, __NR_getsockopt);
- return rc;
-}
-
-bool Sandbox::process_recvfrom(int parentMapsFd, int sandboxFd,
- int threadFdPub, int threadFd,
- SecureMem::Args* mem) {
- // Read request
- RecvFrom recvfrom_req;
- SysCalls sys;
- if (read(sys, sandboxFd, &recvfrom_req, sizeof(recvfrom_req)) !=
- sizeof(recvfrom_req)) {
- die("Failed to read parameters for recvfrom() [process]");
- }
-
- // Unsupported flag encountered. Deny the call.
- if (recvfrom_req.flags &
- ~(MSG_DONTWAIT|MSG_OOB|MSG_PEEK|MSG_TRUNC|MSG_WAITALL)) {
- SecureMem::abandonSystemCall(threadFd, -EINVAL);
- return false;
- }
-
- // While we do not anticipate any particular need to receive data on
- // unconnected sockets, there is no particular risk in doing so.
- SecureMem::sendSystemCall(threadFdPub, false, -1, mem,
- __NR_recvfrom, recvfrom_req.sockfd,
- recvfrom_req.buf, recvfrom_req.len,
- recvfrom_req.flags, recvfrom_req.from,
- recvfrom_req.fromlen);
- return true;
-}
-
-bool Sandbox::process_recvmsg(int parentMapsFd, int sandboxFd, int threadFdPub,
- int threadFd, SecureMem::Args* mem) {
- // Read request
- RecvMsg recvmsg_req;
- SysCalls sys;
- if (read(sys, sandboxFd, &recvmsg_req, sizeof(recvmsg_req)) !=
- sizeof(recvmsg_req)) {
- die("Failed to read parameters for recvmsg() [process]");
- }
-
- // Unsupported flag encountered. Deny the call.
- if (recvmsg_req.flags &
- ~(MSG_DONTWAIT|MSG_OOB|MSG_PEEK|MSG_TRUNC|MSG_WAITALL)) {
- SecureMem::abandonSystemCall(threadFd, -EINVAL);
- return false;
- }
-
- // Receiving messages is general not security critical.
- SecureMem::sendSystemCall(threadFdPub, false, -1, mem,
- __NR_recvmsg, recvmsg_req.sockfd,
- recvmsg_req.msg, recvmsg_req.flags);
- return true;
-}
-
-bool Sandbox::process_sendmsg(int parentMapsFd, int sandboxFd, int threadFdPub,
- int threadFd, SecureMem::Args* mem) {
- // Read request
- struct {
- SendMsg sendmsg_req;
- struct msghdr msg;
- } __attribute__((packed)) data;
- SysCalls sys;
- if (read(sys, sandboxFd, &data, sizeof(data)) != sizeof(data)) {
- die("Failed to read parameters for sendmsg() [process]");
- }
-
- if (data.msg.msg_namelen > 4096 || data.msg.msg_controllen > 4096) {
- die("Unexpected size for socketcall() payload [process]");
- }
- char extra[data.msg.msg_namelen + data.msg.msg_controllen];
- if (read(sys, sandboxFd, &extra, sizeof(extra)) != (ssize_t)sizeof(extra)) {
- die("Failed to read parameters for sendmsg() [process]");
- }
- if (sizeof(struct msghdr) + sizeof(extra) > sizeof(mem->pathname)) {
- goto deny;
- }
-
- if (data.msg.msg_namelen ||
- (data.sendmsg_req.flags &
- ~(MSG_CONFIRM|MSG_DONTWAIT|MSG_EOR|MSG_MORE|MSG_NOSIGNAL|MSG_OOB))) {
- deny:
- SecureMem::abandonSystemCall(threadFd, -EINVAL);
- return false;
- }
-
- // The trusted process receives file handles when a new untrusted thread
- // gets created. We have security checks in place that prevent any
- // critical information from being tampered with during thread creation.
- // But if we disallowed passing of file handles, this would add an extra
- // hurdle for an attacker.
- // Unfortunately, for now, this is not possible as Chrome's
- // base::SendRecvMsg() needs the ability to pass file handles.
- if (data.msg.msg_controllen) {
- data.msg.msg_control = extra + data.msg.msg_namelen;
- struct cmsghdr *cmsg = CMSG_FIRSTHDR(&data.msg);
- do {
- if (cmsg->cmsg_level != SOL_SOCKET ||
- cmsg->cmsg_type != SCM_RIGHTS) {
- goto deny;
- }
- } while ((cmsg = CMSG_NXTHDR(&data.msg, cmsg)) != NULL);
- }
-
- // This must be a locked system call, because we have to ensure that the
- // untrusted code does not tamper with the msghdr after we have examined it.
- SecureMem::lockSystemCall(parentMapsFd, mem);
- if (sizeof(extra) > 0) {
- if (data.msg.msg_namelen > 0) {
- data.msg.msg_name = mem->pathname + sizeof(struct msghdr);
- }
- if (data.msg.msg_controllen > 0) {
- data.msg.msg_control = mem->pathname + sizeof(struct msghdr) +
- data.msg.msg_namelen;
- }
- memcpy(mem->pathname + sizeof(struct msghdr), extra, sizeof(extra));
- }
- memcpy(mem->pathname, &data.msg, sizeof(struct msghdr));
- SecureMem::sendSystemCall(threadFdPub, true, parentMapsFd, mem,
- __NR_sendmsg, data.sendmsg_req.sockfd,
- mem->pathname - (char*)mem + (char*)mem->self,
- data.sendmsg_req.flags);
- return true;
-}
-
-bool Sandbox::process_sendto(int parentMapsFd, int sandboxFd, int threadFdPub,
- int threadFd, SecureMem::Args* mem) {
- // Read request
- SendTo sendto_req;
- SysCalls sys;
- if (read(sys, sandboxFd, &sendto_req, sizeof(sendto_req)) !=
- sizeof(sendto_req)) {
- die("Failed to read parameters for sendto() [process]");
- }
-
- // The sandbox does not allow sending to arbitrary addresses.
- if (sendto_req.to) {
- SecureMem::abandonSystemCall(threadFd, -EINVAL);
- return false;
- }
-
- // Unsupported flag encountered. Deny the call.
- if (sendto_req.flags &
- ~(MSG_CONFIRM|MSG_DONTWAIT|MSG_EOR|MSG_MORE|MSG_NOSIGNAL|MSG_OOB)) {
- SecureMem::abandonSystemCall(threadFd, -EINVAL);
- return false;
- }
-
- // Sending data on a connected socket is similar to calling write().
- // Allow it.
- SecureMem::sendSystemCall(threadFdPub, false, -1, mem,
- __NR_sendto, sendto_req.sockfd,
- sendto_req.buf, sendto_req.len,
- sendto_req.flags, sendto_req.to,
- sendto_req.tolen);
- return true;
-}
-
-bool Sandbox::process_setsockopt(int parentMapsFd, int sandboxFd,
- int threadFdPub, int threadFd,
- SecureMem::Args* mem) {
- // Read request
- SetSockOpt setsockopt_req;
- SysCalls sys;
- if (read(sys, sandboxFd, &setsockopt_req, sizeof(setsockopt_req)) !=
- sizeof(setsockopt_req)) {
- die("Failed to read parameters for setsockopt() [process]");
- }
-
- switch (setsockopt_req.level) {
- case SOL_SOCKET:
- switch (setsockopt_req.optname) {
- case SO_KEEPALIVE:
- case SO_LINGER:
- case SO_OOBINLINE:
- case SO_RCVBUF:
- case SO_RCVLOWAT:
- case SO_SNDLOWAT:
- case SO_RCVTIMEO:
- case SO_SNDTIMEO:
- case SO_REUSEADDR:
- case SO_SNDBUF:
- case SO_TIMESTAMP:
- SecureMem::sendSystemCall(threadFdPub, false, -1, mem,
- __NR_setsockopt, setsockopt_req.sockfd,
- setsockopt_req.level, setsockopt_req.optname,
- setsockopt_req.optval, setsockopt_req.optlen);
- return true;
- default:
- break;
- }
- break;
- case IPPROTO_TCP:
- switch (setsockopt_req.optname) {
- case TCP_CORK:
- case TCP_DEFER_ACCEPT:
- case TCP_INFO:
- case TCP_KEEPCNT:
- case TCP_KEEPIDLE:
- case TCP_KEEPINTVL:
- case TCP_LINGER2:
- case TCP_MAXSEG:
- case TCP_NODELAY:
- case TCP_QUICKACK:
- case TCP_SYNCNT:
- case TCP_WINDOW_CLAMP:
- SecureMem::sendSystemCall(threadFdPub, false, -1, mem,
- __NR_setsockopt, setsockopt_req.sockfd,
- setsockopt_req.level, setsockopt_req.optname,
- setsockopt_req.optval, setsockopt_req.optlen);
- return true;
- default:
- break;
- }
- break;
- default:
- break;
- }
- SecureMem::abandonSystemCall(threadFd, -EINVAL);
- return false;
-}
-
-bool Sandbox::process_getsockopt(int parentMapsFd, int sandboxFd,
- int threadFdPub, int threadFd,
- SecureMem::Args* mem) {
- // Read request
- GetSockOpt getsockopt_req;
- SysCalls sys;
- if (read(sys, sandboxFd, &getsockopt_req, sizeof(getsockopt_req)) !=
- sizeof(getsockopt_req)) {
- die("Failed to read parameters for getsockopt() [process]");
- }
-
- switch (getsockopt_req.level) {
- case SOL_SOCKET:
- switch (getsockopt_req.optname) {
- case SO_ACCEPTCONN:
- case SO_ERROR:
- case SO_KEEPALIVE:
- case SO_LINGER:
- case SO_OOBINLINE:
- case SO_RCVBUF:
- case SO_RCVLOWAT:
- case SO_SNDLOWAT:
- case SO_RCVTIMEO:
- case SO_SNDTIMEO:
- case SO_REUSEADDR:
- case SO_SNDBUF:
- case SO_TIMESTAMP:
- case SO_TYPE:
- SecureMem::sendSystemCall(threadFdPub, false, -1, mem,
- __NR_getsockopt, getsockopt_req.sockfd,
- getsockopt_req.level, getsockopt_req.optname,
- getsockopt_req.optval, getsockopt_req.optlen);
- return true;
- default:
- break;
- }
- break;
- case IPPROTO_TCP:
- switch (getsockopt_req.optname) {
- case TCP_CORK:
- case TCP_DEFER_ACCEPT:
- case TCP_INFO:
- case TCP_KEEPCNT:
- case TCP_KEEPIDLE:
- case TCP_KEEPINTVL:
- case TCP_LINGER2:
- case TCP_MAXSEG:
- case TCP_NODELAY:
- case TCP_QUICKACK:
- case TCP_SYNCNT:
- case TCP_WINDOW_CLAMP:
- SecureMem::sendSystemCall(threadFdPub, false, -1, mem,
- __NR_getsockopt, getsockopt_req.sockfd,
- getsockopt_req.level, getsockopt_req.optname,
- getsockopt_req.optval, getsockopt_req.optlen);
- return true;
- default:
- break;
- }
- break;
- default:
- break;
- }
- SecureMem::abandonSystemCall(threadFd, -EINVAL);
- return false;
-}
-
-#endif
-#if defined(__NR_socketcall)
-
-enum {
- SYS_SOCKET = 1,
- SYS_BIND = 2,
- SYS_CONNECT = 3,
- SYS_LISTEN = 4,
- SYS_ACCEPT = 5,
- SYS_GETSOCKNAME = 6,
- SYS_GETPEERNAME = 7,
- SYS_SOCKETPAIR = 8,
- SYS_SEND = 9,
- SYS_RECV = 10,
- SYS_SENDTO = 11,
- SYS_RECVFROM = 12,
- SYS_SHUTDOWN = 13,
- SYS_SETSOCKOPT = 14,
- SYS_GETSOCKOPT = 15,
- SYS_SENDMSG = 16,
- SYS_RECVMSG = 17,
- SYS_ACCEPT4 = 18
-};
-
-struct Sandbox::SocketCallArgInfo {
- size_t len;
- off_t addrOff;
- off_t lengthOff;
-};
-const struct Sandbox::SocketCallArgInfo Sandbox::socketCallArgInfo[] = {
- #define STRUCT(s) reinterpret_cast<SocketCall *>(0)->args.s
- #define SIZE(s) sizeof(STRUCT(s))
- #define OFF(s, f) offsetof(typeof STRUCT(s), f)
- { 0 },
- { SIZE(socket) },
- { SIZE(bind), OFF(bind, addr), OFF(bind, addrlen) },
- { SIZE(connect), OFF(connect, addr), OFF(connect, addrlen) },
- { SIZE(listen) },
- { SIZE(accept) },
- { SIZE(getsockname) },
- { SIZE(getpeername) },
- { SIZE(socketpair) },
- { SIZE(send) },
- { SIZE(recv) },
- { SIZE(sendto), OFF(sendto, to), OFF(sendto, tolen) },
- { SIZE(recvfrom) },
- { SIZE(shutdown) },
- { SIZE(setsockopt), OFF(setsockopt, optval), OFF(setsockopt, optlen) },
- { SIZE(getsockopt) },
- { SIZE(sendmsg) },
- { SIZE(recvmsg) },
- { SIZE(accept4) }
- #undef STRUCT
- #undef SIZE
- #undef OFF
-};
-
-long Sandbox::sandbox_socketcall(int call, void* args) {
- long long tm;
- Debug::syscall(&tm, __NR_socketcall, "Executing handler", call);
-
- // When demultiplexing socketcall(), only accept calls that have a valid
- // "call" opcode.
- if (call < SYS_SOCKET || call > SYS_ACCEPT4) {
- Debug::elapsed(tm, __NR_socketcall, call);
- return -ENOSYS;
- }
-
- // Some type of calls include a pointer to an address or name, which cannot
- // be accessed by the trusted process, as it lives in a separate address
- // space. For these calls, append the extra data to the serialized request.
- // This requires some copying of data, as we have to make sure there is
- // only a single atomic call to write().
- socklen_t numExtraData = 0;
- const void* extraDataAddr = NULL;
- if (socketCallArgInfo[call].lengthOff) {
- memcpy(&numExtraData,
- reinterpret_cast<char *>(args) + socketCallArgInfo[call].lengthOff,
- sizeof(socklen_t));
- extraDataAddr = reinterpret_cast<char *>(args) +
- socketCallArgInfo[call].addrOff;
- }
-
- // sendmsg() and recvmsg() have more complicated requirements for computing
- // the amount of extra data that needs to be sent to the trusted process.
- if (call == SYS_SENDMSG) {
- SendMsg *sendmsg_args = reinterpret_cast<SendMsg *>(args);
- if (sendmsg_args->msg->msg_iovlen == 1 &&
- !sendmsg_args->msg->msg_control) {
- // Further down in the code, this sendmsg() call will be simplified to
- // a sendto() call. Make sure we already compute the correct value for
- // numExtraData, as it is needed when we allocate "data[]" on the stack.
- numExtraData = sendmsg_args->msg->msg_namelen;
- extraDataAddr = sendmsg_args->msg->msg_name;
- } else {
- // sendmsg() needs to include some of the extra data so that we can
- // inspect it in process_socketcall()
- numExtraData = sizeof(*sendmsg_args->msg) +
- sendmsg_args->msg->msg_namelen +
- sendmsg_args->msg->msg_controllen;
- extraDataAddr = NULL;
- }
- }
- if (call == SYS_RECVMSG) {
- RecvMsg *recvmsg_args = reinterpret_cast<RecvMsg *>(args);
- numExtraData = sizeof(*recvmsg_args->msg);
- extraDataAddr = recvmsg_args->msg;
- }
-
- // Set up storage for the request header and copy the data from "args"
- // into it.
- struct Request {
- int sysnum;
- long long cookie;
- SocketCall socketcall_req;
- } __attribute__((packed)) *request;
- char data[sizeof(struct Request) + numExtraData];
- request = reinterpret_cast<struct Request *>(data);
- memcpy(&request->socketcall_req.args, args, socketCallArgInfo[call].len);
-
- // Simplify send(), sendto() and sendmsg(), if there are simpler equivalent
- // calls. This allows us to occasionally replace them with calls to write(),
- // which don't have to be forwarded to the trusted process.
- SysCalls sys;
- if (call == SYS_SENDMSG &&
- request->socketcall_req.args.sendmsg.msg->msg_iovlen == 1 &&
- !request->socketcall_req.args.sendmsg.msg->msg_control) {
- // Ordering of these assignments is important, as we are reshuffling
- // fields inside of a union.
- call = SYS_SENDTO;
- request->socketcall_req.args.sendto.flags =
- request->socketcall_req.args.sendmsg.flags;
- request->socketcall_req.args.sendto.to =
- request->socketcall_req.args.sendmsg.msg->msg_name;
- request->socketcall_req.args.sendto.tolen =
- request->socketcall_req.args.sendmsg.msg->msg_namelen;
- request->socketcall_req.args.sendto.len =
- request->socketcall_req.args.sendmsg.msg->msg_iov->iov_len;
- request->socketcall_req.args.sendto.buf =
- request->socketcall_req.args.sendmsg.msg->msg_iov->iov_base;
- }
- if (call == SYS_SENDTO && !request->socketcall_req.args.sendto.to) {
- // sendto() with a NULL address is the same as send()
- call = SYS_SEND;
- numExtraData = 0;
- }
- if (call == SYS_SEND && !request->socketcall_req.args.send.flags) {
- // send() with no flags is the same as write(), which is unrestricted
- // in seccomp mode.
- Debug::message("Replaced socketcall() with call to write()");
- ssize_t rc = sys.write(request->socketcall_req.args.send.sockfd,
- request->socketcall_req.args.send.buf,
- request->socketcall_req.args.send.len);
- if (rc < 0) {
- Debug::elapsed(tm, __NR_socketcall, call);
- return -sys.my_errno;
- } else {
- Debug::elapsed(tm, __NR_socketcall, call);
- return rc;
- }
- }
-
- // Simplify recv(), and recvfrom(), if there are simpler equivalent calls.
- // This allows us to occasionally replace them with calls to read(), which
- // don't have to be forwarded to the trusted process.
- // We cannot simplify recvmsg() to recvfrom(), recv() or read(), as we do
- // not know whether the caller needs us to set msg->msg_flags.
- if (call == SYS_RECVFROM && !request->socketcall_req.args.recvfrom.from) {
- // recvfrom() with a NULL address buffer is the same as recv()
- call = SYS_RECV;
- }
- if (call == SYS_RECV && !request->socketcall_req.args.recv.flags) {
- // recv() with no flags is the same as read(), which is unrestricted
- // in seccomp mode.
- Debug::message("Replaced socketcall() with call to read()");
- ssize_t rc = sys.read(request->socketcall_req.args.recv.sockfd,
- request->socketcall_req.args.recv.buf,
- request->socketcall_req.args.recv.len);
- if (rc < 0) {
- Debug::elapsed(tm, __NR_socketcall, call);
- return -sys.my_errno;
- } else {
- Debug::elapsed(tm, __NR_socketcall, call);
- return rc;
- }
- }
-
- // Fill in the rest of the request header.
- request->sysnum = __NR_socketcall;
- request->cookie = cookie();
- request->socketcall_req.call = call;
- request->socketcall_req.arg_ptr = args;
- int padding = sizeof(request->socketcall_req.args) -
- socketCallArgInfo[call].len;
- if (padding > 0) {
- memset((char *)(&request->socketcall_req.args + 1) - padding, 0, padding);
- }
- if (call == SYS_SENDMSG) {
- // for sendmsg() we include the (optional) destination address, and the
- // (optional) control data in the payload.
- SendMsg *sendmsg_args = reinterpret_cast<SendMsg *>(args);
- memcpy(reinterpret_cast<char *>(
- memcpy(reinterpret_cast<char *>(
- memcpy(request + 1, sendmsg_args->msg, sizeof(*sendmsg_args->msg))) +
- sizeof(*sendmsg_args->msg),
- sendmsg_args->msg->msg_name, sendmsg_args->msg->msg_namelen)) +
- sendmsg_args->msg->msg_namelen,
- sendmsg_args->msg->msg_control, sendmsg_args->msg->msg_controllen);
- } else if (extraDataAddr) {
- memcpy(request + 1, extraDataAddr, numExtraData);
- }
-
- // Send request to trusted process and collect response from trusted thread.
- long rc;
- ssize_t len = sizeof(struct Request) + numExtraData;
- if (write(sys, processFdPub(), data, len) != len ||
- read(sys, threadFdPub(), &rc, sizeof(rc)) != sizeof(rc)) {
- die("Failed to forward socketcall() request [sandbox]");
- }
- Debug::elapsed(tm, __NR_socketcall, call);
- return rc;
-}
-
-bool Sandbox::process_socketcall(int parentMapsFd, int sandboxFd,
- int threadFdPub, int threadFd,
- SecureMem::Args* mem) {
- // Read request
- SocketCall socketcall_req;
- SysCalls sys;
- if (read(sys, sandboxFd, &socketcall_req, sizeof(socketcall_req)) !=
- sizeof(socketcall_req)) {
- die("Failed to read parameters for socketcall() [process]");
- }
-
- // sandbox_socketcall() should never send us an unexpected "call" opcode.
- // If it did, something went very wrong and we better terminate the process.
- if (socketcall_req.call < SYS_SOCKET || socketcall_req.call > SYS_ACCEPT4) {
- die("Unexpected socketcall() [process]");
- }
-
- // Check if this particular operation carries an extra payload.
- socklen_t numExtraData = 0;
- if (socketCallArgInfo[socketcall_req.call].lengthOff) {
- memcpy(&numExtraData,
- reinterpret_cast<char *>(&socketcall_req) +
- socketCallArgInfo[socketcall_req.call].lengthOff,
- sizeof(socklen_t));
- } else if (socketcall_req.call == SYS_SENDMSG) {
- numExtraData = sizeof(*socketcall_req.args.sendmsg.msg);
- } else if (socketcall_req.call == SYS_RECVMSG) {
- numExtraData = sizeof(*socketcall_req.args.recvmsg.msg);
- }
-
- // Verify that the length for the payload is reasonable. We don't want to
- // blow up our stack, and excessive (or negative) buffer sizes are almost
- // certainly a bug.
- if (numExtraData > 4096) {
- die("Unexpected size for socketcall() payload [process]");
- }
-
- // Read the extra payload, if any.
- char extra[numExtraData];
- if (numExtraData) {
- if (read(sys, sandboxFd, extra, numExtraData) != (ssize_t)numExtraData) {
- die("Failed to read socketcall() payload [process]");
- }
- }
-
- // sendmsg() has another level of indirection and can carry even more payload
- ssize_t numSendmsgExtra = 0;
- if (socketcall_req.call == SYS_SENDMSG) {
- struct msghdr* msg = reinterpret_cast<struct msghdr*>(extra);
- if (msg->msg_namelen > 4096 || msg->msg_controllen > 4096) {
- die("Unexpected size for socketcall() payload [process]");
- }
- numSendmsgExtra = msg->msg_namelen + msg->msg_controllen;
- }
- char sendmsgExtra[numSendmsgExtra];
- if (numSendmsgExtra) {
- if (read(sys, sandboxFd, sendmsgExtra, numSendmsgExtra) !=
- numSendmsgExtra) {
- die("Failed to read socketcall() payload [process]");
- }
- }
-
- int rc = -EINVAL;
- switch (socketcall_req.call) {
- case SYS_SOCKET:
- // The sandbox does not allow creation of any new sockets.
- goto deny;
- case SYS_BIND:
- // The sandbox does not allow binding an address to a socket.
- goto deny;
- case SYS_CONNECT:
- // The sandbox does not allow connecting a socket.
- goto deny;
- case SYS_LISTEN:
- // The sandbox does not allow a socket to enter listening state.
- goto deny;
- case SYS_ACCEPT4:
- case SYS_ACCEPT:
- // If the sandbox obtained a socket that is already in the listening
- // state (e.g. because somebody sent it a suitable file descriptor), it
- // is permissible to call accept().
-
- accept_simple:
- // None of the parameters need to be checked, so it is OK to refer
- // to the parameter block created by the untrusted code.
- SecureMem::sendSystemCall(threadFdPub, false, -1, mem, __NR_socketcall,
- socketcall_req.call, socketcall_req.arg_ptr);
- return true;
- case SYS_GETSOCKNAME:
- case SYS_GETPEERNAME:
- // Querying the local and the remote name is not considered security
- // sensitive for the purposes of the sandbox.
- goto accept_simple;
- case SYS_SOCKETPAIR:
- // Socket pairs are connected to each other and not considered
- // security sensitive.
- goto accept_simple;
- case SYS_SENDTO:
- if (socketcall_req.args.sendto.to) {
- // The sandbox does not allow sending to arbitrary addresses.
- goto deny;
- }
- // Fall through
- case SYS_SEND:
- if (socketcall_req.args.send.flags &
- ~(MSG_CONFIRM|MSG_DONTWAIT|MSG_EOR|MSG_MORE|MSG_NOSIGNAL|MSG_OOB)) {
- // Unsupported flag encountered. Deny the call.
- goto deny;
- }
- // Sending data on a connected socket is similar to calling write().
- // Allow it.
-
- accept_complex:
- // The parameter block contains potentially security critical information
- // that should not be tampered with after it has been inspected. Copy it
- // into the write-protected securely shared memory before telling the
- // trusted thread to execute the socket call.
- SecureMem::lockSystemCall(parentMapsFd, mem);
- memcpy(mem->pathname, &socketcall_req.args, sizeof(socketcall_req.args));
- SecureMem::sendSystemCall(threadFdPub, true, parentMapsFd, mem,
- __NR_socketcall, socketcall_req.call,
- mem->pathname - (char*)mem + (char*)mem->self);
- return true;
- case SYS_RECVFROM:
- // While we do not anticipate any particular need to receive data on
- // unconnected sockets, there is no particular risk in doing so.
- // Fall through
- case SYS_RECV:
- if (socketcall_req.args.recv.flags &
- ~(MSG_DONTWAIT|MSG_OOB|MSG_PEEK|MSG_TRUNC|MSG_WAITALL)) {
- // Unsupported flag encountered. Deny the call.
- goto deny;
- }
- // Receiving data on a connected socket is similar to calling read().
- // Allow it.
- goto accept_complex;
- case SYS_SHUTDOWN:
- // Shutting down a socket is always OK.
- goto accept_simple;
- case SYS_SETSOCKOPT:
- switch (socketcall_req.args.setsockopt.level) {
- case SOL_SOCKET:
- switch (socketcall_req.args.setsockopt.optname) {
- case SO_KEEPALIVE:
- case SO_LINGER:
- case SO_OOBINLINE:
- case SO_RCVBUF:
- case SO_RCVLOWAT:
- case SO_SNDLOWAT:
- case SO_RCVTIMEO:
- case SO_SNDTIMEO:
- case SO_REUSEADDR:
- case SO_SNDBUF:
- case SO_TIMESTAMP:
- goto accept_complex;
- default:
- break;
- }
- break;
- case IPPROTO_TCP:
- switch (socketcall_req.args.setsockopt.optname) {
- case TCP_CORK:
- case TCP_DEFER_ACCEPT:
- case TCP_INFO:
- case TCP_KEEPCNT:
- case TCP_KEEPIDLE:
- case TCP_KEEPINTVL:
- case TCP_LINGER2:
- case TCP_MAXSEG:
- case TCP_NODELAY:
- case TCP_QUICKACK:
- case TCP_SYNCNT:
- case TCP_WINDOW_CLAMP:
- goto accept_complex;
- default:
- break;
- }
- break;
- default:
- break;
- }
- goto deny;
- case SYS_GETSOCKOPT:
- switch (socketcall_req.args.getsockopt.level) {
- case SOL_SOCKET:
- switch (socketcall_req.args.getsockopt.optname) {
- case SO_ACCEPTCONN:
- case SO_ERROR:
- case SO_KEEPALIVE:
- case SO_LINGER:
- case SO_OOBINLINE:
- case SO_RCVBUF:
- case SO_RCVLOWAT:
- case SO_SNDLOWAT:
- case SO_RCVTIMEO:
- case SO_SNDTIMEO:
- case SO_REUSEADDR:
- case SO_SNDBUF:
- case SO_TIMESTAMP:
- case SO_TYPE:
- goto accept_complex;
- default:
- break;
- }
- break;
- case IPPROTO_TCP:
- switch (socketcall_req.args.getsockopt.optname) {
- case TCP_CORK:
- case TCP_DEFER_ACCEPT:
- case TCP_INFO:
- case TCP_KEEPCNT:
- case TCP_KEEPIDLE:
- case TCP_KEEPINTVL:
- case TCP_LINGER2:
- case TCP_MAXSEG:
- case TCP_NODELAY:
- case TCP_QUICKACK:
- case TCP_SYNCNT:
- case TCP_WINDOW_CLAMP:
- goto accept_complex;
- default:
- break;
- }
- break;
- default:
- break;
- }
- goto deny;
- case SYS_SENDMSG: {
- struct msghdr* msg = reinterpret_cast<struct msghdr*>(extra);
-
- if (sizeof(socketcall_req.args) + sizeof(*msg) + numSendmsgExtra >
- sizeof(mem->pathname)) {
- goto deny;
- }
-
- if (msg->msg_namelen ||
- (socketcall_req.args.sendmsg.flags &
- ~(MSG_CONFIRM|MSG_DONTWAIT|MSG_EOR|MSG_MORE|MSG_NOSIGNAL|MSG_OOB))){
- goto deny;
- }
-
- // The trusted process receives file handles when a new untrusted thread
- // gets created. We have security checks in place that prevent any
- // critical information from being tampered with during thread creation.
- // But if we disallowed passing of file handles, this would add an extra
- // hurdle for an attacker.
- // Unfortunately, for now, this is not possible as Chrome's
- // base::SendRecvMsg() needs the ability to pass file handles.
- if (msg->msg_controllen) {
- msg->msg_control = sendmsgExtra + msg->msg_namelen;
- struct cmsghdr *cmsg = CMSG_FIRSTHDR(msg);
- do {
- if (cmsg->cmsg_level != SOL_SOCKET ||
- cmsg->cmsg_type != SCM_RIGHTS) {
- goto deny;
- }
- } while ((cmsg = CMSG_NXTHDR(msg, cmsg)) != NULL);
- }
-
- // This must be a locked system call, because we have to ensure that
- // the untrusted code does not tamper with the msghdr after we have
- // examined it.
- SecureMem::lockSystemCall(parentMapsFd, mem);
- socketcall_req.args.sendmsg.msg =
- reinterpret_cast<struct msghdr*>(mem->pathname +
- sizeof(socketcall_req.args) -
- (char*)mem + (char*)mem->self);
- memcpy(mem->pathname, &socketcall_req.args, sizeof(socketcall_req.args));
- if (numSendmsgExtra) {
- if (msg->msg_namelen > 0) {
- msg->msg_name = const_cast<struct msghdr*>(
- socketcall_req.args.sendmsg.msg) + 1;
- }
- if (msg->msg_controllen > 0) {
- msg->msg_control = (char *)(
- socketcall_req.args.sendmsg.msg + 1) + msg->msg_namelen;
- }
- memcpy(mem->pathname + sizeof(socketcall_req.args) + sizeof(*msg),
- sendmsgExtra, numSendmsgExtra);
- }
- memcpy(mem->pathname + sizeof(socketcall_req.args), msg, sizeof(*msg));
- SecureMem::sendSystemCall(threadFdPub, true, parentMapsFd, mem,
- __NR_socketcall, socketcall_req.call,
- mem->pathname - (char*)mem + (char*)mem->self);
- return true;
- }
- case SYS_RECVMSG:
- // Receiving messages is general not security critical.
- if (socketcall_req.args.recvmsg.flags &
- ~(MSG_DONTWAIT|MSG_OOB|MSG_PEEK|MSG_TRUNC|MSG_WAITALL)) {
- goto deny;
- }
- goto accept_complex;
- default:
- deny:
- SecureMem::abandonSystemCall(threadFd, rc);
- return false;
- }
-}
-
-#endif
-
-} // namespace
diff --git a/sandbox/linux/seccomp/stat.cc b/sandbox/linux/seccomp/stat.cc
deleted file mode 100644
index cdf7e4c..0000000
--- a/sandbox/linux/seccomp/stat.cc
+++ /dev/null
@@ -1,197 +0,0 @@
-// Copyright (c) 2010 The Chromium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#include "debug.h"
-#include "sandbox_impl.h"
-
-namespace playground {
-
-long Sandbox::sandbox_stat(const char *path, void *buf) {
- long long tm;
- Debug::syscall(&tm, __NR_stat, "Executing handler");
- size_t len = strlen(path);
- struct Request {
- int sysnum;
- long long cookie;
- Stat stat_req;
- char pathname[0];
- } __attribute__((packed)) *request;
- char data[sizeof(struct Request) + len];
- request = reinterpret_cast<struct Request*>(data);
- request->sysnum = __NR_stat;
- request->cookie = cookie();
- request->stat_req.sysnum = __NR_stat;
- request->stat_req.path_length = len;
- request->stat_req.buf = buf;
- memcpy(request->pathname, path, len);
-
- long rc;
- SysCalls sys;
- if (write(sys, processFdPub(), request, sizeof(data)) != (int)sizeof(data) ||
- read(sys, threadFdPub(), &rc, sizeof(rc)) != sizeof(rc)) {
- die("Failed to forward stat() request [sandbox]");
- }
- Debug::elapsed(tm, __NR_stat);
- return rc;
-}
-
-long Sandbox::sandbox_lstat(const char *path, void *buf) {
- long long tm;
- Debug::syscall(&tm, __NR_lstat, "Executing handler");
- size_t len = strlen(path);
- struct Request {
- int sysnum;
- long long cookie;
- Stat stat_req;
- char pathname[0];
- } __attribute__((packed)) *request;
- char data[sizeof(struct Request) + len];
- request = reinterpret_cast<struct Request*>(data);
- request->sysnum = __NR_lstat;
- request->cookie = cookie();
- request->stat_req.sysnum = __NR_lstat;
- request->stat_req.path_length = len;
- request->stat_req.buf = buf;
- memcpy(request->pathname, path, len);
-
- long rc;
- SysCalls sys;
- if (write(sys, processFdPub(), request, sizeof(data)) != (int)sizeof(data) ||
- read(sys, threadFdPub(), &rc, sizeof(rc)) != sizeof(rc)) {
- die("Failed to forward lstat() request [sandbox]");
- }
- Debug::elapsed(tm, __NR_lstat);
- return rc;
-}
-
-#if defined(__NR_stat64)
-long Sandbox::sandbox_stat64(const char *path, void *buf) {
- long long tm;
- Debug::syscall(&tm, __NR_stat64, "Executing handler");
- size_t len = strlen(path);
- struct Request {
- int sysnum;
- long long cookie;
- Stat stat_req;
- char pathname[0];
- } __attribute__((packed)) *request;
- char data[sizeof(struct Request) + len];
- request = reinterpret_cast<struct Request*>(data);
- request->sysnum = __NR_stat64;
- request->cookie = cookie();
- request->stat_req.sysnum = __NR_stat64;
- request->stat_req.path_length = len;
- request->stat_req.buf = buf;
- memcpy(request->pathname, path, len);
-
- long rc;
- SysCalls sys;
- if (write(sys, processFdPub(), request, sizeof(data)) != (int)sizeof(data) ||
- read(sys, threadFdPub(), &rc, sizeof(rc)) != sizeof(rc)) {
- die("Failed to forward stat64() request [sandbox]");
- }
- Debug::elapsed(tm, __NR_stat64);
- return rc;
-}
-
-long Sandbox::sandbox_lstat64(const char *path, void *buf) {
- long long tm;
- Debug::syscall(&tm, __NR_lstat64, "Executing handler");
- size_t len = strlen(path);
- struct Request {
- int sysnum;
- long long cookie;
- Stat stat_req;
- char pathname[0];
- } __attribute__((packed)) *request;
- char data[sizeof(struct Request) + len];
- request = reinterpret_cast<struct Request*>(data);
- request->sysnum = __NR_lstat64;
- request->cookie = cookie();
- request->stat_req.sysnum = __NR_lstat64;
- request->stat_req.path_length = len;
- request->stat_req.buf = buf;
- memcpy(request->pathname, path, len);
-
- long rc;
- SysCalls sys;
- if (write(sys, processFdPub(), request, sizeof(data)) != (int)sizeof(data) ||
- read(sys, threadFdPub(), &rc, sizeof(rc)) != sizeof(rc)) {
- die("Failed to forward lstat64() request [sandbox]");
- }
- Debug::elapsed(tm, __NR_lstat64);
- return rc;
-}
-#endif
-
-bool Sandbox::process_stat(int parentMapsFd, int sandboxFd, int threadFdPub,
- int threadFd, SecureMem::Args* mem) {
- // Read request
- SysCalls sys;
- Stat stat_req;
- if (read(sys, sandboxFd, &stat_req, sizeof(stat_req)) != sizeof(stat_req)) {
- read_parm_failed:
- die("Failed to read parameters for stat() [process]");
- }
- int rc = -ENAMETOOLONG;
- if (stat_req.path_length >= (int)sizeof(mem->pathname)) {
- char buf[32];
- while (stat_req.path_length > 0) {
- size_t len = stat_req.path_length > sizeof(buf) ?
- sizeof(buf) : stat_req.path_length;
- ssize_t i = read(sys, sandboxFd, buf, len);
- if (i <= 0) {
- goto read_parm_failed;
- }
- stat_req.path_length -= i;
- }
- if (write(sys, threadFd, &rc, sizeof(rc)) != sizeof(rc)) {
- die("Failed to return data from stat() [process]");
- }
- return false;
- }
- if (stat_req.sysnum != __NR_stat && stat_req.sysnum != __NR_lstat
- #ifdef __NR_stat64
- && stat_req.sysnum != __NR_stat64
- #endif
- #ifdef __NR_lstat64
- && stat_req.sysnum != __NR_lstat64
- #endif
- ) {
- die("Corrupted stat() request");
- }
-
- if (!g_policy.allow_file_namespace) {
- // After locking the mutex, we can no longer abandon the system call. So,
- // perform checks before clobbering the securely shared memory.
- char tmp[stat_req.path_length];
- if (read(sys, sandboxFd, tmp, stat_req.path_length) !=
- (ssize_t)stat_req.path_length) {
- goto read_parm_failed;
- }
- Debug::message(("Denying access to \"" + std::string(tmp) + "\"").c_str());
- SecureMem::abandonSystemCall(threadFd, -EACCES);
- return false;
- }
-
- SecureMem::lockSystemCall(parentMapsFd, mem);
- if (read(sys, sandboxFd, mem->pathname, stat_req.path_length) !=
- (ssize_t)stat_req.path_length) {
- goto read_parm_failed;
- }
- mem->pathname[stat_req.path_length] = '\000';
-
- // TODO(markus): Implement sandboxing policy
- Debug::message(("Allowing access to \"" + std::string(mem->pathname) +
- "\"").c_str());
-
- // Tell trusted thread to stat the file.
- SecureMem::sendSystemCall(threadFdPub, true, parentMapsFd, mem,
- stat_req.sysnum,
- mem->pathname - (char*)mem + (char*)mem->self,
- stat_req.buf);
- return true;
-}
-
-} // namespace
diff --git a/sandbox/linux/seccomp/syscall.cc b/sandbox/linux/seccomp/syscall.cc
deleted file mode 100644
index 681fec9..0000000
--- a/sandbox/linux/seccomp/syscall.cc
+++ /dev/null
@@ -1,380 +0,0 @@
-// Copyright (c) 2010 The Chromium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#include "debug.h"
-#include "sandbox_impl.h"
-#include "syscall_table.h"
-
-namespace playground {
-
-// TODO(markus): change this into a function that returns the address of the assembly code. If that isn't possible for sandbox_clone, then move that function into a *.S file
-asm(
- ".pushsection .text, \"ax\", @progbits\n"
-
- // This is the special wrapper for the clone() system call. The code
- // relies on the stack layout of the system call wrapper (c.f. below). It
- // passes the stack pointer as an additional argument to sandbox__clone(),
- // so that upon starting the child, register values can be restored and
- // the child can start executing at the correct IP, instead of trying to
- // run in the trusted thread.
- "playground$sandbox_clone:"
- ".globl playground$sandbox_clone\n"
- ".type playground$sandbox_clone, @function\n"
- #if defined(__x86_64__)
- // Skip the 8 byte return address into the system call wrapper. The
- // following bytes are the saved register values that we need to restore
- // upon return from clone() in the new thread.
- "lea 8(%rsp), %r9\n"
- "jmp playground$sandbox__clone\n"
- #elif defined(__i386__)
- // As i386 passes function arguments on the stack, we need to skip a few
- // more values before we can get to the saved registers.
- "lea 28(%esp), %eax\n"
- "mov %eax, 24(%esp)\n"
- "jmp playground$sandbox__clone\n"
- #else
- #error Unsupported target platform
- #endif
- ".size playground$sandbox_clone, .-playground$sandbox_clone\n"
-
-
- // This is the wrapper which is called by the untrusted code, trying to
- // make a system call.
- "playground$syscallWrapper:"
- ".internal playground$syscallWrapper\n"
- ".globl playground$syscallWrapper\n"
- ".type playground$syscallWrapper, @function\n"
- #if defined(__x86_64__)
- // Check for rt_sigreturn(). It needs to be handled specially.
- "cmp $15, %rax\n" // NR_rt_sigreturn
- "jnz 1f\n"
- "add $0x90, %rsp\n" // pop return addresses and red zone
- "0:syscall\n" // rt_sigreturn() is unrestricted
- "mov $66, %edi\n" // rt_sigreturn() should never return
- "mov $231, %eax\n" // NR_exit_group
- "jmp 0b\n"
-
- // Save all registers
- "1:push %rbp\n"
- "mov %rsp, %rbp\n"
- "push %rbx\n"
- "push %rcx\n"
- "push %rdx\n"
- "push %rsi\n"
- "push %rdi\n"
- "push %r8\n"
- "push %r9\n"
- "push %r10\n"
- "push %r11\n"
- "push %r12\n"
- "push %r13\n"
- "push %r14\n"
- "push %r15\n"
-
- // Convert from syscall calling conventions to C calling conventions.
- // System calls have a subtly different register ordering than the user-
- // space x86-64 ABI.
- "mov %r10, %rcx\n"
-
- // Check range of system call
- "cmp playground$maxSyscall(%rip), %eax\n"
- "ja 3f\n"
-
- // Retrieve function call from system call table (c.f. syscall_table.c).
- // We have three different types of entries; zero for denied system calls,
- // that should be handled by the defaultSystemCallHandler(); minus one
- // for unrestricted system calls that need to be forwarded to the trusted
- // thread; and function pointers to specific handler functions.
- "mov %rax, %r10\n"
- "shl $4, %r10\n"
- "lea playground$syscallTable(%rip), %r11\n"
- "add %r11, %r10\n"
- "mov 0(%r10), %r10\n"
-
- // Jump to function if non-null and not UNRESTRICTED_SYSCALL, otherwise
- // jump to fallback handler.
- "cmp $1, %r10\n"
- "jbe 3f\n"
- "call *%r10\n"
- "2:"
-
- // Restore CPU registers, except for %rax which was set by the system call.
- "pop %r15\n"
- "pop %r14\n"
- "pop %r13\n"
- "pop %r12\n"
- "pop %r11\n"
- "pop %r10\n"
- "pop %r9\n"
- "pop %r8\n"
- "pop %rdi\n"
- "pop %rsi\n"
- "pop %rdx\n"
- "pop %rcx\n"
- "pop %rbx\n"
- "pop %rbp\n"
-
- // Remove fake return address. This is added in the patching code in
- // library.cc and it makes stack traces a little cleaner.
- "add $8, %rsp\n"
-
- // Return to caller
- "ret\n"
-
- "3:"
- // If we end up calling a specific handler, we don't need to know the
- // system call number. However, in the generic case, we do. Shift
- // registers so that the system call number becomes visible as the
- // first function argument.
- "push %r9\n"
- "mov %r8, %r9\n"
- "mov %rcx, %r8\n"
- "mov %rdx, %rcx\n"
- "mov %rsi, %rdx\n"
- "mov %rdi, %rsi\n"
- "mov %rax, %rdi\n"
-
- // Call default handler.
- "call playground$defaultSystemCallHandler\n"
- "pop %r9\n"
- "jmp 2b\n"
- #elif defined(__i386__)
- "cmp $119, %eax\n" // NR_sigreturn
- "jnz 1f\n"
- "add $0x4, %esp\n" // pop return address
- "0:int $0x80\n" // sigreturn() is unrestricted
- "mov $66, %ebx\n" // sigreturn() should never return
- "mov %ebx, %eax\n" // NR_exit
- "jmp 0b\n"
- "1:cmp $173, %eax\n" // NR_rt_sigreturn
- "jnz 3f\n"
-
- // Convert rt_sigframe into sigframe, allowing us to call sigreturn().
- // This is possible since the first part of signal stack frames have
- // stayed very stable since the earliest kernel versions. While never
- // officially documented, lots of user space applications rely on this
- // part of the ABI, and kernel developers have been careful to maintain
- // backwards compatibility.
- // In general, the rt_sigframe includes a lot of extra information that
- // the signal handler can look at. Most notably, this means a complete
- // siginfo record.
- // Fortunately though, the kernel doesn't look at any of this extra data
- // when returning from a signal handler. So, we can safely convert an
- // rt_sigframe to a legacy sigframe, discarding the extra data in the
- // process. Interestingly, the legacy signal frame is actually larger than
- // the rt signal frame, as it includes a lot more padding.
- "sub $0x1C8, %esp\n" // a legacy signal stack is much larger
- "mov 0x1CC(%esp), %eax\n" // push signal number
- "push %eax\n"
- "lea 0x270(%esp), %esi\n" // copy siginfo register values
- "lea 0x4(%esp), %edi\n" // into new location
- "mov $0x16, %ecx\n"
- "cld\n"
- "rep movsl\n"
- "mov 0x2C8(%esp), %ebx\n" // copy first half of signal mask
- "mov %ebx, 0x54(%esp)\n"
- "lea 2f, %esi\n"
- "push %esi\n" // push restorer function
- "lea 0x2D4(%esp), %edi\n" // patch up retcode magic numbers
- "movb $2, %cl\n"
- "rep movsl\n"
- "ret\n" // return to restorer function
- "2:pop %eax\n" // remove dummy argument (signo)
- "mov $119, %eax\n" // NR_sigaction
- "int $0x80\n"
-
-
- // Preserve all registers
- "3:push %ebx\n"
- "push %ecx\n"
- "push %edx\n"
- "push %esi\n"
- "push %edi\n"
- "push %ebp\n"
-
- // Convert from syscall calling conventions to C calling conventions
- "push %ebp\n"
- "push %edi\n"
- "push %esi\n"
- "push %edx\n"
- "push %ecx\n"
- "push %ebx\n"
- "push %eax\n"
-
- // Check range of system call
- "cmp playground$maxSyscall, %eax\n"
- "ja 9f\n"
-
- // We often have long sequences of calls to gettimeofday(). This is
- // needlessly expensive. Coalesce them into a single call.
- //
- // We keep track of state in TLS storage that we can access through
- // the %fs segment register. See trusted_thread.cc for the exact
- // memory layout.
- //
- // TODO(markus): maybe, we should proactively call gettimeofday() and
- // clock_gettime(), whenever we talk to the trusted thread?
- // or maybe, if we have recently seen requests to compute
- // the time. There might be a repeated pattern of those.
- "cmp $78, %eax\n" // __NR_gettimeofday
- "jnz 6f\n"
- "cmp %eax, %fs:0x102C-0x58\n" // last system call
- "jnz 4f\n"
-
- // This system call and the last system call prior to this one both are
- // calls to gettimeofday(). Try to avoid making the new call and just
- // return the same result as in the previous call.
- // Just in case the caller is spinning on the result from gettimeofday(),
- // every so often, call the actual system call.
- "decl %fs:0x1030-0x58\n" // countdown calls to gettimofday()
- "jz 4f\n"
-
- // Atomically read the 64bit word representing last-known timestamp and
- // return it to the caller. On x86-32 this is a little more complicated and
- // requires the use of the cmpxchg8b instruction.
- "mov %ebx, %eax\n"
- "mov %ecx, %edx\n"
- "lock; cmpxchg8b 100f\n"
- "mov %eax, 0(%ebx)\n"
- "mov %edx, 4(%ebx)\n"
- "xor %eax, %eax\n"
- "add $28, %esp\n"
- "jmp 8f\n"
-
- // This is a call to gettimeofday(), but we don't have a valid cached
- // result, yet.
- "4:mov %eax, %fs:0x102C-0x58\n" // remember syscall number
- "movl $500, %fs:0x1030-0x58\n" // make system call, each 500 invocations
- "call playground$defaultSystemCallHandler\n"
-
- // Returned from gettimeofday(). Remember return value, in case the
- // application calls us again right away.
- // Again, this has to happen atomically and requires cmpxchg8b.
- "mov 4(%ebx), %ecx\n"
- "mov 0(%ebx), %ebx\n"
- "mov 100f, %eax\n"
- "mov 101f, %edx\n"
- "5:lock; cmpxchg8b 100f\n"
- "jnz 5b\n"
- "xor %eax, %eax\n"
- "jmp 10f\n"
-
- // Remember the number of the last system call made. We deliberately do
- // not remember calls to gettid(), as we have often seen long sequences
- // of calls to just gettimeofday() and gettid(). In that situation, we
- // would still like to coalesce the gettimeofday() calls.
- "6:cmp $224, %eax\n" // __NR_gettid
- "jz 7f\n"
- "mov %eax, %fs:0x102C-0x58\n" // remember syscall number
-
- // Retrieve function call from system call table (c.f. syscall_table.c).
- // We have three different types of entries; zero for denied system calls,
- // that should be handled by the defaultSystemCallHandler(); minus one
- // for unrestricted system calls that need to be forwarded to the trusted
- // thread; and function pointers to specific handler functions.
- "7:shl $3, %eax\n"
- "lea playground$syscallTable, %ebx\n"
- "add %ebx, %eax\n"
- "mov 0(%eax), %eax\n"
-
- // Jump to function if non-null and not UNRESTRICTED_SYSCALL, otherwise
- // jump to fallback handler.
- "cmp $1, %eax\n"
- "jbe 9f\n"
- "add $4, %esp\n"
- "call *%eax\n"
- "add $24, %esp\n"
-
- // Restore CPU registers, except for %eax which was set by the system call.
- "8:pop %ebp\n"
- "pop %edi\n"
- "pop %esi\n"
- "pop %edx\n"
- "pop %ecx\n"
- "pop %ebx\n"
-
- // Return to caller
- "ret\n"
-
- // Call default handler.
- "9:call playground$defaultSystemCallHandler\n"
- "10:add $28, %esp\n"
- "jmp 8b\n"
-
- ".pushsection \".bss\"\n"
- ".balign 8\n"
-"100:.byte 0, 0, 0, 0\n"
-"101:.byte 0, 0, 0, 0\n"
- ".popsection\n"
-
- #else
- #error Unsupported target platform
- #endif
- ".size playground$syscallWrapper, .-playground$syscallWrapper\n"
- ".popsection\n"
-);
-
-
-void* Sandbox::defaultSystemCallHandler(int syscallNum, void* arg0, void* arg1,
- void* arg2, void* arg3, void* arg4,
- void* arg5) {
- // TODO(markus): The following comment is currently not true, we do intercept these system calls. Try to fix that.
-
- // We try to avoid intercepting read(), and write(), as these system calls
- // are not restricted in Seccomp mode. But depending on the exact
- // instruction sequence in libc, we might not be able to reliably
- // filter out these system calls at the time when we instrument the code.
- SysCalls sys;
- long rc;
- long long tm;
- switch (syscallNum) {
- case __NR_read:
- Debug::syscall(&tm, syscallNum, "Allowing unrestricted system call");
- rc = sys.read((long)arg0, arg1, (size_t)arg2);
- break;
- case __NR_write:
- Debug::syscall(&tm, syscallNum, "Allowing unrestricted system call");
- rc = sys.write((long)arg0, arg1, (size_t)arg2);
- break;
- default:
- if (Debug::isEnabled()) {
- // In debug mode, prevent stderr from being closed
- if (syscallNum == __NR_close && arg0 == (void *)2)
- return 0;
- }
-
- if ((unsigned)syscallNum <= maxSyscall &&
- syscallTable[syscallNum].handler == UNRESTRICTED_SYSCALL) {
- Debug::syscall(&tm, syscallNum, "Allowing unrestricted system call");
- perform_unrestricted:
- struct {
- int sysnum;
- void* unrestricted_req[6];
- } __attribute__((packed)) request = {
- syscallNum, { arg0, arg1, arg2, arg3, arg4, arg5 } };
-
- int thread = threadFdPub();
- void* rc;
- if (write(sys, thread, &request, sizeof(request)) != sizeof(request) ||
- read(sys, thread, &rc, sizeof(rc)) != sizeof(rc)) {
- die("Failed to forward unrestricted system call");
- }
- Debug::elapsed(tm, syscallNum);
- return rc;
- } else if (Debug::isEnabled()) {
- Debug::syscall(&tm, syscallNum,
- "In production mode, this call would be disallowed");
- goto perform_unrestricted;
- } else {
- return (void *)-ENOSYS;
- }
- }
- if (rc < 0) {
- rc = -sys.my_errno;
- }
- Debug::elapsed(tm, syscallNum);
- return (void *)rc;
-}
-
-} // namespace
diff --git a/sandbox/linux/seccomp/syscall.h b/sandbox/linux/seccomp/syscall.h
deleted file mode 100644
index 1315e12..0000000
--- a/sandbox/linux/seccomp/syscall.h
+++ /dev/null
@@ -1,22 +0,0 @@
-// Copyright (c) 2010 The Chromium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#ifndef SYSCALL_H__
-#define SYSCALL_H__
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-void syscallWrapper() asm("playground$syscallWrapper")
-#if defined(__x86_64__)
- __attribute__((visibility("internal")))
-#endif
-;
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif // SYSCALL_H__
diff --git a/sandbox/linux/seccomp/syscall_table.c b/sandbox/linux/seccomp/syscall_table.c
deleted file mode 100644
index c9dd7a4..0000000
--- a/sandbox/linux/seccomp/syscall_table.c
+++ /dev/null
@@ -1,153 +0,0 @@
-// Copyright (c) 2010 The Chromium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#include <asm/unistd.h>
-#include "sandbox_impl.h"
-#include "syscall_table.h"
-
-#if defined(__x86_64__)
-#ifndef __NR_set_robust_list
-#define __NR_set_robust_list 273
-#endif
-#ifndef __NR_accept4
-#define __NR_accept4 288
-#endif
-#elif defined(__i386__)
-#ifndef __NR_set_robust_list
-#define __NR_set_robust_list 311
-#endif
-#else
-#error Unsupported target platform
-#endif
-
-// TODO(markus): This is an incredibly dirty hack to make the syscallTable
-// live in r/o memory.
-// Unfortunately, gcc doesn't give us a clean option to do
-// this. Ultimately, we should probably write some code that
-// parses /usr/include/asm/unistd*.h and generates a *.S file.
-// But we then need to figure out how to integrate this code
-// with our build system.
-
-const struct SyscallTable syscallTable[] __attribute__((
- section(".rodata, \"a\", @progbits\n#"))) ={
-
- #if defined(__NR_accept)
- [ __NR_accept ] = { UNRESTRICTED_SYSCALL, 0 },
- [ __NR_accept4 ] = { UNRESTRICTED_SYSCALL, 0 },
- #endif
- [ __NR_access ] = { (void*)&sandbox_access, process_access },
- [ __NR_brk ] = { UNRESTRICTED_SYSCALL, 0 },
- [ __NR_clock_gettime ] = { UNRESTRICTED_SYSCALL, 0 },
- [ __NR_clone ] = { (void*)&sandbox_clone, process_clone },
- [ __NR_close ] = { UNRESTRICTED_SYSCALL, 0 },
- [ __NR_dup ] = { UNRESTRICTED_SYSCALL, 0 },
- [ __NR_dup2 ] = { UNRESTRICTED_SYSCALL, 0 },
- [ __NR_epoll_create ] = { UNRESTRICTED_SYSCALL, 0 },
- [ __NR_epoll_ctl ] = { UNRESTRICTED_SYSCALL, 0 },
- [ __NR_epoll_wait ] = { UNRESTRICTED_SYSCALL, 0 },
- [ __NR_exit ] = { (void*)&sandbox_exit, process_exit },
- [ __NR_exit_group ] = { UNRESTRICTED_SYSCALL, 0 },
- [ __NR_fcntl ] = { UNRESTRICTED_SYSCALL, 0 },
- #if defined(__NR_fcntl64)
- [ __NR_fcntl64 ] = { UNRESTRICTED_SYSCALL, 0 },
- #endif
- [ __NR_fstat ] = { UNRESTRICTED_SYSCALL, 0 },
- #if defined(__NR_fstat64)
- [ __NR_fstat64 ] = { UNRESTRICTED_SYSCALL, 0 },
- #endif
- [ __NR_futex ] = { UNRESTRICTED_SYSCALL, 0 },
- [ __NR_getdents ] = { UNRESTRICTED_SYSCALL, 0 },
- [ __NR_getdents64 ] = { UNRESTRICTED_SYSCALL, 0 },
- #if defined(__NR_getpeername)
- [ __NR_getpeername ] = { UNRESTRICTED_SYSCALL, 0 },
- #endif
- [ __NR_getpid ] = { (void*)&sandbox_getpid, 0 },
- #if defined(__NR_getsockname)
- [ __NR_getsockname ] = { UNRESTRICTED_SYSCALL, 0 },
- [ __NR_getsockopt ] = { (void*)&sandbox_getsockopt,process_getsockopt },
- #endif
- [ __NR_gettid ] = { (void*)&sandbox_gettid, 0 },
- [ __NR_gettimeofday ] = { UNRESTRICTED_SYSCALL, 0 },
- [ __NR_ioctl ] = { (void*)&sandbox_ioctl, process_ioctl },
- #if defined(__NR_ipc)
- [ __NR_ipc ] = { (void*)&sandbox_ipc, process_ipc },
- #endif
- #if defined(__NR__llseek)
- [ __NR__llseek ] = { UNRESTRICTED_SYSCALL, 0 },
- #endif
- [ __NR_lseek ] = { UNRESTRICTED_SYSCALL, 0 },
- [ __NR_lstat ] = { (void*)&sandbox_lstat, process_stat },
- #if defined(__NR_lstat64)
- [ __NR_lstat64 ] = { (void*)&sandbox_lstat64, process_stat },
- #endif
- [ __NR_madvise ] = { (void*)&sandbox_madvise, process_madvise },
- #if defined(__NR_mmap2)
- [ __NR_mmap2 ] =
- #else
- [ __NR_mmap ] =
- #endif
- { (void*)&sandbox_mmap, process_mmap },
- [ __NR_mprotect ] = { (void*)&sandbox_mprotect, process_mprotect },
- [ __NR_munmap ] = { (void*)&sandbox_munmap, process_munmap },
- [ __NR_open ] = { (void*)&sandbox_open, process_open },
- [ __NR_pipe ] = { UNRESTRICTED_SYSCALL, 0 },
- [ __NR_poll ] = { UNRESTRICTED_SYSCALL, 0 },
- #if defined(__NR_recvfrom)
- [ __NR_recvfrom ] = { (void*)&sandbox_recvfrom, process_recvfrom },
- [ __NR_recvmsg ] = { (void*)&sandbox_recvmsg, process_recvmsg },
- #endif
- #if defined(__NR_rt_sigaction)
- [ __NR_rt_sigaction ] = { (void*)&sandbox_rt_sigaction,process_sigaction},
- #endif
- #if defined(__NR_rt_sigprocmask)
- [ __NR_rt_sigprocmask ] = { (void*)&sandbox_rt_sigprocmask, 0 },
- #endif
- #if defined(__NR_sendmsg)
- [ __NR_sendmsg ] = { (void*)&sandbox_sendmsg, process_sendmsg },
- [ __NR_sendto ] = { (void*)&sandbox_sendto, process_sendto },
- #endif
- [ __NR_set_robust_list ] = { UNRESTRICTED_SYSCALL, 0 },
- #if defined(__NR_setsockopt)
- [ __NR_setsockopt ] = { (void*)&sandbox_setsockopt,process_setsockopt },
- #endif
- #if defined(__NR_shmat)
- [ __NR_shmat ] = { (void*)&sandbox_shmat, process_shmat },
- [ __NR_shmctl ] = { (void*)&sandbox_shmctl, process_shmctl },
- [ __NR_shmdt ] = { (void*)&sandbox_shmdt, process_shmdt },
- [ __NR_shmget ] = { (void*)&sandbox_shmget, process_shmget },
- #endif
- #if defined(__NR_shutdown)
- [ __NR_shutdown ] = { UNRESTRICTED_SYSCALL, 0 },
- #endif
- #if defined(__NR_sigaction)
- [ __NR_sigaction ] = { (void*)&sandbox_sigaction,process_sigaction },
- #endif
- #if defined(__NR_signal)
- [ __NR_signal ] = { (void*)&sandbox_signal, process_sigaction },
- #endif
- #if defined(__NR_sigprocmask)
- [ __NR_sigprocmask ] = { (void*)&sandbox_sigprocmask, 0 },
- #endif
- #if defined(__NR_socketpair)
- [ __NR_socketpair ] = { UNRESTRICTED_SYSCALL, 0 },
- #endif
- #if defined(__NR_socketcall)
- [ __NR_socketcall ] = { (void*)&sandbox_socketcall,process_socketcall },
- #endif
- [ __NR_stat ] = { (void*)&sandbox_stat, process_stat },
- #if defined(__NR_stat64)
- [ __NR_stat64 ] = { (void*)&sandbox_stat64, process_stat },
- #endif
- [ __NR_time ] = { UNRESTRICTED_SYSCALL, 0 },
- [ __NR_uname ] = { UNRESTRICTED_SYSCALL, 0 },
-};
-const unsigned maxSyscall __attribute__((section(".rodata"))) =
- sizeof(syscallTable)/sizeof(struct SyscallTable);
-
-const int syscall_mutex_[4096/sizeof(int)] asm("playground$syscall_mutex")
- __attribute__((section(".rodata"),aligned(4096)
-#if defined(__x86_64__)
- ,visibility("internal")
-#endif
- )) = { 0x80000000 };
diff --git a/sandbox/linux/seccomp/syscall_table.h b/sandbox/linux/seccomp/syscall_table.h
deleted file mode 100644
index 5bd6791..0000000
--- a/sandbox/linux/seccomp/syscall_table.h
+++ /dev/null
@@ -1,43 +0,0 @@
-// Copyright (c) 2010 The Chromium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#ifndef SYSCALL_TABLE_H__
-#define SYSCALL_TABLE_H__
-
-#include <sys/types.h>
-
-#ifdef __cplusplus
-#include "securemem.h"
-extern "C" {
-namespace playground {
-#define SecureMemArgs SecureMem::Args
-#else
-#define SecureMemArgs void
-#define bool int
-#endif
- #define UNRESTRICTED_SYSCALL ((void *)1)
-
- struct SyscallTable {
- void *handler;
- bool (*trustedProcess)(int parentMapsFd, int sandboxFd, int threadFdPub,
- int threadFd, SecureMemArgs* mem);
- };
- extern const struct SyscallTable syscallTable[]
- asm("playground$syscallTable")
-#if defined(__x86_64__)
- __attribute__((visibility("internal")))
-#endif
- ;
- extern const unsigned maxSyscall
- asm("playground$maxSyscall")
-#if defined(__x86_64__)
- __attribute__((visibility("internal")))
-#endif
- ;
-#ifdef __cplusplus
-} // namespace
-}
-#endif
-
-#endif // SYSCALL_TABLE_H__
diff --git a/sandbox/linux/seccomp/tests/list_tests.py b/sandbox/linux/seccomp/tests/list_tests.py
deleted file mode 100644
index 011a52e..0000000
--- a/sandbox/linux/seccomp/tests/list_tests.py
+++ /dev/null
@@ -1,22 +0,0 @@
-# Copyright (c) 2010 The Chromium Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
-
-import re
-import sys
-
-
-def get_tests(filename):
- for line in open(filename):
- match = re.match(r"TEST\((\w+)\)", line)
- if match is not None:
- yield match.group(1)
-
-
-def main(args):
- for name in get_tests(args[0]):
- print ' { "%s", %s },' % (name, name)
-
-
-if __name__ == "__main__":
- main(sys.argv[1:])
diff --git a/sandbox/linux/seccomp/tests/test_syscalls.cc b/sandbox/linux/seccomp/tests/test_syscalls.cc
deleted file mode 100644
index 3e6acd5..0000000
--- a/sandbox/linux/seccomp/tests/test_syscalls.cc
+++ /dev/null
@@ -1,758 +0,0 @@
-// Copyright (c) 2010 The Chromium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#include <assert.h>
-#include <dirent.h>
-#include <pthread.h>
-#include <pty.h>
-#include <sys/types.h>
-#include <sys/wait.h>
-
-#include "sandbox_impl.h"
-
-#ifdef DEBUG
-#define MSG(fmt, ...) printf(fmt, ##__VA_ARGS__)
-#else
-#define MSG(fmt, ...) do { } while (0)
-#endif
-
-int g_intended_status_fd = -1;
-
-// Declares the wait() status that the test subprocess intends to exit with.
-void intend_exit_status(int val, bool is_signal) {
- if (is_signal) {
- val = W_EXITCODE(0, val);
- } else {
- val = W_EXITCODE(val, 0);
- }
- if (g_intended_status_fd != -1) {
- int sent = write(g_intended_status_fd, &val, sizeof(val));
- assert(sent == sizeof(val));
- } else {
- // This prints in cases where we run one test without forking
- printf("Intending to exit with status %i...\n", val);
- }
-}
-
-
-// This is basically a marker to grep for.
-#define TEST(name) void name()
-
-TEST(test_dup) {
- StartSeccompSandbox();
- // Test a simple syscall that is marked as UNRESTRICTED_SYSCALL.
- int fd = dup(1);
- assert(fd >= 0);
- int rc = close(fd);
- assert(rc == 0);
-}
-
-TEST(test_segfault) {
- StartSeccompSandbox();
- // Check that the sandbox's SIGSEGV handler does not stop the
- // process from dying cleanly in the event of a real segfault.
- intend_exit_status(SIGSEGV, true);
- asm("hlt");
-}
-
-TEST(test_exit) {
- StartSeccompSandbox();
- intend_exit_status(123, false);
- _exit(123);
-}
-
-// This has an off-by-three error because it counts ".", "..", and the
-// FD for the /proc/self/fd directory. This doesn't matter because it
-// is only used to check for differences in the number of open FDs.
-static int count_fds() {
- DIR *dir = opendir("/proc/self/fd");
- assert(dir != NULL);
- int count = 0;
- while (1) {
- struct dirent *d = readdir(dir);
- if (d == NULL)
- break;
- count++;
- }
- int rc = closedir(dir);
- assert(rc == 0);
- return count;
-}
-
-static void *thread_func(void *x) {
- int *ptr = (int *) x;
- *ptr = 123;
- MSG("In new thread\n");
- return (void *) 456;
-}
-
-TEST(test_thread) {
- playground::g_policy.allow_file_namespace = true; // To allow count_fds()
- StartSeccompSandbox();
- int fd_count1 = count_fds();
- pthread_t tid;
- int x = 999;
- void *result;
- pthread_create(&tid, NULL, thread_func, &x);
- MSG("Waiting for thread\n");
- pthread_join(tid, &result);
- assert(result == (void *) 456);
- assert(x == 123);
- // Check that the process has not leaked FDs.
- int fd_count2 = count_fds();
- assert(fd_count2 == fd_count1);
-}
-
-static int clone_func(void *x) {
- int *ptr = (int *) x;
- *ptr = 124;
- MSG("In thread\n");
- // On x86-64, returning from this function calls the __NR_exit_group
- // syscall instead of __NR_exit.
- syscall(__NR_exit, 100);
- // Not reached.
- return 200;
-}
-
-#if defined(__i386__)
-static int get_gs() {
- int gs;
- asm volatile("mov %%gs, %0" : "=r"(gs));
- return gs;
-}
-#endif
-
-static void *get_tls_base() {
- void *base;
-#if defined(__x86_64__)
- asm volatile("mov %%fs:0, %0" : "=r"(base));
-#elif defined(__i386__)
- asm volatile("mov %%gs:0, %0" : "=r"(base));
-#else
-#error Unsupported target platform
-#endif
- return base;
-}
-
-TEST(test_clone) {
- playground::g_policy.allow_file_namespace = true; // To allow count_fds()
- StartSeccompSandbox();
- int fd_count1 = count_fds();
- int stack_size = 0x1000;
- char *stack = (char *) malloc(stack_size);
- assert(stack != NULL);
- int flags = CLONE_VM | CLONE_FS | CLONE_FILES |
- CLONE_SIGHAND | CLONE_THREAD | CLONE_SYSVSEM |
- CLONE_SETTLS | CLONE_PARENT_SETTID | CLONE_CHILD_CLEARTID;
- int tid = -1;
- int x = 999;
-
- // The sandbox requires us to pass CLONE_TLS. Pass settings that
- // are enough to copy the parent thread's TLS setup. This allows us
- // to invoke libc in the child thread.
-#if defined(__x86_64__)
- void *tls = get_tls_base();
-#elif defined(__i386__)
- struct user_desc tls_desc, *tls = &tls_desc;
- tls_desc.entry_number = get_gs() >> 3;
- tls_desc.base_addr = (long) get_tls_base();
- tls_desc.limit = 0xfffff;
- tls_desc.seg_32bit = 1;
- tls_desc.contents = 0;
- tls_desc.read_exec_only = 0;
- tls_desc.limit_in_pages = 1;
- tls_desc.seg_not_present = 0;
- tls_desc.useable = 1;
-#else
-#error Unsupported target platform
-#endif
-
- int rc = clone(clone_func, (void *) (stack + stack_size), flags, &x,
- &tid, tls, &tid);
- assert(rc > 0);
- while (tid == rc) {
- syscall(__NR_futex, &tid, FUTEX_WAIT, rc, NULL);
- }
- assert(tid == 0);
- assert(x == 124);
- // Check that the process has not leaked FDs.
- int fd_count2 = count_fds();
- assert(fd_count2 == fd_count1);
-}
-
-static int uncalled_clone_func(void *x) {
- printf("In thread func, which shouldn't happen\n");
- return 1;
-}
-
-TEST(test_clone_disallowed_flags) {
- StartSeccompSandbox();
- int stack_size = 4096;
- char *stack = (char *) malloc(stack_size);
- assert(stack != NULL);
- /* We omit the flags CLONE_SETTLS, CLONE_PARENT_SETTID and
- CLONE_CHILD_CLEARTID, which is disallowed by the sandbox. */
- int flags = CLONE_VM | CLONE_FS | CLONE_FILES |
- CLONE_SIGHAND | CLONE_THREAD | CLONE_SYSVSEM;
- int rc = clone(uncalled_clone_func, (void *) (stack + stack_size),
- flags, NULL, NULL, NULL, NULL);
- assert(rc == -1);
- assert(errno == EPERM);
-}
-
-static void *fp_thread(void *x) {
- int val;
- asm("movss %%xmm0, %0" : "=m"(val));
- MSG("val=%i\n", val);
- return NULL;
-}
-
-TEST(test_fp_regs) {
- StartSeccompSandbox();
- int val = 1234;
- asm("movss %0, %%xmm0" : "=m"(val));
- pthread_t tid;
- pthread_create(&tid, NULL, fp_thread, NULL);
- pthread_join(tid, NULL);
- MSG("thread done OK\n");
-}
-
-static long long read_tsc() {
- long long rc;
- asm volatile(
- "rdtsc\n"
- "mov %%eax, (%0)\n"
- "mov %%edx, 4(%0)\n"
- :
- : "c"(&rc), "a"(-1), "d"(-1));
- return rc;
-}
-
-TEST(test_rdtsc) {
- StartSeccompSandbox();
- // Just check that we can do the instruction.
- read_tsc();
-}
-
-TEST(test_getpid) {
- int pid1 = getpid();
- StartSeccompSandbox();
- int pid2 = getpid();
- assert(pid1 == pid2);
- // Bypass any caching that glibc's getpid() wrapper might do.
- int pid3 = syscall(__NR_getpid);
- assert(pid1 == pid3);
-}
-
-TEST(test_gettid) {
- // glibc doesn't provide a gettid() wrapper.
- int tid1 = syscall(__NR_gettid);
- assert(tid1 > 0);
- StartSeccompSandbox();
- int tid2 = syscall(__NR_gettid);
- assert(tid1 == tid2);
-}
-
-static void *map_something() {
- void *addr = mmap(NULL, 0x1000, PROT_READ,
- MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
- assert(addr != MAP_FAILED);
- return addr;
-}
-
-TEST(test_mmap_disallows_remapping) {
- void *addr = map_something();
- StartSeccompSandbox();
- // Overwriting a mapping that was created before the sandbox was
- // enabled is not allowed.
- void *result = mmap(addr, 0x1000, PROT_READ,
- MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0);
- assert(result == MAP_FAILED);
- assert(errno == EINVAL);
-}
-
-TEST(test_mmap_disallows_low_address) {
- StartSeccompSandbox();
- // Mapping pages at low addresses is not allowed because this helps
- // with exploiting buggy kernels.
- void *result = mmap(NULL, 0x1000, PROT_READ,
- MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0);
- assert(result == MAP_FAILED);
- assert(errno == EINVAL);
-}
-
-TEST(test_munmap_allowed) {
- StartSeccompSandbox();
- void *addr = map_something();
- int result = munmap(addr, 0x1000);
- assert(result == 0);
-}
-
-TEST(test_munmap_disallowed) {
- void *addr = map_something();
- StartSeccompSandbox();
- int result = munmap(addr, 0x1000);
- assert(result == -1);
- assert(errno == EINVAL);
-}
-
-TEST(test_mprotect_allowed) {
- StartSeccompSandbox();
- void *addr = map_something();
- int result = mprotect(addr, 0x1000, PROT_READ | PROT_WRITE);
- assert(result == 0);
-}
-
-TEST(test_mprotect_disallowed) {
- void *addr = map_something();
- StartSeccompSandbox();
- int result = mprotect(addr, 0x1000, PROT_READ | PROT_WRITE);
- assert(result == -1);
- assert(errno == EINVAL);
-}
-
-static int get_tty_fd() {
- int master_fd, tty_fd;
- int rc = openpty(&master_fd, &tty_fd, NULL, NULL, NULL);
- assert(rc == 0);
- return tty_fd;
-}
-
-TEST(test_ioctl_tiocgwinsz_allowed) {
- int tty_fd = get_tty_fd();
- StartSeccompSandbox();
- int size[2];
- // Get terminal width and height.
- int result = ioctl(tty_fd, TIOCGWINSZ, size);
- assert(result == 0);
-}
-
-TEST(test_ioctl_disallowed) {
- int tty_fd = get_tty_fd();
- StartSeccompSandbox();
- // This ioctl call inserts a character into the tty's input queue,
- // which provides a way to send commands to an interactive shell.
- char c = 'x';
- int result = ioctl(tty_fd, TIOCSTI, &c);
- assert(result == -1);
- assert(errno == EINVAL);
-}
-
-TEST(test_socket) {
- StartSeccompSandbox();
- int fd = socket(AF_UNIX, SOCK_STREAM, 0);
- assert(fd == -1);
- // TODO: Make it consistent between i386 and x86-64.
- assert(errno == EINVAL || errno == ENOSYS);
-}
-
-TEST(test_open_disabled) {
- StartSeccompSandbox();
- int fd = open("/dev/null", O_RDONLY);
- assert(fd == -1);
- assert(errno == EACCES);
-
- // Writing to the policy flag does not change this.
- playground::g_policy.allow_file_namespace = true;
- fd = open("/dev/null", O_RDONLY);
- assert(fd == -1);
- assert(errno == EACCES);
-}
-
-TEST(test_open_enabled) {
- playground::g_policy.allow_file_namespace = true;
- StartSeccompSandbox();
- int fd = open("/dev/null", O_RDONLY);
- assert(fd >= 0);
- int rc = close(fd);
- assert(rc == 0);
- fd = open("/dev/null", O_WRONLY);
- assert(fd == -1);
- assert(errno == EACCES);
-}
-
-TEST(test_access_disabled) {
- StartSeccompSandbox();
- int rc = access("/dev/null", R_OK);
- assert(rc == -1);
- assert(errno == EACCES);
-}
-
-TEST(test_access_enabled) {
- playground::g_policy.allow_file_namespace = true;
- StartSeccompSandbox();
- int rc = access("/dev/null", R_OK);
- assert(rc == 0);
- rc = access("path-that-does-not-exist", R_OK);
- assert(rc == -1);
- assert(errno == ENOENT);
-}
-
-TEST(test_stat_disabled) {
- StartSeccompSandbox();
- struct stat st;
- int rc = stat("/dev/null", &st);
- assert(rc == -1);
- assert(errno == EACCES);
-}
-
-TEST(test_stat_enabled) {
- playground::g_policy.allow_file_namespace = true;
- StartSeccompSandbox();
- struct stat st;
- int rc = stat("/dev/null", &st);
- assert(rc == 0);
- rc = stat("path-that-does-not-exist", &st);
- assert(rc == -1);
- assert(errno == ENOENT);
-}
-
-static int g_value;
-
-static void signal_handler(int sig) {
- g_value = 300;
- MSG("In signal handler\n");
-}
-
-static void sigaction_handler(int sig, siginfo_t *a, void *b) {
- g_value = 300;
- MSG("In sigaction handler\n");
-}
-
-static void (*g_sig_handler_ptr)(int sig, void *addr) asm("g_sig_handler_ptr");
-
-static void non_fatal_sig_handler(int sig, void *addr) {
- g_value = 300;
- MSG("Caught signal %d at %p\n", sig, addr);
-}
-
-static void fatal_sig_handler(int sig, void *addr) {
- // Recursively trigger another segmentation fault while already in the SEGV
- // handler. This should terminate the program if SIGSEGV is marked as a
- // deferred signal.
- // Only do this on the first entry to this function. Otherwise, the signal
- // handler was probably marked as SA_NODEFER and we want to continue
- // execution.
- if (!g_value++) {
- MSG("Caught signal %d at %p\n", sig, addr);
- if (sig == SIGSEGV) {
- asm volatile("hlt");
- } else {
- asm volatile("int3");
- }
- }
-}
-
-static void (*generic_signal_handler(void))
- (int signo, siginfo_t *info, void *context) {
- void (*hdl)(int, siginfo_t *, void *);
- asm volatile(
- "lea 0f, %0\n"
- "jmp 999f\n"
- "0:\n"
-
-#if defined(__x86_64__)
- "mov 0xB0(%%rsp), %%rsi\n" // Pass original %rip to signal handler
- "cmpb $0xF4, 0(%%rsi)\n" // hlt
- "jnz 1f\n"
- "addq $1, 0xB0(%%rsp)\n" // Adjust %eip past failing instruction
- "1:jmp *g_sig_handler_ptr\n" // Call actual signal handler
-#elif defined(__i386__)
- // TODO(markus): We currently don't guarantee that signal handlers always
- // have the correct "magic" restorer function. If we fix
- // this, we should add a test for it (both for SEGV and
- // non-SEGV).
- "cmpw $0, 0xA(%%esp)\n"
- "lea 0x40(%%esp), %%eax\n" // %eip at time of exception
- "jz 1f\n"
- "add $0x9C, %%eax\n" // %eip at time of exception
- "1:mov 0(%%eax), %%ecx\n"
- "cmpb $0xF4, 0(%%ecx)\n" // hlt
- "jnz 2f\n"
- "addl $1, 0(%%eax)\n" // Adjust %eip past failing instruction
- "2:push %%ecx\n" // Pass original %eip to signal handler
- "mov 8(%%esp), %%eax\n"
- "push %%eax\n" // Pass signal number to signal handler
- "call *g_sig_handler_ptr\n" // Call actual signal handler
- "pop %%eax\n"
- "pop %%ecx\n"
- "ret\n"
-#else
-#error Unsupported target platform
-#endif
-
-"999:\n"
- : "=r"(hdl));
- return hdl;
-}
-
-TEST(test_signal_handler) {
- sighandler_t result = signal(SIGTRAP, signal_handler);
- assert(result != SIG_ERR);
-
- StartSeccompSandbox();
-
- result = signal(SIGTRAP, signal_handler);
- assert(result != SIG_ERR);
-
- g_value = 200;
- asm("int3");
- assert(g_value == 300);
-}
-
-TEST(test_sigaction_handler) {
- struct sigaction act;
- act.sa_sigaction = sigaction_handler;
- sigemptyset(&act.sa_mask);
- act.sa_flags = SA_SIGINFO;
- int rc = sigaction(SIGTRAP, &act, NULL);
- assert(rc == 0);
-
- StartSeccompSandbox();
-
- rc = sigaction(SIGTRAP, &act, NULL);
- assert(rc == 0);
-
- g_value = 200;
- asm("int3");
- assert(g_value == 300);
-}
-
-TEST(test_blocked_signal) {
- sighandler_t result = signal(SIGTRAP, signal_handler);
- assert(result != SIG_ERR);
- StartSeccompSandbox();
-
- // Initially the signal should not be blocked.
- sigset_t sigs;
- sigfillset(&sigs);
- int rc = sigprocmask(0, NULL, &sigs);
- assert(rc == 0);
- assert(!sigismember(&sigs, SIGTRAP));
-
- sigemptyset(&sigs);
- sigaddset(&sigs, SIGTRAP);
- rc = sigprocmask(SIG_BLOCK, &sigs, NULL);
- assert(rc == 0);
-
- // Check that we can read back the blocked status.
- sigemptyset(&sigs);
- rc = sigprocmask(0, NULL, &sigs);
- assert(rc == 0);
- assert(sigismember(&sigs, SIGTRAP));
-
- // Check that the signal handler really is blocked.
- intend_exit_status(SIGTRAP, true);
- asm("int3");
-}
-
-TEST(test_sigaltstack) {
- // The sandbox does not support sigaltstack() yet. Just test that
- // it returns an error.
- StartSeccompSandbox();
- stack_t st;
- st.ss_size = 0x4000;
- st.ss_sp = malloc(st.ss_size);
- assert(st.ss_sp != NULL);
- st.ss_flags = 0;
- int rc = sigaltstack(&st, NULL);
- assert(rc == -1);
- assert(errno == ENOSYS);
-}
-
-TEST(test_sa_flags) {
- StartSeccompSandbox();
- int flags[4] = { 0, SA_NODEFER, SA_SIGINFO, SA_SIGINFO | SA_NODEFER };
- for (int i = 0; i < 4; ++i) {
- struct sigaction sa;
- memset(&sa, 0, sizeof(sa));
- sa.sa_sigaction = generic_signal_handler();
- g_sig_handler_ptr = non_fatal_sig_handler;
- sa.sa_flags = flags[i];
-
- // Test SEGV handling
- g_value = 200;
- sigaction(SIGSEGV, &sa, NULL);
- asm volatile("hlt");
- assert(g_value == 300);
-
- // Test non-SEGV handling
- g_value = 200;
- sigaction(SIGTRAP, &sa, NULL);
- asm volatile("int3");
- assert(g_value == 300);
- }
-}
-
-TEST(test_segv_defer) {
- StartSeccompSandbox();
- struct sigaction sa;
- memset(&sa, 0, sizeof(sa));
- sa.sa_sigaction = generic_signal_handler();
- g_sig_handler_ptr = fatal_sig_handler;
-
- // Test non-deferred SEGV (should continue execution)
- sa.sa_flags = SA_NODEFER;
- sigaction(SIGSEGV, &sa, NULL);
- g_value = 0;
- asm volatile("hlt");
-
- // Test deferred SEGV (should terminate program)
- sa.sa_flags = 0;
- sigaction(SIGSEGV, &sa, NULL);
- g_value = 0;
- intend_exit_status(SIGSEGV, true);
- asm volatile("hlt");
-}
-
-TEST(test_trap_defer) {
- StartSeccompSandbox();
- struct sigaction sa;
- memset(&sa, 0, sizeof(sa));
- sa.sa_sigaction = generic_signal_handler();
- g_sig_handler_ptr = fatal_sig_handler;
-
- // Test non-deferred TRAP (should continue execution)
- sa.sa_flags = SA_NODEFER;
- sigaction(SIGTRAP, &sa, NULL);
- g_value = 0;
- asm volatile("int3");
-
- // Test deferred TRAP (should terminate program)
- sa.sa_flags = 0;
- sigaction(SIGTRAP, &sa, NULL);
- g_value = 0;
- intend_exit_status(SIGTRAP, true);
- asm volatile("int3");
-}
-
-TEST(test_segv_resethand) {
- StartSeccompSandbox();
- struct sigaction sa;
- memset(&sa, 0, sizeof(sa));
- sa.sa_sigaction = generic_signal_handler();
- g_sig_handler_ptr = non_fatal_sig_handler;
- sa.sa_flags = SA_RESETHAND;
- sigaction(SIGSEGV, &sa, NULL);
-
- // Test first invocation of signal handler (should continue execution)
- asm volatile("hlt");
-
- // Test second invocation of signal handler (should terminate program)
- intend_exit_status(SIGSEGV, true);
- asm volatile("hlt");
-}
-
-TEST(test_trap_resethand) {
- StartSeccompSandbox();
- struct sigaction sa;
- memset(&sa, 0, sizeof(sa));
- sa.sa_sigaction = generic_signal_handler();
- g_sig_handler_ptr = non_fatal_sig_handler;
- sa.sa_flags = SA_RESETHAND;
- sigaction(SIGTRAP, &sa, NULL);
-
- // Test first invocation of signal handler (should continue execution)
- asm volatile("int3");
-
- // Test second invocation of signal handler (should terminate program)
- intend_exit_status(SIGTRAP, true);
- asm volatile("int3");
-}
-
-struct testcase {
- const char *test_name;
- void (*test_func)();
-};
-
-struct testcase all_tests[] = {
-#include "test-list.h"
- { NULL, NULL },
-};
-
-static int run_test_forked(struct testcase *test) {
- printf("** %s\n", test->test_name);
- int pipe_fds[2];
- int rc = pipe(pipe_fds);
- assert(rc == 0);
- int pid = fork();
- if (pid == 0) {
- rc = close(pipe_fds[0]);
- assert(rc == 0);
- g_intended_status_fd = pipe_fds[1];
-
- test->test_func();
- intend_exit_status(0, false);
- _exit(0);
- }
- rc = close(pipe_fds[1]);
- assert(rc == 0);
-
- int intended_status;
- int got = read(pipe_fds[0], &intended_status, sizeof(intended_status));
- bool got_intended_status = got == sizeof(intended_status);
- if (!got_intended_status) {
- printf("Test runner: Did not receive intended status\n");
- }
-
- int status;
- int pid2 = waitpid(pid, &status, 0);
- assert(pid2 == pid);
- if (!got_intended_status) {
- printf("Test returned exit status %i\n", status);
- return 1;
- }
- else if ((status & ~WCOREFLAG) != intended_status) {
- printf("Test failed with exit status %i, expected %i\n",
- status, intended_status);
- return 1;
- }
- else {
- return 0;
- }
-}
-
-static int run_test_by_name(const char *name) {
- struct testcase *test;
- for (test = all_tests; test->test_name != NULL; test++) {
- if (strcmp(name, test->test_name) == 0) {
- printf("Running test %s...\n", name);
- test->test_func();
- printf("OK\n");
- return 0;
- }
- }
- fprintf(stderr, "Test '%s' not found\n", name);
- return 1;
-}
-
-int main(int argc, char **argv) {
- setvbuf(stdout, NULL, _IONBF, 0);
- setvbuf(stderr, NULL, _IONBF, 0);
- if (argc == 2) {
- // Run one test without forking, to aid debugging.
- return run_test_by_name(argv[1]);
- }
- else if (argc > 2) {
- // TODO: run multiple tests.
- fprintf(stderr, "Too many arguments\n");
- return 1;
- }
- else {
- // Run all tests.
- struct testcase *test;
- int failures = 0;
- for (test = all_tests; test->test_name != NULL; test++) {
- failures += run_test_forked(test);
- }
- if (failures == 0) {
- printf("OK\n");
- return 0;
- }
- else {
- printf("%i FAILURE(S)\n", failures);
- return 1;
- }
- }
-}
diff --git a/sandbox/linux/seccomp/timestats.cc b/sandbox/linux/seccomp/timestats.cc
deleted file mode 100644
index 5d9b66a..0000000
--- a/sandbox/linux/seccomp/timestats.cc
+++ /dev/null
@@ -1,191 +0,0 @@
-// Copyright (c) 2010 The Chromium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-// Helper program to analyze the time that Chrome's renderers spend in system
-// calls. Start Chrome like this:
-//
-// SECCOMP_SANDBOX_DEBUGGING=1 chrome --enable-seccomp-sandbox 2>&1 | timestats
-//
-// The program prints CPU time (0-100%) spent within system calls. This gives
-// a general idea of where it is worthwhile to spend effort optimizing Chrome.
-//
-// Caveats:
-// - there currently is no way to estimate what the overhead is for running
-// inside of the sandbox vs. running without a sandbox.
-// - we currently use a very simple heuristic to decide whether a system call
-// is blocking or not. Blocking system calls should not be included in the
-// computations. But it is quite possible for the numbers to be somewhat
-// wrong, because the heuristic failed.
-// - in order to collect this data, we have to turn on sandbox debugging.
-// There is a measurable performance penalty to doing so. Production numbers
-// are strictly better than the numbers reported by this tool.
-#include <set>
-#include <vector>
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/time.h>
-#include <time.h>
-
-static const int kAvgWindowSizeMs = 500;
-static const int kPeakWindowSizeMs = 2*1000;
-
-// Class containing information on a single system call. Most notably, it
-// contains the time when the system call happened, and the time that it
-// took to complete.
-class Datum {
- friend class Data;
- public:
- Datum(const char* name, double ms)
- : name_(name),
- ms_(ms) {
- struct timeval tv;
- gettimeofday(&tv, NULL);
- timestamp_ = tv.tv_sec*1000.0 + tv.tv_usec/1000.0;
- }
- virtual ~Datum() { }
-
- double operator-(const Datum& b) {
- return timestamp_ - b.timestamp_;
- }
-
- protected:
- const char* name_;
- double ms_;
- double timestamp_;
-};
-
-// Class containing data on the most recent system calls. It maintains
-// sliding averages for total CPU time used, and it also maintains a peak
-// CPU usage. The peak usage is usually updated slower than the average
-// usage, as that makes it easier to inspect visually.
-class Data {
- public:
- Data() { }
- virtual ~Data() { }
-
- void addData(const char* name, double ms) {
- average_.push_back(Datum(name, ms));
- peak_.push_back(Datum(name, ms));
-
- // Prune entries outside of the window
- std::vector<Datum>::iterator iter;
- for (iter = average_.begin();
- *average_.rbegin() - *iter > kAvgWindowSizeMs;
- ++iter) {
- }
- average_.erase(average_.begin(), iter);
-
- for (iter = peak_.begin();
- *peak_.rbegin() - *iter > kPeakWindowSizeMs;
- ++iter){
- }
- peak_.erase(peak_.begin(), iter);
-
- // Add the total usage of all system calls inside of the window
- double total = 0;
- for (iter = average_.begin(); iter != average_.end(); ++iter) {
- total += iter->ms_;
- }
-
- // Compute the peak CPU usage during the last window
- double peak = 0;
- double max = 0;
- std::vector<Datum>::iterator tail = peak_.begin();
- for (iter = tail; iter != peak_.end(); ++iter) {
- while (*iter - *tail > kAvgWindowSizeMs) {
- peak -= tail->ms_;
- ++tail;
- }
- peak += iter->ms_;
- if (peak > max) {
- max = peak;
- }
- }
-
- // Print the average CPU usage in the last window
- char buf[80];
- total *= 100.0/kAvgWindowSizeMs;
- max *= 100.0/kAvgWindowSizeMs;
- sprintf(buf, "%6.2f%% (peak=%6.2f%%) ", total, max);
-
- // Animate the actual usage, displaying both average and peak values
- int len = strlen(buf);
- int space = sizeof(buf) - len - 1;
- int mark = (total * space + 50)/100;
- int bar = (max * space + 50)/100;
- for (int i = 0; i < mark; ++i) {
- buf[len++] = '*';
- }
- if (mark == bar) {
- if (bar) {
- len--;
- }
- } else {
- for (int i = 0; i < bar - mark - 1; ++i) {
- buf[len++] = ' ';
- }
- }
- buf[len++] = '|';
- while (len < static_cast<int>(sizeof(buf))) {
- buf[len++] = ' ';
- }
- strcpy(buf + len, "\r");
- fwrite(buf, len + 1, 1, stdout);
- fflush(stdout);
- }
-
- private:
- std::vector<Datum> average_;
- std::vector<Datum> peak_;
-};
-static Data data;
-
-
-int main(int argc, char *argv[]) {
- char buf[80];
- bool expensive = false;
- while (fgets(buf, sizeof(buf), stdin)) {
- // Allow longer delays for expensive system calls
- if (strstr(buf, "This is an expensive system call")) {
- expensive = true;
- continue;
- }
-
- // Parse the string and extract the elapsed time
- const char elapsed[] = "Elapsed time: ";
- char* ms_string = strstr(buf, elapsed);
- char* endptr;
- double ms;
- char* colon = strchr(buf, ':');
-
- // If this string doesn't match, then it must be some other type of
- // message. Just ignore it.
- // It is quite likely that we will regularly encounter debug messages
- // that either should be parsed by a completely different tool, or
- // messages that were intended for humans to read.
- if (!ms_string ||
- ((ms = strtod(ms_string + sizeof(elapsed) - 1, &endptr)),
- endptr == ms_string) ||
- !colon) {
- continue;
- }
-
- // Filter out system calls that were probably just blocking
- // TODO(markus): automatically compute the cut-off for blocking calls
- if (!expensive && ms > 0.05) {
- continue;
- }
- expensive = false;
-
- // Extract the name of the system call
- *colon = '\000';
-
- // Add the data point and update the display
- data.addData(buf, ms);
- }
- puts("");
- return 0;
-}
diff --git a/sandbox/linux/seccomp/tls.h b/sandbox/linux/seccomp/tls.h
deleted file mode 100644
index 7ec5a28..0000000
--- a/sandbox/linux/seccomp/tls.h
+++ /dev/null
@@ -1,155 +0,0 @@
-// Copyright (c) 2010 The Chromium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#ifndef TLS_H__
-#define TLS_H__
-
-#include <asm/ldt.h>
-#include <stdlib.h>
-#include <sys/mman.h>
-#include <sys/prctl.h>
-
-namespace playground {
-
-class TLS {
- private:
- class SysCalls {
- public:
- #define SYS_CPLUSPLUS
- #define SYS_ERRNO my_errno
- #define SYS_INLINE inline
- #define SYS_PREFIX -1
- #undef SYS_LINUX_SYSCALL_SUPPORT_H
- #include "linux_syscall_support.h"
- SysCalls() : my_errno(0) { }
- int my_errno;
- };
-
- public:
- static void *allocateTLS() {
- SysCalls sys;
- #if defined(__x86_64__)
- void *addr = sys.mmap(0, 4096, PROT_READ|PROT_WRITE,
- MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
- if (sys.arch_prctl(ARCH_SET_GS, addr) < 0) {
- return NULL;
- }
- #elif defined(__i386__)
- void *addr = sys.mmap2(0, 4096, PROT_READ|PROT_WRITE,
- MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
- struct user_desc u;
- u.entry_number = (typeof u.entry_number)-1;
- u.base_addr = (int)addr;
- u.limit = 0xfffff;
- u.seg_32bit = 1;
- u.contents = 0;
- u.read_exec_only = 0;
- u.limit_in_pages = 1;
- u.seg_not_present = 0;
- u.useable = 1;
- if (sys.set_thread_area(&u) < 0) {
- return NULL;
- }
- asm volatile(
- "movw %w0, %%fs"
- :
- : "q"(8*u.entry_number+3));
- #else
- #error Unsupported target platform
- #endif
- return addr;
- }
-
- static void freeTLS() {
- SysCalls sys;
- void *addr;
- #if defined(__x86_64__)
- sys.arch_prctl(ARCH_GET_GS, &addr);
- #elif defined(__i386__)
- struct user_desc u;
- sys.get_thread_area(&u);
- addr = (void *)u.base_addr;
- #else
- #error Unsupported target platform
- #endif
- sys.munmap(addr, 4096);
- }
-
- template<class T> static inline bool setTLSValue(int idx, T val) {
- #if defined(__x86_64__)
- if (idx < 0 || idx >= 4096/8) {
- return false;
- }
- asm volatile(
- "movq %0, %%gs:(%1)\n"
- :
- : "q"((void *)val), "q"(8ll * idx));
- #elif defined(__i386__)
- if (idx < 0 || idx >= 4096/8) {
- return false;
- }
- if (sizeof(T) == 8) {
- asm volatile(
- "movl %0, %%fs:(%1)\n"
- :
- : "r"((unsigned)val), "r"(8 * idx));
- asm volatile(
- "movl %0, %%fs:(%1)\n"
- :
- : "r"((unsigned)((unsigned long long)val >> 32)), "r"(8 * idx + 4));
- } else {
- asm volatile(
- "movl %0, %%fs:(%1)\n"
- :
- : "r"(val), "r"(8 * idx));
- }
- #else
- #error Unsupported target platform
- #endif
- return true;
- }
-
- template<class T> static inline T getTLSValue(int idx) {
- #if defined(__x86_64__)
- long long rc;
- if (idx < 0 || idx >= 4096/8) {
- return 0;
- }
- asm volatile(
- "movq %%gs:(%1), %0\n"
- : "=q"(rc)
- : "q"(8ll * idx));
- return (T)rc;
- #elif defined(__i386__)
- if (idx < 0 || idx >= 4096/8) {
- return 0;
- }
- if (sizeof(T) == 8) {
- unsigned lo, hi;
- asm volatile(
- "movl %%fs:(%1), %0\n"
- : "=r"(lo)
- : "r"(8 * idx));
- asm volatile(
- "movl %%fs:(%1), %0\n"
- : "=r"(hi)
- : "r"(8 * idx + 4));
- return (T)((unsigned long long)lo + ((unsigned long long)hi << 32));
- } else {
- long rc;
- asm volatile(
- "movl %%fs:(%1), %0\n"
- : "=r"(rc)
- : "r"(8 * idx));
- return (T)rc;
- }
- #else
- #error Unsupported target platform
- #endif
- }
-
-};
-
-} // namespace
-#endif
diff --git a/sandbox/linux/seccomp/trusted_process.cc b/sandbox/linux/seccomp/trusted_process.cc
deleted file mode 100644
index 5c62b0f..0000000
--- a/sandbox/linux/seccomp/trusted_process.cc
+++ /dev/null
@@ -1,268 +0,0 @@
-// Copyright (c) 2010 The Chromium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#include <dirent.h>
-#include <map>
-
-#include "debug.h"
-#include "sandbox_impl.h"
-#include "syscall_table.h"
-
-namespace playground {
-
-struct SandboxPolicy g_policy;
-
-struct Thread {
- int fdPub, fd;
- SecureMem::Args* mem;
-};
-
-SecureMem::Args* Sandbox::getNewSecureMem() {
- if (!secureMemPool_.empty()) {
- SecureMem::Args* rc = secureMemPool_.back();
- secureMemPool_.pop_back();
- memset(rc->scratchPage, 0, sizeof(rc->scratchPage));
- return rc;
- }
- return NULL;
-}
-
-void Sandbox::trustedProcess(int parentMapsFd, int processFdPub, int sandboxFd,
- int cloneFd, SecureMem::Args* secureArena) {
- // The trusted process doesn't have access to TLS. Zero out the segment
- // registers so that we can later test that we are in the trusted process.
- #if defined(__x86_64__)
- asm volatile("mov %0, %%gs\n" : : "r"(0));
- #elif defined(__i386__)
- asm volatile("mov %0, %%fs\n" : : "r"(0));
- #else
- #error Unsupported target platform
- #endif
-
- std::map<long long, struct Thread> threads;
- SysCalls sys;
- long long cookie = 0;
-
- // The very first entry in the secure memory arena has been assigned to the
- // initial thread. The remaining entries are available for allocation.
- SecureMem::Args* startAddress = secureArena;
- SecureMem::Args* nextThread = startAddress;
- for (int i = 0; i < kMaxThreads-1; i++) {
- secureMemPool_.push_back(++startAddress);
- }
-
-newThreadCreated:
- // Receive information from newly created thread
- Thread *newThread = &threads[++cookie];
- memset(newThread, 0, sizeof(Thread));
- struct {
- SecureMem::Args* self;
- int tid;
- int fdPub;
- } __attribute__((packed)) data;
-
- size_t dataLen = sizeof(data);
- if (!getFd(cloneFd, &newThread->fdPub, &newThread->fd, &data, &dataLen) ||
- dataLen != sizeof(data)) {
- // We get here either because the sandbox got corrupted, or because our
- // parent process has terminated.
- if (newThread->fdPub || dataLen) {
- die("Failed to receive new thread information");
- }
- die();
- }
- if (data.self != nextThread) {
- // The only potentially security critical information received from the
- // newly created thread is "self". The "tid" is for informational purposes
- // (and for use in the new thread's TLS), and "fdPub" is uncritical as all
- // file descriptors are considered untrusted.
- // Thus, we only use "self" for a sanity check, but don't actually trust
- // it beyond that.
- die("Received corrupted thread information");
- }
- newThread->mem = nextThread;
-
- // Set up TLS area and let thread know that the data is now ready
- nextThread->cookie = cookie;
- nextThread->threadId = data.tid;
- nextThread->threadFdPub = data.fdPub;
- write(sys, newThread->fd, "", 1);
-
- // Dispatch system calls that have been forwarded from the trusted thread(s).
- for (;;) {
- struct {
- unsigned int sysnum;
- long long cookie;
- } __attribute__((packed)) header;
-
- int rc;
- if ((rc = read(sys, sandboxFd, &header, sizeof(header))) !=sizeof(header)){
- if (rc) {
- die("Failed to read system call number and thread id");
- }
- die();
- }
- std::map<long long, struct Thread>::iterator iter =
- threads.find(header.cookie);
- if (iter == threads.end()) {
- die("Received request from unknown thread");
- }
- struct Thread* currentThread = &iter->second;
- if (header.sysnum > maxSyscall ||
- !syscallTable[header.sysnum].trustedProcess) {
- die("Trusted process encountered unexpected system call");
- }
-
- // Dispatch system call to handler function. Treat both exit() and clone()
- // specially.
- if (syscallTable[header.sysnum].trustedProcess(parentMapsFd,
- sandboxFd,
- currentThread->fdPub,
- currentThread->fd,
- currentThread->mem) &&
- header.sysnum == __NR_clone) {
- nextThread = currentThread->mem->newSecureMem;
- goto newThreadCreated;
- } else if (header.sysnum == __NR_exit) {
- NOINTR_SYS(sys.close(iter->second.fdPub));
- NOINTR_SYS(sys.close(iter->second.fd));
- SecureMem::Args* secureMem = currentThread->mem;
- threads.erase(iter);
- secureMemPool_.push_back(secureMem);
- }
- }
-}
-
-int Sandbox::initializeProtectedMap(int fd) {
- int mapsFd;
- if (!getFd(fd, &mapsFd, NULL, NULL, NULL)) {
- maps_failure:
- die("Cannot access /proc/self/maps");
- }
-
- // Read the memory mappings as they were before the sandbox takes effect.
- // These mappings cannot be changed by the sandboxed process.
- char line[80];
- FILE *fp = fdopen(mapsFd, "r");
- for (bool truncated = false;;) {
- if (fgets(line, sizeof(line), fp) == NULL) {
- if (feof(fp) || errno != EINTR) {
- break;
- }
- continue;
- }
- if (!truncated) {
- unsigned long start, stop;
- char *ptr = line;
- errno = 0;
- start = strtoul(ptr, &ptr, 16);
- if (errno || *ptr++ != '-') {
- parse_failure:
- die("Failed to parse /proc/self/maps");
- }
- stop = strtoul(ptr, &ptr, 16);
- if (errno || *ptr++ != ' ') {
- goto parse_failure;
- }
- protectedMap_[reinterpret_cast<void *>(start)] = stop - start;
- }
- truncated = strchr(line, '\n') == NULL;
- }
-
- // Prevent low address memory allocations. Some buggy kernels allow those
- if (protectedMap_[0] < (64 << 10)) {
- protectedMap_[0] = 64 << 10;
- }
-
- // Let the sandbox know that we are done parsing the memory map.
- SysCalls sys;
- if (write(sys, fd, &mapsFd, sizeof(mapsFd)) != sizeof(mapsFd)) {
- goto maps_failure;
- }
-
- return mapsFd;
-}
-
-SecureMem::Args* Sandbox::createTrustedProcess(int processFdPub, int sandboxFd,
- int cloneFdPub, int cloneFd) {
- // Allocate memory that will be used by an arena for storing the secure
- // memory. While we allow this memory area to be empty at times (e.g. when
- // not all threads are in use), we make sure that it never gets overwritten
- // by user-allocated memory. This happens in initializeProtectedMap() and
- // snapshotMemoryMappings().
- SecureMem::Args* secureArena = reinterpret_cast<SecureMem::Args*>(
- mmap(NULL, 8192*kMaxThreads, PROT_READ|PROT_WRITE,
- MAP_SHARED|MAP_ANONYMOUS, -1, 0));
- if (secureArena == MAP_FAILED) {
- die("Failed to allocate secure memory arena");
- }
-
- // Set up the mutex to be accessible from the trusted process and from
- // children of the trusted thread(s)
- if (mmap(&syscall_mutex_, 4096, PROT_READ|PROT_WRITE,
- MAP_SHARED|MAP_ANONYMOUS|MAP_FIXED, -1, 0) != &syscall_mutex_) {
- die("Failed to initialize secure mutex");
- }
- syscall_mutex_ = 0x80000000;
-
-
- // Create a trusted process that can evaluate system call parameters and
- // decide whether a system call should execute. This process runs outside of
- // the seccomp sandbox. It communicates with the sandbox'd process through
- // a socketpair() and through securely shared memory.
- pid_t pid = fork();
- if (pid < 0) {
- die("Failed to create trusted process");
- }
- if (!pid) {
- // Close all file handles except for sandboxFd, cloneFd, and stdio
- DIR *dir = opendir("/proc/self/fd");
- if (dir == 0) {
- // If we don't know the list of our open file handles, just try closing
- // all valid ones.
- for (int fd = sysconf(_SC_OPEN_MAX); --fd > 2; ) {
- if (fd != sandboxFd && fd != cloneFd) {
- close(fd);
- }
- }
- } else {
- // If available, if is much more efficient to just close the file
- // handles that show up in /proc/self/fd/
- struct dirent de, *res;
- while (!readdir_r(dir, &de, &res) && res) {
- if (res->d_name[0] < '0')
- continue;
- int fd = atoi(res->d_name);
- if (fd > 2 &&
- fd != sandboxFd && fd != cloneFd && fd != dirfd(dir)) {
- close(fd);
- }
- }
- closedir(dir);
- }
-
- // Initialize secure memory used for threads
- for (int i = 0; i < kMaxThreads; i++) {
- SecureMem::Args* args = secureArena + i;
- args->self = args;
- #ifndef NDEBUG
- args->allowAllSystemCalls= Debug::isEnabled();
- #endif
- }
-
- int parentMapsFd = initializeProtectedMap(sandboxFd);
- trustedProcess(parentMapsFd, processFdPub, sandboxFd,
- cloneFd, secureArena);
- die();
- }
-
- // We are still in the untrusted code. Deny access to restricted resources.
- mprotect(secureArena, 8192*kMaxThreads, PROT_NONE);
- mprotect(&syscall_mutex_, 4096, PROT_NONE);
- close(sandboxFd);
-
- return secureArena;
-}
-
-} // namespace
diff --git a/sandbox/linux/seccomp/trusted_thread.cc b/sandbox/linux/seccomp/trusted_thread.cc
deleted file mode 100644
index 6d6a3f5..0000000
--- a/sandbox/linux/seccomp/trusted_thread.cc
+++ /dev/null
@@ -1,1483 +0,0 @@
-// Copyright (c) 2010 The Chromium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#include "sandbox_impl.h"
-#include "syscall_table.h"
-
-namespace playground {
-
-void Sandbox::createTrustedThread(int processFdPub, int cloneFdPub,
- SecureMem::Args* secureMem) {
- SecureMem::Args args = { { { { { 0 } } } } };
- args.self = &args;
- args.newSecureMem = secureMem;
- args.processFdPub = processFdPub;
- args.cloneFdPub = cloneFdPub;
-#if defined(__x86_64__)
- asm volatile(
- "push %%rbx\n"
- "push %%rbp\n"
- "mov %0, %%rbp\n" // %rbp = args
- "xor %%rbx, %%rbx\n" // initial sequence number
- "lea 999f(%%rip), %%r15\n" // continue in same thread
-
- // Signal handlers are process-wide. This means that for security
- // reasons, we cannot allow that the trusted thread ever executes any
- // signal handlers.
- // We prevent the execution of signal handlers by setting a signal
- // mask that blocks all signals. In addition, we make sure that the
- // stack pointer is invalid.
- // We cannot reset the signal mask until after we have enabled
- // Seccomp mode. Our sigprocmask() wrapper would normally do this by
- // raising a signal, modifying the signal mask in the kernel-generated
- // signal frame, and then calling sigreturn(). This presents a bit of
- // a Catch-22, as all signals are masked and we can therefore not
- // raise any signal that would allow us to generate the signal stack
- // frame.
- // Instead, we have to create the signal stack frame prior to entering
- // Seccomp mode. This incidentally also helps us to restore the
- // signal mask to the same value that it had prior to entering the
- // sandbox.
- // The signal wrapper for clone() is the second entry point into this
- // code (by means of sending an IPC to its trusted thread). It goes
- // through the same steps of creating a signal stack frame on the
- // newly created thread's stacks prior to cloning. See clone.cc for
- // details.
- "mov $56+0xF000, %%eax\n" // __NR_clone + 0xF000
- "mov %%rsp, %%rcx\n"
- "int $0\n" // push a signal stack frame (see clone.cc)
- "mov %%rcx, 0xA0(%%rsp)\n" // pop stack upon call to sigreturn()
- "mov %%rsp, %%r9\n"
- "mov $2, %%rdi\n" // how = SIG_SETMASK
- "pushq $-1\n"
- "mov %%rsp, %%rsi\n" // set = full mask
- "xor %%rdx, %%rdx\n" // old_set = NULL
- "mov $8, %%r10\n" // mask all 64 signals
- "mov $14, %%eax\n" // NR_rt_sigprocmask
- "syscall\n"
- "xor %%rsp, %%rsp\n" // invalidate the stack in all trusted code
- "jmp 20f\n" // create trusted thread
-
- // TODO(markus): Coalesce the read() operations by reading into a bigger
- // buffer.
-
- // Parameters:
- // *%fs: secure memory region
- // the page following this one contains the scratch space
- // %r13: thread's side of threadFd
- // %r15: processFdPub
-
- // Local variables:
- // %rbx: sequence number for trusted calls
-
- // Temporary variables:
- // %r8: child stack
- // %r9: system call number, child stack
- // %rbp: secure memory of previous thread
-
- // Layout of secure shared memory region (c.f. securemem.h):
- // 0x00: pointer to the secure shared memory region (i.e. self)
- // 0x08: sequence number; must match %rbx
- // 0x10: call type; must match %eax, iff %eax == -1 || %eax == -2
- // 0x18: system call number; passed to syscall in %rax
- // 0x20: first argument; passed to syscall in %rdi
- // 0x28: second argument; passed to syscall in %rsi
- // 0x30: third argument; passed to syscall in %rdx
- // 0x38: fourth argument; passed to syscall in %r10
- // 0x40: fifth argument; passed to syscall in %r8
- // 0x48: sixth argument; passed to syscall in %r9
- // 0x50: stored return address for clone() system call
- // 0x58: stored %rbp value for clone() system call
- // 0x60: stored %rbx value for clone() system call
- // 0x68: stored %rcx value for clone() system call
- // 0x70: stored %rdx value for clone() system call
- // 0x78: stored %rsi value for clone() system call
- // 0x80: stored %rdi value for clone() system call
- // 0x88: stored %r8 value for clone() system call
- // 0x90: stored %r9 value for clone() system call
- // 0x98: stored %r10 value for clone() system call
- // 0xA0: stored %r11 value for clone() system call
- // 0xA8: stored %r12 value for clone() system call
- // 0xB0: stored %r13 value for clone() system call
- // 0xB8: stored %r14 value for clone() system call
- // 0xC0: stored %r15 value for clone() system call
- // 0xC8: new shared memory for clone()
- // 0xD0: processFdPub for talking to trusted process
- // 0xD4: cloneFdPub for talking to trusted process
- // 0xD8: set to non-zero, if in debugging mode
- // 0xDC: most recent SHM id returned by shmget(IPC_PRIVATE)
- // 0xE0: cookie assigned to us by the trusted process (TLS_COOKIE)
- // 0xE8: thread id (TLS_TID)
- // 0xF0: threadFdPub (TLS_THREAD_FD)
- // 0x200-0x1000: securely passed verified file name(s)
-
- // Layout of (untrusted) scratch space:
- // 0x00: syscall number; passed in %rax
- // 0x04: first argument; passed in %rdi
- // 0x0C: second argument; passed in %rsi
- // 0x14: third argument; passed in %rdx
- // 0x1C: fourth argument; passed in %r10
- // 0x24: fifth argument; passed in %r8
- // 0x2C: sixth argument; passed in %r9
- // 0x34: return value
- // 0x3C: RDTSCP result (%eax)
- // 0x40: RDTSCP result (%edx)
- // 0x44: RDTSCP result (%ecx)
- // 0x48: last system call (not used on x86-64)
- // 0x4C: number of consecutive calls to a time fnc (not used on x86-64)
- // 0x50: nesting level of system calls (for debugging purposes only)
- // 0x54: signal mask
- // 0x5C: in SEGV handler
-
- // We use the %fs register for accessing the secure read-only page, and
- // the untrusted scratch space immediately following it. The segment
- // register and the local descriptor table is set up by passing
- // appropriate arguments to clone().
-
- "0:xor %%rsp, %%rsp\n"
- "mov $2, %%ebx\n" // %rbx = initial sequence number
-
- // Read request from untrusted thread, or from trusted process. In either
- // case, the data that we read has to be considered untrusted.
- // read(threadFd, &scratch, 4)
- "1:xor %%rax, %%rax\n" // NR_read
- "mov %%r13, %%rdi\n" // fd = threadFd
- "mov %%fs:0x0, %%rsi\n" // secure_mem
- "add $0x1000, %%rsi\n" // buf = &scratch
- "mov $4, %%edx\n" // len = 4
- "2:syscall\n"
- "cmp $-4, %%rax\n" // EINTR
- "jz 2b\n"
- "cmp %%rdx, %%rax\n"
- "jnz 25f\n" // exit process
-
- // Retrieve system call number. It is crucial that we only dereference
- // %fs:0x1000 exactly once. Afterwards, memory becomes untrusted and
- // we must use the value that we have read the first time.
- "mov 0(%%rsi), %%eax\n"
-
- // If syscall number is -1, execute an unlocked system call from the
- // secure memory area
- "cmp $-1, %%eax\n"
- "jnz 5f\n"
- "3:cmp %%rbx, %%fs:0x8\n"
- "jne 25f\n" // exit process
- "cmp %%fs:0x10, %%eax\n"
- "jne 25f\n" // exit process
- "mov %%fs:0x18, %%rax\n"
- "mov %%fs:0x20, %%rdi\n"
- "mov %%fs:0x28, %%rsi\n"
- "mov %%fs:0x30, %%rdx\n"
- "mov %%fs:0x38, %%r10\n"
- "mov %%fs:0x40, %%r8\n"
- "mov %%fs:0x48, %%r9\n"
- "cmp %%rbx, %%fs:0x8\n"
- "jne 25f\n" // exit process
- "add $2, %%rbx\n"
-
- // shmget() gets some special treatment. Whenever we return from this
- // system call, we remember the most recently returned SysV shm id.
- "cmp $29, %%eax\n" // NR_shmget
- "jnz 4f\n"
- "syscall\n"
- "mov %%rax, %%r8\n"
- "mov $56, %%eax\n" // NR_clone
- "mov $17, %%edi\n" // flags = SIGCHLD
- "mov $1, %%esi\n" // stack = 1
- "syscall\n"
- "test %%rax, %%rax\n"
- "js 25f\n" // exit process
- "mov %%rax, %%rdi\n"
- "jnz 8f\n" // wait for child, then return result
- "mov %%fs:0x0, %%rdi\n" // start = secure_mem
- "mov $4096, %%esi\n" // len = 4096
- "mov $3, %%edx\n" // prot = PROT_READ | PROT_WRITE
- "mov $10, %%eax\n" // NR_mprotect
- "syscall\n"
- "mov %%r8d, 0xDC(%%rdi)\n" // set most recently returned SysV shm id
- "xor %%rdi, %%rdi\n"
-
- // When debugging messages are enabled, warn about expensive system calls
- #ifndef NDEBUG
- "cmpw $0, %%fs:0xD8\n" // debug mode
- "jz 27f\n"
- "mov $1, %%eax\n" // NR_write
- "mov $2, %%edi\n" // fd = stderr
- "lea 101f(%%rip), %%rsi\n" // "This is an expensive system call"
- "mov $102f-101f, %%edx\n" // len = strlen(msg)
- "syscall\n"
- "xor %%rdi, %%rdi\n"
- #endif
-
- "jmp 27f\n" // exit program, no message
- "4:syscall\n"
- "jmp 15f\n" // return result
-
- // If syscall number is -2, execute locked system call from the
- // secure memory area
- "5:jg 12f\n"
- "cmp $-2, %%eax\n"
- "jnz 9f\n"
- "cmp %%rbx, %%fs:0x8\n"
- "jne 25f\n" // exit process
- "cmp %%eax, %%fs:0x10\n"
- "jne 25f\n" // exit process
-
- // When debugging messages are enabled, warn about expensive system calls
- #ifndef NDEBUG
- "cmpw $0, %%fs:0xD8\n" // debug mode
- "jz 6f\n"
- "mov $1, %%eax\n" // NR_write
- "mov $2, %%edi\n" // fd = stderr
- "lea 101f(%%rip), %%rsi\n" // "This is an expensive system call"
- "mov $102f-101f, %%edx\n" // len = strlen(msg)
- "syscall\n"
- "6:"
- #endif
-
- "mov %%fs:0x18, %%rax\n"
- "mov %%fs:0x20, %%rdi\n"
- "mov %%fs:0x28, %%rsi\n"
- "mov %%fs:0x30, %%rdx\n"
- "mov %%fs:0x38, %%r10\n"
- "mov %%fs:0x40, %%r8\n"
- "mov %%fs:0x48, %%r9\n"
- "cmp %%rbx, %%fs:0x8\n"
- "jne 25f\n" // exit process
-
- // clone() has unusual calling conventions and must be handled specially
- "cmp $56, %%rax\n" // NR_clone
- "jz 19f\n"
-
- // exit() terminates trusted thread
- "cmp $60, %%eax\n" // NR_exit
- "jz 18f\n"
-
- // Perform requested system call
- "syscall\n"
-
- // Unlock mutex
- "7:cmp %%rbx, %%fs:0x8\n"
- "jne 25f\n" // exit process
- "add $2, %%rbx\n"
- "mov %%rax, %%r8\n"
- "mov $56, %%eax\n" // NR_clone
- "mov $17, %%rdi\n" // flags = SIGCHLD
- "mov $1, %%rsi\n" // stack = 1
- "syscall\n"
- "test %%rax, %%rax\n"
- "js 25f\n" // exit process
- "jz 22f\n" // unlock and exit
- "mov %%rax, %%rdi\n"
- "8:xor %%rsi, %%rsi\n"
- "xor %%rdx, %%rdx\n"
- "xor %%r10, %%r10\n"
- "mov $61, %%eax\n" // NR_wait4
- "syscall\n"
- "cmp $-4, %%eax\n" // EINTR
- "jz 8b\n"
- "mov %%r8, %%rax\n"
- "jmp 15f\n" // return result
-
- // If syscall number is -3, read the time stamp counter
- "9:cmp $-3, %%eax\n"
- "jnz 10f\n"
- "rdtsc\n" // sets %edx:%eax
- "xor %%rcx, %%rcx\n"
- "jmp 11f\n"
- "10:cmp $-4, %%eax\n"
- "jnz 12f\n"
- "rdtscp\n" // sets %edx:%eax and %ecx
- "11:add $0x3C, %%rsi\n"
- "mov %%eax, 0(%%rsi)\n"
- "mov %%edx, 4(%%rsi)\n"
- "mov %%ecx, 8(%%rsi)\n"
- "mov $12, %%edx\n"
- "jmp 16f\n" // return result
-
- // Check in syscallTable whether this system call is unrestricted
- "12:mov %%rax, %%r9\n"
- #ifndef NDEBUG
- "cmpw $0, %%fs:0xD8\n" // debug mode
- "jnz 13f\n"
- #endif
- "cmp playground$maxSyscall(%%rip), %%eax\n"
- "ja 25f\n" // exit process
- "shl $4, %%rax\n"
- "lea playground$syscallTable(%%rip), %%rdi\n"
- "add %%rdi, %%rax\n"
- "mov 0(%%rax), %%rax\n"
- "cmp $1, %%rax\n"
- "jne 25f\n" // exit process
-
- // Default behavior for unrestricted system calls is to just execute
- // them. Read the remaining arguments first.
- "13:mov %%rsi, %%r8\n"
- "xor %%rax, %%rax\n" // NR_read
- "mov %%r13, %%rdi\n" // fd = threadFd
- "add $4, %%rsi\n" // buf = &scratch + 4
- "mov $48, %%edx\n" // len = 6*sizeof(void *)
- "14:syscall\n"
- "cmp $-4, %%rax\n" // EINTR
- "jz 14b\n"
- "cmp %%rdx, %%rax\n"
- "jnz 25f\n" // exit process
- "mov %%r9, %%rax\n"
- "mov 0x04(%%r8), %%rdi\n"
- "mov 0x0C(%%r8), %%rsi\n"
- "mov 0x14(%%r8), %%rdx\n"
- "mov 0x1C(%%r8), %%r10\n"
- "mov 0x2C(%%r8), %%r9\n"
- "mov 0x24(%%r8), %%r8\n"
- "cmp $231, %%rax\n" // NR_exit_group
- "jz 27f\n" // exit program, no message
- "syscall\n"
-
- // Return result of system call to sandboxed thread
- "15:mov %%fs:0x0, %%rsi\n" // secure_mem
- "add $0x1034, %%rsi\n" // buf = &scratch + 52
- "mov %%rax, (%%rsi)\n"
- "mov $8, %%edx\n" // len = 8
- "16:mov %%r13, %%rdi\n" // fd = threadFd
- "mov $1, %%eax\n" // NR_write
- "17:syscall\n"
- "cmp %%rdx, %%rax\n"
- "jz 1b\n"
- "cmp $-4, %%rax\n" // EINTR
- "jz 17b\n"
- "jmp 25f\n" // exit process
-
- // NR_exit:
- // Exit trusted thread after cleaning up resources
- "18:mov %%fs:0x0, %%rsi\n" // secure_mem
- "mov 0xF0(%%rsi), %%rdi\n" // fd = threadFdPub
- "mov $3, %%eax\n" // NR_close
- "syscall\n"
- "mov %%rsi, %%rdi\n" // start = secure_mem
- "mov $8192, %%esi\n" // length = 8192
- "xor %%rdx, %%rdx\n" // prot = PROT_NONE
- "mov $10, %%eax\n" // NR_mprotect
- "syscall\n"
- "mov %%r13, %%rdi\n" // fd = threadFd
- "mov $3, %%eax\n" // NR_close
- "syscall\n"
- "mov $56, %%eax\n" // NR_clone
- "mov $17, %%rdi\n" // flags = SIGCHLD
- "mov $1, %%rsi\n" // stack = 1
- "syscall\n"
- "mov %%rax, %%rdi\n"
- "test %%rax, %%rax\n"
- "js 27f\n" // exit process
- "jne 21f\n" // reap helper, exit thread
- "jmp 22f\n" // unlock mutex
-
- // NR_clone:
- // Original trusted thread calls clone() to create new nascent
- // thread. This thread is (typically) fully privileged and shares all
- // resources with the caller (i.e. the previous trusted thread),
- // and by extension it shares all resources with the sandbox'd
- // threads.
- "19:mov %%fs:0x0, %%rbp\n" // %rbp = old_shared_mem
- "mov %%rsi, %%r15\n" // remember child stack
- "mov $1, %%rsi\n" // stack = 1
- "syscall\n" // calls NR_clone
- "cmp $-4095, %%rax\n" // return codes -1..-4095 are errno values
- "jae 7b\n" // unlock mutex, return result
- "add $2, %%rbx\n"
- "test %%rax, %%rax\n"
- "jne 15b\n" // return result
-
- // In nascent thread, now.
- "sub $2, %%rbx\n"
-
- // We want to maintain an invalid %rsp whenver we access untrusted
- // memory. This ensures that even if an attacker can trick us into
- // triggering a SIGSEGV, we will never successfully execute a signal
- // handler.
- // Signal handlers are inherently dangerous, as an attacker could trick
- // us into returning to the wrong address by adjusting the signal stack
- // right before the handler returns.
- // N.B. While POSIX is curiously silent about this, it appears that on
- // Linux, alternate signal stacks are a per-thread property. That is
- // good. It means that this security mechanism works, even if the
- // sandboxed thread manages to set up an alternate signal stack.
- //
- // TODO(markus): We currently do not support emulating calls to
- // sys_clone() with a zero (i.e. copy) stack parameter. See clone.cc
- // for a discussion on how to fix this, if this ever becomes neccessary.
- "mov %%r15, %%r9\n" // %r9 = child_stack
- "xor %%r15, %%r15\n" // Request to return from clone() when done
-
- // Get thread id of nascent thread
- "20:mov $186, %%eax\n" // NR_gettid
- "syscall\n"
- "mov %%rax, %%r14\n"
-
- // Nascent thread creates socketpair() for sending requests to
- // trusted thread.
- // We can create the filehandles on the child's stack. Filehandles are
- // always treated as untrusted.
- // socketpair(AF_UNIX, SOCK_STREAM, 0, fds)
- "sub $0x10, %%r9\n"
- "mov %%r15, 8(%%r9)\n" // preserve return address on child stack
- "mov $53, %%eax\n" // NR_socketpair
- "mov $1, %%edi\n" // domain = AF_UNIX
- "mov $1, %%esi\n" // type = SOCK_STREAM
- "xor %%rdx, %%rdx\n" // protocol = 0
- "mov %%r9, %%r10\n" // sv = child_stack
- "syscall\n"
- "test %%rax, %%rax\n"
- "jz 28f\n"
-
- // If things went wrong, we don't have an (easy) way of signaling
- // the parent. For our purposes, it is sufficient to fail with a
- // fatal error.
- "jmp 25f\n" // exit process
- "21:xor %%rsi, %%rsi\n"
- "xor %%rdx, %%rdx\n"
- "xor %%r10, %%r10\n"
- "mov $61, %%eax\n" // NR_wait4
- "syscall\n"
- "cmp $-4, %%eax\n" // EINTR
- "jz 21b\n"
- "jmp 23f\n" // exit thread (no message)
- "22:lea playground$syscall_mutex(%%rip), %%rdi\n"
- "mov $4096, %%esi\n"
- "mov $3, %%edx\n" // prot = PROT_READ | PROT_WRITE
- "mov $10, %%eax\n" // NR_mprotect
- "syscall\n"
- "lock; addl $0x80000000, (%%rdi)\n"
- "jz 23f\n" // exit thread
- "mov $1, %%edx\n"
- "mov %%rdx, %%rsi\n" // FUTEX_WAKE
- "mov $202, %%eax\n" // NR_futex
- "syscall\n"
- "23:mov $60, %%eax\n" // NR_exit
- "mov $1, %%edi\n" // status = 1
- "24:syscall\n"
- "25:mov $1, %%eax\n" // NR_write
- "mov $2, %%edi\n" // fd = stderr
- "lea 100f(%%rip), %%rsi\n" // "Sandbox violation detected"
- "mov $101f-100f, %%edx\n" // len = strlen(msg)
- "syscall\n"
- "26:mov $1, %%edi\n"
- "27:mov $231, %%eax\n" // NR_exit_group
- "jmp 24b\n"
-
- // The first page is mapped read-only for use as securely shared memory
- "28:mov 0xC8(%%rbp), %%r12\n" // %r12 = secure shared memory
- "cmp %%rbx, 8(%%rbp)\n"
- "jne 25b\n" // exit process
- "mov $10, %%eax\n" // NR_mprotect
- "mov %%r12, %%rdi\n" // addr = secure_mem
- "mov $4096, %%esi\n" // len = 4096
- "mov $1, %%edx\n" // prot = PROT_READ
- "syscall\n"
-
- // The second page is used as scratch space by the trusted thread.
- // Make it writable.
- "mov $10, %%eax\n" // NR_mprotect
- "add $4096, %%rdi\n" // addr = secure_mem + 4096
- "mov $3, %%edx\n" // prot = PROT_READ | PROT_WRITE
- "syscall\n"
-
- // Call clone() to create new trusted thread().
- // clone(CLONE_VM|CLONE_FS|CLONE_FILES|CLONE_SIGHAND|CLONE_THREAD|
- // CLONE_SYSVSEM|CLONE_UNTRACED|CLONE_SETTLS, stack, NULL, NULL,
- // tls)
- "mov 4(%%r9), %%r13d\n" // %r13 = threadFd (on child's stack)
- "mov $56, %%eax\n" // NR_clone
- "mov $0x8D0F00, %%edi\n" // flags = VM|FS|FILES|SIGH|THR|SYSV|UTR|TLS
- "mov $1, %%rsi\n" // stack = 1
- "mov %%r12, %%r8\n" // tls = new_secure_mem
- "mov 0xD0(%%rbp), %%r15d\n" // %r15 = processFdPub
- "cmp %%rbx, 8(%%rbp)\n"
- "jne 25b\n" // exit process
- "syscall\n"
- "test %%rax, %%rax\n"
- "js 25b\n" // exit process
- "jz 0b\n" // invoke trustedThreadFnc()
-
- // Copy the caller's signal mask
- "mov 0x1054(%%rbp), %%rax\n"
- "mov %%rax, 0x1054(%%r12)\n"
-
- // Done creating trusted thread. We can now get ready to return to caller
- "mov %%r9, %%r8\n" // %r8 = child_stack
- "mov 0(%%r9), %%r9d\n" // %r9 = threadFdPub
-
- // Set up thread local storage with information on how to talk to
- // trusted thread and trusted process.
- "lea 0xE0(%%r12), %%rsi\n" // args = &secure_mem.TLS;
- "mov $158, %%eax\n" // NR_arch_prctl
- "mov $0x1001, %%edi\n" // option = ARCH_SET_GS
- "syscall\n"
- "cmp $-4095, %%rax\n" // return codes -1..-4095 are errno values
- "jae 25b\n" // exit process
-
- // Check whether this is the initial thread, or a newly created one.
- // At startup we run the same code as when we create a new thread. At
- // the very top of this function, you will find that we push 999(%rip)
- // on the stack. That is the signal that we should return on the same
- // stack rather than return to where clone was called.
- "mov 8(%%r8), %%r15\n"
- "add $0x10, %%r8\n"
- "test %%r15, %%r15\n"
- "jne 29f\n"
-
- // Returning from clone() into the newly created thread is special. We
- // cannot unroll the stack, as we just set up a new stack for this
- // thread. We have to explicitly restore CPU registers to the values
- // that they had when the program originally called clone().
- // We patch the register values in the signal stack frame so that we
- // can ask sigreturn() to restore all registers for us.
- "sub $0x8, %%r8\n"
- "mov 0x50(%%rbp), %%rax\n"
- "mov %%rax, 0x00(%%r8)\n" // return address
- "xor %%rax, %%rax\n"
- "mov %%rax, 0x98(%%r8)\n" // %rax = 0
- "mov 0x58(%%rbp), %%rax\n"
- "mov %%rax, 0x80(%%r8)\n" // %rbp
- "mov 0x60(%%rbp), %%rax\n"
- "mov %%rax, 0x88(%%r8)\n" // %rbx
- "mov 0x68(%%rbp), %%rax\n"
- "mov %%rax, 0xA0(%%r8)\n" // %rcx
- "mov 0x70(%%rbp), %%rax\n"
- "mov %%rax, 0x90(%%r8)\n" // %rdx
- "mov 0x78(%%rbp), %%rax\n"
- "mov %%rax, 0x78(%%r8)\n" // %rsi
- "mov 0x80(%%rbp), %%rax\n"
- "mov %%rax, 0x70(%%r8)\n" // %rdi
- "mov 0x88(%%rbp), %%rax\n"
- "mov %%rax, 0x30(%%r8)\n" // %r8
- "mov 0x90(%%rbp), %%rax\n"
- "mov %%rax, 0x38(%%r8)\n" // %r9
- "mov 0x98(%%rbp), %%rax\n"
- "mov %%rax, 0x40(%%r8)\n" // %r10
- "mov 0xA0(%%rbp), %%rax\n"
- "mov %%rax, 0x48(%%r8)\n" // %r11
- "mov 0xA8(%%rbp), %%rax\n"
- "mov %%rax, 0x50(%%r8)\n" // %r12
- "mov 0xB0(%%rbp), %%rax\n"
- "mov %%rax, 0x58(%%r8)\n" // %r13
- "mov 0xB8(%%rbp), %%rax\n"
- "mov %%rax, 0x60(%%r8)\n" // %r14
- "mov 0xC0(%%rbp), %%rax\n"
- "mov %%rax, 0x68(%%r8)\n" // %r15
- "cmp %%rbx, 8(%%rbp)\n"
- "jne 25b\n" // exit process
-
- // Nascent thread launches a helper that doesn't share any of our
- // resources, except for pages mapped as MAP_SHARED.
- // clone(SIGCHLD, stack=1)
- "29:mov $56, %%eax\n" // NR_clone
- "mov $17, %%rdi\n" // flags = SIGCHLD
- "mov $1, %%rsi\n" // stack = 1
- "syscall\n"
- "test %%rax, %%rax\n"
- "js 25b\n" // exit process
- "jne 31f\n"
-
- // Use sendmsg() to send to the trusted process the file handles for
- // communicating with the new trusted thread. We also send the address
- // of the secure memory area (for sanity checks) and the thread id.
- "mov 0xD4(%%rbp), %%edi\n" // transport = Sandbox::cloneFdPub()
- "cmp %%rbx, 8(%%rbp)\n"
- "jne 25b\n" // exit process
-
- // 0x00 msg:
- // 0x00 msg_name ($0)
- // 0x08 msg_namelen ($0)
- // 0x10 msg_iov (%r8 + 0x44)
- // 0x18 msg_iovlen ($1)
- // 0x20 msg_control (%r8 + 0x54)
- // 0x28 msg_controllen ($0x18)
- // 0x30 data:
- // 0x30 msg_flags/err ($0)
- // 0x34 secure_mem (%r12)
- // 0x3C threadId (%r14d)
- // 0x40 threadFdPub (%r9d)
- // 0x44 iov:
- // 0x44 iov_base (%r8 + 0x30)
- // 0x4C iov_len ($0x14)
- // 0x54 cmsg:
- // 0x54 cmsg_len ($0x18)
- // 0x5C cmsg_level ($1, SOL_SOCKET)
- // 0x60 cmsg_type ($1, SCM_RIGHTS)
- // 0x64 threadFdPub (%r9d)
- // 0x68 threadFd (%r13d)
- // 0x6C
- "sub $0x6C, %%r8\n"
- "xor %%rdx, %%rdx\n" // flags = 0
- "mov %%rdx, 0x00(%%r8)\n" // msg_name
- "mov %%edx, 0x08(%%r8)\n" // msg_namelen
- "mov %%edx, 0x30(%%r8)\n" // msg_flags
- "mov $1, %%r11d\n"
- "mov %%r11, 0x18(%%r8)\n" // msg_iovlen
- "mov %%r11d, 0x5C(%%r8)\n" // cmsg_level
- "mov %%r11d, 0x60(%%r8)\n" // cmsg_type
- "lea 0x30(%%r8), %%r11\n"
- "mov %%r11, 0x44(%%r8)\n" // iov_base
- "add $0x14, %%r11\n"
- "mov %%r11, 0x10(%%r8)\n" // msg_iov
- "add $0x10, %%r11\n"
- "mov %%r11, 0x20(%%r8)\n" // msg_control
- "mov $0x14, %%r11d\n"
- "mov %%r11, 0x4C(%%r8)\n" // iov_len
- "add $4, %%r11d\n"
- "mov %%r11, 0x28(%%r8)\n" // msg_controllen
- "mov %%r11, 0x54(%%r8)\n" // cmsg_len
- "mov %%r12, 0x34(%%r8)\n" // secure_mem
- "mov %%r14d, 0x3C(%%r8)\n" // threadId
- "mov %%r9d, 0x40(%%r8)\n" // threadFdPub
- "mov %%r9d, 0x64(%%r8)\n" // threadFdPub
- "mov %%r13d, 0x68(%%r8)\n" // threadFd
- "mov $46, %%eax\n" // NR_sendmsg
- "mov %%r8, %%rsi\n" // msg
- "syscall\n"
-
- // Release syscall_mutex_. This signals the trusted process that
- // it can write into the original thread's secure memory again.
- "mov $10, %%eax\n" // NR_mprotect
- "lea playground$syscall_mutex(%%rip), %%rdi\n"
- "mov $4096, %%esi\n"
- "mov $3, %%edx\n" // PROT_READ | PROT_WRITE
- "syscall\n"
- "cmp %%rbx, 8(%%rbp)\n"
- "jne 25b\n" // exit process
- "lock; addl $0x80000000, (%%rdi)\n"
- "jz 30f\n" // exit process (no error message)
- "mov $1, %%edx\n"
- "mov %%rdx, %%rsi\n" // FUTEX_WAKE
- "mov $202, %%eax\n" // NR_futex
- "syscall\n"
- "30:xor %%rdi, %%rdi\n"
- "jmp 27b\n" // exit process (no error message)
-
- // Reap helper
- "31:mov %%rax, %%rdi\n"
- "32:lea -4(%%r8), %%rsi\n"
- "xor %%rdx, %%rdx\n"
- "xor %%r10, %%r10\n"
- "mov $61, %%eax\n" // NR_wait4
- "syscall\n"
- "cmp $-4, %%eax\n" // EINTR
- "jz 32b\n"
- "mov -4(%%r8), %%eax\n"
- "test %%rax, %%rax\n"
- "jnz 26b\n" // exit process (no error message)
-
- // Release privileges by entering seccomp mode.
- "mov $157, %%eax\n" // NR_prctl
- "mov $22, %%edi\n" // PR_SET_SECCOMP
- "mov $1, %%esi\n"
- "syscall\n"
- "test %%rax, %%rax\n"
- "jnz 25b\n" // exit process
-
- // We can finally start using the stack. Signal handlers no longer pose
- // a threat to us.
- "mov %%r8, %%rsp\n"
-
- // Back in the newly created sandboxed thread, wait for trusted process
- // to receive request. It is possible for an attacker to make us
- // continue even before the trusted process is done. This is OK. It'll
- // result in us putting stale values into the new thread's TLS. But that
- // data is considered untrusted anyway.
- "push %%rax\n"
- "mov $1, %%edx\n" // len = 1
- "mov %%rsp, %%rsi\n" // buf = %rsp
- "mov %%r9, %%rdi\n" // fd = threadFdPub
- "33:xor %%rax, %%rax\n" // NR_read
- "syscall\n"
- "cmp $-4, %%rax\n" // EINTR
- "jz 33b\n"
- "cmp %%rdx, %%rax\n"
- "jne 25b\n" // exit process
- "pop %%rax\n"
-
- // Return to caller. We are in the new thread, now.
- "test %%r15, %%r15\n"
- "jnz 34f\n" // Returning to createTrustedThread()
-
- // Returning to the place where clone() had been called. We rely on
- // using rt_sigreturn() for restoring our registers. The caller already
- // created a signal stack frame, and we patched the register values
- // with the ones that were in effect prior to calling sandbox_clone().
- "pop %%r15\n"
- "34:mov %%r15, 0xA8(%%rsp)\n" // compute new %rip
- "mov $15, %%eax\n" // NR_rt_sigreturn
- "syscall\n"
-
- ".pushsection \".rodata\"\n"
- "100:.ascii \"Sandbox violation detected, program aborted\\n\"\n"
- "101:.ascii \"WARNING! This is an expensive system call\\n\"\n"
- "102:\n"
- ".popsection\n"
-
- "999:pop %%rbp\n"
- "pop %%rbx\n"
- :
- : "g"(&args)
- : "rax", "rcx", "rdx", "rdi", "rsi", "r8", "r9", "r10", "r11", "r12",
- "r13", "r14", "r15", "rsp", "memory"
-#elif defined(__i386__)
- struct user_desc u;
- u.entry_number = (typeof u.entry_number)-1;
- u.base_addr = 0;
- u.limit = 0xfffff;
- u.seg_32bit = 1;
- u.contents = 0;
- u.read_exec_only = 0;
- u.limit_in_pages = 1;
- u.seg_not_present = 0;
- u.useable = 1;
- SysCalls sys;
- if (sys.set_thread_area(&u) < 0) {
- die("Cannot set up thread local storage");
- }
- asm volatile("movw %w0, %%fs"
- :
- : "q"(8*u.entry_number+3));
- asm volatile(
- "push %%ebx\n"
- "push %%ebp\n"
-
- // Signal handlers are process-wide. This means that for security
- // reasons, we cannot allow that the trusted thread ever executes any
- // signal handlers.
- // We prevent the execution of signal handlers by setting a signal
- // mask that blocks all signals. In addition, we make sure that the
- // stack pointer is invalid.
- // We cannot reset the signal mask until after we have enabled
- // Seccomp mode. Our sigprocmask() wrapper would normally do this by
- // raising a signal, modifying the signal mask in the kernel-generated
- // signal frame, and then calling sigreturn(). This presents a bit of
- // a Catch-22, as all signals are masked and we can therefore not
- // raise any signal that would allow us to generate the signal stack
- // frame.
- // Instead, we have to create the signal stack frame prior to entering
- // Seccomp mode. This incidentally also helps us to restore the
- // signal mask to the same value that it had prior to entering the
- // sandbox.
- // The signal wrapper for clone() is the second entry point into this
- // code (by means of sending an IPC to its trusted thread). It goes
- // through the same steps of creating a signal stack frame on the
- // newly created thread's stacks prior to cloning. See clone.cc for
- // details.
- "mov %0, %%edi\n" // create signal stack before accessing MMX
- "mov $120+0xF000, %%eax\n" // __NR_clone + 0xF000
- "mov %%esp, %%ebp\n"
- "int $0\n" // push a signal stack frame (see clone.cc)
- "mov %%ebp, 0x1C(%%esp)\n" // pop stack upon call to sigreturn()
- "mov %%esp, %%ebp\n"
- "mov $2, %%ebx\n" // how = SIG_SETMASK
- "pushl $-1\n"
- "pushl $-1\n"
- "mov %%esp, %%ecx\n" // set = full mask
- "xor %%edx, %%edx\n" // old_set = NULL
- "mov $8, %%esi\n" // mask all 64 signals
- "mov $175, %%eax\n" // NR_rt_sigprocmask
- "int $0x80\n"
- "mov $126, %%eax\n" // NR_sigprocmask
- "int $0x80\n"
- "xor %%esp, %%esp\n" // invalidate the stack in all trusted code
- "movd %%edi, %%mm6\n" // %mm6 = args
- "lea 999f, %%edi\n" // continue in same thread
- "movd %%edi, %%mm3\n"
- "xor %%edi, %%edi\n" // initial sequence number
- "movd %%edi, %%mm2\n"
- "jmp 20f\n" // create trusted thread
-
- // TODO(markus): Coalesce the read() operations by reading into a bigger
- // buffer.
-
- // Parameters:
- // %mm0: thread's side of threadFd
- // %mm1: processFdPub
- // %mm3: return address after creation of new trusted thread
- // %mm5: secure memory region
- // the page following this one contains the scratch space
-
- // Local variables:
- // %mm2: sequence number for trusted calls
- // %mm4: thread id
-
- // Temporary variables:
- // %ebp: system call number
- // %mm6: secure memory of previous thread
- // %mm7: temporary variable for spilling data
-
- // Layout of secure shared memory region (c.f. securemem.h):
- // 0x00: pointer to the secure shared memory region (i.e. self)
- // 0x04: sequence number; must match %mm2
- // 0x08: call type; must match %eax, iff %eax == -1 || %eax == -2
- // 0x0C: system call number; passed to syscall in %eax
- // 0x10: first argument; passed to syscall in %ebx
- // 0x14: second argument; passed to syscall in %ecx
- // 0x18: third argument; passed to syscall in %edx
- // 0x1C: fourth argument; passed to syscall in %esi
- // 0x20: fifth argument; passed to syscall in %edi
- // 0x24: sixth argument; passed to syscall in %ebp
- // 0x28: stored return address for clone() system call
- // 0x2C: stored %ebp value for clone() system call
- // 0x30: stored %edi value for clone() system call
- // 0x34: stored %esi value for clone() system call
- // 0x38: stored %edx value for clone() system call
- // 0x3C: stored %ecx value for clone() system call
- // 0x40: stored %ebx value for clone() system call
- // 0x44: new shared memory for clone()
- // 0x48: processFdPub for talking to trusted process
- // 0x4C: cloneFdPub for talking to trusted process
- // 0x50: set to non-zero, if in debugging mode
- // 0x54: most recent SHM id returned by shmget(IPC_PRIVATE)
- // 0x58: cookie assigned to us by the trusted process (TLS_COOKIE)
- // 0x60: thread id (TLS_TID)
- // 0x68: threadFdPub (TLS_THREAD_FD)
- // 0x200-0x1000: securely passed verified file name(s)
-
- // Layout of (untrusted) scratch space:
- // 0x00: syscall number; passed in %eax
- // 0x04: first argument; passed in %ebx
- // 0x08: second argument; passed in %ecx
- // 0x0C: third argument; passed in %edx
- // 0x10: fourth argument; passed in %esi
- // 0x14: fifth argument; passed in %edi
- // 0x18: sixth argument; passed in %ebp
- // 0x1C: return value
- // 0x20: RDTSCP result (%eax)
- // 0x24: RDTSCP result (%edx)
- // 0x28: RDTSCP result (%ecx)
- // 0x2C: last system call (updated in syscall.cc)
- // 0x30: number of consecutive calls to a time fnc. (e.g. gettimeofday)
- // 0x34: nesting level of system calls (for debugging purposes only)
- // 0x38: signal mask
- // 0x40: in SEGV handler
-
- "0:xor %%esp, %%esp\n"
- "mov $2, %%eax\n" // %mm2 = initial sequence number
- "movd %%eax, %%mm2\n"
-
- // Read request from untrusted thread, or from trusted process. In either
- // case, the data that we read has to be considered untrusted.
- // read(threadFd, &scratch, 4)
- "1:mov $3, %%eax\n" // NR_read
- "movd %%mm0, %%ebx\n" // fd = threadFd
- "movd %%mm5, %%ecx\n" // secure_mem
- "add $0x1000, %%ecx\n" // buf = &scratch
- "mov $4, %%edx\n" // len = 4
- "2:int $0x80\n"
- "cmp $-4, %%eax\n" // EINTR
- "jz 2b\n"
- "cmp %%edx, %%eax\n"
- "jnz 25f\n" // exit process
-
- // Retrieve system call number. It is crucial that we only dereference
- // 0x1000(%mm5) exactly once. Afterwards, memory becomes untrusted and
- // we must use the value that we have read the first time.
- "mov 0(%%ecx), %%eax\n"
-
- // If syscall number is -1, execute an unlocked system call from the
- // secure memory area
- "cmp $-1, %%eax\n"
- "jnz 5f\n"
- "3:movd %%mm2, %%ebp\n"
- "cmp %%ebp, 0x4-0x1000(%%ecx)\n"
- "jne 25f\n" // exit process
- "cmp 0x08-0x1000(%%ecx), %%eax\n"
- "jne 25f\n" // exit process
- "mov 0x0C-0x1000(%%ecx), %%eax\n"
- "mov 0x10-0x1000(%%ecx), %%ebx\n"
- "mov 0x18-0x1000(%%ecx), %%edx\n"
- "mov 0x1C-0x1000(%%ecx), %%esi\n"
- "mov 0x20-0x1000(%%ecx), %%edi\n"
- "mov 0x24-0x1000(%%ecx), %%ebp\n"
- "mov 0x14-0x1000(%%ecx), %%ecx\n"
- "movd %%edi, %%mm4\n"
- "movd %%ebp, %%mm7\n"
- "movd %%mm2, %%ebp\n"
- "movd %%mm5, %%edi\n"
- "cmp %%ebp, 4(%%edi)\n"
- "jne 25f\n" // exit process
- "add $2, %%ebp\n"
- "movd %%ebp, %%mm2\n"
- "movd %%mm4, %%edi\n"
- "movd %%mm7, %%ebp\n"
-
- // shmget() gets some special treatment. Whenever we return from this
- // system call, we remember the most recently returned SysV shm id.
- "cmp $117, %%eax\n" // NR_ipc
- "jnz 4f\n"
- "cmp $23, %%ebx\n" // shmget()
- "jnz 4f\n"
- "int $0x80\n"
- "mov %%eax, %%ebp\n"
- "mov $120, %%eax\n" // NR_clone
- "mov $17, %%ebx\n" // flags = SIGCHLD
- "mov $1, %%ecx\n" // stack = 1
- "int $0x80\n"
- "test %%eax, %%eax\n"
- "js 25f\n" // exit process
- "mov %%eax, %%ebx\n"
- "jnz 8f\n" // wait for child, then return result
- "movd %%mm5, %%ebx\n" // start = secure_mem
- "mov $4096, %%ecx\n" // len = 4096
- "mov $3, %%edx\n" // prot = PROT_READ | PROT_WRITE
- "mov $125, %%eax\n" // NR_mprotect
- "int $0x80\n"
- "mov %%ebp, 0x54(%%ebx)\n" // set most recently returned SysV shm id
- "xor %%ebx, %%ebx\n"
-
- // When debugging messages are enabled, warn about expensive system calls
- #ifndef NDEBUG
- "movd %%mm5, %%ecx\n"
- "cmpw $0, 0x50(%%ecx)\n" // debug mode
- "jz 27f\n"
- "mov $4, %%eax\n" // NR_write
- "mov $2, %%ebx\n" // fd = stderr
- "lea 101f, %%ecx\n" // "This is an expensive system call"
- "mov $102f-101f, %%edx\n" // len = strlen(msg)
- "int $0x80\n"
- "xor %%ebx, %%ebx\n"
- #endif
-
- "jmp 27f\n" // exit program, no message
- "4:int $0x80\n"
- "jmp 15f\n" // return result
-
- // If syscall number is -2, execute locked system call from the
- // secure memory area
- "5:jg 12f\n"
- "cmp $-2, %%eax\n"
- "jnz 9f\n"
- "movd %%mm2, %%ebp\n"
- "cmp %%ebp, 0x4-0x1000(%%ecx)\n"
- "jne 25f\n" // exit process
- "cmp %%eax, 0x8-0x1000(%%ecx)\n"
- "jne 25f\n" // exit process
-
- // When debugging messages are enabled, warn about expensive system calls
- #ifndef NDEBUG
- "cmpw $0, 0x50-0x1000(%%ecx)\n"
- "jz 6f\n" // debug mode
- "mov %%ecx, %%ebp\n"
- "mov $4, %%eax\n" // NR_write
- "mov $2, %%ebx\n" // fd = stderr
- "lea 101f, %%ecx\n" // "This is an expensive system call"
- "mov $102f-101f, %%edx\n" // len = strlen(msg)
- "int $0x80\n"
- "mov %%ebp, %%ecx\n"
- "6:"
- #endif
-
- "mov 0x0C-0x1000(%%ecx), %%eax\n"
- "mov 0x10-0x1000(%%ecx), %%ebx\n"
- "mov 0x18-0x1000(%%ecx), %%edx\n"
- "mov 0x1C-0x1000(%%ecx), %%esi\n"
- "mov 0x20-0x1000(%%ecx), %%edi\n"
- "mov 0x24-0x1000(%%ecx), %%ebp\n"
- "mov 0x14-0x1000(%%ecx), %%ecx\n"
- "movd %%edi, %%mm4\n"
- "movd %%ebp, %%mm7\n"
- "movd %%mm2, %%ebp\n"
- "movd %%mm5, %%edi\n"
- "cmp %%ebp, 4(%%edi)\n"
- "jne 25f\n" // exit process
-
- // clone() has unusual calling conventions and must be handled specially
- "cmp $120, %%eax\n" // NR_clone
- "jz 19f\n"
-
- // exit() terminates trusted thread
- "cmp $1, %%eax\n" // NR_exit
- "jz 18f\n"
-
- // Perform requested system call
- "movd %%mm4, %%edi\n"
- "movd %%mm7, %%ebp\n"
- "int $0x80\n"
-
- // Unlock mutex
- "7:movd %%mm2, %%ebp\n"
- "movd %%mm5, %%edi\n"
- "cmp %%ebp, 4(%%edi)\n"
- "jne 25f\n" // exit process
- "add $2, %%ebp\n"
- "movd %%ebp, %%mm2\n"
- "mov %%eax, %%ebp\n"
- "mov $120, %%eax\n" // NR_clone
- "mov $17, %%ebx\n" // flags = SIGCHLD
- "mov $1, %%ecx\n" // stack = 1
- "int $0x80\n"
- "test %%eax, %%eax\n"
- "js 25f\n" // exit process
- "jz 22f\n" // unlock and exit
- "mov %%eax, %%ebx\n"
- "8:xor %%ecx, %%ecx\n"
- "xor %%edx, %%edx\n"
- "mov $7, %%eax\n" // NR_waitpid
- "int $0x80\n"
- "cmp $-4, %%eax\n" // EINTR
- "jz 8b\n"
- "mov %%ebp, %%eax\n"
- "jmp 15f\n" // return result
-
- // If syscall number is -3, read the time stamp counter
- "9:cmp $-3, %%eax\n"
- "jnz 10f\n"
- "rdtsc\n" // sets %edx:%eax
- "xor %%ecx, %%ecx\n"
- "jmp 11f\n"
- "10:cmp $-4, %%eax\n"
- "jnz 12f\n"
- "rdtscp\n" // sets %edx:%eax and %ecx
- "11:movd %%mm5, %%ebx\n"
- "add $0x1020, %%ebx\n"
- "mov %%eax, 0(%%ebx)\n"
- "mov %%edx, 4(%%ebx)\n"
- "mov %%ecx, 8(%%ebx)\n"
- "mov %%ebx, %%ecx\n"
- "mov $12, %%edx\n"
- "jmp 16f\n" // return result
-
- // Check in syscallTable whether this system call is unrestricted
- "12:mov %%eax, %%ebp\n"
- #ifndef NDEBUG
- "cmpw $0, 0x50-0x1000(%%ecx)\n"
- "jnz 13f\n" // debug mode
- #endif
- "cmp playground$maxSyscall, %%eax\n"
- "ja 25f\n" // exit process
- "shl $3, %%eax\n"
- "add $playground$syscallTable, %%eax\n"
- "mov 0(%%eax), %%eax\n"
- "cmp $1, %%eax\n"
- "jne 25f\n" // exit process
-
- // Default behavior for unrestricted system calls is to just execute
- // them. Read the remaining arguments first.
- "13:mov $3, %%eax\n" // NR_read
- "movd %%mm0, %%ebx\n" // fd = threadFd
- "add $4, %%ecx\n" // buf = &scratch + 4
- "mov $24, %%edx\n" // len = 6*sizeof(void *)
- "14:int $0x80\n"
- "cmp $-4, %%eax\n" // EINTR
- "jz 14b\n"
- "cmp %%edx, %%eax\n"
- "jnz 25f\n" // exit process
- "mov %%ebp, %%eax\n"
- "mov 0x00(%%ecx), %%ebx\n"
- "mov 0x08(%%ecx), %%edx\n"
- "mov 0x0C(%%ecx), %%esi\n"
- "mov 0x10(%%ecx), %%edi\n"
- "mov 0x14(%%ecx), %%ebp\n"
- "mov 0x04(%%ecx), %%ecx\n"
- "cmp $252, %%eax\n" // NR_exit_group
- "jz 27f\n" // exit program, no message
- "int $0x80\n"
-
- // Return result of system call to sandboxed thread
- "15:movd %%mm5, %%ecx\n" // secure_mem
- "add $0x101C, %%ecx\n" // buf = &scratch + 28
- "mov %%eax, (%%ecx)\n"
- "mov $4, %%edx\n" // len = 4
- "16:movd %%mm0, %%ebx\n" // fd = threadFd
- "mov $4, %%eax\n" // NR_write
- "17:int $0x80\n"
- "cmp %%edx, %%eax\n"
- "jz 1b\n"
- "cmp $-4, %%eax\n" // EINTR
- "jz 17b\n"
- "jmp 25f\n" // exit process
-
- // NR_exit:
- // Exit trusted thread after cleaning up resources
- "18:mov %%edi, %%ecx\n" // secure_mem
- "mov 0x68(%%ecx), %%ebx\n" // fd = threadFdPub
- "mov $6, %%eax\n" // NR_close
- "int $0x80\n"
- "mov %%ecx, %%ebx\n" // start = secure_mem
- "mov $8192, %%ecx\n" // length = 8192
- "xor %%edx, %%edx\n" // prot = PROT_NONE
- "mov $125, %%eax\n" // NR_mprotect
- "int $0x80\n"
- "movd %%mm0, %%ebx\n" // fd = threadFd
- "mov $6, %%eax\n" // NR_close
- "int $0x80\n"
- "mov $120, %%eax\n" // NR_clone
- "mov $17, %%ebx\n" // flags = SIGCHLD
- "mov $1, %%ecx\n" // stack = 1
- "int $0x80\n"
- "mov %%eax, %%ebx\n"
- "test %%eax, %%eax\n"
- "js 25f\n" // exit process
- "jne 21f\n" // reap helper, exit thread
- "jmp 22f\n" // unlock mutex
-
- // NR_clone:
- // Original trusted thread calls clone() to create new nascent
- // thread. This thread is (typically) fully privileged and shares all
- // resources with the caller (i.e. the previous trusted thread),
- // and by extension it shares all resources with the sandbox'd
- // threads.
- "19:movd %%edi, %%mm6\n" // %mm6 = old_shared_mem
- "movd %%mm4, %%edi\n" // child_tidptr
- "mov %%ecx, %%ebp\n" // remember child stack
- "mov $1, %%ecx\n" // stack = 1
- "int $0x80\n" // calls NR_clone
- "cmp $-4095, %%eax\n" // return codes -1..-4095 are errno values
- "jae 7b\n" // unlock mutex, return result
- "movd %%mm2, %%edi\n"
- "add $2, %%edi\n"
- "movd %%edi, %%mm2\n"
- "test %%eax, %%eax\n"
- "jne 15b\n" // return result
-
- // In nascent thread, now.
- "sub $2, %%edi\n"
- "movd %%edi, %%mm2\n"
-
- // We want to maintain an invalid %esp whenver we access untrusted
- // memory. This ensures that even if an attacker can trick us into
- // triggering a SIGSEGV, we will never successfully execute a signal
- // handler.
- // Signal handlers are inherently dangerous, as an attacker could trick
- // us into returning to the wrong address by adjusting the signal stack
- // right before the handler returns.
- // N.B. While POSIX is curiously silent about this, it appears that on
- // Linux, alternate signal stacks are a per-thread property. That is
- // good. It means that this security mechanism works, even if the
- // sandboxed thread manages to set up an alternate signal stack.
- //
- // TODO(markus): We currently do not support emulating calls to
- // sys_clone() with a zero (i.e. copy) stack parameter. See clone.cc
- // for a discussion on how to fix this, if this ever becomes neccessary.
- "movd %%eax, %%mm3\n" // Request to return from clone() when done
-
- // Get thread id of nascent thread
- "20:mov $224, %%eax\n" // NR_gettid
- "int $0x80\n"
- "movd %%eax, %%mm4\n"
-
- // Nascent thread creates socketpair() for sending requests to
- // trusted thread.
- // We can create the filehandles on the child's stack. Filehandles are
- // always treated as untrusted.
- // socketpair(AF_UNIX, SOCK_STREAM, 0, fds)
- "mov $102, %%eax\n" // NR_socketcall
- "mov $8, %%ebx\n" // socketpair
- "sub $8, %%ebp\n" // sv = child_stack
- "mov %%ebp, -0x04(%%ebp)\n"
- "movl $0, -0x08(%%ebp)\n" // protocol = 0
- "movl $1, -0x0C(%%ebp)\n" // type = SOCK_STREAM
- "movl $1, -0x10(%%ebp)\n" // domain = AF_UNIX
- "lea -0x10(%%ebp), %%ecx\n"
- "int $0x80\n"
- "test %%eax, %%eax\n"
- "jz 28f\n"
-
- // If things went wrong, we don't have an (easy) way of signaling
- // the parent. For our purposes, it is sufficient to fail with a
- // fatal error.
- "jmp 25f\n" // exit process
- "21:xor %%ecx, %%ecx\n"
- "xor %%edx, %%edx\n"
- "mov $7, %%eax\n" // NR_waitpid
- "int $0x80\n"
- "cmp $-4, %%eax\n" // EINTR
- "jz 21b\n"
- "jmp 23f\n" // exit thread (no message)
- "22:lea playground$syscall_mutex, %%ebx\n"
- "mov $4096, %%ecx\n"
- "mov $3, %%edx\n" // prot = PROT_READ | PROT_WRITE
- "mov $125, %%eax\n" // NR_mprotect
- "int $0x80\n"
- "lock; addl $0x80000000, (%%ebx)\n"
- "jz 23f\n" // exit thread
- "mov $1, %%edx\n"
- "mov %%edx, %%ecx\n" // FUTEX_WAKE
- "mov $240, %%eax\n" // NR_futex
- "int $0x80\n"
- "23:mov $1, %%eax\n" // NR_exit
- "mov $1, %%ebx\n" // status = 1
- "24:int $0x80\n"
- "25:mov $4, %%eax\n" // NR_write
- "mov $2, %%ebx\n" // fd = stderr
- "lea 100f, %%ecx\n" // "Sandbox violation detected"
- "mov $101f-100f, %%edx\n" // len = strlen(msg)
- "int $0x80\n"
- "26:mov $1, %%ebx\n"
- "27:mov $252, %%eax\n" // NR_exit_group
- "jmp 24b\n"
-
- // The first page is mapped read-only for use as securely shared memory
- "28:movd %%mm6, %%edi\n" // %edi = old_shared_mem
- "mov 0x44(%%edi), %%ebx\n" // addr = secure_mem
- "movd %%ebx, %%mm5\n" // %mm5 = secure_mem
- "movd %%mm2, %%esi\n"
- "cmp %%esi, 4(%%edi)\n"
- "jne 25b\n" // exit process
- "mov $125, %%eax\n" // NR_mprotect
- "mov $4096, %%ecx\n" // len = 4096
- "mov $1, %%edx\n" // prot = PROT_READ
- "int $0x80\n"
-
- // The second page is used as scratch space by the trusted thread.
- // Make it writable.
- "mov $125, %%eax\n" // NR_mprotect
- "add $4096, %%ebx\n" // addr = secure_mem + 4096
- "mov $3, %%edx\n" // prot = PROT_READ | PROT_WRITE
- "int $0x80\n"
-
- // Call clone() to create new trusted thread().
- // clone(CLONE_VM|CLONE_FS|CLONE_FILES|CLONE_SIGHAND|CLONE_THREAD|
- // CLONE_SYSVSEM|CLONE_UNTRACED, stack, NULL, NULL, NULL)
- "mov 4(%%ebp), %%eax\n" // threadFd (on child's stack)
- "movd %%eax, %%mm0\n" // %mm0 = threadFd
- "mov $120, %%eax\n" // NR_clone
- "mov $0x850F00, %%ebx\n" // flags = VM|FS|FILES|SIGH|THR|SYSV|UTR
- "mov $1, %%ecx\n" // stack = 1
- "movd 0x48(%%edi), %%mm1\n" // %mm1 = processFdPub
- "cmp %%esi, 4(%%edi)\n"
- "jne 25b\n" // exit process
- "int $0x80\n"
- "test %%eax, %%eax\n"
- "js 25b\n" // exit process
- "jz 0b\n" // invoke trustedThreadFnc()
-
- // Set up thread local storage
- "mov $0x51, %%eax\n" // seg_32bit, limit_in_pages, useable
- "mov %%eax, -0x04(%%ebp)\n"
- "mov $0xFFFFF, %%eax\n" // limit
- "mov %%eax, -0x08(%%ebp)\n"
- "movd %%mm5, %%eax\n"
- "add $0x58, %%eax\n"
- "mov %%eax, -0x0C(%%ebp)\n" // base_addr = &secure_mem.TLS
- "mov %%fs, %%eax\n"
- "shr $3, %%eax\n"
- "mov %%eax, -0x10(%%ebp)\n" // entry_number
- "mov $243, %%eax\n" // NR_set_thread_area
- "lea -0x10(%%ebp), %%ebx\n"
- "int $0x80\n"
- "test %%eax, %%eax\n"
- "jnz 25b\n" // exit process
-
- // Copy the caller's signal mask
- "movd %%mm5, %%edx\n"
- "mov 0x1038(%%edi), %%eax\n"
- "mov %%eax, 0x1038(%%edx)\n"
- "mov 0x103C(%%edi), %%eax\n"
- "mov %%eax, 0x103C(%%edx)\n"
-
- // Done creating trusted thread. We can now get ready to return to caller
- "mov 0(%%ebp), %%esi\n" // %esi = threadFdPub
- "add $8, %%ebp\n"
-
- // Check whether this is the initial thread, or a newly created one.
- // At startup we run the same code as when we create a new thread. At
- // the very top of this function, you will find that we store 999f
- // in %%mm3. That is the signal that we should return on the same
- // stack rather than return to where clone was called.
- "movd %%mm3, %%eax\n"
- "movd %%mm2, %%edx\n"
- "test %%eax, %%eax\n"
- "jne 29f\n"
-
- // Returning from clone() into the newly created thread is special. We
- // cannot unroll the stack, as we just set up a new stack for this
- // thread. We have to explicitly restore CPU registers to the values
- // that they had when the program originally called clone().
- // We patch the register values in the signal stack frame so that we
- // can ask sigreturn() to restore all registers for us.
- "sub $0x4, %%ebp\n"
- "mov 0x28(%%edi), %%eax\n"
- "mov %%eax, 0x00(%%ebp)\n" // return address
- "xor %%eax, %%eax\n"
- "mov %%eax, 0x30(%%ebp)\n" // %eax = 0
- "mov 0x2C(%%edi), %%eax\n"
- "mov %%eax, 0x1C(%%ebp)\n" // %ebp
- "mov 0x30(%%edi), %%eax\n"
- "mov %%eax, 0x14(%%ebp)\n" // %edi
- "mov 0x34(%%edi), %%eax\n"
- "mov %%eax, 0x18(%%ebp)\n" // %esi
- "mov 0x38(%%edi), %%eax\n"
- "mov %%eax, 0x28(%%ebp)\n" // %edx
- "mov 0x3C(%%edi), %%eax\n"
- "mov %%eax, 0x2C(%%ebp)\n" // %ecx
- "mov 0x40(%%edi), %%eax\n"
- "mov %%eax, 0x24(%%ebp)\n" // %ebx
- "cmp %%edx, 4(%%edi)\n"
- "jne 25b\n" // exit process
-
- // Nascent thread launches a helper that doesn't share any of our
- // resources, except for pages mapped as MAP_SHARED.
- // clone(SIGCHLD, stack=1)
- "29:mov $120, %%eax\n" // NR_clone
- "mov $17, %%ebx\n" // flags = SIGCHLD
- "mov $1, %%ecx\n" // stack = 1
- "int $0x80\n"
- "test %%eax, %%eax\n"
- "js 25b\n" // exit process
- "jne 31f\n"
-
- // Use sendmsg() to send to the trusted process the file handles for
- // communicating with the new trusted thread. We also send the address
- // of the secure memory area (for sanity checks) and the thread id.
- "cmp %%edx, 4(%%edi)\n"
- "jne 25b\n" // exit process
-
- // 0x00 socketcall:
- // 0x00 socket (0x4C(%edi))
- // 0x04 msg (%ecx + 0x0C)
- // 0x08 flags ($0)
- // 0x0C msg:
- // 0x0C msg_name ($0)
- // 0x10 msg_namelen ($0)
- // 0x14 msg_iov (%ecx + 0x34)
- // 0x18 msg_iovlen ($1)
- // 0x1C msg_control (%ecx + 0x3C)
- // 0x20 msg_controllen ($0x14)
- // 0x24 data:
- // 0x24 msg_flags/err ($0)
- // 0x28 secure_mem (%mm5)
- // 0x2C threadId (%mm4)
- // 0x30 threadFdPub (%esi)
- // 0x34 iov:
- // 0x34 iov_base (%ecx + 0x24)
- // 0x38 iov_len ($0x10)
- // 0x3C cmsg:
- // 0x3C cmsg_len ($0x14)
- // 0x40 cmsg_level ($1, SOL_SOCKET)
- // 0x44 cmsg_type ($1, SCM_RIGHTS)
- // 0x48 threadFdPub (%esi)
- // 0x4C threadFd (%mm0)
- // 0x50
- "lea -0x50(%%ebp), %%ecx\n"
- "xor %%eax, %%eax\n"
- "mov %%eax, 0x08(%%ecx)\n" // flags
- "mov %%eax, 0x0C(%%ecx)\n" // msg_name
- "mov %%eax, 0x10(%%ecx)\n" // msg_namelen
- "mov %%eax, 0x24(%%ecx)\n" // msg_flags
- "inc %%eax\n"
- "mov %%eax, 0x18(%%ecx)\n" // msg_iovlen
- "mov %%eax, 0x40(%%ecx)\n" // cmsg_level
- "mov %%eax, 0x44(%%ecx)\n" // cmsg_type
- "movl $0x10, 0x38(%%ecx)\n" // iov_len
- "mov $0x14, %%eax\n"
- "mov %%eax, 0x20(%%ecx)\n" // msg_controllen
- "mov %%eax, 0x3C(%%ecx)\n" // cmsg_len
- "mov 0x4C(%%edi), %%eax\n" // cloneFdPub
- "mov %%eax, 0x00(%%ecx)\n" // socket
- "lea 0x0C(%%ecx), %%eax\n"
- "mov %%eax, 0x04(%%ecx)\n" // msg
- "add $0x18, %%eax\n"
- "mov %%eax, 0x34(%%ecx)\n" // iov_base
- "add $0x10, %%eax\n"
- "mov %%eax, 0x14(%%ecx)\n" // msg_iov
- "add $8, %%eax\n"
- "mov %%eax, 0x1C(%%ecx)\n" // msg_control
- "mov %%esi, 0x30(%%ecx)\n" // threadFdPub
- "mov %%esi, 0x48(%%ecx)\n" // threadFdPub
- "movd %%mm5, %%eax\n"
- "mov %%eax, 0x28(%%ecx)\n" // secure_mem
- "movd %%mm4, %%eax\n"
- "mov %%eax, 0x2C(%%ecx)\n" // threadId
- "movd %%mm0, %%eax\n"
- "mov %%eax, 0x4C(%%ecx)\n" // threadFd
- "mov $16, %%ebx\n" // sendmsg()
- "mov $102, %%eax\n" // NR_socketcall
- "int $0x80\n"
-
- // Release syscall_mutex_. This signals the trusted process that
- // it can write into the original thread's secure memory again.
- "mov $125, %%eax\n" // NR_mprotect
- "lea playground$syscall_mutex, %%ebx\n"
- "mov $4096, %%ecx\n"
- "mov $3, %%edx\n" // PROT_READ | PROT_WRITE
- "int $0x80\n"
- "movd %%mm2, %%edx\n"
- "cmp %%edx, 0x4(%%edi)\n"
- "jnz 25b\n" // exit process
- "lock; addl $0x80000000, (%%ebx)\n"
- "jz 30f\n" // exit process (no error message)
- "mov $1, %%edx\n"
- "mov %%edx, %%ecx\n" // FUTEX_WAKE
- "mov $240, %%eax\n" // NR_futex
- "int $0x80\n"
- "30:xor %%ebx, %%ebx\n"
- "jmp 27b\n" // exit process (no error message)
-
- // Reap helper
- "31:mov %%eax, %%ebx\n"
- "32:lea -4(%%ebp), %%ecx\n"
- "xor %%edx, %%edx\n"
- "mov $7, %%eax\n" // NR_waitpid
- "int $0x80\n"
- "cmp $-4, %%eax\n" // EINTR
- "jz 32b\n"
- "mov -4(%%ebp), %%eax\n"
- "test %%eax, %%eax\n"
- "jnz 26b\n" // exit process (no error message)
-
- // Release privileges by entering seccomp mode.
- "33:mov $172, %%eax\n" // NR_prctl
- "mov $22, %%ebx\n" // PR_SET_SECCOMP
- "mov $1, %%ecx\n"
- "int $0x80\n"
- "test %%eax, %%eax\n"
- "jnz 25b\n" // exit process
-
- // We can finally start using the stack. Signal handlers no longer pose
- // a threat to us.
- "mov %%ebp, %%esp\n"
-
- // Back in the newly created sandboxed thread, wait for trusted process
- // to receive request. It is possible for an attacker to make us
- // continue even before the trusted process is done. This is OK. It'll
- // result in us putting stale values into the new thread's TLS. But that
- // data is considered untrusted anyway.
- "push %%eax\n"
- "mov $1, %%edx\n" // len = 1
- "mov %%esp, %%ecx\n" // buf = %esp
- "mov %%esi, %%ebx\n" // fd = threadFdPub
- "34:mov $3, %%eax\n" // NR_read
- "int $0x80\n"
- "cmp $-4, %%eax\n" // EINTR
- "jz 34b\n"
- "cmp %%edx, %%eax\n"
- "jne 25b\n" // exit process
- "pop %%eax\n"
-
- // Return to caller. We are in the new thread, now.
- "movd %%mm3, %%ebx\n"
- "test %%ebx, %%ebx\n"
- "jnz 35f\n" // Returning to createTrustedThread()
-
- // Returning to the place where clone() had been called. We rely on
- // using sigreturn() for restoring our registers. The caller already
- // created a signal stack frame, and we patched the register values
- // with the ones that were in effect prior to calling sandbox_clone().
- "pop %%ebx\n"
- "35:mov %%ebx, 0x38(%%esp)\n" // compute new %eip
- "mov $119, %%eax\n" // NR_sigreturn
- "int $0x80\n"
-
- ".pushsection \".rodata\"\n"
- "100:.ascii \"Sandbox violation detected, program aborted\\n\"\n"
- "101:.ascii \"WARNING! This is an expensive system call\\n\"\n"
- "102:\n"
- ".popsection\n"
-
- "999:pop %%ebp\n"
- "pop %%ebx\n"
- :
- : "g"(&args)
- : "eax", "ecx", "edx", "edi", "esi", "esp", "memory"
-#else
-#error Unsupported target platform
-#endif
-);
-}
-
-} // namespace
diff --git a/sandbox/linux/seccomp/x86_decode.cc b/sandbox/linux/seccomp/x86_decode.cc
deleted file mode 100644
index 1b55139..0000000
--- a/sandbox/linux/seccomp/x86_decode.cc
+++ /dev/null
@@ -1,310 +0,0 @@
-// Copyright (c) 2010 The Chromium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#include "x86_decode.h"
-
-namespace playground {
-
-#if defined(__x86_64__) || defined(__i386__)
-unsigned short next_inst(const char **ip, bool is64bit, bool *has_prefix,
- char **rex_ptr, char **mod_rm_ptr, char **sib_ptr,
- bool *is_group) {
- enum {
- BYTE_OP = (1<<1), // 0x02
- IMM = (1<<2), // 0x04
- IMM_BYTE = (2<<2), // 0x08
- MEM_ABS = (3<<2), // 0x0C
- MODE_MASK = (7<<2), // 0x1C
- MOD_RM = (1<<5), // 0x20
- STACK = (1<<6), // 0x40
- GROUP = (1<<7), // 0x80
- GROUP_MASK = 0x7F,
- };
-
- static unsigned char opcode_types[512] = {
- 0x23, 0x21, 0x23, 0x21, 0x09, 0x05, 0x01, 0x01, // 0x00 - 0x07
- 0x23, 0x21, 0x23, 0x21, 0x09, 0x05, 0x01, 0x00, // 0x08 - 0x0F
- 0x23, 0x21, 0x23, 0x21, 0x09, 0x05, 0x01, 0x01, // 0x10 - 0x17
- 0x23, 0x21, 0x23, 0x21, 0x09, 0x05, 0x01, 0x01, // 0x18 - 0x1F
- 0x23, 0x21, 0x23, 0x21, 0x09, 0x05, 0x00, 0x01, // 0x20 - 0x27
- 0x23, 0x21, 0x23, 0x21, 0x09, 0x05, 0x00, 0x01, // 0x28 - 0x2F
- 0x23, 0x21, 0x23, 0x21, 0x09, 0x05, 0x00, 0x01, // 0x30 - 0x37
- 0x23, 0x21, 0x23, 0x21, 0x09, 0x05, 0x00, 0x01, // 0x38 - 0x3F
- 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, // 0x40 - 0x47
- 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, // 0x48 - 0x4F
- 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, // 0x50 - 0x57
- 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, // 0x58 - 0x5F
- 0x01, 0x01, 0x21, 0x21, 0x00, 0x00, 0x00, 0x00, // 0x60 - 0x67
- 0x45, 0x25, 0x49, 0x29, 0x03, 0x01, 0x03, 0x01, // 0x68 - 0x6F
- 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, // 0x70 - 0x77
- 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, // 0x78 - 0x7F
- 0x27, 0x25, 0x27, 0x29, 0x23, 0x21, 0x23, 0x21, // 0x80 - 0x87
- 0x23, 0x21, 0x23, 0x21, 0x21, 0x21, 0x21, 0x80, // 0x88 - 0x8F
- 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, // 0x90 - 0x97
- 0x01, 0x01, 0x05, 0x01, 0x41, 0x41, 0x01, 0x01, // 0x98 - 0x9F
- 0x0F, 0x0D, 0x0F, 0x0D, 0x03, 0x01, 0x03, 0x01, // 0xA0 - 0xA7
- 0x09, 0x05, 0x03, 0x01, 0x03, 0x01, 0x03, 0x01, // 0xA8 - 0xAF
- 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, // 0xB0 - 0xB7
- 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, // 0xB8 - 0xBF
- 0x27, 0x29, 0x01, 0x01, 0x21, 0x21, 0x27, 0x25, // 0xC0 - 0xC7
- 0x01, 0x01, 0x01, 0x01, 0x01, 0x09, 0x01, 0x01, // 0xC8 - 0xCF
- 0x23, 0x21, 0x23, 0x21, 0x09, 0x09, 0x01, 0x01, // 0xD0 - 0xD7
- 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, // 0xD8 - 0xDF
- 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, // 0xE0 - 0xE7
- 0x05, 0x05, 0x05, 0x09, 0x03, 0x01, 0x03, 0x01, // 0xE8 - 0xEF
- 0x00, 0x01, 0x00, 0x00, 0x01, 0x01, 0x88, 0x90, // 0xF0 - 0xF7
- 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x98, 0xA0, // 0xF8 - 0xFF
- 0x00, 0xA8, 0x00, 0x00, 0x00, 0x01, 0x01, 0x01, // 0xF00 - 0xF07
- 0x01, 0x01, 0x00, 0x01, 0x00, 0x21, 0x01, 0x00, // 0xF08 - 0xF0F
- 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, // 0xF10 - 0xF17
- 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, // 0xF18 - 0xF1F
- 0x21, 0x21, 0x21, 0x21, 0x00, 0x00, 0x00, 0x00, // 0xF20 - 0xF27
- 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, // 0xF28 - 0xF2F
- 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, // 0xF30 - 0xF37
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0xF38 - 0xF3F
- 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, // 0xF40 - 0xF47
- 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, // 0xF48 - 0xF4F
- 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, // 0xF50 - 0xF57
- 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, // 0xF58 - 0xF5F
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0xF60 - 0xF67
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0xF68 - 0xF6F
- 0x21, 0x00, 0x00, 0x00, 0x21, 0x21, 0x21, 0x00, // 0xF70 - 0xF77
- 0x21, 0x21, 0x00, 0x00, 0x21, 0x21, 0x21, 0x21, // 0xF78 - 0xF7F
- 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, // 0xF80 - 0xF87
- 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, // 0xF88 - 0xF8F
- 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, // 0xF90 - 0xF97
- 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, // 0xF98 - 0xF9F
- 0x01, 0x01, 0x01, 0x21, 0x29, 0x21, 0x00, 0x00, // 0xFA0 - 0xFA7
- 0x01, 0x01, 0x01, 0x21, 0x29, 0x21, 0x21, 0x21, // 0xFA8 - 0xFAF
- 0x23, 0x21, 0x00, 0x21, 0x00, 0x00, 0x23, 0x21, // 0xFB0 - 0xFB7
- 0x21, 0x00, 0x29, 0x21, 0x21, 0x21, 0x21, 0x21, // 0xFB8 - 0xFBF
- 0x21, 0x21, 0x00, 0x21, 0x00, 0x00, 0x00, 0x21, // 0xFC0 - 0xFC7
- 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, // 0xFC8 - 0xFCF
- 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, // 0xFD0 - 0xFD7
- 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, // 0xFD8 - 0xFDF
- 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, // 0xFE0 - 0xFE7
- 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, // 0xFE8 - 0xFEF
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0xFF0 - 0xFF7
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0xFF8 - 0xFFF
- };
-
- static unsigned char group_table[56] = {
- 0x61, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Group 1A
- 0x27, 0x27, 0x23, 0x23, 0x23, 0x23, 0x23, 0x23, // Group 3 (Byte)
- 0x25, 0x25, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, // Group 3
- 0x23, 0x23, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Group 4
- 0x21, 0x21, 0x61, 0x21, 0x61, 0x21, 0x61, 0x00, // Group 5
- 0x00, 0x00, 0x21, 0x21, 0x21, 0x00, 0x21, 0x23, // Group 7
- 0x21, 0x00, 0x00, 0x21, 0x21, 0x00, 0x21, 0x00, // Group 7 (Alternate)
- };
-
- const unsigned char *insn_ptr = reinterpret_cast<const unsigned char *>(*ip);
- int operand_width = 4;
- int address_width = 4;
- if (is64bit) {
- address_width = 8;
- }
- unsigned char byte, rex = 0;
- bool found_prefix = false;
- if (rex_ptr) {
- *rex_ptr = 0;
- }
- if (mod_rm_ptr) {
- *mod_rm_ptr = 0;
- }
- if (sib_ptr) {
- *sib_ptr = 0;
- }
- for (;; ++insn_ptr) {
- switch (byte = *insn_ptr) {
- case 0x66: // Operand width prefix
- operand_width ^= 6;
- break;
- case 0x67: // Address width prefix
- address_width ^= is64bit ? 12 : 6;
- break;
- case 0x26: // Segment selector prefixes
- case 0x2e:
- case 0x36:
- case 0x3e:
- case 0x64:
- case 0x65:
- case 0xF0:
- case 0xF2:
- case 0xF3:
- break;
- case 0x40: case 0x41: case 0x42: case 0x43: // 64 bit REX prefixes
- case 0x44: case 0x45: case 0x46: case 0x47:
- case 0x48: case 0x49: case 0x4A: case 0x4B:
- case 0x4C: case 0x4D: case 0x4E: case 0x4F:
- if (is64bit) {
- if (rex_ptr) {
- *rex_ptr = (char *)insn_ptr;
- }
- rex = byte;
- found_prefix = true;
- continue;
- }
- // fall through
- default:
- ++insn_ptr;
- goto no_more_prefixes;
- }
- rex = 0;
- found_prefix = true;
- }
-no_more_prefixes:
- if (has_prefix) {
- *has_prefix = found_prefix;
- }
- if (rex & REX_W) {
- operand_width = 8;
- }
- unsigned char type;
- unsigned short insn = byte;
- unsigned int idx = 0;
- if (byte == 0x0F) {
- byte = *insn_ptr++;
- insn = (insn << 8) | byte;
- idx = 256;
- }
- type = opcode_types[idx + byte];
- bool found_mod_rm = false;
- bool found_group = false;
- bool found_sib = false;
- unsigned char mod_rm = 0;
- unsigned char sib = 0;
- if (type & GROUP) {
- found_mod_rm = true;
- found_group = true;
- mod_rm = *insn_ptr;
- if (mod_rm_ptr) {
- *mod_rm_ptr = (char *)insn_ptr;
- }
- unsigned char group = (type & GROUP_MASK) + ((mod_rm >> 3) & 0x7);
- if ((type & GROUP_MASK) == 40 && (mod_rm >> 6) == 3) {
- group += 8;
- }
- type = group_table[group];
- }
- if (!type) {
- // We know that we still don't decode some of the more obscure
- // instructions, but for all practical purposes that doesn't matter.
- // Compilers are unlikely to output them, and even if we encounter
- // hand-coded assembly, we will soon synchronize to the instruction
- // stream again.
- //
- // std::cerr << "Unsupported instruction at 0x" << std::hex <<
- // std::uppercase << reinterpret_cast<long>(*ip) << " [ ";
- // for (const unsigned char *ptr =
- // reinterpret_cast<const unsigned char *>(*ip);
- // ptr < insn_ptr; ) {
- // std::cerr << std::hex << std::uppercase << std::setw(2) <<
- // std::setfill('0') << (unsigned int)*ptr++ << ' ';
- // }
- // std::cerr << "]" << std::endl;
- } else {
- if (is64bit && (type & STACK)) {
- operand_width = 8;
- }
- if (type & MOD_RM) {
- found_mod_rm = true;
- if (mod_rm_ptr) {
- *mod_rm_ptr = (char *)insn_ptr;
- }
- mod_rm = *insn_ptr++;
- int mod = (mod_rm >> 6) & 0x3;
- int rm = 8*(rex & REX_B) + (mod_rm & 0x7);
- if (mod != 3) {
- if (address_width == 2) {
- switch (mod) {
- case 0:
- if (rm != 6 /* SI */) {
- break;
- }
- // fall through
- case 2:
- insn_ptr++;
- // fall through
- case 1:
- insn_ptr++;
- break;
- }
- } else {
- if ((rm & 0x7) == 4) {
- found_sib = true;
- if (sib_ptr) {
- *sib_ptr = (char *)insn_ptr;
- }
- sib = *insn_ptr++;
- if (!mod && (sib & 0x7) == 5 /* BP */) {
- insn_ptr += 4;
- }
- }
- switch (mod) {
- case 0:
- if (rm != 5 /* BP */) {
- break;
- }
- // fall through
- case 2:
- insn_ptr += 3;
- // fall through
- case 1:
- insn_ptr++;
- break;
- }
- }
- }
- }
- switch (insn) {
- case 0xC8: // ENTER
- insn_ptr++;
- // fall through
- case 0x9A: // CALL (far)
- case 0xC2: // RET (near)
- case 0xCA: // LRET
- case 0xEA: // JMP (far)
- insn_ptr += 2;
- break;
- case 0xF80: case 0xF81: case 0xF82: case 0xF83: // Jcc (rel)
- case 0xF84: case 0xF85: case 0xF86: case 0xF87:
- case 0xF88: case 0xF89: case 0xF8A: case 0xF8B:
- case 0xF8C: case 0xF8D: case 0xF8E: case 0xF8F:
- insn_ptr += operand_width;
- break;
- }
- switch (type & MODE_MASK) {
- case IMM:
- if (!(type & BYTE_OP)) {
- switch (insn) {
- case 0xB8: case 0xB9: case 0xBA: case 0xBB:
- case 0xBC: case 0xBD: case 0xBE: case 0xBF:
- // Allow MOV to/from 64bit addresses
- insn_ptr += operand_width;
- break;
- default:
- insn_ptr += (operand_width == 8) ? 4 : operand_width;
- break;
- }
- break;
- }
- // fall through
- case IMM_BYTE:
- insn_ptr++;
- break;
- case MEM_ABS:
- insn_ptr += address_width;
- break;
- }
- }
- if (is_group) {
- *is_group = found_group;
- }
- *ip = reinterpret_cast<const char *>(insn_ptr);
- return insn;
-}
-#endif
-
-} // namespace
diff --git a/sandbox/linux/seccomp/x86_decode.h b/sandbox/linux/seccomp/x86_decode.h
deleted file mode 100644
index 68f0ab5..0000000
--- a/sandbox/linux/seccomp/x86_decode.h
+++ /dev/null
@@ -1,19 +0,0 @@
-// Copyright (c) 2010 The Chromium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#ifndef X86_DECODE_H__
-#define X86_DECODE_H__
-namespace playground {
-enum {
- REX_B = 0x01,
- REX_X = 0x02,
- REX_R = 0x04,
- REX_W = 0x08
-};
-
-unsigned short next_inst(const char **ip, bool is64bit, bool *has_prefix = 0,
- char **rex_ptr = 0, char **mod_rm_ptr = 0,
- char **sib_ptr = 0, bool *is_group = 0);
-} // namespace
-#endif // X86_DECODE_H__