diff options
author | mseaborn@chromium.org <mseaborn@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2010-08-30 22:22:07 +0000 |
---|---|---|
committer | mseaborn@chromium.org <mseaborn@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2010-08-30 22:22:07 +0000 |
commit | 439764b703a5edd48aa878b86fbd07a117b6a3cc (patch) | |
tree | e2d6b02e2dd58e7dd01effc9311807318883a2a0 /sandbox | |
parent | 0429d943467575406d5fbc3ac859a6e9604d6d0d (diff) | |
download | chromium_src-439764b703a5edd48aa878b86fbd07a117b6a3cc.zip chromium_src-439764b703a5edd48aa878b86fbd07a117b6a3cc.tar.gz chromium_src-439764b703a5edd48aa878b86fbd07a117b6a3cc.tar.bz2 |
Pull seccomp-sandbox in via DEPS rather than using an in-tree copy
This means changes to the sandbox won't have to be committed twice, to
both trees.
BUG=none
TEST=smoke test of running chromium with --enable-seccomp-sandbox
Review URL: http://codereview.chromium.org/3249003
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@57921 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'sandbox')
45 files changed, 0 insertions, 14319 deletions
diff --git a/sandbox/linux/seccomp/Makefile b/sandbox/linux/seccomp/Makefile deleted file mode 100644 index 141d8c3..0000000 --- a/sandbox/linux/seccomp/Makefile +++ /dev/null @@ -1,59 +0,0 @@ -# Copyright (c) 2010 The Chromium Authors. All rights reserved. -# Use of this source code is governed by a BSD-style license that can be -# found in the LICENSE file. - -# This Makefile temporarily has been checked into the source tree so that -# we can run the tests. It will be replaced with a proper gyp file. - -CFLAGS = -g -O0 -Wall -Werror -Wextra -Wno-missing-field-initializers \ - -Wno-unused-parameter -I. -LDFLAGS = -g -CPPFLAGS = -MODS := allocator library debug maps x86_decode securemem sandbox \ - syscall syscall_table trusted_thread trusted_process \ - access exit clone getpid gettid ioctl ipc madvise mmap mprotect \ - munmap open sigaction sigprocmask socketcall stat -OBJS64 := $(shell echo ${MODS} | xargs -n 1 | sed -e 's/$$/.o64/') -OBJS32 := $(shell echo ${MODS} | xargs -n 1 | sed -e 's/$$/.o32/') -HEADERS:= $(shell for i in ${MODS}; do [ -r "$$i" ] && echo "$$i"; done) - -.SUFFIXES: .o64 .o32 - -all: test - -clean: - -rm -f *.o *.o32 *.o64 tests/*.o32 tests/*.o.64 - -rm -f core core.* vgcore vgcore.* strace.log* - -rm -f run_tests_32 run_tests_64 - -rm -f tests/test_syscalls.o64 tests/test_syscalls.o32 - -rm -f tests/test-list.h - -test: run_tests_64 run_tests_32 - ./run_tests_64 - ./run_tests_32 - -# TODO: Track header file dependencies properly -tests/test_syscalls.o64 tests/test_syscalls.o32: tests/test-list.h - -tests/test-list.h: tests/list_tests.py tests/test_syscalls.cc - python tests/list_tests.py tests/test_syscalls.cc > $@ - -run_tests_64: $(OBJS64) tests/test_syscalls.o64 tests/test-list.h - g++ -m64 tests/test_syscalls.o64 $(OBJS64) -lpthread -lutil -o $@ -run_tests_32: $(OBJS32) tests/test_syscalls.o32 tests/test-list.h - g++ -m32 tests/test_syscalls.o32 $(OBJS32) -lpthread -lutil -o $@ - -.cc.o: ${HEADERS} - ${CXX} ${CFLAGS} ${CPPFLAGS} -c -o $@ $< - -.cc.o64: ${HEADERS} - ${CXX} ${CFLAGS} ${CPPFLAGS} -fPIC -c -o $@ $< - -.c.o64: ${HEADERS} - ${CC} ${CFLAGS} ${CPPFLAGS} --std=gnu99 -fPIC -c -o $@ $< - -.cc.o32: ${HEADERS} - ${CXX} ${CFLAGS} ${CPPFLAGS} -m32 -fPIC -c -o $@ $< - -.c.o32: ${HEADERS} - ${CC} ${CFLAGS} ${CPPFLAGS} -m32 --std=gnu99 -fPIC -c -o $@ $< diff --git a/sandbox/linux/seccomp/access.cc b/sandbox/linux/seccomp/access.cc deleted file mode 100644 index fbe7e53..0000000 --- a/sandbox/linux/seccomp/access.cc +++ /dev/null @@ -1,97 +0,0 @@ -// Copyright (c) 2010 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#include "debug.h" -#include "sandbox_impl.h" - -namespace playground { - -long Sandbox::sandbox_access(const char *pathname, int mode) { - long long tm; - Debug::syscall(&tm, __NR_access, "Executing handler"); - size_t len = strlen(pathname); - struct Request { - int sysnum; - long long cookie; - Access access_req; - char pathname[0]; - } __attribute__((packed)) *request; - char data[sizeof(struct Request) + len]; - request = reinterpret_cast<struct Request*>(data); - request->sysnum = __NR_access; - request->cookie = cookie(); - request->access_req.path_length = len; - request->access_req.mode = mode; - memcpy(request->pathname, pathname, len); - - long rc; - SysCalls sys; - if (write(sys, processFdPub(), request, sizeof(data)) != (int)sizeof(data) || - read(sys, threadFdPub(), &rc, sizeof(rc)) != sizeof(rc)) { - die("Failed to forward access() request [sandbox]"); - } - Debug::elapsed(tm, __NR_access); - return rc; -} - -bool Sandbox::process_access(int parentMapsFd, int sandboxFd, int threadFdPub, - int threadFd, SecureMem::Args* mem) { - // Read request - SysCalls sys; - Access access_req; - if (read(sys, sandboxFd, &access_req, sizeof(access_req)) != - sizeof(access_req)) { - read_parm_failed: - die("Failed to read parameters for access() [process]"); - } - int rc = -ENAMETOOLONG; - if (access_req.path_length >= sizeof(mem->pathname)) { - char buf[32]; - while (access_req.path_length > 0) { - size_t len = access_req.path_length > sizeof(buf) ? - sizeof(buf) : access_req.path_length; - ssize_t i = read(sys, sandboxFd, buf, len); - if (i <= 0) { - goto read_parm_failed; - } - access_req.path_length -= i; - } - if (write(sys, threadFd, &rc, sizeof(rc)) != sizeof(rc)) { - die("Failed to return data from access() [process]"); - } - return false; - } - - if (!g_policy.allow_file_namespace) { - // After locking the mutex, we can no longer abandon the system call. So, - // perform checks before clobbering the securely shared memory. - char tmp[access_req.path_length]; - if (read(sys, sandboxFd, tmp, access_req.path_length) != - (ssize_t)access_req.path_length) { - goto read_parm_failed; - } - Debug::message(("Denying access to \"" + std::string(tmp) + "\"").c_str()); - SecureMem::abandonSystemCall(threadFd, -EACCES); - return false; - } - - SecureMem::lockSystemCall(parentMapsFd, mem); - if (read(sys, sandboxFd, mem->pathname, access_req.path_length) != - (ssize_t)access_req.path_length) { - goto read_parm_failed; - } - mem->pathname[access_req.path_length] = '\000'; - - // TODO(markus): Implement sandboxing policy - Debug::message(("Allowing access to \"" + std::string(mem->pathname) + - "\"").c_str()); - - // Tell trusted thread to access the file. - SecureMem::sendSystemCall(threadFdPub, true, parentMapsFd, mem, __NR_access, - mem->pathname - (char*)mem + (char*)mem->self, - access_req.mode); - return true; -} - -} // namespace diff --git a/sandbox/linux/seccomp/allocator.cc b/sandbox/linux/seccomp/allocator.cc deleted file mode 100644 index 6e11a4a..0000000 --- a/sandbox/linux/seccomp/allocator.cc +++ /dev/null @@ -1,136 +0,0 @@ -// Copyright (c) 2010 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -// The allocator is very simplistic. It requests memory pages directly from -// the system. Each page starts with a header describing the allocation. This -// makes sure that we can return the memory to the system when it is -// deallocated. -// For allocations that are smaller than a single page, we try to squeeze -// multiple of them into the same page. -// We expect to use this allocator for a moderate number of small allocations. -// In most cases, it will only need to ever make a single request to the -// operating system for the lifetime of the STL container object. -// We don't worry about memory fragmentation as the allocator is expected to -// be short-lived. - -#include <stdint.h> -#include <sys/mman.h> - -#include "allocator.h" -#include "linux_syscall_support.h" - -namespace playground { - -class SysCalls { - public: - #define SYS_CPLUSPLUS - #define SYS_ERRNO my_errno - #define SYS_INLINE inline - #define SYS_PREFIX -1 - #undef SYS_LINUX_SYSCALL_SUPPORT_H - #include "linux_syscall_support.h" - SysCalls() : my_errno(0) { } - int my_errno; -}; -#ifdef __NR_mmap2 - #define MMAP mmap2 - #define __NR_MMAP __NR_mmap2 -#else - #define MMAP mmap - #define __NR_MMAP __NR_mmap -#endif - -// We only ever keep track of the very last partial page that was used for -// allocations. This approach simplifies the code a lot. It can theoretically -// lead to more memory fragmentation, but for our use case that is unlikely -// to happen. -struct Header { - // The total amount of memory allocated for this chunk of memory. Typically, - // this would be a single page. - size_t total_len; - - // "used" keeps track of the number of bytes currently allocated in this - // page. Note that as elements are freed from this page, "used" is updated - // allowing us to track when the page is free. However, these holes in the - // page are never re-used, so "tail" is the only way to find out how much - // free space remains and when we need to request another chunk of memory - // from the system. - size_t used; - void *tail; -}; -static Header* last_alloc; - -void* SystemAllocatorHelper::sys_allocate(size_t size) { - // Number of bytes that need to be allocated - if (size + 3 < size) { - return NULL; - } - size_t len = (size + 3) & ~3; - - if (last_alloc) { - // Remaining space in the last chunk of memory allocated from system - size_t remainder = last_alloc->total_len - - (reinterpret_cast<char *>(last_alloc->tail) - - reinterpret_cast<char *>(last_alloc)); - - if (remainder >= len) { - void* ret = last_alloc->tail; - last_alloc->tail = reinterpret_cast<char *>(last_alloc->tail) + len; - last_alloc->used += len; - return ret; - } - } - - SysCalls sys; - if (sizeof(Header) + len + 4095 < len) { - return NULL; - } - size_t total_len = (sizeof(Header) + len + 4095) & ~4095; - Header* mem = reinterpret_cast<Header *>( - sys.MMAP(NULL, total_len, PROT_READ|PROT_WRITE, - MAP_PRIVATE|MAP_ANONYMOUS, -1, 0)); - if (mem == MAP_FAILED) { - return NULL; - } - - // If we were only asked to allocate a single page, then we will use any - // remaining space for other small allocations. - if (total_len - sizeof(Header) - len >= 4) { - last_alloc = mem; - } - mem->total_len = total_len; - mem->used = len; - char* ret = reinterpret_cast<char *>(mem) + sizeof(Header); - mem->tail = ret + len; - - return ret; -} - -void SystemAllocatorHelper::sys_deallocate(void* p, size_t size) { - // Number of bytes in this allocation - if (size + 3 < size) { - return; - } - size_t len = (size + 3) & ~3; - - // All allocations (small and large) have starting addresses in the - // first page that was allocated from the system. This page starts with - // a header that keeps track of how many bytes are currently used. The - // header can be found by truncating the last few bits of the address. - Header* header = reinterpret_cast<Header *>( - reinterpret_cast<uintptr_t>(p) & ~4095); - header->used -= len; - - // After the last allocation has been freed, return the page(s) to the - // system - if (!header->used) { - SysCalls sys; - sys.munmap(header, header->total_len); - if (last_alloc == header) { - last_alloc = NULL; - } - } -} - -} // namespace diff --git a/sandbox/linux/seccomp/allocator.h b/sandbox/linux/seccomp/allocator.h deleted file mode 100644 index 29e0065..0000000 --- a/sandbox/linux/seccomp/allocator.h +++ /dev/null @@ -1,88 +0,0 @@ -// Copyright (c) 2010 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -// Implement a very basic memory allocator that make direct system calls -// instead of relying on libc. -// This allocator is not thread-safe. - -#ifndef ALLOCATOR_H__ -#define ALLOCATOR_H__ - -#include <cstddef> - -namespace playground { - -class SystemAllocatorHelper { - protected: - static void *sys_allocate(size_t size); - static void sys_deallocate(void* p, size_t size); -}; - -template <class T> -class SystemAllocator : SystemAllocatorHelper { - public: - typedef T value_type; - typedef T* pointer; - typedef const T* const_pointer; - typedef T& reference; - typedef const T& const_reference; - typedef size_t size_type; - typedef std::ptrdiff_t difference_type; - - template <class U> - struct rebind { - typedef SystemAllocator<U> other; - }; - - pointer address(reference value) const { - return &value; - } - - const_pointer address(const_reference value) const { - return &value; - } - - SystemAllocator() throw() { } - SystemAllocator(const SystemAllocator& src) throw() { } - template <class U> SystemAllocator(const SystemAllocator<U>& src) throw() { } - ~SystemAllocator() throw() { } - - size_type max_size() const throw() { - return (1 << 30) / sizeof(T); - } - - pointer allocate(size_type num, const void* = 0) { - if (num > max_size()) { - return NULL; - } - return (pointer)sys_allocate(num * sizeof(T)); - } - - void construct(pointer p, const T& value) { - new(reinterpret_cast<void *>(p))T(value); - } - - void destroy(pointer p) { - p->~T(); - } - - void deallocate(pointer p, size_type num) { - sys_deallocate(p, num * sizeof(T)); - } -}; - -template <class T1, class T2> -bool operator== (const SystemAllocator<T1>&, const SystemAllocator<T2>&) - throw() { - return true; -} -template <class T1, class T2> -bool operator!= (const SystemAllocator<T1>&, const SystemAllocator<T2>&) - throw() { - return false; -} - -} // namespace - -#endif // ALLOCATOR_H__ diff --git a/sandbox/linux/seccomp/clone.cc b/sandbox/linux/seccomp/clone.cc deleted file mode 100644 index 0d35181..0000000 --- a/sandbox/linux/seccomp/clone.cc +++ /dev/null @@ -1,179 +0,0 @@ -// Copyright (c) 2010 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#include "debug.h" -#include "sandbox_impl.h" - -namespace playground { - -long Sandbox::sandbox_clone(int flags, char* stack, int* pid, int* ctid, - void* tls, void *wrapper_sp) { - long long tm; - Debug::syscall(&tm, __NR_clone, "Executing handler"); - struct { - int sysnum; - long long cookie; - Clone clone_req; - } __attribute__((packed)) request; - request.sysnum = __NR_clone; - request.cookie = cookie(); - request.clone_req.flags = flags; - request.clone_req.stack = stack; - request.clone_req.pid = pid; - request.clone_req.ctid = ctid; - request.clone_req.tls = tls; - - // TODO(markus): Passing stack == 0 currently does not do the same thing - // that the kernel would do without the sandbox. This is just going to - // cause a crash. We should detect this case, and replace the stack pointer - // with the correct value, instead. - // This is complicated by the fact that we will temporarily be executing - // both threads from the same stack. Some synchronization will be necessary. - // Fortunately, this complication also explains why hardly anybody ever - // does this. - // See trusted_thread.cc for more information. - long rc; - if (stack == 0) { - rc = -EINVAL; - } else { - // Pass along the address on the stack where syscallWrapper() stored the - // original CPU registers. These registers will be restored in the newly - // created thread prior to returning from the wrapped system call. - #if defined(__x86_64__) - memcpy(&request.clone_req.regs64, wrapper_sp, - sizeof(request.clone_req.regs64) + sizeof(void *)); - #elif defined(__i386__) - memcpy(&request.clone_req.regs32, wrapper_sp, - sizeof(request.clone_req.regs32) + sizeof(void *)); - #else - #error Unsupported target platform - #endif - - // In order to unblock the signal mask in the newly created thread and - // after entering Seccomp mode, we have to call sigreturn(). But that - // requires access to a proper stack frame describing a valid signal. - // We trigger a signal now and make sure the stack frame ends up on the - // new stack. Our segv() handler (in sandbox.cc) does that for us. - // See trusted_thread.cc for more details on how threads get created. - // - // In general we rely on the kernel for generating the signal stack - // frame, as the exact binary format has been extended several times over - // the course of the kernel's development. Fortunately, the kernel - // developers treat the initial part of the stack frame as a stable part - // of the ABI. So, we can rely on fixed, well-defined offsets for accessing - // register values and for accessing the signal mask. - #if defined(__x86_64__) - // Red zone compensation. The instrumented system call will remove 128 - // bytes from the thread's stack prior to returning to the original - // call site. - stack -= 128; - request.clone_req.stack = stack; - void *dummy; - asm volatile("mov %%rsp, %%rcx\n" - "mov %3, %%rsp\n" - "int $0\n" - "mov %%rcx, %%rsp\n" - : "=a"(request.clone_req.stack), "=&c"(dummy) - : "a"(__NR_clone + 0xF000), "m"(request.clone_req.stack) - : "memory"); - #elif defined(__i386__) - void *dummy; - asm volatile("mov %%esp, %%ecx\n" - "mov %3, %%esp\n" - "int $0\n" - "mov %%ecx, %%esp\n" - : "=a"(request.clone_req.stack), "=&c"(dummy) - : "a"(__NR_clone + 0xF000), "m"(request.clone_req.stack) - : "memory"); - #else - #error Unsupported target platform - #endif - - // Adjust the signal stack frame so that it contains the correct stack - // pointer upon returning from sigreturn(). - #if defined(__x86_64__) - *(char **)(request.clone_req.stack + 0xA0) = stack; - #elif defined(__i386__) - *(char **)(request.clone_req.stack + 0x1C) = stack; - #else - #error Unsupported target platform - #endif - - SysCalls sys; - if (write(sys, processFdPub(), &request, sizeof(request)) != - sizeof(request) || - read(sys, threadFdPub(), &rc, sizeof(rc)) != sizeof(rc)) { - die("Failed to forward clone() request [sandbox]"); - } - } - Debug::elapsed(tm, __NR_clone); - return rc; -} - -bool Sandbox::process_clone(int parentMapsFd, int sandboxFd, int threadFdPub, - int threadFd, SecureMem::Args* mem) { - // Read request - Clone clone_req; - SysCalls sys; - if (read(sys, sandboxFd, &clone_req, sizeof(clone_req)) !=sizeof(clone_req)){ - die("Failed to read parameters for clone() [process]"); - } - - // TODO(markus): add policy restricting parameters for clone - if ((clone_req.flags & ~CLONE_DETACHED) != (CLONE_VM|CLONE_FS|CLONE_FILES| - CLONE_SIGHAND|CLONE_THREAD|CLONE_SYSVSEM|CLONE_SETTLS| - CLONE_PARENT_SETTID|CLONE_CHILD_CLEARTID)) { - SecureMem::abandonSystemCall(threadFd, -EPERM); - return false; - } else { - SecureMem::Args* newMem = getNewSecureMem(); - if (!newMem) { - SecureMem::abandonSystemCall(threadFd, -ENOMEM); - return false; - } else { - // clone() has unusual semantics. We don't want to return back into the - // trusted thread, but instead we need to continue execution at the IP - // where we got called initially. - SecureMem::lockSystemCall(parentMapsFd, mem); - mem->ret = clone_req.ret; - #if defined(__x86_64__) - mem->rbp = clone_req.regs64.rbp; - mem->rbx = clone_req.regs64.rbx; - mem->rcx = clone_req.regs64.rcx; - mem->rdx = clone_req.regs64.rdx; - mem->rsi = clone_req.regs64.rsi; - mem->rdi = clone_req.regs64.rdi; - mem->r8 = clone_req.regs64.r8; - mem->r9 = clone_req.regs64.r9; - mem->r10 = clone_req.regs64.r10; - mem->r11 = clone_req.regs64.r11; - mem->r12 = clone_req.regs64.r12; - mem->r13 = clone_req.regs64.r13; - mem->r14 = clone_req.regs64.r14; - mem->r15 = clone_req.regs64.r15; - #elif defined(__i386__) - mem->ebp = clone_req.regs32.ebp; - mem->edi = clone_req.regs32.edi; - mem->esi = clone_req.regs32.esi; - mem->edx = clone_req.regs32.edx; - mem->ecx = clone_req.regs32.ecx; - mem->ebx = clone_req.regs32.ebx; - #else - #error Unsupported target platform - #endif - newMem->sequence = 0; - newMem->shmId = -1; - mem->newSecureMem = newMem; - mem->processFdPub = processFdPub_; - mem->cloneFdPub = cloneFdPub_; - - SecureMem::sendSystemCall(threadFdPub, true, parentMapsFd, mem, - __NR_clone, clone_req.flags, clone_req.stack, - clone_req.pid, clone_req.ctid, clone_req.tls); - return true; - } - } -} - -} // namespace diff --git a/sandbox/linux/seccomp/debug.cc b/sandbox/linux/seccomp/debug.cc deleted file mode 100644 index 5d6de49..0000000 --- a/sandbox/linux/seccomp/debug.cc +++ /dev/null @@ -1,363 +0,0 @@ -// Copyright (c) 2010 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#ifndef NDEBUG - -#include "debug.h" - -namespace playground { - -bool Debug::enabled_; -int Debug::numSyscallNames_; -const char **Debug::syscallNames_; -std::map<int, std::string> Debug::syscallNamesMap_; - -Debug Debug::debug_; - -Debug::Debug() { - // Logging is disabled by default, but can be turned on by setting an - // appropriate environment variable. Initialize this code from a global - // constructor, so that it runs before the sandbox is turned on. - enabled_ = !!getenv("SECCOMP_SANDBOX_DEBUGGING"); - - // Read names of system calls from header files, if available. Symbolic - // names make debugging so much nicer. - if (enabled_) { - static const char *filenames[] = { - #if __WORDSIZE == 64 - "/usr/include/asm/unistd_64.h", - #elif __WORDSIZE == 32 - "/usr/include/asm/unistd_32.h", - #endif - "/usr/include/asm/unistd.h", - NULL }; - numSyscallNames_ = 0; - for (const char **fn = filenames; *fn; ++fn) { - FILE *fp = fopen(*fn, "r"); - if (fp) { - std::string baseName; - int baseNum = -1; - char buf[80]; - while (fgets(buf, sizeof(buf), fp)) { - // Check if the line starts with "#define" - static const char* whitespace = " \t\r\n"; - char *token, *save; - token = strtok_r(buf, whitespace, &save); - if (token && !strcmp(token, "#define")) { - - // Only parse identifiers that start with "__NR_" - token = strtok_r(NULL, whitespace, &save); - if (token) { - if (strncmp(token, "__NR_", 5)) { - continue; - } - std::string syscallName(token + 5); - - // Parse the value of the symbol. Try to be forgiving in what - // we accept, as the file format might change over time. - token = strtok_r(NULL, "\r\n", &save); - if (token) { - // Some values are defined relative to previous values, we - // detect these examples by finding an earlier symbol name - // followed by a '+' plus character. - bool isRelative = false; - char *base = strstr(token, baseName.c_str()); - if (baseNum >= 0 && base) { - base += baseName.length(); - while (*base == ' ' || *base == '\t') { - ++base; - } - if (*base == '+') { - isRelative = true; - token = base; - } - } - - // Skip any characters that are not part of the syscall number. - while (*token < '0' || *token > '9') { - token++; - } - - // If we now have a valid datum, enter it into our map. - if (*token) { - int sysnum = atoi(token); - - // Deal with symbols that are defined relative to earlier - // ones. - if (isRelative) { - sysnum += baseNum; - } else { - baseNum = sysnum; - baseName = syscallName; - } - - // Keep track of the highest syscall number that we know - // about. - if (sysnum >= numSyscallNames_) { - numSyscallNames_ = sysnum + 1; - } - - syscallNamesMap_[sysnum] = syscallName; - } - } - } - } - } - fclose(fp); - break; - } - } - if (numSyscallNames_) { - // We cannot make system calls at the time, when we are looking up - // the names. So, copy them into a data structure that can be - // accessed without having to allocated memory (i.e. no more STL). - syscallNames_ = reinterpret_cast<const char **>( - calloc(sizeof(char *), numSyscallNames_)); - for (std::map<int, std::string>::const_iterator iter = - syscallNamesMap_.begin(); - iter != syscallNamesMap_.end(); - ++iter) { - syscallNames_[iter->first] = iter->second.c_str(); - } - } - } -} - -bool Debug::enter() { - // Increment the recursion level in TLS storage. This allows us to - // make system calls from within our debugging functions, without triggering - // additional debugging output. - // - // This function can be called from both the sandboxed process and from the - // trusted process. Only the sandboxed process needs to worry about - // recursively calling system calls. The trusted process doesn't intercept - // system calls and thus doesn't have this problem. It also doesn't have - // a TLS. We explicitly set the segment selector to zero, when in the - // trusted process, so that we can avoid tracking recursion levels. - int level; - #if defined(__x86_64__) - asm volatile("mov %%gs, %0\n" - "test %0, %0\n" - "jz 1f\n" - "movl %%gs:0x1050-0xE0, %0\n" - "incl %%gs:0x1050-0xE0\n" - "1:\n" - : "=r"(level) - : - : "memory"); - #elif defined(__i386__) - asm volatile("mov %%fs, %0\n" - "test %0, %0\n" - "jz 1f\n" - "movl %%fs:0x1034-0x58, %0\n" - "incl %%fs:0x1034-0x58\n" - "1:\n" - : "=r"(level) - : - : "memory"); - #else - #error "Unsupported target platform" - #endif - return !level; -} - -bool Debug::leave() { - // Decrement the recursion level in TLS storage. This allows us to - // make system calls from within our debugging functions, without triggering - // additional debugging output. - // - // This function can be called from both the sandboxed process and from the - // trusted process. Only the sandboxed process needs to worry about - // recursively calling system calls. The trusted process doesn't intercept - // system calls and thus doesn't have this problem. It also doesn't have - // a TLS. We explicitly set the segment selector to zero, when in the - // trusted process, so that we can avoid tracking recursion levels. - int level; - #if defined(__x86_64__) - asm volatile("mov %%gs, %0\n" - "test %0, %0\n" - "jz 1f\n" - "decl %%gs:0x1050-0xE0\n" - "movl %%gs:0x1050-0xE0, %0\n" - "1:\n" - : "=r"(level) - : - : "memory"); - #elif defined(__i386__) - asm volatile("mov %%fs, %0\n" - "test %0, %0\n" - "jz 1f\n" - "decl %%fs:0x1034-0x58\n" - "movl %%fs:0x1034-0x58, %0\n" - "1:\n" - : "=r"(level) - : - : "memory"); - #else - #error Unsupported target platform - #endif - return !level; -} - -void Debug::_message(const char* msg) { - if (enabled_) { - Sandbox::SysCalls sys; - size_t len = strlen(msg); - if (len && msg[len-1] != '\n') { - // Write operations should be atomic, so that we don't interleave - // messages from multiple threads. Append a newline, if it is not - // already there. - char copy[len + 1]; - memcpy(copy, msg, len); - copy[len] = '\n'; - Sandbox::write(sys, 2, copy, len + 1); - } else { - Sandbox::write(sys, 2, msg, len); - } - } -} - -void Debug::message(const char* msg) { - if (enabled_) { - if (enter()) { - _message(msg); - } - leave(); - } -} - -void Debug::gettimeofday(long long* tm) { - if (tm) { - struct timeval tv; - #if defined(__i386__) - // Zero out the lastSyscallNum, so that we don't try to coalesce - // calls to gettimeofday(). For debugging purposes, we need the - // exact time. - asm volatile("movl $0, %fs:0x102C-0x58"); - #elif !defined(__x86_64__) - #error Unsupported target platform - #endif - ::gettimeofday(&tv, NULL); - *tm = 1000ULL*1000ULL*static_cast<unsigned>(tv.tv_sec) + - static_cast<unsigned>(tv.tv_usec); - } -} - -void Debug::syscall(long long* tm, int sysnum, const char* msg, int call) { - // This function gets called from the system call wrapper. Avoid calling - // any library functions that themselves need system calls. - if (enabled_) { - if (enter() || !tm) { - gettimeofday(tm); - - const char *sysname = NULL; - if (sysnum >= 0 && sysnum < numSyscallNames_) { - sysname = syscallNames_[sysnum]; - } - static const char kUnnamedMessage[] = "Unnamed syscall #"; - char unnamed[40]; - if (!sysname) { - memcpy(unnamed, kUnnamedMessage, sizeof(kUnnamedMessage) - 1); - itoa(unnamed + sizeof(kUnnamedMessage) - 1, sysnum); - sysname = unnamed; - } - #if defined(__NR_socketcall) || defined(__NR_ipc) - char extra[40]; - *extra = '\000'; - #if defined(__NR_socketcall) - if (sysnum == __NR_socketcall) { - static const char* socketcall_name[] = { - 0, "socket", "bind", "connect", "listen", "accept", "getsockname", - "getpeername", "socketpair", "send", "recv", "sendto","recvfrom", - "shutdown", "setsockopt", "getsockopt", "sendmsg", "recvmsg", - "accept4" - }; - if (call >= 1 && - call < (int)(sizeof(socketcall_name)/sizeof(char *))) { - strcat(strcpy(extra, " "), socketcall_name[call]); - } else { - itoa(strcpy(extra, " #") + 2, call); - } - } - #endif - #if defined(__NR_ipc) - if (sysnum == __NR_ipc) { - static const char* ipc_name[] = { - 0, "semop", "semget", "semctl", "semtimedop", 0, 0, 0, 0, 0, 0, - "msgsnd", "msgrcv", "msgget", "msgctl", 0, 0, 0, 0, 0, 0, - "shmat", "shmdt", "shmget", "shmctl" }; - if (call >= 1 && call < (int)(sizeof(ipc_name)/sizeof(char *)) && - ipc_name[call]) { - strcat(strcpy(extra, " "), ipc_name[call]); - } else { - itoa(strcpy(extra, " #") + 2, call); - } - } - #endif - #else - static const char extra[1] = { 0 }; - #endif - char buf[strlen(sysname) + strlen(extra) + (msg ? strlen(msg) : 0) + 4]; - strcat(strcat(strcat(strcat(strcpy(buf, sysname), extra), ": "), - msg ? msg : ""), "\n"); - _message(buf); - } - leave(); - } -} - -char* Debug::itoa(char* s, int n) { - // Remember return value - char *ret = s; - - // Insert sign for negative numbers - if (n < 0) { - *s++ = '-'; - n = -n; - } - - // Convert to decimal (in reverse order) - char *start = s; - do { - *s++ = '0' + (n % 10); - n /= 10; - } while (n); - *s-- = '\000'; - - // Reverse order of digits - while (start < s) { - char ch = *s; - *s-- = *start; - *start++ = ch; - } - - return ret; -} - -void Debug::elapsed(long long tm, int sysnum, int call) { - if (enabled_) { - if (enter()) { - // Compute the time that has passed since the system call started. - long long delta; - gettimeofday(&delta); - delta -= tm; - - // Format "Elapsed time: %d.%03dms" without using sprintf(). - char buf[80]; - itoa(strrchr(strcpy(buf, "Elapsed time: "), '\000'), delta/1000); - delta %= 1000; - strcat(buf, delta < 100 ? delta < 10 ? ".00" : ".0" : "."); - itoa(strrchr(buf, '\000'), delta); - strcat(buf, "ms"); - - // Print system call name and elapsed time. - syscall(NULL, sysnum, buf, call); - } - leave(); - } -} - -} // namespace - -#endif // NDEBUG diff --git a/sandbox/linux/seccomp/debug.h b/sandbox/linux/seccomp/debug.h deleted file mode 100644 index eb5a194..0000000 --- a/sandbox/linux/seccomp/debug.h +++ /dev/null @@ -1,80 +0,0 @@ -// Copyright (c) 2010 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#ifndef DEBUG_H__ -#define DEBUG_H__ - -#include <map> -#include <stdio.h> -#include <stdlib.h> -#include <string> -#include <string.h> - -#include "sandbox_impl.h" - -namespace playground { - -class Debug { - public: - // If debugging is enabled, write a message to stderr. - static void message(const char* msg) - #ifndef NDEBUG - asm("playground$debugMessage") - #if defined(__x86_64__) - __attribute__((visibility("internal"))) - #endif - ; - #else - { } - #endif - - // If debugging is enabled, write the name of the syscall and an optional - // message to stderr. - static void syscall(long long* tm, int sysnum, - const char* msg, int call = -1) - #ifndef NDEBUG - ; - #else - { } - #endif - - // Print how much wall-time has elapsed since the last call to syscall() - static void elapsed(long long tm, int sysnum, int call = -1) - #ifndef NDEBUG - ; - #else - { - } - #endif - - // Check whether debugging is enabled. - static bool isEnabled() { - #ifndef NDEBUG - return enabled_; - #else - return false; - #endif - } - - private: - #ifndef NDEBUG - Debug(); - static bool enter(); - static bool leave(); - static void _message(const char* msg); - static void gettimeofday(long long* tm); - static char* itoa(char* s, int n); - - static Debug debug_; - - static bool enabled_; - static int numSyscallNames_; - static const char **syscallNames_; - static std::map<int, std::string> syscallNamesMap_; - #endif -}; - -} // namespace - -#endif // DEBUG_H__ diff --git a/sandbox/linux/seccomp/exit.cc b/sandbox/linux/seccomp/exit.cc deleted file mode 100644 index f4db643..0000000 --- a/sandbox/linux/seccomp/exit.cc +++ /dev/null @@ -1,38 +0,0 @@ -// Copyright (c) 2010 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#include "debug.h" -#include "sandbox_impl.h" - -namespace playground { - -long Sandbox::sandbox_exit(int status) { - long long tm; - Debug::syscall(&tm, __NR_exit, "Executing handler"); - struct { - int sysnum; - long long cookie; - } __attribute__((packed)) request; - request.sysnum = __NR_exit; - request.cookie = cookie(); - - SysCalls sys; - if (write(sys, processFdPub(), &request, sizeof(request)) != - sizeof(request)) { - die("Failed to forward exit() request [sandbox]"); - } - for (;;) { - sys._exit(status); - } -} - -bool Sandbox::process_exit(int parentMapsFd, int sandboxFd, int threadFdPub, - int threadFd, SecureMem::Args* mem) { - SecureMem::lockSystemCall(parentMapsFd, mem); - SecureMem::sendSystemCall(threadFdPub, true, parentMapsFd, mem, - __NR_exit, 0); - return true; -} - -} // namespace diff --git a/sandbox/linux/seccomp/getpid.cc b/sandbox/linux/seccomp/getpid.cc deleted file mode 100644 index be5449b..0000000 --- a/sandbox/linux/seccomp/getpid.cc +++ /dev/null @@ -1,17 +0,0 @@ -// Copyright (c) 2010 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#include "debug.h" -#include "sandbox_impl.h" - -namespace playground { - -long Sandbox::sandbox_getpid() { - long long tm; - Debug::syscall(&tm, __NR_getpid, "Executing handler"); - Debug::elapsed(tm, __NR_getpid); - return pid_; -} - -} // namespace diff --git a/sandbox/linux/seccomp/gettid.cc b/sandbox/linux/seccomp/gettid.cc deleted file mode 100644 index 699774a..0000000 --- a/sandbox/linux/seccomp/gettid.cc +++ /dev/null @@ -1,18 +0,0 @@ -// Copyright (c) 2010 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#include "debug.h" -#include "sandbox_impl.h" - -namespace playground { - -long Sandbox::sandbox_gettid() { - long long tm; - Debug::syscall(&tm, __NR_gettid, "Executing handler"); - pid_t t = tid(); - Debug::elapsed(tm, __NR_gettid); - return t; -} - -} // namespace diff --git a/sandbox/linux/seccomp/ioctl.cc b/sandbox/linux/seccomp/ioctl.cc deleted file mode 100644 index 4d2b3c5c5..0000000 --- a/sandbox/linux/seccomp/ioctl.cc +++ /dev/null @@ -1,61 +0,0 @@ -// Copyright (c) 2010 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#include "debug.h" -#include "sandbox_impl.h" - -namespace playground { - -long Sandbox::sandbox_ioctl(int d, int req, void *arg) { - long long tm; - Debug::syscall(&tm, __NR_ioctl, "Executing handler"); - struct { - int sysnum; - long long cookie; - IOCtl ioctl_req; - } __attribute__((packed)) request; - request.sysnum = __NR_ioctl; - request.cookie = cookie(); - request.ioctl_req.d = d; - request.ioctl_req.req = req; - request.ioctl_req.arg = arg; - - long rc; - SysCalls sys; - if (write(sys, processFdPub(), &request, sizeof(request)) != - sizeof(request) || - read(sys, threadFdPub(), &rc, sizeof(rc)) != sizeof(rc)) { - die("Failed to forward ioctl() request [sandbox]"); - } - Debug::elapsed(tm, __NR_ioctl); - return rc; -} - -bool Sandbox::process_ioctl(int parentMapsFd, int sandboxFd, int threadFdPub, - int threadFd, SecureMem::Args* mem) { - // Read request - IOCtl ioctl_req; - SysCalls sys; - if (read(sys, sandboxFd, &ioctl_req, sizeof(ioctl_req)) !=sizeof(ioctl_req)){ - die("Failed to read parameters for ioctl() [process]"); - } - int rc = -EINVAL; - switch (ioctl_req.req) { - case TCGETS: - case TIOCGWINSZ: - SecureMem::sendSystemCall(threadFdPub, false, -1, mem, __NR_ioctl, - ioctl_req.d, ioctl_req.req, ioctl_req.arg); - return true; - default: - if (Debug::isEnabled()) { - char buf[80]; - sprintf(buf, "Unsupported ioctl: 0x%04X\n", ioctl_req.req); - Debug::message(buf); - } - SecureMem::abandonSystemCall(threadFd, rc); - return false; - } -} - -} // namespace diff --git a/sandbox/linux/seccomp/ipc.cc b/sandbox/linux/seccomp/ipc.cc deleted file mode 100644 index 67a4e34..0000000 --- a/sandbox/linux/seccomp/ipc.cc +++ /dev/null @@ -1,351 +0,0 @@ -// Copyright (c) 2010 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#include "debug.h" -#include "sandbox_impl.h" - -namespace playground { - -#ifndef IPC_PRIVATE -#define IPC_PRIVATE 0 -#endif -#ifndef IPC_RMID -#define IPC_RMID 0 -#endif -#ifndef IPC_64 -#define IPC_64 256 -#endif - -#if defined(__NR_shmget) -void* Sandbox::sandbox_shmat(int shmid, const void* shmaddr, int shmflg) { - long long tm; - Debug::syscall(&tm, __NR_shmat, "Executing handler"); - - struct { - int sysnum; - long long cookie; - ShmAt shmat_req; - } __attribute__((packed)) request; - request.sysnum = __NR_shmat; - request.cookie = cookie(); - request.shmat_req.shmid = shmid; - request.shmat_req.shmaddr = shmaddr; - request.shmat_req.shmflg = shmflg; - - long rc; - SysCalls sys; - if (write(sys, processFdPub(), &request, sizeof(request)) != - sizeof(request) || - read(sys, threadFdPub(), &rc, sizeof(rc)) != sizeof(rc)) { - die("Failed to forward shmat() request [sandbox]"); - } - Debug::elapsed(tm, __NR_shmat); - return reinterpret_cast<void *>(rc); -} - -long Sandbox::sandbox_shmctl(int shmid, int cmd, void* buf) { - long long tm; - Debug::syscall(&tm, __NR_shmctl, "Executing handler"); - - struct { - int sysnum; - long long cookie; - ShmCtl shmctl_req; - } __attribute__((packed)) request; - request.sysnum = __NR_shmctl; - request.cookie = cookie(); - request.shmctl_req.shmid = shmid; - request.shmctl_req.cmd = cmd; - request.shmctl_req.buf = buf; - - long rc; - SysCalls sys; - if (write(sys, processFdPub(), &request, sizeof(request)) != - sizeof(request) || - read(sys, threadFdPub(), &rc, sizeof(rc)) != sizeof(rc)) { - die("Failed to forward shmctl() request [sandbox]"); - } - Debug::elapsed(tm, __NR_shmctl); - return rc; -} - -long Sandbox::sandbox_shmdt(const void* shmaddr) { - long long tm; - Debug::syscall(&tm, __NR_shmdt, "Executing handler"); - - struct { - int sysnum; - long long cookie; - ShmDt shmdt_req; - } __attribute__((packed)) request; - request.sysnum = __NR_shmdt; - request.cookie = cookie(); - request.shmdt_req.shmaddr = shmaddr; - - long rc; - SysCalls sys; - if (write(sys, processFdPub(), &request, sizeof(request)) != - sizeof(request) || - read(sys, threadFdPub(), &rc, sizeof(rc)) != sizeof(rc)) { - die("Failed to forward shmdt() request [sandbox]"); - } - Debug::elapsed(tm, __NR_shmdt); - return rc; -} - -long Sandbox::sandbox_shmget(int key, size_t size, int shmflg) { - long long tm; - Debug::syscall(&tm, __NR_shmget, "Executing handler"); - - struct { - int sysnum; - long long cookie; - ShmGet shmget_req; - } __attribute__((packed)) request; - request.sysnum = __NR_shmget; - request.cookie = cookie(); - request.shmget_req.key = key; - request.shmget_req.size = size; - request.shmget_req.shmflg = shmflg; - - long rc; - SysCalls sys; - if (write(sys, processFdPub(), &request, sizeof(request)) != - sizeof(request) || - read(sys, threadFdPub(), &rc, sizeof(rc)) != sizeof(rc)) { - die("Failed to forward shmget() request [sandbox]"); - } - Debug::elapsed(tm, __NR_shmget); - return rc; -} - -bool Sandbox::process_shmat(int parentMapsFd, int sandboxFd, int threadFdPub, - int threadFd, SecureMem::Args* mem) { - // Read request - ShmAt shmat_req; - SysCalls sys; - if (read(sys, sandboxFd, &shmat_req, sizeof(shmat_req)) != - sizeof(shmat_req)) { - die("Failed to read parameters for shmat() [process]"); - } - - // We only allow attaching to the shm identifier that was returned by - // the most recent call to shmget(IPC_PRIVATE) - if (shmat_req.shmaddr || shmat_req.shmflg || shmat_req.shmid != mem->shmId) { - mem->shmId = -1; - SecureMem::abandonSystemCall(threadFd, -EINVAL); - return false; - } - - mem->shmId = -1; - SecureMem::sendSystemCall(threadFdPub, false, -1, mem, - __NR_shmat, shmat_req.shmid, shmat_req.shmaddr, - shmat_req.shmflg); - return true; -} - -bool Sandbox::process_shmctl(int parentMapsFd, int sandboxFd, int threadFdPub, - int threadFd, SecureMem::Args* mem) { - // Read request - ShmCtl shmctl_req; - SysCalls sys; - if (read(sys, sandboxFd, &shmctl_req, sizeof(shmctl_req)) != - sizeof(shmctl_req)) { - die("Failed to read parameters for shmctl() [process]"); - } - - // The only shmctl() operation that we need to support is removal. This - // operation is generally safe. - if ((shmctl_req.cmd & ~(IPC_64 | IPC_RMID)) || shmctl_req.buf) { - mem->shmId = -1; - SecureMem::abandonSystemCall(threadFd, -EINVAL); - return false; - } - - mem->shmId = -1; - SecureMem::sendSystemCall(threadFdPub, false, -1, mem, - __NR_shmctl, shmctl_req.shmid, shmctl_req.cmd, - shmctl_req.buf); - return true; -} - -bool Sandbox::process_shmdt(int parentMapsFd, int sandboxFd, int threadFdPub, - int threadFd, SecureMem::Args* mem) { - // Read request - ShmDt shmdt_req; - SysCalls sys; - if (read(sys, sandboxFd, &shmdt_req, sizeof(shmdt_req)) != - sizeof(shmdt_req)) { - die("Failed to read parameters for shmdt() [process]"); - } - - // Detaching shared memory segments it generally safe, but just in case - // of a kernel bug, we make sure that the address does not fall into any - // of the reserved memory regions. - ProtectedMap::const_iterator iter = protectedMap_.lower_bound( - (void *)shmdt_req.shmaddr); - if (iter != protectedMap_.begin()) { - --iter; - } - for (; iter != protectedMap_.end() && iter->first <= shmdt_req.shmaddr; - ++iter){ - if (shmdt_req.shmaddr < reinterpret_cast<void *>( - reinterpret_cast<char *>(iter->first) + iter->second) && - shmdt_req.shmaddr >= iter->first) { - mem->shmId = -1; - SecureMem::abandonSystemCall(threadFd, -EINVAL); - return false; - } - } - - mem->shmId = -1; - SecureMem::sendSystemCall(threadFdPub, false, -1, mem, - __NR_shmdt, shmdt_req.shmaddr); - return true; -} - -bool Sandbox::process_shmget(int parentMapsFd, int sandboxFd, int threadFdPub, - int threadFd, SecureMem::Args* mem) { - // Read request - ShmGet shmget_req; - SysCalls sys; - if (read(sys, sandboxFd, &shmget_req, sizeof(shmget_req)) != - sizeof(shmget_req)) { - die("Failed to read parameters for shmget() [process]"); - } - - // We do not want to allow the sandboxed application to access arbitrary - // shared memory regions. We only allow it to access regions that it - // created itself. - if (shmget_req.key != IPC_PRIVATE || shmget_req.shmflg & ~0777) { - mem->shmId = -1; - SecureMem::abandonSystemCall(threadFd, -EINVAL); - return false; - } - - mem->shmId = -1; - SecureMem::sendSystemCall(threadFdPub, false, -1, mem, - __NR_shmget, shmget_req.key, shmget_req.size, - shmget_req.shmflg); - return true; -} -#endif - -#if defined(__NR_ipc) -#ifndef SHMAT -#define SHMAT 21 -#endif -#ifndef SHMDT -#define SHMDT 22 -#endif -#ifndef SHMGET -#define SHMGET 23 -#endif -#ifndef SHMCTL -#define SHMCTL 24 -#endif - -long Sandbox::sandbox_ipc(unsigned call, int first, int second, int third, - void* ptr, long fifth) { - long long tm; - Debug::syscall(&tm, __NR_ipc, "Executing handler", call); - struct { - int sysnum; - long long cookie; - IPC ipc_req; - } __attribute__((packed)) request; - request.sysnum = __NR_ipc; - request.cookie = cookie(); - request.ipc_req.call = call; - request.ipc_req.first = first; - request.ipc_req.second = second; - request.ipc_req.third = third; - request.ipc_req.ptr = ptr; - request.ipc_req.fifth = fifth; - - long rc; - SysCalls sys; - if (write(sys, processFdPub(), &request, sizeof(request)) != - sizeof(request) || - read(sys, threadFdPub(), &rc, sizeof(rc)) != sizeof(rc)) { - die("Failed to forward ipc() request [sandbox]"); - } - Debug::elapsed(tm, __NR_ipc, call); - return rc; -} - -bool Sandbox::process_ipc(int parentMapsFd, int sandboxFd, int threadFdPub, - int threadFd, SecureMem::Args* mem) { - // Read request - IPC ipc_req; - SysCalls sys; - if (read(sys, sandboxFd, &ipc_req, sizeof(ipc_req)) != sizeof(ipc_req)) { - die("Failed to read parameters for ipc() [process]"); - } - - // We do not support all of the SysV IPC calls. In fact, we only support - // the minimum feature set necessary for Chrome's renderers to share memory - // with the X server. - switch (ipc_req.call) { - case SHMAT: { - // We only allow attaching to the shm identifier that was returned by - // the most recent call to shmget(IPC_PRIVATE) - if (ipc_req.ptr || ipc_req.second || ipc_req.first != mem->shmId) { - goto deny; - } - accept: - mem->shmId = -1; - SecureMem::sendSystemCall(threadFdPub, false, -1, mem, - __NR_ipc, ipc_req.call, ipc_req.first, - ipc_req.second, ipc_req.third, ipc_req.ptr, - ipc_req.fifth); - return true; - } - case SHMCTL: - // The only shmctl() operation that we need to support is removal. This - // operation is generally safe. - if ((ipc_req.second & ~(IPC_64 | IPC_RMID)) || ipc_req.ptr) { - goto deny; - } else { - goto accept; - } - case SHMDT: { - // Detaching shared memory segments it generally safe, but just in case - // of a kernel bug, we make sure that the address does not fall into any - // of the reserved memory regions. - ProtectedMap::const_iterator iter = protectedMap_.lower_bound( - (void *)ipc_req.ptr); - if (iter != protectedMap_.begin()) { - --iter; - } - for (; iter != protectedMap_.end() && iter->first <=ipc_req.ptr; ++iter){ - if (ipc_req.ptr < reinterpret_cast<void *>( - reinterpret_cast<char *>(iter->first) + iter->second) && - ipc_req.ptr >= iter->first) { - goto deny; - } - } - goto accept; - } - case SHMGET: - // We do not want to allow the sandboxed application to access arbitrary - // shared memory regions. We only allow it to access regions that it - // created itself. - if (ipc_req.first != IPC_PRIVATE || ipc_req.third & ~0777) { - goto deny; - } else { - goto accept; - } - default: - // Other than SysV shared memory, we do not actually need to support any - // other SysV IPC calls. - deny: - mem->shmId = -1; - SecureMem::abandonSystemCall(threadFd, -EINVAL); - return false; - } -} -#endif - -} // namespace diff --git a/sandbox/linux/seccomp/library.cc b/sandbox/linux/seccomp/library.cc deleted file mode 100644 index 8dd9b93..0000000 --- a/sandbox/linux/seccomp/library.cc +++ /dev/null @@ -1,1208 +0,0 @@ -// Copyright (c) 2010 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#define XOPEN_SOURCE 500 -#include <algorithm> -#include <elf.h> -#include <errno.h> -#include <errno.h> -#include <fcntl.h> -#include <linux/unistd.h> -#include <set> -#include <signal.h> -#include <stdarg.h> -#include <stdio.h> -#include <stdlib.h> -#include <sys/ptrace.h> -#include <sys/resource.h> -#include <sys/stat.h> -#include <sys/types.h> - -#include "allocator.h" -#include "debug.h" -#include "library.h" -#include "sandbox_impl.h" -#include "syscall.h" -#include "syscall_table.h" -#include "x86_decode.h" - -#if defined(__x86_64__) -typedef Elf64_Phdr Elf_Phdr; -typedef Elf64_Rela Elf_Rel; - -typedef Elf64_Half Elf_Half; -typedef Elf64_Word Elf_Word; -typedef Elf64_Sword Elf_Sword; -typedef Elf64_Xword Elf_Xword; -typedef Elf64_Sxword Elf_Sxword; -typedef Elf64_Off Elf_Off; -typedef Elf64_Section Elf_Section; -typedef Elf64_Versym Elf_Versym; - -#define ELF_ST_BIND ELF64_ST_BIND -#define ELF_ST_TYPE ELF64_ST_TYPE -#define ELF_ST_INFO ELF64_ST_INFO -#define ELF_R_SYM ELF64_R_SYM -#define ELF_R_TYPE ELF64_R_TYPE -#define ELF_R_INFO ELF64_R_INFO - -#define ELF_REL_PLT ".rela.plt" -#define ELF_JUMP_SLOT R_X86_64_JUMP_SLOT -#elif defined(__i386__) -typedef Elf32_Phdr Elf_Phdr; -typedef Elf32_Rel Elf_Rel; - -typedef Elf32_Half Elf_Half; -typedef Elf32_Word Elf_Word; -typedef Elf32_Sword Elf_Sword; -typedef Elf32_Xword Elf_Xword; -typedef Elf32_Sxword Elf_Sxword; -typedef Elf32_Off Elf_Off; -typedef Elf32_Section Elf_Section; -typedef Elf32_Versym Elf_Versym; - -#define ELF_ST_BIND ELF32_ST_BIND -#define ELF_ST_TYPE ELF32_ST_TYPE -#define ELF_ST_INFO ELF32_ST_INFO -#define ELF_R_SYM ELF32_R_SYM -#define ELF_R_TYPE ELF32_R_TYPE -#define ELF_R_INFO ELF32_R_INFO - -#define ELF_REL_PLT ".rel.plt" -#define ELF_JUMP_SLOT R_386_JMP_SLOT -#else -#error Unsupported target platform -#endif - -namespace playground { - -char* Library::__kernel_vsyscall; -char* Library::__kernel_sigreturn; -char* Library::__kernel_rt_sigreturn; - -Library::~Library() { - if (image_size_) { - // We no longer need access to a full mapping of the underlying library - // file. Move the temporarily extended mapping back to where we originally - // found. Make sure to preserve any changes that we might have made since. - Sandbox::SysCalls sys; - sys.mprotect(image_, 4096, PROT_READ | PROT_WRITE | PROT_EXEC); - if (memcmp(image_, memory_ranges_.rbegin()->second.start, 4096)) { - // Only copy data, if we made any changes in this data. Otherwise there - // is no need to create another modified COW mapping. - memcpy(image_, memory_ranges_.rbegin()->second.start, 4096); - } - sys.mprotect(image_, 4096, PROT_READ | PROT_EXEC); - sys.mremap(image_, image_size_, 4096, MREMAP_MAYMOVE | MREMAP_FIXED, - memory_ranges_.rbegin()->second.start); - } -} - -char* Library::getBytes(char* dst, const char* src, ssize_t len) { - // Some kernels don't allow accessing the VDSO from write() - if (isVDSO_ && - src >= memory_ranges_.begin()->second.start && - src <= memory_ranges_.begin()->second.stop) { - ssize_t max = - reinterpret_cast<char *>(memory_ranges_.begin()->second.stop) - src; - if (len > max) { - len = max; - } - memcpy(dst, src, len); - return dst; - } - - // Read up to "len" bytes from "src" and copy them to "dst". Short - // copies are possible, if we are at the end of a mapping. Returns - // NULL, if the operation failed completely. - static int helper_socket[2]; - Sandbox::SysCalls sys; - if (!helper_socket[0] && !helper_socket[1]) { - // Copy data through a socketpair, as this allows us to access it - // without incurring a segmentation fault. - sys.socketpair(AF_UNIX, SOCK_STREAM, 0, helper_socket); - } - char* ptr = dst; - int inc = 4096; - while (len > 0) { - ssize_t l = inc == 1 ? inc : 4096 - (reinterpret_cast<long>(src) & 0xFFF); - if (l > len) { - l = len; - } - l = NOINTR_SYS(sys.write(helper_socket[0], src, l)); - if (l == -1) { - if (sys.my_errno == EFAULT) { - if (inc == 1) { - if (ptr == dst) { - return NULL; - } - break; - } - inc = 1; - continue; - } else { - return NULL; - } - } - l = sys.read(helper_socket[1], ptr, l); - if (l <= 0) { - return NULL; - } - ptr += l; - src += l; - len -= l; - } - return dst; -} - -char *Library::get(Elf_Addr offset, char *buf, size_t len) { - if (!valid_) { - memset(buf, 0, len); - return NULL; - } - RangeMap::const_iterator iter = memory_ranges_.lower_bound(offset); - if (iter == memory_ranges_.end()) { - memset(buf, 0, len); - return NULL; - } - offset -= iter->first; - long size = reinterpret_cast<char *>(iter->second.stop) - - reinterpret_cast<char *>(iter->second.start); - if (offset > size - len) { - memset(buf, 0, len); - return NULL; - } - char *src = reinterpret_cast<char *>(iter->second.start) + offset; - memset(buf, 0, len); - if (!getBytes(buf, src, len)) { - return NULL; - } - return buf; -} - -Library::string Library::get(Elf_Addr offset) { - if (!valid_) { - return ""; - } - RangeMap::const_iterator iter = memory_ranges_.lower_bound(offset); - if (iter == memory_ranges_.end()) { - return ""; - } - offset -= iter->first; - const char *start = reinterpret_cast<char *>(iter->second.start) + offset; - const char *stop = reinterpret_cast<char *>(iter->second.stop) + offset; - char buf[4096] = { 0 }; - getBytes(buf, start, stop - start >= (int)sizeof(buf) ? - sizeof(buf) - 1 : stop - start); - start = buf; - stop = buf; - while (*stop) { - ++stop; - } - string s = stop > start ? string(start, stop - start) : ""; - return s; -} - -char *Library::getOriginal(Elf_Addr offset, char *buf, size_t len) { - if (!valid_) { - memset(buf, 0, len); - return NULL; - } - Sandbox::SysCalls sys; - if (!image_ && !isVDSO_ && !memory_ranges_.empty() && - memory_ranges_.rbegin()->first == 0) { - // Extend the mapping of the very first page of the underlying library - // file. This way, we can read the original file contents of the entire - // library. - // We have to be careful, because doing so temporarily removes the first - // 4096 bytes of the library from memory. And we don't want to accidentally - // unmap code that we are executing. So, only use functions that can be - // inlined. - void* start = memory_ranges_.rbegin()->second.start; - image_size_ = memory_ranges_.begin()->first + - (reinterpret_cast<char *>(memory_ranges_.begin()->second.stop) - - reinterpret_cast<char *>(memory_ranges_.begin()->second.start)); - if (image_size_ < 8192) { - // It is possible to create a library that is only a single page in - // size. In that case, we have to make sure that we artificially map - // one extra page past the end of it, as our code relies on mremap() - // actually moving the mapping. - image_size_ = 8192; - } - image_ = reinterpret_cast<char *>(sys.mremap(start, 4096, image_size_, - MREMAP_MAYMOVE)); - if (image_size_ == 8192 && image_ == start) { - // We really mean it, when we say we want the memory to be moved. - image_ = reinterpret_cast<char *>(sys.mremap(start, 4096, image_size_, - MREMAP_MAYMOVE)); - sys.munmap(reinterpret_cast<char *>(start) + 4096, 4096); - } - if (image_ == MAP_FAILED) { - image_ = NULL; - } else { - sys.MMAP(start, 4096, PROT_READ | PROT_WRITE | PROT_EXEC, - MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0); - for (int i = 4096 / sizeof(long); --i; - reinterpret_cast<long *>(start)[i] = - reinterpret_cast<long *>(image_)[i]); - } - } - - if (image_) { - if (offset + len > image_size_) { - // It is quite likely that we initially did not map the entire file as - // we did not know how large it is. So, if necessary, try to extend the - // mapping. - size_t new_size = (offset + len + 4095) & ~4095; - char* tmp = - reinterpret_cast<char *>(sys.mremap(image_, image_size_, new_size, - MREMAP_MAYMOVE)); - if (tmp != MAP_FAILED) { - image_ = tmp; - image_size_ = new_size; - } - } - if (buf && offset + len <= image_size_) { - return reinterpret_cast<char *>(memcpy(buf, image_ + offset, len)); - } - return NULL; - } - return buf ? get(offset, buf, len) : NULL; -} - -Library::string Library::getOriginal(Elf_Addr offset) { - if (!valid_) { - return ""; - } - // Make sure we actually have a mapping that we can access. If the string - // is located at the end of the image, we might not yet have extended the - // mapping sufficiently. - if (!image_ || image_size_ <= offset) { - getOriginal(offset, NULL, 1); - } - - if (image_) { - if (offset < image_size_) { - char* start = image_ + offset; - char* stop = start; - while (stop < image_ + image_size_ && *stop) { - ++stop; - if (stop >= image_ + image_size_) { - getOriginal(stop - image_, NULL, 1); - } - } - return string(start, stop - start); - } - return ""; - } - return get(offset); -} - -const Elf_Ehdr* Library::getEhdr() { - if (!valid_) { - return NULL; - } - return &ehdr_; -} - -const Elf_Shdr* Library::getSection(const string& section) { - if (!valid_) { - return NULL; - } - SectionTable::const_iterator iter = section_table_.find(section); - if (iter == section_table_.end()) { - return NULL; - } - return &iter->second.second; -} - -int Library::getSectionIndex(const string& section) { - if (!valid_) { - return -1; - } - SectionTable::const_iterator iter = section_table_.find(section); - if (iter == section_table_.end()) { - return -1; - } - return iter->second.first; -} - -void Library::makeWritable(bool state) const { - for (RangeMap::const_iterator iter = memory_ranges_.begin(); - iter != memory_ranges_.end(); ++iter) { - const Range& range = iter->second; - long length = reinterpret_cast<char *>(range.stop) - - reinterpret_cast<char *>(range.start); - Sandbox::SysCalls sys; - sys.mprotect(range.start, length, - range.prot | (state ? PROT_WRITE : 0)); - } -} - -bool Library::isSafeInsn(unsigned short insn) { - // Check if the instruction has no unexpected side-effects. If so, it can - // be safely relocated from the function that we are patching into the - // out-of-line scratch space that we are setting up. This is often necessary - // to make room for the JMP into the scratch space. - return ((insn & 0x7) < 0x6 && (insn & 0xF0) < 0x40 - /* ADD, OR, ADC, SBB, AND, SUB, XOR, CMP */) || - #if defined(__x86_64__) - insn == 0x63 /* MOVSXD */ || - #endif - (insn >= 0x80 && insn <= 0x8E /* ADD, OR, ADC, - SBB, AND, SUB, XOR, CMP, TEST, XCHG, MOV, LEA */) || - (insn == 0x90) || /* NOP */ - (insn >= 0xA0 && insn <= 0xA9) /* MOV, TEST */ || - (insn >= 0xB0 && insn <= 0xBF /* MOV */) || - (insn >= 0xC0 && insn <= 0xC1) || /* Bit Shift */ - (insn >= 0xD0 && insn <= 0xD3) || /* Bit Shift */ - (insn >= 0xC6 && insn <= 0xC7 /* MOV */) || - (insn == 0xF7) /* TEST, NOT, NEG, MUL, IMUL, DIV, IDIV */; -} - -char* Library::getScratchSpace(const Maps* maps, char* near, int needed, - char** extraSpace, int* extraLength) { - if (needed > *extraLength || - labs(*extraSpace - reinterpret_cast<char *>(near)) > (1536 << 20)) { - if (*extraSpace) { - // Start a new scratch page and mark any previous page as write-protected - Sandbox::SysCalls sys; - sys.mprotect(*extraSpace, 4096, PROT_READ|PROT_EXEC); - } - // Our new scratch space is initially executable and writable. - *extraLength = 4096; - *extraSpace = maps->allocNearAddr(near, *extraLength, - PROT_READ|PROT_WRITE|PROT_EXEC); - } - if (*extraSpace) { - *extraLength -= needed; - return *extraSpace + *extraLength; - } - Sandbox::die("Insufficient space to intercept system call"); -} - -void Library::patchSystemCallsInFunction(const Maps* maps, char *start, - char *end, char** extraSpace, - int* extraLength) { - std::set<char *, std::less<char *>, SystemAllocator<char *> > branch_targets; - for (char *ptr = start; ptr < end; ) { - unsigned short insn = next_inst((const char **)&ptr, __WORDSIZE == 64); - char *target; - if ((insn >= 0x70 && insn <= 0x7F) /* Jcc */ || insn == 0xEB /* JMP */) { - target = ptr + (reinterpret_cast<signed char *>(ptr))[-1]; - } else if (insn == 0xE8 /* CALL */ || insn == 0xE9 /* JMP */ || - (insn >= 0x0F80 && insn <= 0x0F8F) /* Jcc */) { - target = ptr + (reinterpret_cast<int *>(ptr))[-1]; - } else { - continue; - } - branch_targets.insert(target); - } - struct Code { - char* addr; - int len; - unsigned short insn; - bool is_ip_relative; - } code[5] = { { 0 } }; - int codeIdx = 0; - char* ptr = start; - while (ptr < end) { - // Keep a ring-buffer of the last few instruction in order to find the - // correct place to patch the code. - char *mod_rm; - code[codeIdx].addr = ptr; - code[codeIdx].insn = next_inst((const char **)&ptr, __WORDSIZE == 64, - 0, 0, &mod_rm, 0, 0); - code[codeIdx].len = ptr - code[codeIdx].addr; - code[codeIdx].is_ip_relative = - #if defined(__x86_64__) - mod_rm && (*mod_rm & 0xC7) == 0x5; - #else - false; - #endif - - // Whenever we find a system call, we patch it with a jump to out-of-line - // code that redirects to our system call wrapper. - bool is_syscall = true; - #if defined(__x86_64__) - bool is_indirect_call = false; - if (code[codeIdx].insn == 0x0F05 /* SYSCALL */ || - // In addition, on x86-64, we need to redirect all CALLs between the - // VDSO and the VSyscalls page. We want these to jump to our own - // modified copy of the VSyscalls. As we know that the VSyscalls are - // always more than 2GB away from the VDSO, the compiler has to - // generate some form of indirect jumps. We can find all indirect - // CALLs and redirect them to a separate scratch area, where we can - // inspect the destination address. If it indeed points to the - // VSyscall area, we then adjust the destination address accordingly. - (is_indirect_call = - (isVDSO_ && vsys_offset_ && code[codeIdx].insn == 0xFF && - !code[codeIdx].is_ip_relative && - mod_rm && (*mod_rm & 0x38) == 0x10 /* CALL (indirect) */))) { - is_syscall = !is_indirect_call; - #elif defined(__i386__) - bool is_gs_call = false; - if (code[codeIdx].len == 7 && - code[codeIdx].insn == 0xFF && - code[codeIdx].addr[2] == '\x15' /* CALL (indirect) */ && - code[codeIdx].addr[0] == '\x65' /* %gs prefix */) { - char* target; - asm volatile("mov %%gs:(%1), %0\n" - : "=a"(target) - : "c"(*reinterpret_cast<int *>(code[codeIdx].addr+3))); - if (target == __kernel_vsyscall) { - is_gs_call = true; - // TODO(markus): also handle the other vsyscalls - } - } - if (is_gs_call || - (code[codeIdx].insn == 0xCD && - code[codeIdx].addr[1] == '\x80' /* INT $0x80 */)) { - #else - #error Unsupported target platform - #endif - // Found a system call. Search backwards to figure out how to redirect - // the code. We will need to overwrite a couple of instructions and, - // of course, move these instructions somewhere else. - int startIdx = codeIdx; - int endIdx = codeIdx; - int length = code[codeIdx].len; - for (int idx = codeIdx; - (idx = (idx + (sizeof(code) / sizeof(struct Code)) - 1) % - (sizeof(code) / sizeof(struct Code))) != codeIdx; ) { - std::set<char *>::const_iterator iter = - std::upper_bound(branch_targets.begin(), branch_targets.end(), - code[idx].addr); - if (iter != branch_targets.end() && *iter < ptr) { - // Found a branch pointing to somewhere past our instruction. This - // instruction cannot be moved safely. Leave it in place. - break; - } - if (code[idx].addr && !code[idx].is_ip_relative && - isSafeInsn(code[idx].insn)) { - // These are all benign instructions with no side-effects and no - // dependency on the program counter. We should be able to safely - // relocate them. - startIdx = idx; - length = ptr - code[startIdx].addr; - } else { - break; - } - } - // Search forward past the system call, too. Sometimes, we can only - // find relocatable instructions following the system call. - #if defined(__i386__) - findEndIdx: - #endif - char *next = ptr; - for (int i = codeIdx; - next < end && - (i = (i + 1) % (sizeof(code) / sizeof(struct Code))) != startIdx; - ) { - std::set<char *>::const_iterator iter = - std::lower_bound(branch_targets.begin(), branch_targets.end(), - next); - if (iter != branch_targets.end() && *iter == next) { - // Found branch target pointing to our instruction - break; - } - char *tmp_rm; - code[i].addr = next; - code[i].insn = next_inst((const char **)&next, __WORDSIZE == 64, - 0, 0, &tmp_rm, 0, 0); - code[i].len = next - code[i].addr; - code[i].is_ip_relative = tmp_rm && (*tmp_rm & 0xC7) == 0x5; - if (!code[i].is_ip_relative && isSafeInsn(code[i].insn)) { - endIdx = i; - length = next - code[startIdx].addr; - } else { - break; - } - } - // We now know, how many instructions neighboring the system call we - // can safely overwrite. On x86-32 we need six bytes, and on x86-64 - // We need five bytes to insert a JMPQ and a 32bit address. We then - // jump to a code fragment that safely forwards to our system call - // wrapper. - // On x86-64, this is complicated by the fact that the API allows up - // to 128 bytes of red-zones below the current stack pointer. So, we - // cannot write to the stack until we have adjusted the stack - // pointer. - // On both x86-32 and x86-64 we take care to leave the stack unchanged - // while we are executing the preamble and postamble. This allows us - // to treat instructions that reference %esp/%rsp as safe for - // relocation. - // In particular, this means that on x86-32 we cannot use CALL, but - // have to use a PUSH/RET combination to change the instruction pointer. - // On x86-64, we can instead use a 32bit JMPQ. - // - // .. .. .. .. ; any leading instructions copied from original code - // 48 81 EC 80 00 00 00 SUB $0x80, %rsp - // 50 PUSH %rax - // 48 8D 05 .. .. .. .. LEA ...(%rip), %rax - // 50 PUSH %rax - // 48 B8 .. .. .. .. MOV $syscallWrapper, %rax - // .. .. .. .. - // 50 PUSH %rax - // 48 8D 05 06 00 00 00 LEA 6(%rip), %rax - // 48 87 44 24 10 XCHG %rax, 16(%rsp) - // C3 RETQ - // 48 81 C4 80 00 00 00 ADD $0x80, %rsp - // .. .. .. .. ; any trailing instructions copied from original code - // E9 .. .. .. .. JMPQ ... - // - // Total: 52 bytes + any bytes that were copied - // - // On x86-32, the stack is available and we can do: - // - // TODO(markus): Try to maintain frame pointers on x86-32 - // - // .. .. .. .. ; any leading instructions copied from original code - // 68 .. .. .. .. PUSH return_addr - // 68 .. .. .. .. PUSH $syscallWrapper - // C3 RET - // .. .. .. .. ; any trailing instructions copied from original code - // 68 .. .. .. .. PUSH return_addr - // C3 RET - // - // Total: 17 bytes + any bytes that were copied - // - // For indirect jumps from the VDSO to the VSyscall page, we instead - // replace the following code (this is only necessary on x86-64). This - // time, we don't have to worry about red zones: - // - // .. .. .. .. ; any leading instructions copied from original code - // E8 00 00 00 00 CALL . - // 48 83 04 24 .. ADDQ $.., (%rsp) - // FF .. .. .. .. .. PUSH .. ; from original CALL instruction - // 48 81 3C 24 00 00 00 FF CMPQ $0xFFFFFFFFFF000000, 0(%rsp) - // 72 10 JB . + 16 - // 81 2C 24 .. .. .. .. SUBL ..., 0(%rsp) - // C7 44 24 04 00 00 00 00 MOVL $0, 4(%rsp) - // C3 RETQ - // 48 87 04 24 XCHG %rax,(%rsp) - // 48 89 44 24 08 MOV %rax,0x8(%rsp) - // 58 POP %rax - // C3 RETQ - // .. .. .. .. ; any trailing instructions copied from original code - // E9 .. .. .. .. JMPQ ... - // - // Total: 52 bytes + any bytes that were copied - - if (length < (__WORDSIZE == 32 ? 6 : 5)) { - // There are a very small number of instruction sequences that we - // cannot easily intercept, and that have been observed in real world - // examples. Handle them here: - #if defined(__i386__) - int diff; - if (!memcmp(code[codeIdx].addr, "\xCD\x80\xEB", 3) && - (diff = *reinterpret_cast<signed char *>( - code[codeIdx].addr + 3)) < 0 && diff >= -6) { - // We have seen... - // for (;;) { - // _exit(0); - // } - // ..get compiled to: - // B8 01 00 00 00 MOV $__NR_exit, %eax - // 66 90 XCHG %ax, %ax - // 31 DB 0:XOR %ebx, %ebx - // CD 80 INT $0x80 - // EB FA JMP 0b - // The JMP is really superfluous as the system call never returns. - // And there are in fact no returning system calls that need to be - // unconditionally repeated in an infinite loop. - // If we replace the JMP with NOPs, the system call can successfully - // be intercepted. - *reinterpret_cast<unsigned short *>(code[codeIdx].addr + 2) = 0x9090; - goto findEndIdx; - } - #elif defined(__x86_64__) - std::set<char *>::const_iterator iter; - #endif - // If we cannot figure out any other way to intercept this system call, - // we replace it with a call to INT0. This causes a SEGV which we then - // handle in the signal handler. That's a lot slower than rewriting the - // instruction with a jump, but it should only happen very rarely. - if (is_syscall) { - memcpy(code[codeIdx].addr, "\xCD", 2); - if (code[codeIdx].len > 2) { - memset(code[codeIdx].addr + 2, 0x90, code[codeIdx].len - 2); - } - goto replaced; - } - #if defined(__x86_64__) - // On x86-64, we occasionally see code like this in the VDSO: - // 48 8B 05 CF FE FF FF MOV -0x131(%rip),%rax - // FF 50 20 CALLQ *0x20(%rax) - // By default, we would not replace the MOV instruction, as it is - // IP relative. But if the following instruction is also IP relative, - // we are left with only three bytes which is not enough to insert a - // jump. - // We recognize this particular situation, and as long as the CALLQ - // is not a branch target, we decide to still relocate the entire - // sequence. We just have to make sure that we then patch up the - // IP relative addressing. - else if (is_indirect_call && startIdx == codeIdx && - code[startIdx = (startIdx + (sizeof(code) / - sizeof(struct Code)) - 1) % - (sizeof(code) / sizeof(struct Code))].addr && - ptr - code[startIdx].addr >= 5 && - code[startIdx].is_ip_relative && - isSafeInsn(code[startIdx].insn) && - ((iter = std::upper_bound(branch_targets.begin(), - branch_targets.end(), - code[startIdx].addr)) == - branch_targets.end() || *iter >= ptr)) { - // We changed startIdx to include the IP relative instruction. - // When copying this preamble, we make sure to patch up the - // offset. - } - #endif - else { - Sandbox::die("Cannot intercept system call"); - } - } - int needed = (__WORDSIZE == 32 ? 6 : 5) - code[codeIdx].len; - int first = codeIdx; - while (needed > 0 && first != startIdx) { - first = (first + (sizeof(code) / sizeof(struct Code)) - 1) % - (sizeof(code) / sizeof(struct Code)); - needed -= code[first].len; - } - int second = codeIdx; - while (needed > 0) { - second = (second + 1) % (sizeof(code) / sizeof(struct Code)); - needed -= code[second].len; - } - int preamble = code[codeIdx].addr - code[first].addr; - int postamble = code[second].addr + code[second].len - - code[codeIdx].addr - code[codeIdx].len; - - // The following is all the code that construct the various bits of - // assembly code. - #if defined(__x86_64__) - if (is_indirect_call) { - needed = 52 + preamble + code[codeIdx].len + postamble; - } else { - needed = 52 + preamble + postamble; - } - #elif defined(__i386__) - needed = 17 + preamble + postamble; - #else - #error Unsupported target platform - #endif - - // Allocate scratch space and copy the preamble of code that was moved - // from the function that we are patching. - char* dest = getScratchSpace(maps, code[first].addr, needed, - extraSpace, extraLength); - memcpy(dest, code[first].addr, preamble); - - // For jumps from the VDSO to the VSyscalls we sometimes allow exactly - // one IP relative instruction in the preamble. - if (code[first].is_ip_relative) { - *reinterpret_cast<int *>(dest + (code[codeIdx].addr - - code[first].addr) - 4) - -= dest - code[first].addr; - } - - // For indirect calls, we need to copy the actual CALL instruction and - // turn it into a PUSH instruction. - #if defined(__x86_64__) - if (is_indirect_call) { - memcpy(dest + preamble, "\xE8\x00\x00\x00\x00\x48\x83\x04\x24", 9); - dest[preamble + 9] = code[codeIdx].len + 42; - memcpy(dest + preamble + 10, code[codeIdx].addr, code[codeIdx].len); - - // Convert CALL -> PUSH - dest[preamble + 10 + (mod_rm - code[codeIdx].addr)] |= 0x20; - preamble += 10 + code[codeIdx].len; - } - #endif - - // Copy the static body of the assembly code. - memcpy(dest + preamble, - #if defined(__x86_64__) - is_indirect_call ? - "\x48\x81\x3C\x24\x00\x00\x00\xFF\x72\x10\x81\x2C\x24\x00\x00\x00" - "\x00\xC7\x44\x24\x04\x00\x00\x00\x00\xC3\x48\x87\x04\x24\x48\x89" - "\x44\x24\x08\x58\xC3" : - "\x48\x81\xEC\x80\x00\x00\x00\x50\x48\x8D\x05\x00\x00\x00\x00\x50" - "\x48\xB8\x00\x00\x00\x00\x00\x00\x00\x00\x50\x48\x8D\x05\x06\x00" - "\x00\x00\x48\x87\x44\x24\x10\xC3\x48\x81\xC4\x80\x00\x00", - is_indirect_call ? 37 : 47 - #elif defined(__i386__) - "\x68\x00\x00\x00\x00\x68\x00\x00\x00\x00\xC3", 11 - #else - #error Unsupported target platform - #endif - ); - - // Copy the postamble that was moved from the function that we are - // patching. - memcpy(dest + preamble + - #if defined(__x86_64__) - (is_indirect_call ? 37 : 47), - #elif defined(__i386__) - 11, - #else - #error Unsupported target platform - #endif - code[codeIdx].addr + code[codeIdx].len, - postamble); - - // Patch up the various computed values - #if defined(__x86_64__) - int post = preamble + (is_indirect_call ? 37 : 47) + postamble; - dest[post] = '\xE9'; - *reinterpret_cast<int *>(dest + post + 1) = - (code[second].addr + code[second].len) - (dest + post + 5); - if (is_indirect_call) { - *reinterpret_cast<int *>(dest + preamble + 13) = vsys_offset_; - } else { - *reinterpret_cast<int *>(dest + preamble + 11) = - (code[second].addr + code[second].len) - (dest + preamble + 15); - *reinterpret_cast<void **>(dest + preamble + 18) = - reinterpret_cast<void *>(&syscallWrapper); - } - #elif defined(__i386__) - *(dest + preamble + 11 + postamble) = '\x68'; // PUSH - *reinterpret_cast<char **>(dest + preamble + 12 + postamble) = - code[second].addr + code[second].len; - *(dest + preamble + 16 + postamble) = '\xC3'; // RET - *reinterpret_cast<char **>(dest + preamble + 1) = - dest + preamble + 11; - *reinterpret_cast<void (**)()>(dest + preamble + 6) = syscallWrapper; - #else - #error Unsupported target platform - #endif - - // Pad unused space in the original function with NOPs - memset(code[first].addr, 0x90 /* NOP */, - code[second].addr + code[second].len - code[first].addr); - - // Replace the system call with an unconditional jump to our new code. - #if defined(__x86_64__) - *code[first].addr = '\xE9'; // JMPQ - *reinterpret_cast<int *>(code[first].addr + 1) = - dest - (code[first].addr + 5); - #elif defined(__i386__) - code[first].addr[0] = '\x68'; // PUSH - *reinterpret_cast<char **>(code[first].addr + 1) = dest; - code[first].addr[5] = '\xC3'; // RET - #else - #error Unsupported target platform - #endif - } - replaced: - codeIdx = (codeIdx + 1) % (sizeof(code) / sizeof(struct Code)); - } -} - -void Library::patchVDSO(char** extraSpace, int* extraLength){ - #if defined(__i386__) - Sandbox::SysCalls sys; - if (!__kernel_vsyscall || - sys.mprotect(reinterpret_cast<void *>( - reinterpret_cast<long>(__kernel_vsyscall) & ~0xFFF), - 4096, PROT_READ|PROT_WRITE|PROT_EXEC)) { - return; - } - - // x86-32 has a small number of well-defined functions in the VDSO library. - // These functions do not easily lend themselves to be rewritten by the - // automatic code. Instead, we explicitly find new definitions for them. - // - // We don't bother with optimizing the syscall instruction instead always - // use INT $0x80, no matter whether the hardware supports more modern - // calling conventions. - // - // TODO(markus): Investigate whether it is worthwhile to optimize this - // code path and use the platform-specific entry code. - if (__kernel_vsyscall) { - // Replace the kernel entry point with: - // - // E9 .. .. .. .. JMP syscallWrapper - *__kernel_vsyscall = '\xE9'; - *reinterpret_cast<long *>(__kernel_vsyscall + 1) = - reinterpret_cast<char *>(&syscallWrapper) - - reinterpret_cast<char *>(__kernel_vsyscall + 5); - } - if (__kernel_sigreturn) { - // Replace the sigreturn() system call with a jump to code that does: - // - // 58 POP %eax - // B8 77 00 00 00 MOV $0x77, %eax - // E8 .. .. .. .. CALL syscallWrapper - char* dest = getScratchSpace(maps_, __kernel_sigreturn, 11, extraSpace, - extraLength); - memcpy(dest, "\x58\xB8\x77\x00\x00\x00\xE8", 7); - *reinterpret_cast<long *>(dest + 7) = - reinterpret_cast<char *>(&syscallWrapper) - dest - 11;; - *__kernel_sigreturn = '\xE9'; - *reinterpret_cast<long *>(__kernel_sigreturn + 1) = - dest - reinterpret_cast<char *>(__kernel_sigreturn) - 5; - } - if (__kernel_rt_sigreturn) { - // Replace the rt_sigreturn() system call with a jump to code that does: - // - // B8 AD 00 00 00 MOV $0xAD, %eax - // E8 .. .. .. .. CALL syscallWrapper - char* dest = getScratchSpace(maps_, __kernel_rt_sigreturn, 10, extraSpace, - extraLength); - memcpy(dest, "\xB8\xAD\x00\x00\x00\xE8", 6); - *reinterpret_cast<long *>(dest + 6) = - reinterpret_cast<char *>(&syscallWrapper) - dest - 10; - *__kernel_rt_sigreturn = '\xE9'; - *reinterpret_cast<long *>(__kernel_rt_sigreturn + 1) = - dest - reinterpret_cast<char *>(__kernel_rt_sigreturn) - 5; - } - #endif -} - -int Library::patchVSystemCalls() { - #if defined(__x86_64__) - // VSyscalls live in a shared 4kB page at the top of the address space. This - // page cannot be unmapped nor remapped. We have to create a copy within - // 2GB of the page, and rewrite all IP-relative accesses to shared variables. - // As the top of the address space is not accessible by mmap(), this means - // that we need to wrap around addresses to the bottom 2GB of the address - // space. - // Only x86-64 has VSyscalls. - if (maps_->vsyscall()) { - char* copy = maps_->allocNearAddr(maps_->vsyscall(), 0x1000, - PROT_READ|PROT_WRITE|PROT_EXEC); - char* extraSpace = copy; - int extraLength = 0x1000; - memcpy(copy, maps_->vsyscall(), 0x1000); - long adjust = (long)maps_->vsyscall() - (long)copy; - for (int vsys = 0; vsys < 0x1000; vsys += 0x400) { - char* start = copy + vsys; - char* end = start + 0x400; - - // There can only be up to four VSyscalls starting at an offset of - // n*0x1000, each. VSyscalls are invoked by functions in the VDSO - // and provide fast implementations of a time source. We don't exactly - // know where the code and where the data is in the VSyscalls page. - // So, we disassemble the code for each function and find all branch - // targets within the function in order to find the last address of - // function. - for (char *last = start, *vars = end, *ptr = start; ptr < end; ) { - new_function: - char* mod_rm; - unsigned short insn = next_inst((const char **)&ptr, true, 0, 0, - &mod_rm, 0, 0); - if (mod_rm && (*mod_rm & 0xC7) == 0x5) { - // Instruction has IP relative addressing mode. Adjust to reference - // the variables in the original VSyscall segment. - long offset = *reinterpret_cast<int *>(mod_rm + 1); - char* var = ptr + offset; - if (var >= ptr && var < vars) { - // Variables are stored somewhere past all the functions. Remember - // the first variable in the VSyscall slot, so that we stop - // scanning for instructions once we reach that address. - vars = var; - } - offset += adjust; - if ((offset >> 32) && (offset >> 32) != -1) { - Sandbox::die("Cannot patch [vsystemcall]"); - } - *reinterpret_cast<int *>(mod_rm + 1) = offset; - } - - // Check for jump targets to higher addresses (but within our own - // VSyscall slot). They extend the possible end-address of this - // function. - char *target = 0; - if ((insn >= 0x70 && insn <= 0x7F) /* Jcc */ || - insn == 0xEB /* JMP */) { - target = ptr + (reinterpret_cast<signed char *>(ptr))[-1]; - } else if (insn == 0xE8 /* CALL */ || insn == 0xE9 /* JMP */ || - (insn >= 0x0F80 && insn <= 0x0F8F) /* Jcc */) { - target = ptr + (reinterpret_cast<int *>(ptr))[-1]; - } - - // The function end is found, once the loop reaches the last valid - // address in the VSyscall slot, or once it finds a RET instruction - // that is not followed by any jump targets. Unconditional jumps that - // point backwards are treated the same as a RET instruction. - if (insn == 0xC3 /* RET */ || - (target < ptr && - (insn == 0xEB /* JMP */ || insn == 0xE9 /* JMP */))) { - if (last >= ptr) { - continue; - } else { - // The function can optionally be followed by more functions in - // the same VSyscall slot. Allow for alignment to a 16 byte - // boundary. If we then find more non-zero bytes, and if this is - // not the known start of the variables, assume a new function - // started. - for (; ptr < vars; ++ptr) { - if ((long)ptr & 0xF) { - if (*ptr && *ptr != '\x90' /* NOP */) { - goto new_function; - } - *ptr = '\x90'; // NOP - } else { - if (*ptr && *ptr != '\x90' /* NOP */) { - goto new_function; - } - break; - } - } - - // Translate all SYSCALLs to jumps into our system call handler. - patchSystemCallsInFunction(NULL, start, ptr, - &extraSpace, &extraLength); - break; - } - } - - // Adjust assumed end address for this function, if a valid jump - // target has been found that originates from the current instruction. - if (target > last && target < start + 0x100) { - last = target; - } - } - } - - // We are done. Write-protect our code and make it executable. - Sandbox::SysCalls sys; - sys.mprotect(copy, 0x1000, PROT_READ|PROT_EXEC); - return maps_->vsyscall() - copy; - } - #endif - return 0; -} - -void Library::patchSystemCalls() { - if (!valid_) { - return; - } - int extraLength = 0; - char* extraSpace = NULL; - if (isVDSO_) { - // patchVDSO() calls patchSystemCallsInFunction() which needs vsys_offset_ - // iff processing the VDSO library. So, make sure we call - // patchVSystemCalls() first. - vsys_offset_ = patchVSystemCalls(); - #if defined(__i386__) - patchVDSO(&extraSpace, &extraLength); - return; - #endif - } - SectionTable::const_iterator iter; - if ((iter = section_table_.find(".text")) == section_table_.end()) { - return; - } - const Elf_Shdr& shdr = iter->second.second; - char* start = reinterpret_cast<char *>(shdr.sh_addr + asr_offset_); - char* stop = start + shdr.sh_size; - char* func = start; - int nopcount = 0; - bool has_syscall = false; - for (char *ptr = start; ptr < stop; ptr++) { - #if defined(__x86_64__) - if ((*ptr == '\x0F' && ptr[1] == '\x05' /* SYSCALL */) || - (isVDSO_ && *ptr == '\xFF')) { - #elif defined(__i386__) - if ((*ptr == '\xCD' && ptr[1] == '\x80' /* INT $0x80 */) || - (*ptr == '\x65' && ptr[1] == '\xFF' && - ptr[2] == '\x15' /* CALL %gs:.. */)) { - #else - #error Unsupported target platform - #endif - ptr++; - has_syscall = true; - nopcount = 0; - } else if (*ptr == '\x90' /* NOP */) { - nopcount++; - } else if (!(reinterpret_cast<long>(ptr) & 0xF)) { - if (nopcount > 2) { - // This is very likely the beginning of a new function. Functions - // are aligned on 16 byte boundaries and the preceding function is - // padded out with NOPs. - // - // For performance reasons, we quickly scan the entire text segment - // for potential SYSCALLs, and then patch the code in increments of - // individual functions. - if (has_syscall) { - has_syscall = false; - // Our quick scan of the function found a potential system call. - // Do a more thorough scan, now. - patchSystemCallsInFunction(maps_, func, ptr, &extraSpace, - &extraLength); - } - func = ptr; - } - nopcount = 0; - } else { - nopcount = 0; - } - } - if (has_syscall) { - // Patch any remaining system calls that were in the last function before - // the loop terminated. - patchSystemCallsInFunction(maps_, func, stop, &extraSpace, &extraLength); - } - - // Mark our scratch space as write-protected and executable. - if (extraSpace) { - Sandbox::SysCalls sys; - sys.mprotect(extraSpace, 4096, PROT_READ|PROT_EXEC); - } -} - -bool Library::parseElf() { - valid_ = true; - - // Verify ELF header - Elf_Shdr str_shdr; - if (!getOriginal(0, &ehdr_) || - ehdr_.e_ehsize < sizeof(Elf_Ehdr) || - ehdr_.e_phentsize < sizeof(Elf_Phdr) || - ehdr_.e_shentsize < sizeof(Elf_Shdr) || - !getOriginal(ehdr_.e_shoff + ehdr_.e_shstrndx * ehdr_.e_shentsize, - &str_shdr)) { - // Not all memory mappings are necessarily ELF files. Skip memory - // mappings that we cannot identify. - error: - valid_ = false; - return false; - } - - // Parse section table and find all sections in this ELF file - for (int i = 0; i < ehdr_.e_shnum; i++) { - Elf_Shdr shdr; - if (!getOriginal(ehdr_.e_shoff + i*ehdr_.e_shentsize, &shdr)) { - continue; - } - section_table_.insert( - std::make_pair(getOriginal(str_shdr.sh_offset + shdr.sh_name), - std::make_pair(i, shdr))); - } - - // Compute the offset of entries in the .text segment - const Elf_Shdr* text = getSection(".text"); - if (text == NULL) { - // On x86-32, the VDSO is unusual in as much as it does not have a single - // ".text" section. Instead, it has one section per function. Each - // section name starts with ".text". We just need to pick an arbitrary - // one in order to find the asr_offset_ -- which would typically be zero - // for the VDSO. - for (SectionTable::const_iterator iter = section_table_.begin(); - iter != section_table_.end(); ++iter) { - if (!strncmp(iter->first.c_str(), ".text", 5)) { - text = &iter->second.second; - break; - } - } - } - - // Now that we know where the .text segment is located, we can compute the - // asr_offset_. - if (text) { - RangeMap::const_iterator iter = - memory_ranges_.lower_bound(text->sh_offset); - if (iter != memory_ranges_.end()) { - asr_offset_ = reinterpret_cast<char *>(iter->second.start) - - (text->sh_addr - (text->sh_offset - iter->first)); - } else { - goto error; - } - } else { - goto error; - } - - return !isVDSO_ || parseSymbols(); -} - -bool Library::parseSymbols() { - if (!valid_) { - return false; - } - - Elf_Shdr str_shdr; - getOriginal(ehdr_.e_shoff + ehdr_.e_shstrndx * ehdr_.e_shentsize, &str_shdr); - - // Find PLT and symbol tables - const Elf_Shdr* plt = getSection(ELF_REL_PLT); - const Elf_Shdr* symtab = getSection(".dynsym"); - Elf_Shdr strtab = { 0 }; - if (symtab) { - if (symtab->sh_link >= ehdr_.e_shnum || - !getOriginal(ehdr_.e_shoff + symtab->sh_link * ehdr_.e_shentsize, - &strtab)) { - Debug::message("Cannot find valid symbol table\n"); - valid_ = false; - return false; - } - } - - if (plt && symtab) { - // Parse PLT table and add its entries - for (int i = plt->sh_size/sizeof(Elf_Rel); --i >= 0; ) { - Elf_Rel rel; - if (!getOriginal(plt->sh_offset + i * sizeof(Elf_Rel), &rel) || - ELF_R_SYM(rel.r_info)*sizeof(Elf_Sym) >= symtab->sh_size) { - Debug::message("Encountered invalid plt entry\n"); - valid_ = false; - return false; - } - - if (ELF_R_TYPE(rel.r_info) != ELF_JUMP_SLOT) { - continue; - } - Elf_Sym sym; - if (!getOriginal(symtab->sh_offset + - ELF_R_SYM(rel.r_info)*sizeof(Elf_Sym), &sym) || - sym.st_shndx >= ehdr_.e_shnum) { - Debug::message("Encountered invalid symbol for plt entry\n"); - valid_ = false; - return false; - } - string name = getOriginal(strtab.sh_offset + sym.st_name); - if (name.empty()) { - continue; - } - plt_entries_.insert(std::make_pair(name, rel.r_offset)); - } - } - - if (symtab) { - // Parse symbol table and add its entries - for (Elf_Addr addr = 0; addr < symtab->sh_size; addr += sizeof(Elf_Sym)) { - Elf_Sym sym; - if (!getOriginal(symtab->sh_offset + addr, &sym) || - (sym.st_shndx >= ehdr_.e_shnum && - sym.st_shndx < SHN_LORESERVE)) { - Debug::message("Encountered invalid symbol\n"); - valid_ = false; - return false; - } - string name = getOriginal(strtab.sh_offset + sym.st_name); - if (name.empty()) { - continue; - } - symbols_.insert(std::make_pair(name, sym)); - } - } - - SymbolTable::const_iterator iter = symbols_.find("__kernel_vsyscall"); - if (iter != symbols_.end() && iter->second.st_value) { - __kernel_vsyscall = asr_offset_ + iter->second.st_value; - } - iter = symbols_.find("__kernel_sigreturn"); - if (iter != symbols_.end() && iter->second.st_value) { - __kernel_sigreturn = asr_offset_ + iter->second.st_value; - } - iter = symbols_.find("__kernel_rt_sigreturn"); - if (iter != symbols_.end() && iter->second.st_value) { - __kernel_rt_sigreturn = asr_offset_ + iter->second.st_value; - } - - return true; -} - -} // namespace diff --git a/sandbox/linux/seccomp/library.h b/sandbox/linux/seccomp/library.h deleted file mode 100644 index e27bfde..0000000 --- a/sandbox/linux/seccomp/library.h +++ /dev/null @@ -1,199 +0,0 @@ -// Copyright (c) 2010 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#ifndef LIBRARY_H__ -#define LIBRARY_H__ - -#include <elf.h> -#include <functional> -#include <map> -#include <set> -#include <string> -#include <string.h> -#include <sys/mman.h> - -#include "maps.h" - -#if defined(__x86_64__) -typedef Elf64_Ehdr Elf_Ehdr; -typedef Elf64_Shdr Elf_Shdr; -typedef Elf64_Sym Elf_Sym; -typedef Elf64_Addr Elf_Addr; -#elif defined(__i386__) -typedef Elf32_Ehdr Elf_Ehdr; -typedef Elf32_Shdr Elf_Shdr; -typedef Elf32_Sym Elf_Sym; -typedef Elf32_Addr Elf_Addr; -#else -#error Unsupported target platform -#endif - -struct SyscallTable; -namespace playground { - -class Library { - friend class Maps; - public: - typedef Maps::string string; - - Library() : - valid_(false), - isVDSO_(false), - asr_offset_(0), - vsys_offset_(0), - maps_(0), - image_(0), - image_size_(0) { - } - - ~Library(); - - void setLibraryInfo(Maps* maps) { - if (!maps_) { - maps_ = maps; - } - } - - void addMemoryRange(void* start, void* stop, Elf_Addr offset, - int prot, int isVDSO) { - isVDSO_ = isVDSO; - RangeMap::const_iterator iter = memory_ranges_.find(offset); - if (iter != memory_ranges_.end()) { - // It is possible to have overlapping mappings. This is particularly - // likely to happen with very small programs or libraries. If it does - // happen, we really only care about the text segment. Look for a - // mapping that is mapped executable. - if ((prot & PROT_EXEC) == 0) { - return; - } - } - memory_ranges_.insert(std::make_pair(offset, Range(start, stop, prot))); - } - - char *get(Elf_Addr offset, char *buf, size_t len); - string get(Elf_Addr offset); - char *getOriginal(Elf_Addr offset, char *buf, size_t len); - string getOriginal(Elf_Addr offset); - - template<class T>T* get(Elf_Addr offset, T* t) { - if (!valid_) { - memset(t, 0, sizeof(T)); - return NULL; - } - return reinterpret_cast<T *>(get(offset, reinterpret_cast<char *>(t), - sizeof(T))); - } - - template<class T>T* getOriginal(Elf_Addr offset, T* t) { - if (!valid_) { - memset(t, 0, sizeof(T)); - return NULL; - } - return reinterpret_cast<T *>(getOriginal(offset, - reinterpret_cast<char *>(t), - sizeof(T))); - } - - template<class T>bool set(void *addr, T* value) { - if (!valid_) { - return false; - } - *reinterpret_cast<T *>(addr) = *value; - return true; - } - - template<class T>bool set(Elf_Addr offset, T* value) { - if (!valid_) { - return false; - } - RangeMap::const_iterator iter = memory_ranges_.lower_bound(offset); - if (iter == memory_ranges_.end()) { - return false; - } - offset -= iter->first; - if (offset > - reinterpret_cast<char *>(iter->second.stop) - - reinterpret_cast<char *>(iter->second.start) - - sizeof(T)) { - return false; - } - *reinterpret_cast<T *>( - reinterpret_cast<char *>(iter->second.start) + offset) = *value; - return true; - } - - bool parseElf(); - const Elf_Ehdr* getEhdr(); - const Elf_Shdr* getSection(const string& section); - int getSectionIndex(const string& section); - void makeWritable(bool state) const; - void patchSystemCalls(); - bool isVDSO() const { return isVDSO_; } - - protected: - bool parseSymbols(); - - private: - class GreaterThan : public std::binary_function<Elf_Addr, Elf_Addr, bool> { - // We create the RangeMap with a GreaterThan rather than the default - // comparator, as that allows us to use lower_bound() to find memory - // mappings. - public: - bool operator() (Elf_Addr s1, Elf_Addr s2) const { - return s1 > s2; - } - }; - - struct Range { - Range(void* start, void* stop, int prot) : - start(start), stop(stop), prot(prot) { } - void* start; - void* stop; - int prot; - }; - - typedef std::map<Elf_Addr, Range, GreaterThan, - SystemAllocator<std::pair<const Elf_Addr, - Range> > > RangeMap; - typedef std::map<string, std::pair<int, Elf_Shdr>, std::less<string>, - SystemAllocator<std::pair<const string, - std::pair<int, Elf_Shdr> > > > - SectionTable; - typedef std::map<string, Elf_Sym, std::less<string>, - SystemAllocator<std::pair<const string, - Elf_Sym> > > SymbolTable; - typedef std::map<string, Elf_Addr, std::less<string>, - SystemAllocator<std::pair<const string, - Elf_Addr> > > PltTable; - - char* getBytes(char* dst, const char* src, ssize_t len); - static bool isSafeInsn(unsigned short insn); - static int isSimpleSystemCall(char *start, char *end); - static char* getScratchSpace(const Maps* maps, char* near, int needed, - char** extraSpace, int* extraLength); - void patchSystemCallsInFunction(const Maps* maps, char *start, char *end, - char** extraSpace, int* extraLength); - int patchVSystemCalls(); - void patchVDSO(char** extraSpace, int* extraLength); - - RangeMap memory_ranges_; - bool valid_; - bool isVDSO_; - char* asr_offset_; - int vsys_offset_; - Maps* maps_; - Elf_Ehdr ehdr_; - SectionTable section_table_; - SymbolTable symbols_; - PltTable plt_entries_; - char* image_; - size_t image_size_; - static char* __kernel_vsyscall; - static char* __kernel_sigreturn; - static char* __kernel_rt_sigreturn; -}; - -} // namespace - -#endif // LIBRARY_H__ diff --git a/sandbox/linux/seccomp/linux_syscall_support.h b/sandbox/linux/seccomp/linux_syscall_support.h deleted file mode 100644 index 2ee0426..0000000 --- a/sandbox/linux/seccomp/linux_syscall_support.h +++ /dev/null @@ -1,3208 +0,0 @@ -/* Copyright (c) 2005-2010, Google Inc. - * Author: Markus Gutschke - * - * All rights reserved. - * Use of this source code is governed by a BSD-style license that can be - * found in the Chromium LICENSE file. - */ - -/* This file includes Linux-specific support functions common to the - * coredumper and the thread lister; primarily, this is a collection - * of direct system calls, and a couple of symbols missing from - * standard header files. - * There are a few options that the including file can set to control - * the behavior of this file: - * - * SYS_CPLUSPLUS: - * The entire header file will normally be wrapped in 'extern "C" { }", - * making it suitable for compilation as both C and C++ source. If you - * do not want to do this, you can set the SYS_CPLUSPLUS macro to inhibit - * the wrapping. N.B. doing so will suppress inclusion of all prerequisite - * system header files, too. It is the caller's responsibility to provide - * the necessary definitions. - * - * SYS_ERRNO: - * All system calls will update "errno" unless overriden by setting the - * SYS_ERRNO macro prior to including this file. SYS_ERRNO should be - * an l-value. - * - * SYS_INLINE: - * New symbols will be defined "static inline", unless overridden by - * the SYS_INLINE macro. - * - * SYS_LINUX_SYSCALL_SUPPORT_H - * This macro is used to avoid multiple inclusions of this header file. - * If you need to include this file more than once, make sure to - * unset SYS_LINUX_SYSCALL_SUPPORT_H before each inclusion. - * - * SYS_PREFIX: - * New system calls will have a prefix of "sys_" unless overridden by - * the SYS_PREFIX macro. Valid values for this macro are [0..9] which - * results in prefixes "sys[0..9]_". It is also possible to set this - * macro to -1, which avoids all prefixes. - * - * This file defines a few internal symbols that all start with "LSS_". - * Do not access these symbols from outside this file. They are not part - * of the supported API. - */ -#ifndef SYS_LINUX_SYSCALL_SUPPORT_H -#define SYS_LINUX_SYSCALL_SUPPORT_H - -/* We currently only support x86-32, x86-64, ARM, MIPS, and PPC on Linux. - * Porting to other related platforms should not be difficult. - */ -#if (defined(__i386__) || defined(__x86_64__) || defined(__ARM_ARCH_3__) || \ - defined(__mips__) || defined(__PPC__)) && defined(__linux) - -#ifndef SYS_CPLUSPLUS -#ifdef __cplusplus -/* Some system header files in older versions of gcc neglect to properly - * handle being included from C++. As it appears to be harmless to have - * multiple nested 'extern "C"' blocks, just add another one here. - */ -extern "C" { -#endif - -#include <errno.h> -#include <signal.h> -#include <stdarg.h> -#include <stddef.h> -#include <string.h> -#include <sys/ptrace.h> -#include <sys/resource.h> -#include <sys/time.h> -#include <sys/types.h> -#include <syscall.h> -#include <unistd.h> -#include <linux/unistd.h> -#include <endian.h> - -#ifdef __mips__ -/* Include definitions of the ABI currently in use. */ -#include <sgidefs.h> -#endif - -#endif - -/* As glibc often provides subtly incompatible data structures (and implicit - * wrapper functions that convert them), we provide our own kernel data - * structures for use by the system calls. - * These structures have been developed by using Linux 2.6.23 headers for - * reference. Note though, we do not care about exact API compatibility - * with the kernel, and in fact the kernel often does not have a single - * API that works across architectures. Instead, we try to mimic the glibc - * API where reasonable, and only guarantee ABI compatibility with the - * kernel headers. - * Most notably, here are a few changes that were made to the structures - * defined by kernel headers: - * - * - we only define structures, but not symbolic names for kernel data - * types. For the latter, we directly use the native C datatype - * (i.e. "unsigned" instead of "mode_t"). - * - in a few cases, it is possible to define identical structures for - * both 32bit (e.g. i386) and 64bit (e.g. x86-64) platforms by - * standardizing on the 64bit version of the data types. In particular, - * this means that we use "unsigned" where the 32bit headers say - * "unsigned long". - * - overall, we try to minimize the number of cases where we need to - * conditionally define different structures. - * - the "struct kernel_sigaction" class of structures have been - * modified to more closely mimic glibc's API by introducing an - * anonymous union for the function pointer. - * - a small number of field names had to have an underscore appended to - * them, because glibc defines a global macro by the same name. - */ - -/* include/linux/dirent.h */ -struct kernel_dirent64 { - unsigned long long d_ino; - long long d_off; - unsigned short d_reclen; - unsigned char d_type; - char d_name[256]; -}; - -/* include/linux/dirent.h */ -struct kernel_dirent { - long d_ino; - long d_off; - unsigned short d_reclen; - char d_name[256]; -}; - -/* include/linux/uio.h */ -struct kernel_iovec { - void *iov_base; - unsigned long iov_len; -}; - -/* include/linux/socket.h */ -struct kernel_msghdr { - void *msg_name; - int msg_namelen; - struct kernel_iovec*msg_iov; - unsigned long msg_iovlen; - void *msg_control; - unsigned long msg_controllen; - unsigned msg_flags; -}; - -/* include/asm-generic/poll.h */ -struct kernel_pollfd { - int fd; - short events; - short revents; -}; - -/* include/linux/resource.h */ -struct kernel_rlimit { - unsigned long rlim_cur; - unsigned long rlim_max; -}; - -/* include/linux/time.h */ -struct kernel_timespec { - long tv_sec; - long tv_nsec; -}; - -/* include/linux/time.h */ -struct kernel_timeval { - long tv_sec; - long tv_usec; -}; - -/* include/linux/resource.h */ -struct kernel_rusage { - struct kernel_timeval ru_utime; - struct kernel_timeval ru_stime; - long ru_maxrss; - long ru_ixrss; - long ru_idrss; - long ru_isrss; - long ru_minflt; - long ru_majflt; - long ru_nswap; - long ru_inblock; - long ru_oublock; - long ru_msgsnd; - long ru_msgrcv; - long ru_nsignals; - long ru_nvcsw; - long ru_nivcsw; -}; - -struct siginfo; -#if defined(__i386__) || defined(__ARM_ARCH_3__) || defined(__PPC__) - -/* include/asm-{arm,i386,mips,ppc}/signal.h */ -struct kernel_old_sigaction { - union { - void (*sa_handler_)(int); - void (*sa_sigaction_)(int, struct siginfo *, void *); - }; - unsigned long sa_mask; - unsigned long sa_flags; - void (*sa_restorer)(void); -} __attribute__((packed,aligned(4))); -#elif (defined(__mips__) && _MIPS_SIM == _MIPS_SIM_ABI32) - #define kernel_old_sigaction kernel_sigaction -#endif - -/* Some kernel functions (e.g. sigaction() in 2.6.23) require that the - * exactly match the size of the signal set, even though the API was - * intended to be extensible. We define our own KERNEL_NSIG to deal with - * this. - * Please note that glibc provides signals [1.._NSIG-1], whereas the - * kernel (and this header) provides the range [1..KERNEL_NSIG]. The - * actual number of signals is obviously the same, but the constants - * differ by one. - */ -#ifdef __mips__ -#define KERNEL_NSIG 128 -#else -#define KERNEL_NSIG 64 -#endif - -/* include/asm-{arm,i386,mips,x86_64}/signal.h */ -struct kernel_sigset_t { - unsigned long sig[(KERNEL_NSIG + 8*sizeof(unsigned long) - 1)/ - (8*sizeof(unsigned long))]; -}; - -/* include/asm-{arm,i386,mips,x86_64,ppc}/signal.h */ -struct kernel_sigaction { -#ifdef __mips__ - unsigned long sa_flags; - union { - void (*sa_handler_)(int); - void (*sa_sigaction_)(int, struct siginfo *, void *); - }; - struct kernel_sigset_t sa_mask; -#else - union { - void (*sa_handler_)(int); - void (*sa_sigaction_)(int, struct siginfo *, void *); - }; - unsigned long sa_flags; - void (*sa_restorer)(void); - struct kernel_sigset_t sa_mask; -#endif -}; - -/* include/linux/socket.h */ -struct kernel_sockaddr { - unsigned short sa_family; - char sa_data[14]; -}; - -/* include/asm-{arm,i386,mips,ppc}/stat.h */ -#ifdef __mips__ -#if _MIPS_SIM == _MIPS_SIM_ABI64 -struct kernel_stat { -#else -struct kernel_stat64 { -#endif - unsigned st_dev; - unsigned __pad0[3]; - unsigned long long st_ino; - unsigned st_mode; - unsigned st_nlink; - unsigned st_uid; - unsigned st_gid; - unsigned st_rdev; - unsigned __pad1[3]; - long long st_size; - unsigned st_atime_; - unsigned st_atime_nsec_; - unsigned st_mtime_; - unsigned st_mtime_nsec_; - unsigned st_ctime_; - unsigned st_ctime_nsec_; - unsigned st_blksize; - unsigned __pad2; - unsigned long long st_blocks; -}; -#elif defined __PPC__ -struct kernel_stat64 { - unsigned long long st_dev; - unsigned long long st_ino; - unsigned st_mode; - unsigned st_nlink; - unsigned st_uid; - unsigned st_gid; - unsigned long long st_rdev; - unsigned short int __pad2; - long long st_size; - long st_blksize; - long long st_blocks; - long st_atime_; - unsigned long st_atime_nsec_; - long st_mtime_; - unsigned long st_mtime_nsec_; - long st_ctime_; - unsigned long st_ctime_nsec_; - unsigned long __unused4; - unsigned long __unused5; -}; -#else -struct kernel_stat64 { - unsigned long long st_dev; - unsigned char __pad0[4]; - unsigned __st_ino; - unsigned st_mode; - unsigned st_nlink; - unsigned st_uid; - unsigned st_gid; - unsigned long long st_rdev; - unsigned char __pad3[4]; - long long st_size; - unsigned st_blksize; - unsigned long long st_blocks; - unsigned st_atime_; - unsigned st_atime_nsec_; - unsigned st_mtime_; - unsigned st_mtime_nsec_; - unsigned st_ctime_; - unsigned st_ctime_nsec_; - unsigned long long st_ino; -}; -#endif - -/* include/asm-{arm,i386,mips,x86_64,ppc}/stat.h */ -#if defined(__i386__) || defined(__ARM_ARCH_3__) -struct kernel_stat { - /* The kernel headers suggest that st_dev and st_rdev should be 32bit - * quantities encoding 12bit major and 20bit minor numbers in an interleaved - * format. In reality, we do not see useful data in the top bits. So, - * we'll leave the padding in here, until we find a better solution. - */ - unsigned short st_dev; - short pad1; - unsigned st_ino; - unsigned short st_mode; - unsigned short st_nlink; - unsigned short st_uid; - unsigned short st_gid; - unsigned short st_rdev; - short pad2; - unsigned st_size; - unsigned st_blksize; - unsigned st_blocks; - unsigned st_atime_; - unsigned st_atime_nsec_; - unsigned st_mtime_; - unsigned st_mtime_nsec_; - unsigned st_ctime_; - unsigned st_ctime_nsec_; - unsigned __unused4; - unsigned __unused5; -}; -#elif defined(__x86_64__) -struct kernel_stat { - unsigned long st_dev; - unsigned long st_ino; - unsigned long st_nlink; - unsigned st_mode; - unsigned st_uid; - unsigned st_gid; - unsigned __pad0; - unsigned long st_rdev; - long st_size; - long st_blksize; - long st_blocks; - unsigned long st_atime_; - unsigned long st_atime_nsec_; - unsigned long st_mtime_; - unsigned long st_mtime_nsec_; - unsigned long st_ctime_; - unsigned long st_ctime_nsec_; - long __unused[3]; -}; -#elif defined(__PPC__) -struct kernel_stat { - unsigned st_dev; - unsigned long st_ino; // ino_t - unsigned long st_mode; // mode_t - unsigned short st_nlink; // nlink_t - unsigned st_uid; // uid_t - unsigned st_gid; // gid_t - unsigned st_rdev; - long st_size; // off_t - unsigned long st_blksize; - unsigned long st_blocks; - unsigned long st_atime_; - unsigned long st_atime_nsec_; - unsigned long st_mtime_; - unsigned long st_mtime_nsec_; - unsigned long st_ctime_; - unsigned long st_ctime_nsec_; - unsigned long __unused4; - unsigned long __unused5; -}; -#elif (defined(__mips__) && _MIPS_SIM != _MIPS_SIM_ABI64) -struct kernel_stat { - unsigned st_dev; - int st_pad1[3]; - unsigned st_ino; - unsigned st_mode; - unsigned st_nlink; - unsigned st_uid; - unsigned st_gid; - unsigned st_rdev; - int st_pad2[2]; - long st_size; - int st_pad3; - long st_atime_; - long st_atime_nsec_; - long st_mtime_; - long st_mtime_nsec_; - long st_ctime_; - long st_ctime_nsec_; - int st_blksize; - int st_blocks; - int st_pad4[14]; -}; -#endif - -/* include/asm-{arm,i386,mips,x86_64,ppc}/statfs.h */ -#ifdef __mips__ -#if _MIPS_SIM != _MIPS_SIM_ABI64 -struct kernel_statfs64 { - unsigned long f_type; - unsigned long f_bsize; - unsigned long f_frsize; - unsigned long __pad; - unsigned long long f_blocks; - unsigned long long f_bfree; - unsigned long long f_files; - unsigned long long f_ffree; - unsigned long long f_bavail; - struct { int val[2]; } f_fsid; - unsigned long f_namelen; - unsigned long f_spare[6]; -}; -#endif -#elif !defined(__x86_64__) -struct kernel_statfs64 { - unsigned long f_type; - unsigned long f_bsize; - unsigned long long f_blocks; - unsigned long long f_bfree; - unsigned long long f_bavail; - unsigned long long f_files; - unsigned long long f_ffree; - struct { int val[2]; } f_fsid; - unsigned long f_namelen; - unsigned long f_frsize; - unsigned long f_spare[5]; -}; -#endif - -/* include/asm-{arm,i386,mips,x86_64,ppc,generic}/statfs.h */ -#ifdef __mips__ -struct kernel_statfs { - long f_type; - long f_bsize; - long f_frsize; - long f_blocks; - long f_bfree; - long f_files; - long f_ffree; - long f_bavail; - struct { int val[2]; } f_fsid; - long f_namelen; - long f_spare[6]; -}; -#else -struct kernel_statfs { - /* x86_64 actually defines all these fields as signed, whereas all other */ - /* platforms define them as unsigned. Leaving them at unsigned should not */ - /* cause any problems. */ - unsigned long f_type; - unsigned long f_bsize; - unsigned long f_blocks; - unsigned long f_bfree; - unsigned long f_bavail; - unsigned long f_files; - unsigned long f_ffree; - struct { int val[2]; } f_fsid; - unsigned long f_namelen; - unsigned long f_frsize; - unsigned long f_spare[5]; -}; -#endif - - -/* Definitions missing from the standard header files */ -#ifndef O_DIRECTORY -#if defined(__ARM_ARCH_3__) -#define O_DIRECTORY 0040000 -#else -#define O_DIRECTORY 0200000 -#endif -#endif -#ifndef NT_PRXFPREG -#define NT_PRXFPREG 0x46e62b7f -#endif -#ifndef PTRACE_GETFPXREGS -#define PTRACE_GETFPXREGS ((enum __ptrace_request)18) -#endif -#ifndef PR_GET_DUMPABLE -#define PR_GET_DUMPABLE 3 -#endif -#ifndef PR_SET_DUMPABLE -#define PR_SET_DUMPABLE 4 -#endif -#ifndef PR_GET_SECCOMP -#define PR_GET_SECCOMP 21 -#endif -#ifndef PR_SET_SECCOMP -#define PR_SET_SECCOMP 22 -#endif -#ifndef AT_FDCWD -#define AT_FDCWD (-100) -#endif -#ifndef AT_SYMLINK_NOFOLLOW -#define AT_SYMLINK_NOFOLLOW 0x100 -#endif -#ifndef AT_REMOVEDIR -#define AT_REMOVEDIR 0x200 -#endif -#ifndef MREMAP_FIXED -#define MREMAP_FIXED 2 -#endif -#ifndef SA_RESTORER -#define SA_RESTORER 0x04000000 -#endif -#ifndef CPUCLOCK_PROF -#define CPUCLOCK_PROF 0 -#endif -#ifndef CPUCLOCK_VIRT -#define CPUCLOCK_VIRT 1 -#endif -#ifndef CPUCLOCK_SCHED -#define CPUCLOCK_SCHED 2 -#endif -#ifndef CPUCLOCK_PERTHREAD_MASK -#define CPUCLOCK_PERTHREAD_MASK 4 -#endif -#ifndef MAKE_PROCESS_CPUCLOCK -#define MAKE_PROCESS_CPUCLOCK(pid, clock) \ - ((~(int)(pid) << 3) | (int)(clock)) -#endif -#ifndef MAKE_THREAD_CPUCLOCK -#define MAKE_THREAD_CPUCLOCK(tid, clock) \ - ((~(int)(tid) << 3) | (int)((clock) | CPUCLOCK_PERTHREAD_MASK)) -#endif - -#ifndef FUTEX_WAIT -#define FUTEX_WAIT 0 -#endif -#ifndef FUTEX_WAKE -#define FUTEX_WAKE 1 -#endif -#ifndef FUTEX_FD -#define FUTEX_FD 2 -#endif -#ifndef FUTEX_REQUEUE -#define FUTEX_REQUEUE 3 -#endif -#ifndef FUTEX_CMP_REQUEUE -#define FUTEX_CMP_REQUEUE 4 -#endif -#ifndef FUTEX_WAKE_OP -#define FUTEX_WAKE_OP 5 -#endif -#ifndef FUTEX_LOCK_PI -#define FUTEX_LOCK_PI 6 -#endif -#ifndef FUTEX_UNLOCK_PI -#define FUTEX_UNLOCK_PI 7 -#endif -#ifndef FUTEX_TRYLOCK_PI -#define FUTEX_TRYLOCK_PI 8 -#endif -#ifndef FUTEX_PRIVATE_FLAG -#define FUTEX_PRIVATE_FLAG 128 -#endif -#ifndef FUTEX_CMD_MASK -#define FUTEX_CMD_MASK ~FUTEX_PRIVATE_FLAG -#endif -#ifndef FUTEX_WAIT_PRIVATE -#define FUTEX_WAIT_PRIVATE (FUTEX_WAIT | FUTEX_PRIVATE_FLAG) -#endif -#ifndef FUTEX_WAKE_PRIVATE -#define FUTEX_WAKE_PRIVATE (FUTEX_WAKE | FUTEX_PRIVATE_FLAG) -#endif -#ifndef FUTEX_REQUEUE_PRIVATE -#define FUTEX_REQUEUE_PRIVATE (FUTEX_REQUEUE | FUTEX_PRIVATE_FLAG) -#endif -#ifndef FUTEX_CMP_REQUEUE_PRIVATE -#define FUTEX_CMP_REQUEUE_PRIVATE (FUTEX_CMP_REQUEUE | FUTEX_PRIVATE_FLAG) -#endif -#ifndef FUTEX_WAKE_OP_PRIVATE -#define FUTEX_WAKE_OP_PRIVATE (FUTEX_WAKE_OP | FUTEX_PRIVATE_FLAG) -#endif -#ifndef FUTEX_LOCK_PI_PRIVATE -#define FUTEX_LOCK_PI_PRIVATE (FUTEX_LOCK_PI | FUTEX_PRIVATE_FLAG) -#endif -#ifndef FUTEX_UNLOCK_PI_PRIVATE -#define FUTEX_UNLOCK_PI_PRIVATE (FUTEX_UNLOCK_PI | FUTEX_PRIVATE_FLAG) -#endif -#ifndef FUTEX_TRYLOCK_PI_PRIVATE -#define FUTEX_TRYLOCK_PI_PRIVATE (FUTEX_TRYLOCK_PI | FUTEX_PRIVATE_FLAG) -#endif - - -#if defined(__x86_64__) -#ifndef ARCH_SET_GS -#define ARCH_SET_GS 0x1001 -#endif -#ifndef ARCH_GET_GS -#define ARCH_GET_GS 0x1004 -#endif -#endif - -#if defined(__i386__) -#ifndef __NR_quotactl -#define __NR_quotactl 131 -#endif -#ifndef __NR_setresuid -#define __NR_setresuid 164 -#define __NR_getresuid 165 -#define __NR_setresgid 170 -#define __NR_getresgid 171 -#endif -#ifndef __NR_rt_sigaction -#define __NR_rt_sigreturn 173 -#define __NR_rt_sigaction 174 -#define __NR_rt_sigprocmask 175 -#define __NR_rt_sigpending 176 -#define __NR_rt_sigsuspend 179 -#endif -#ifndef __NR_pread64 -#define __NR_pread64 180 -#endif -#ifndef __NR_pwrite64 -#define __NR_pwrite64 181 -#endif -#ifndef __NR_ugetrlimit -#define __NR_ugetrlimit 191 -#endif -#ifndef __NR_stat64 -#define __NR_stat64 195 -#endif -#ifndef __NR_fstat64 -#define __NR_fstat64 197 -#endif -#ifndef __NR_setresuid32 -#define __NR_setresuid32 208 -#define __NR_getresuid32 209 -#define __NR_setresgid32 210 -#define __NR_getresgid32 211 -#endif -#ifndef __NR_setfsuid32 -#define __NR_setfsuid32 215 -#define __NR_setfsgid32 216 -#endif -#ifndef __NR_getdents64 -#define __NR_getdents64 220 -#endif -#ifndef __NR_gettid -#define __NR_gettid 224 -#endif -#ifndef __NR_readahead -#define __NR_readahead 225 -#endif -#ifndef __NR_setxattr -#define __NR_setxattr 226 -#endif -#ifndef __NR_lsetxattr -#define __NR_lsetxattr 227 -#endif -#ifndef __NR_getxattr -#define __NR_getxattr 229 -#endif -#ifndef __NR_lgetxattr -#define __NR_lgetxattr 230 -#endif -#ifndef __NR_listxattr -#define __NR_listxattr 232 -#endif -#ifndef __NR_llistxattr -#define __NR_llistxattr 233 -#endif -#ifndef __NR_tkill -#define __NR_tkill 238 -#endif -#ifndef __NR_futex -#define __NR_futex 240 -#endif -#ifndef __NR_sched_setaffinity -#define __NR_sched_setaffinity 241 -#define __NR_sched_getaffinity 242 -#endif -#ifndef __NR_set_tid_address -#define __NR_set_tid_address 258 -#endif -#ifndef __NR_clock_gettime -#define __NR_clock_gettime 265 -#endif -#ifndef __NR_clock_getres -#define __NR_clock_getres 266 -#endif -#ifndef __NR_statfs64 -#define __NR_statfs64 268 -#endif -#ifndef __NR_fstatfs64 -#define __NR_fstatfs64 269 -#endif -#ifndef __NR_fadvise64_64 -#define __NR_fadvise64_64 272 -#endif -#ifndef __NR_ioprio_set -#define __NR_ioprio_set 289 -#endif -#ifndef __NR_ioprio_get -#define __NR_ioprio_get 290 -#endif -#ifndef __NR_openat -#define __NR_openat 295 -#endif -#ifndef __NR_fstatat64 -#define __NR_fstatat64 300 -#endif -#ifndef __NR_unlinkat -#define __NR_unlinkat 301 -#endif -#ifndef __NR_move_pages -#define __NR_move_pages 317 -#endif -#ifndef __NR_getcpu -#define __NR_getcpu 318 -#endif -#ifndef __NR_fallocate -#define __NR_fallocate 324 -#endif -/* End of i386 definitions */ -#elif defined(__ARM_ARCH_3__) -#ifndef __NR_setresuid -#define __NR_setresuid (__NR_SYSCALL_BASE + 164) -#define __NR_getresuid (__NR_SYSCALL_BASE + 165) -#define __NR_setresgid (__NR_SYSCALL_BASE + 170) -#define __NR_getresgid (__NR_SYSCALL_BASE + 171) -#endif -#ifndef __NR_rt_sigaction -#define __NR_rt_sigreturn (__NR_SYSCALL_BASE + 173) -#define __NR_rt_sigaction (__NR_SYSCALL_BASE + 174) -#define __NR_rt_sigprocmask (__NR_SYSCALL_BASE + 175) -#define __NR_rt_sigpending (__NR_SYSCALL_BASE + 176) -#define __NR_rt_sigsuspend (__NR_SYSCALL_BASE + 179) -#endif -#ifndef __NR_pread64 -#define __NR_pread64 (__NR_SYSCALL_BASE + 180) -#endif -#ifndef __NR_pwrite64 -#define __NR_pwrite64 (__NR_SYSCALL_BASE + 181) -#endif -#ifndef __NR_ugetrlimit -#define __NR_ugetrlimit (__NR_SYSCALL_BASE + 191) -#endif -#ifndef __NR_stat64 -#define __NR_stat64 (__NR_SYSCALL_BASE + 195) -#endif -#ifndef __NR_fstat64 -#define __NR_fstat64 (__NR_SYSCALL_BASE + 197) -#endif -#ifndef __NR_setresuid32 -#define __NR_setresuid32 (__NR_SYSCALL_BASE + 208) -#define __NR_getresuid32 (__NR_SYSCALL_BASE + 209) -#define __NR_setresgid32 (__NR_SYSCALL_BASE + 210) -#define __NR_getresgid32 (__NR_SYSCALL_BASE + 211) -#endif -#ifndef __NR_setfsuid32 -#define __NR_setfsuid32 (__NR_SYSCALL_BASE + 215) -#define __NR_setfsgid32 (__NR_SYSCALL_BASE + 216) -#endif -#ifndef __NR_getdents64 -#define __NR_getdents64 (__NR_SYSCALL_BASE + 217) -#endif -#ifndef __NR_gettid -#define __NR_gettid (__NR_SYSCALL_BASE + 224) -#endif -#ifndef __NR_readahead -#define __NR_readahead (__NR_SYSCALL_BASE + 225) -#endif -#ifndef __NR_setxattr -#define __NR_setxattr (__NR_SYSCALL_BASE + 226) -#endif -#ifndef __NR_lsetxattr -#define __NR_lsetxattr (__NR_SYSCALL_BASE + 227) -#endif -#ifndef __NR_getxattr -#define __NR_getxattr (__NR_SYSCALL_BASE + 229) -#endif -#ifndef __NR_lgetxattr -#define __NR_lgetxattr (__NR_SYSCALL_BASE + 230) -#endif -#ifndef __NR_listxattr -#define __NR_listxattr (__NR_SYSCALL_BASE + 232) -#endif -#ifndef __NR_llistxattr -#define __NR_llistxattr (__NR_SYSCALL_BASE + 233) -#endif -#ifndef __NR_tkill -#define __NR_tkill (__NR_SYSCALL_BASE + 238) -#endif -#ifndef __NR_futex -#define __NR_futex (__NR_SYSCALL_BASE + 240) -#endif -#ifndef __NR_sched_setaffinity -#define __NR_sched_setaffinity (__NR_SYSCALL_BASE + 241) -#define __NR_sched_getaffinity (__NR_SYSCALL_BASE + 242) -#endif -#ifndef __NR_set_tid_address -#define __NR_set_tid_address (__NR_SYSCALL_BASE + 256) -#endif -#ifndef __NR_clock_gettime -#define __NR_clock_gettime (__NR_SYSCALL_BASE + 263) -#endif -#ifndef __NR_clock_getres -#define __NR_clock_getres (__NR_SYSCALL_BASE + 264) -#endif -#ifndef __NR_statfs64 -#define __NR_statfs64 (__NR_SYSCALL_BASE + 266) -#endif -#ifndef __NR_fstatfs64 -#define __NR_fstatfs64 (__NR_SYSCALL_BASE + 267) -#endif -#ifndef __NR_ioprio_set -#define __NR_ioprio_set (__NR_SYSCALL_BASE + 314) -#endif -#ifndef __NR_ioprio_get -#define __NR_ioprio_get (__NR_SYSCALL_BASE + 315) -#endif -#ifndef __NR_move_pages -#define __NR_move_pages (__NR_SYSCALL_BASE + 344) -#endif -#ifndef __NR_getcpu -#define __NR_getcpu (__NR_SYSCALL_BASE + 345) -#endif -/* End of ARM 3 definitions */ -#elif defined(__x86_64__) -#ifndef __NR_pread64 -#define __NR_pread64 17 -#endif -#ifndef __NR_pwrite64 -#define __NR_pwrite64 18 -#endif -#ifndef __NR_setresuid -#define __NR_setresuid 117 -#define __NR_getresuid 118 -#define __NR_setresgid 119 -#define __NR_getresgid 120 -#endif -#ifndef __NR_quotactl -#define __NR_quotactl 179 -#endif -#ifndef __NR_gettid -#define __NR_gettid 186 -#endif -#ifndef __NR_readahead -#define __NR_readahead 187 -#endif -#ifndef __NR_setxattr -#define __NR_setxattr 188 -#endif -#ifndef __NR_lsetxattr -#define __NR_lsetxattr 189 -#endif -#ifndef __NR_getxattr -#define __NR_getxattr 191 -#endif -#ifndef __NR_lgetxattr -#define __NR_lgetxattr 192 -#endif -#ifndef __NR_listxattr -#define __NR_listxattr 194 -#endif -#ifndef __NR_llistxattr -#define __NR_llistxattr 195 -#endif -#ifndef __NR_tkill -#define __NR_tkill 200 -#endif -#ifndef __NR_futex -#define __NR_futex 202 -#endif -#ifndef __NR_sched_setaffinity -#define __NR_sched_setaffinity 203 -#define __NR_sched_getaffinity 204 -#endif -#ifndef __NR_getdents64 -#define __NR_getdents64 217 -#endif -#ifndef __NR_set_tid_address -#define __NR_set_tid_address 218 -#endif -#ifndef __NR_fadvise64 -#define __NR_fadvise64 221 -#endif -#ifndef __NR_clock_gettime -#define __NR_clock_gettime 228 -#endif -#ifndef __NR_clock_getres -#define __NR_clock_getres 229 -#endif -#ifndef __NR_ioprio_set -#define __NR_ioprio_set 251 -#endif -#ifndef __NR_ioprio_get -#define __NR_ioprio_get 252 -#endif -#ifndef __NR_openat -#define __NR_openat 257 -#endif -#ifndef __NR_newfstatat -#define __NR_newfstatat 262 -#endif -#ifndef __NR_unlinkat -#define __NR_unlinkat 263 -#endif -#ifndef __NR_move_pages -#define __NR_move_pages 279 -#endif -#ifndef __NR_fallocate -#define __NR_fallocate 285 -#endif -/* End of x86-64 definitions */ -#elif defined(__mips__) -#if _MIPS_SIM == _MIPS_SIM_ABI32 -#ifndef __NR_setresuid -#define __NR_setresuid (__NR_Linux + 185) -#define __NR_getresuid (__NR_Linux + 186) -#define __NR_setresgid (__NR_Linux + 190) -#define __NR_getresgid (__NR_Linux + 191) -#endif -#ifndef __NR_rt_sigaction -#define __NR_rt_sigreturn (__NR_Linux + 193) -#define __NR_rt_sigaction (__NR_Linux + 194) -#define __NR_rt_sigprocmask (__NR_Linux + 195) -#define __NR_rt_sigpending (__NR_Linux + 196) -#define __NR_rt_sigsuspend (__NR_Linux + 199) -#endif -#ifndef __NR_pread64 -#define __NR_pread64 (__NR_Linux + 200) -#endif -#ifndef __NR_pwrite64 -#define __NR_pwrite64 (__NR_Linux + 201) -#endif -#ifndef __NR_stat64 -#define __NR_stat64 (__NR_Linux + 213) -#endif -#ifndef __NR_fstat64 -#define __NR_fstat64 (__NR_Linux + 215) -#endif -#ifndef __NR_getdents64 -#define __NR_getdents64 (__NR_Linux + 219) -#endif -#ifndef __NR_gettid -#define __NR_gettid (__NR_Linux + 222) -#endif -#ifndef __NR_readahead -#define __NR_readahead (__NR_Linux + 223) -#endif -#ifndef __NR_setxattr -#define __NR_setxattr (__NR_Linux + 224) -#endif -#ifndef __NR_lsetxattr -#define __NR_lsetxattr (__NR_Linux + 225) -#endif -#ifndef __NR_getxattr -#define __NR_getxattr (__NR_Linux + 227) -#endif -#ifndef __NR_lgetxattr -#define __NR_lgetxattr (__NR_Linux + 228) -#endif -#ifndef __NR_listxattr -#define __NR_listxattr (__NR_Linux + 230) -#endif -#ifndef __NR_llistxattr -#define __NR_llistxattr (__NR_Linux + 231) -#endif -#ifndef __NR_tkill -#define __NR_tkill (__NR_Linux + 236) -#endif -#ifndef __NR_futex -#define __NR_futex (__NR_Linux + 238) -#endif -#ifndef __NR_sched_setaffinity -#define __NR_sched_setaffinity (__NR_Linux + 239) -#define __NR_sched_getaffinity (__NR_Linux + 240) -#endif -#ifndef __NR_set_tid_address -#define __NR_set_tid_address (__NR_Linux + 252) -#endif -#ifndef __NR_statfs64 -#define __NR_statfs64 (__NR_Linux + 255) -#endif -#ifndef __NR_fstatfs64 -#define __NR_fstatfs64 (__NR_Linux + 256) -#endif -#ifndef __NR_clock_gettime -#define __NR_clock_gettime (__NR_Linux + 263) -#endif -#ifndef __NR_clock_getres -#define __NR_clock_getres (__NR_Linux + 264) -#endif -#ifndef __NR_openat -#define __NR_openat (__NR_Linux + 288) -#endif -#ifndef __NR_fstatat -#define __NR_fstatat (__NR_Linux + 293) -#endif -#ifndef __NR_unlinkat -#define __NR_unlinkat (__NR_Linux + 294) -#endif -#ifndef __NR_move_pages -#define __NR_move_pages (__NR_Linux + 308) -#endif -#ifndef __NR_getcpu -#define __NR_getcpu (__NR_Linux + 312) -#endif -#ifndef __NR_ioprio_set -#define __NR_ioprio_set (__NR_Linux + 314) -#endif -#ifndef __NR_ioprio_get -#define __NR_ioprio_get (__NR_Linux + 315) -#endif -/* End of MIPS (old 32bit API) definitions */ -#elif _MIPS_SIM == _MIPS_SIM_ABI64 -#ifndef __NR_pread64 -#define __NR_pread64 (__NR_Linux + 16) -#endif -#ifndef __NR_pwrite64 -#define __NR_pwrite64 (__NR_Linux + 17) -#endif -#ifndef __NR_setresuid -#define __NR_setresuid (__NR_Linux + 115) -#define __NR_getresuid (__NR_Linux + 116) -#define __NR_setresgid (__NR_Linux + 117) -#define __NR_getresgid (__NR_Linux + 118) -#endif -#ifndef __NR_gettid -#define __NR_gettid (__NR_Linux + 178) -#endif -#ifndef __NR_readahead -#define __NR_readahead (__NR_Linux + 179) -#endif -#ifndef __NR_setxattr -#define __NR_setxattr (__NR_Linux + 180) -#endif -#ifndef __NR_lsetxattr -#define __NR_lsetxattr (__NR_Linux + 181) -#endif -#ifndef __NR_getxattr -#define __NR_getxattr (__NR_Linux + 183) -#endif -#ifndef __NR_lgetxattr -#define __NR_lgetxattr (__NR_Linux + 184) -#endif -#ifndef __NR_listxattr -#define __NR_listxattr (__NR_Linux + 186) -#endif -#ifndef __NR_llistxattr -#define __NR_llistxattr (__NR_Linux + 187) -#endif -#ifndef __NR_tkill -#define __NR_tkill (__NR_Linux + 192) -#endif -#ifndef __NR_futex -#define __NR_futex (__NR_Linux + 194) -#endif -#ifndef __NR_sched_setaffinity -#define __NR_sched_setaffinity (__NR_Linux + 195) -#define __NR_sched_getaffinity (__NR_Linux + 196) -#endif -#ifndef __NR_set_tid_address -#define __NR_set_tid_address (__NR_Linux + 212) -#endif -#ifndef __NR_clock_gettime -#define __NR_clock_gettime (__NR_Linux + 222) -#endif -#ifndef __NR_clock_getres -#define __NR_clock_getres (__NR_Linux + 223) -#endif -#ifndef __NR_openat -#define __NR_openat (__NR_Linux + 247) -#endif -#ifndef __NR_fstatat -#define __NR_fstatat (__NR_Linux + 252) -#endif -#ifndef __NR_unlinkat -#define __NR_unlinkat (__NR_Linux + 253) -#endif -#ifndef __NR_move_pages -#define __NR_move_pages (__NR_Linux + 267) -#endif -#ifndef __NR_getcpu -#define __NR_getcpu (__NR_Linux + 271) -#endif -#ifndef __NR_ioprio_set -#define __NR_ioprio_set (__NR_Linux + 273) -#endif -#ifndef __NR_ioprio_get -#define __NR_ioprio_get (__NR_Linux + 274) -#endif -/* End of MIPS (64bit API) definitions */ -#else -#ifndef __NR_setresuid -#define __NR_setresuid (__NR_Linux + 115) -#define __NR_getresuid (__NR_Linux + 116) -#define __NR_setresgid (__NR_Linux + 117) -#define __NR_getresgid (__NR_Linux + 118) -#endif -#ifndef __NR_gettid -#define __NR_gettid (__NR_Linux + 178) -#endif -#ifndef __NR_readahead -#define __NR_readahead (__NR_Linux + 179) -#endif -#ifndef __NR_setxattr -#define __NR_setxattr (__NR_Linux + 180) -#endif -#ifndef __NR_lsetxattr -#define __NR_lsetxattr (__NR_Linux + 181) -#endif -#ifndef __NR_getxattr -#define __NR_getxattr (__NR_Linux + 183) -#endif -#ifndef __NR_lgetxattr -#define __NR_lgetxattr (__NR_Linux + 184) -#endif -#ifndef __NR_listxattr -#define __NR_listxattr (__NR_Linux + 186) -#endif -#ifndef __NR_llistxattr -#define __NR_llistxattr (__NR_Linux + 187) -#endif -#ifndef __NR_tkill -#define __NR_tkill (__NR_Linux + 192) -#endif -#ifndef __NR_futex -#define __NR_futex (__NR_Linux + 194) -#endif -#ifndef __NR_sched_setaffinity -#define __NR_sched_setaffinity (__NR_Linux + 195) -#define __NR_sched_getaffinity (__NR_Linux + 196) -#endif -#ifndef __NR_set_tid_address -#define __NR_set_tid_address (__NR_Linux + 213) -#endif -#ifndef __NR_statfs64 -#define __NR_statfs64 (__NR_Linux + 217) -#endif -#ifndef __NR_fstatfs64 -#define __NR_fstatfs64 (__NR_Linux + 218) -#endif -#ifndef __NR_clock_gettime -#define __NR_clock_gettime (__NR_Linux + 226) -#endif -#ifndef __NR_clock_getres -#define __NR_clock_getres (__NR_Linux + 227) -#endif -#ifndef __NR_openat -#define __NR_openat (__NR_Linux + 251) -#endif -#ifndef __NR_fstatat -#define __NR_fstatat (__NR_Linux + 256) -#endif -#ifndef __NR_unlinkat -#define __NR_unlinkat (__NR_Linux + 257) -#endif -#ifndef __NR_move_pages -#define __NR_move_pages (__NR_Linux + 271) -#endif -#ifndef __NR_getcpu -#define __NR_getcpu (__NR_Linux + 275) -#endif -#ifndef __NR_ioprio_set -#define __NR_ioprio_set (__NR_Linux + 277) -#endif -#ifndef __NR_ioprio_get -#define __NR_ioprio_get (__NR_Linux + 278) -#endif -/* End of MIPS (new 32bit API) definitions */ -#endif -/* End of MIPS definitions */ -#elif defined(__PPC__) -#ifndef __NR_setfsuid -#define __NR_setfsuid 138 -#define __NR_setfsgid 139 -#endif -#ifndef __NR_setresuid -#define __NR_setresuid 164 -#define __NR_getresuid 165 -#define __NR_setresgid 169 -#define __NR_getresgid 170 -#endif -#ifndef __NR_rt_sigaction -#define __NR_rt_sigreturn 172 -#define __NR_rt_sigaction 173 -#define __NR_rt_sigprocmask 174 -#define __NR_rt_sigpending 175 -#define __NR_rt_sigsuspend 178 -#endif -#ifndef __NR_pread64 -#define __NR_pread64 179 -#endif -#ifndef __NR_pwrite64 -#define __NR_pwrite64 180 -#endif -#ifndef __NR_ugetrlimit -#define __NR_ugetrlimit 190 -#endif -#ifndef __NR_readahead -#define __NR_readahead 191 -#endif -#ifndef __NR_stat64 -#define __NR_stat64 195 -#endif -#ifndef __NR_fstat64 -#define __NR_fstat64 197 -#endif -#ifndef __NR_getdents64 -#define __NR_getdents64 202 -#endif -#ifndef __NR_gettid -#define __NR_gettid 207 -#endif -#ifndef __NR_tkill -#define __NR_tkill 208 -#endif -#ifndef __NR_setxattr -#define __NR_setxattr 209 -#endif -#ifndef __NR_lsetxattr -#define __NR_lsetxattr 210 -#endif -#ifndef __NR_getxattr -#define __NR_getxattr 212 -#endif -#ifndef __NR_lgetxattr -#define __NR_lgetxattr 213 -#endif -#ifndef __NR_listxattr -#define __NR_listxattr 215 -#endif -#ifndef __NR_llistxattr -#define __NR_llistxattr 216 -#endif -#ifndef __NR_futex -#define __NR_futex 221 -#endif -#ifndef __NR_sched_setaffinity -#define __NR_sched_setaffinity 222 -#define __NR_sched_getaffinity 223 -#endif -#ifndef __NR_set_tid_address -#define __NR_set_tid_address 232 -#endif -#ifndef __NR_clock_gettime -#define __NR_clock_gettime 246 -#endif -#ifndef __NR_clock_getres -#define __NR_clock_getres 247 -#endif -#ifndef __NR_statfs64 -#define __NR_statfs64 252 -#endif -#ifndef __NR_fstatfs64 -#define __NR_fstatfs64 253 -#endif -#ifndef __NR_fadvise64_64 -#define __NR_fadvise64_64 254 -#endif -#ifndef __NR_ioprio_set -#define __NR_ioprio_set 273 -#endif -#ifndef __NR_ioprio_get -#define __NR_ioprio_get 274 -#endif -#ifndef __NR_openat -#define __NR_openat 286 -#endif -#ifndef __NR_fstatat64 -#define __NR_fstatat64 291 -#endif -#ifndef __NR_unlinkat -#define __NR_unlinkat 292 -#endif -#ifndef __NR_move_pages -#define __NR_move_pages 301 -#endif -#ifndef __NR_getcpu -#define __NR_getcpu 302 -#endif -/* End of powerpc defininitions */ -#endif - - -/* After forking, we must make sure to only call system calls. */ -#if __BOUNDED_POINTERS__ - #error "Need to port invocations of syscalls for bounded ptrs" -#else - /* The core dumper and the thread lister get executed after threads - * have been suspended. As a consequence, we cannot call any functions - * that acquire locks. Unfortunately, libc wraps most system calls - * (e.g. in order to implement pthread_atfork, and to make calls - * cancellable), which means we cannot call these functions. Instead, - * we have to call syscall() directly. - */ - #undef LSS_ERRNO - #ifdef SYS_ERRNO - /* Allow the including file to override the location of errno. This can - * be useful when using clone() with the CLONE_VM option. - */ - #define LSS_ERRNO SYS_ERRNO - #else - #define LSS_ERRNO errno - #endif - - #undef LSS_INLINE - #ifdef SYS_INLINE - #define LSS_INLINE SYS_INLINE - #else - #define LSS_INLINE static inline - #endif - - /* Allow the including file to override the prefix used for all new - * system calls. By default, it will be set to "sys_". - */ - #undef LSS_NAME - #ifndef SYS_PREFIX - #define LSS_NAME(name) sys_##name - #elif SYS_PREFIX < 0 - #define LSS_NAME(name) name - #elif SYS_PREFIX == 0 - #define LSS_NAME(name) sys0_##name - #elif SYS_PREFIX == 1 - #define LSS_NAME(name) sys1_##name - #elif SYS_PREFIX == 2 - #define LSS_NAME(name) sys2_##name - #elif SYS_PREFIX == 3 - #define LSS_NAME(name) sys3_##name - #elif SYS_PREFIX == 4 - #define LSS_NAME(name) sys4_##name - #elif SYS_PREFIX == 5 - #define LSS_NAME(name) sys5_##name - #elif SYS_PREFIX == 6 - #define LSS_NAME(name) sys6_##name - #elif SYS_PREFIX == 7 - #define LSS_NAME(name) sys7_##name - #elif SYS_PREFIX == 8 - #define LSS_NAME(name) sys8_##name - #elif SYS_PREFIX == 9 - #define LSS_NAME(name) sys9_##name - #endif - - #undef LSS_RETURN - #if (defined(__i386__) || defined(__x86_64__) || defined(__ARM_ARCH_3__)) - /* Failing system calls return a negative result in the range of - * -1..-4095. These are "errno" values with the sign inverted. - */ - #define LSS_RETURN(type, res) \ - do { \ - if ((unsigned long)(res) >= (unsigned long)(-4095)) { \ - LSS_ERRNO = -(res); \ - res = -1; \ - } \ - return (type) (res); \ - } while (0) - #elif defined(__mips__) - /* On MIPS, failing system calls return -1, and set errno in a - * separate CPU register. - */ - #define LSS_RETURN(type, res, err) \ - do { \ - if (err) { \ - LSS_ERRNO = (res); \ - res = -1; \ - } \ - return (type) (res); \ - } while (0) - #elif defined(__PPC__) - /* On PPC, failing system calls return -1, and set errno in a - * separate CPU register. See linux/unistd.h. - */ - #define LSS_RETURN(type, res, err) \ - do { \ - if (err & 0x10000000 ) { \ - LSS_ERRNO = (res); \ - res = -1; \ - } \ - return (type) (res); \ - } while (0) - #endif - #if defined(__i386__) - /* In PIC mode (e.g. when building shared libraries), gcc for i386 - * reserves ebx. Unfortunately, most distribution ship with implementations - * of _syscallX() which clobber ebx. - * Also, most definitions of _syscallX() neglect to mark "memory" as being - * clobbered. This causes problems with compilers, that do a better job - * at optimizing across __asm__ calls. - * So, we just have to redefine all of the _syscallX() macros. - */ - #undef LSS_BODY - #define LSS_BODY(type,args...) \ - long __res; \ - __asm__ __volatile__("push %%ebx\n" \ - "movl %2,%%ebx\n" \ - "int $0x80\n" \ - "pop %%ebx" \ - args \ - : "esp", "memory"); \ - LSS_RETURN(type,__res) - #undef _syscall0 - #define _syscall0(type,name) \ - type LSS_NAME(name)(void) { \ - long __res; \ - __asm__ volatile("int $0x80" \ - : "=a" (__res) \ - : "0" (__NR_##name) \ - : "memory"); \ - LSS_RETURN(type,__res); \ - } - #undef _syscall1 - #define _syscall1(type,name,type1,arg1) \ - type LSS_NAME(name)(type1 arg1) { \ - LSS_BODY(type, \ - : "=a" (__res) \ - : "0" (__NR_##name), "ri" ((long)(arg1))); \ - } - #undef _syscall2 - #define _syscall2(type,name,type1,arg1,type2,arg2) \ - type LSS_NAME(name)(type1 arg1,type2 arg2) { \ - LSS_BODY(type, \ - : "=a" (__res) \ - : "0" (__NR_##name),"ri" ((long)(arg1)), "c" ((long)(arg2))); \ - } - #undef _syscall3 - #define _syscall3(type,name,type1,arg1,type2,arg2,type3,arg3) \ - type LSS_NAME(name)(type1 arg1,type2 arg2,type3 arg3) { \ - LSS_BODY(type, \ - : "=a" (__res) \ - : "0" (__NR_##name), "ri" ((long)(arg1)), "c" ((long)(arg2)), \ - "d" ((long)(arg3))); \ - } - #undef _syscall4 - #define _syscall4(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4) \ - type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4) { \ - LSS_BODY(type, \ - : "=a" (__res) \ - : "0" (__NR_##name), "ri" ((long)(arg1)), "c" ((long)(arg2)), \ - "d" ((long)(arg3)),"S" ((long)(arg4))); \ - } - #undef _syscall5 - #define _syscall5(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4, \ - type5,arg5) \ - type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \ - type5 arg5) { \ - long __res; \ - __asm__ __volatile__("push %%ebx\n" \ - "movl %2,%%ebx\n" \ - "movl %1,%%eax\n" \ - "int $0x80\n" \ - "pop %%ebx" \ - : "=a" (__res) \ - : "i" (__NR_##name), "ri" ((long)(arg1)), \ - "c" ((long)(arg2)), "d" ((long)(arg3)), \ - "S" ((long)(arg4)), "D" ((long)(arg5)) \ - : "esp", "memory"); \ - LSS_RETURN(type,__res); \ - } - #undef _syscall6 - #define _syscall6(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4, \ - type5,arg5,type6,arg6) \ - type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \ - type5 arg5, type6 arg6) { \ - long __res; \ - struct { long __a1; long __a6; } __s = { (long)arg1, (long) arg6 }; \ - __asm__ __volatile__("push %%ebp\n" \ - "push %%ebx\n" \ - "movl 4(%2),%%ebp\n" \ - "movl 0(%2), %%ebx\n" \ - "movl %1,%%eax\n" \ - "int $0x80\n" \ - "pop %%ebx\n" \ - "pop %%ebp" \ - : "=a" (__res) \ - : "i" (__NR_##name), "0" ((long)(&__s)), \ - "c" ((long)(arg2)), "d" ((long)(arg3)), \ - "S" ((long)(arg4)), "D" ((long)(arg5)) \ - : "esp", "memory"); \ - LSS_RETURN(type,__res); \ - } - LSS_INLINE int LSS_NAME(clone)(int (*fn)(void *), void *child_stack, - int flags, void *arg, int *parent_tidptr, - void *newtls, int *child_tidptr) { - long __res; - __asm__ __volatile__(/* if (fn == NULL) - * return -EINVAL; - */ - "movl %3,%%ecx\n" - "jecxz 1f\n" - - /* if (child_stack == NULL) - * return -EINVAL; - */ - "movl %4,%%ecx\n" - "jecxz 1f\n" - - /* Set up alignment of the child stack: - * child_stack = (child_stack & ~0xF) - 20; - */ - "andl $-16,%%ecx\n" - "subl $20,%%ecx\n" - - /* Push "arg" and "fn" onto the stack that will be - * used by the child. - */ - "movl %6,%%eax\n" - "movl %%eax,4(%%ecx)\n" - "movl %3,%%eax\n" - "movl %%eax,(%%ecx)\n" - - /* %eax = syscall(%eax = __NR_clone, - * %ebx = flags, - * %ecx = child_stack, - * %edx = parent_tidptr, - * %esi = newtls, - * %edi = child_tidptr) - * Also, make sure that %ebx gets preserved as it is - * used in PIC mode. - */ - "movl %8,%%esi\n" - "movl %7,%%edx\n" - "movl %5,%%eax\n" - "movl %9,%%edi\n" - "pushl %%ebx\n" - "movl %%eax,%%ebx\n" - "movl %2,%%eax\n" - "int $0x80\n" - - /* In the parent: restore %ebx - * In the child: move "fn" into %ebx - */ - "popl %%ebx\n" - - /* if (%eax != 0) - * return %eax; - */ - "test %%eax,%%eax\n" - "jnz 1f\n" - - /* In the child, now. Terminate frame pointer chain. - */ - "movl $0,%%ebp\n" - - /* Call "fn". "arg" is already on the stack. - */ - "call *%%ebx\n" - - /* Call _exit(%ebx). Unfortunately older versions - * of gcc restrict the number of arguments that can - * be passed to asm(). So, we need to hard-code the - * system call number. - */ - "movl %%eax,%%ebx\n" - "movl $1,%%eax\n" - "int $0x80\n" - - /* Return to parent. - */ - "1:\n" - : "=a" (__res) - : "0"(-EINVAL), "i"(__NR_clone), - "m"(fn), "m"(child_stack), "m"(flags), "m"(arg), - "m"(parent_tidptr), "m"(newtls), "m"(child_tidptr) - : "esp", "memory", "ecx", "edx", "esi", "edi"); - LSS_RETURN(int, __res); - } - - #define __NR__fadvise64_64 __NR_fadvise64_64 - LSS_INLINE _syscall6(int, _fadvise64_64, int, fd, - unsigned, offset_lo, unsigned, offset_hi, - unsigned, len_lo, unsigned, len_hi, - int, advice) - - LSS_INLINE int LSS_NAME(fadvise64)(int fd, loff_t offset, - loff_t len, int advice) { - return LSS_NAME(_fadvise64_64)(fd, - (unsigned)offset, (unsigned)(offset >>32), - (unsigned)len, (unsigned)(len >> 32), - advice); - } - - #define __NR__fallocate __NR_fallocate - LSS_INLINE _syscall6(int, _fallocate, int, fd, - int, mode, - unsigned, offset_lo, unsigned, offset_hi, - unsigned, len_lo, unsigned, len_hi) - - LSS_INLINE int LSS_NAME(fallocate)(int fd, int mode, - loff_t offset, loff_t len) { - union { loff_t off; unsigned w[2]; } o = { offset }, l = { len }; - return LSS_NAME(_fallocate)(fd, mode, o.w[0], o.w[1], l.w[0], l.w[1]); - } - - LSS_INLINE _syscall1(int, set_thread_area, void *, u) - LSS_INLINE _syscall1(int, get_thread_area, void *, u) - - LSS_INLINE void (*LSS_NAME(restore_rt)(void))(void) { - /* On i386, the kernel does not know how to return from a signal - * handler. Instead, it relies on user space to provide a - * restorer function that calls the {rt_,}sigreturn() system call. - * Unfortunately, we cannot just reference the glibc version of this - * function, as glibc goes out of its way to make it inaccessible. - */ - void (*res)(void); - __asm__ __volatile__("call 2f\n" - "0:.align 16\n" - "1:movl %1,%%eax\n" - "int $0x80\n" - "2:popl %0\n" - "addl $(1b-0b),%0\n" - : "=a" (res) - : "i" (__NR_rt_sigreturn)); - return res; - } - LSS_INLINE void (*LSS_NAME(restore)(void))(void) { - /* On i386, the kernel does not know how to return from a signal - * handler. Instead, it relies on user space to provide a - * restorer function that calls the {rt_,}sigreturn() system call. - * Unfortunately, we cannot just reference the glibc version of this - * function, as glibc goes out of its way to make it inaccessible. - */ - void (*res)(void); - __asm__ __volatile__("call 2f\n" - "0:.align 16\n" - "1:pop %%eax\n" - "movl %1,%%eax\n" - "int $0x80\n" - "2:popl %0\n" - "addl $(1b-0b),%0\n" - : "=a" (res) - : "i" (__NR_sigreturn)); - return res; - } - #elif defined(__x86_64__) - /* There are no known problems with any of the _syscallX() macros - * currently shipping for x86_64, but we still need to be able to define - * our own version so that we can override the location of the errno - * location (e.g. when using the clone() system call with the CLONE_VM - * option). - */ - #undef LSS_BODY - #define LSS_BODY(type,name, ...) \ - long __res; \ - __asm__ __volatile__("syscall" : "=a" (__res) : "0" (__NR_##name), \ - ##__VA_ARGS__ : "r11", "rcx", "memory"); \ - LSS_RETURN(type, __res) - #undef _syscall0 - #define _syscall0(type,name) \ - type LSS_NAME(name)() { \ - LSS_BODY(type, name); \ - } - #undef _syscall1 - #define _syscall1(type,name,type1,arg1) \ - type LSS_NAME(name)(type1 arg1) { \ - LSS_BODY(type, name, "D" ((long)(arg1))); \ - } - #undef _syscall2 - #define _syscall2(type,name,type1,arg1,type2,arg2) \ - type LSS_NAME(name)(type1 arg1, type2 arg2) { \ - LSS_BODY(type, name, "D" ((long)(arg1)), "S" ((long)(arg2))); \ - } - #undef _syscall3 - #define _syscall3(type,name,type1,arg1,type2,arg2,type3,arg3) \ - type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3) { \ - LSS_BODY(type, name, "D" ((long)(arg1)), "S" ((long)(arg2)), \ - "d" ((long)(arg3))); \ - } - #undef _syscall4 - #define _syscall4(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4) \ - type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4) { \ - long __res; \ - __asm__ __volatile__("movq %5,%%r10; syscall" : \ - "=a" (__res) : "0" (__NR_##name), \ - "D" ((long)(arg1)), "S" ((long)(arg2)), "d" ((long)(arg3)), \ - "r" ((long)(arg4)) : "r10", "r11", "rcx", "memory"); \ - LSS_RETURN(type, __res); \ - } - #undef _syscall5 - #define _syscall5(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4, \ - type5,arg5) \ - type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \ - type5 arg5) { \ - long __res; \ - __asm__ __volatile__("movq %5,%%r10; movq %6,%%r8; syscall" : \ - "=a" (__res) : "0" (__NR_##name), \ - "D" ((long)(arg1)), "S" ((long)(arg2)), "d" ((long)(arg3)), \ - "r" ((long)(arg4)), "r" ((long)(arg5)) : \ - "r8", "r10", "r11", "rcx", "memory"); \ - LSS_RETURN(type, __res); \ - } - #undef _syscall6 - #define _syscall6(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4, \ - type5,arg5,type6,arg6) \ - type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \ - type5 arg5, type6 arg6) { \ - long __res; \ - __asm__ __volatile__("movq %5,%%r10; movq %6,%%r8; movq %7,%%r9;" \ - "syscall" : \ - "=a" (__res) : "0" (__NR_##name), \ - "D" ((long)(arg1)), "S" ((long)(arg2)), "d" ((long)(arg3)), \ - "r" ((long)(arg4)), "r" ((long)(arg5)), "r" ((long)(arg6)) : \ - "r8", "r9", "r10", "r11", "rcx", "memory"); \ - LSS_RETURN(type, __res); \ - } - LSS_INLINE int LSS_NAME(clone)(int (*fn)(void *), void *child_stack, - int flags, void *arg, int *parent_tidptr, - void *newtls, int *child_tidptr) { - long __res; - { - register void *__tls __asm__("r8") = newtls; - register int *__ctid __asm__("r10") = child_tidptr; - __asm__ __volatile__(/* if (fn == NULL) - * return -EINVAL; - */ - "testq %4,%4\n" - "jz 1f\n" - - /* if (child_stack == NULL) - * return -EINVAL; - */ - "testq %5,%5\n" - "jz 1f\n" - - /* childstack -= 2*sizeof(void *); - */ - "subq $16,%5\n" - - /* Push "arg" and "fn" onto the stack that will be - * used by the child. - */ - "movq %7,8(%5)\n" - "movq %4,0(%5)\n" - - /* %rax = syscall(%rax = __NR_clone, - * %rdi = flags, - * %rsi = child_stack, - * %rdx = parent_tidptr, - * %r8 = new_tls, - * %r10 = child_tidptr) - */ - "movq %2,%%rax\n" - "syscall\n" - - /* if (%rax != 0) - * return; - */ - "testq %%rax,%%rax\n" - "jnz 1f\n" - - /* In the child. Terminate frame pointer chain. - */ - "xorq %%rbp,%%rbp\n" - - /* Call "fn(arg)". - */ - "popq %%rax\n" - "popq %%rdi\n" - "call *%%rax\n" - - /* Call _exit(%ebx). - */ - "movq %%rax,%%rdi\n" - "movq %3,%%rax\n" - "syscall\n" - - /* Return to parent. - */ - "1:\n" - : "=a" (__res) - : "0"(-EINVAL), "i"(__NR_clone), "i"(__NR_exit), - "r"(fn), "S"(child_stack), "D"(flags), "r"(arg), - "d"(parent_tidptr), "r"(__tls), "r"(__ctid) - : "rsp", "memory", "r11", "rcx"); - } - LSS_RETURN(int, __res); - } - LSS_INLINE _syscall2(int, arch_prctl, int, c, void *, a) - LSS_INLINE _syscall4(int, fadvise64, int, fd, loff_t, offset, loff_t, len, - int, advice) - - LSS_INLINE void (*LSS_NAME(restore_rt)(void))(void) { - /* On x86-64, the kernel does not know how to return from - * a signal handler. Instead, it relies on user space to provide a - * restorer function that calls the rt_sigreturn() system call. - * Unfortunately, we cannot just reference the glibc version of this - * function, as glibc goes out of its way to make it inaccessible. - */ - void (*res)(void); - __asm__ __volatile__("call 2f\n" - "0:.align 16\n" - "1:movq %1,%%rax\n" - "syscall\n" - "2:popq %0\n" - "addq $(1b-0b),%0\n" - : "=a" (res) - : "i" (__NR_rt_sigreturn)); - return res; - } - #elif defined(__ARM_ARCH_3__) - /* Most definitions of _syscallX() neglect to mark "memory" as being - * clobbered. This causes problems with compilers, that do a better job - * at optimizing across __asm__ calls. - * So, we just have to redefine all fo the _syscallX() macros. - */ - #undef LSS_REG - #define LSS_REG(r,a) register long __r##r __asm__("r"#r) = (long)a - #undef LSS_BODY - #define LSS_BODY(type,name,args...) \ - register long __res_r0 __asm__("r0"); \ - long __res; \ - __asm__ __volatile__ (__syscall(name) \ - : "=r"(__res_r0) : args : "lr", "memory"); \ - __res = __res_r0; \ - LSS_RETURN(type, __res) - #undef _syscall0 - #define _syscall0(type, name) \ - type LSS_NAME(name)() { \ - LSS_BODY(type, name); \ - } - #undef _syscall1 - #define _syscall1(type, name, type1, arg1) \ - type LSS_NAME(name)(type1 arg1) { \ - LSS_REG(0, arg1); LSS_BODY(type, name, "r"(__r0)); \ - } - #undef _syscall2 - #define _syscall2(type, name, type1, arg1, type2, arg2) \ - type LSS_NAME(name)(type1 arg1, type2 arg2) { \ - LSS_REG(0, arg1); LSS_REG(1, arg2); \ - LSS_BODY(type, name, "r"(__r0), "r"(__r1)); \ - } - #undef _syscall3 - #define _syscall3(type, name, type1, arg1, type2, arg2, type3, arg3) \ - type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3) { \ - LSS_REG(0, arg1); LSS_REG(1, arg2); LSS_REG(2, arg3); \ - LSS_BODY(type, name, "r"(__r0), "r"(__r1), "r"(__r2)); \ - } - #undef _syscall4 - #define _syscall4(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4) \ - type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4) { \ - LSS_REG(0, arg1); LSS_REG(1, arg2); LSS_REG(2, arg3); \ - LSS_REG(3, arg4); \ - LSS_BODY(type, name, "r"(__r0), "r"(__r1), "r"(__r2), "r"(__r3)); \ - } - #undef _syscall5 - #define _syscall5(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4, \ - type5,arg5) \ - type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \ - type5 arg5) { \ - LSS_REG(0, arg1); LSS_REG(1, arg2); LSS_REG(2, arg3); \ - LSS_REG(3, arg4); LSS_REG(4, arg5); \ - LSS_BODY(type, name, "r"(__r0), "r"(__r1), "r"(__r2), "r"(__r3), \ - "r"(__r4)); \ - } - #undef _syscall6 - #define _syscall6(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4, \ - type5,arg5,type6,arg6) \ - type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \ - type5 arg5, type6 arg6) { \ - LSS_REG(0, arg1); LSS_REG(1, arg2); LSS_REG(2, arg3); \ - LSS_REG(3, arg4); LSS_REG(4, arg5); LSS_REG(5, arg6); \ - LSS_BODY(type, name, "r"(__r0), "r"(__r1), "r"(__r2), "r"(__r3), \ - "r"(__r4), "r"(__r5)); \ - } - LSS_INLINE int LSS_NAME(clone)(int (*fn)(void *), void *child_stack, - int flags, void *arg, int *parent_tidptr, - void *newtls, int *child_tidptr) { - long __res; - { - register int __flags __asm__("r0") = flags; - register void *__stack __asm__("r1") = child_stack; - register void *__ptid __asm__("r2") = parent_tidptr; - register void *__tls __asm__("r3") = newtls; - register int *__ctid __asm__("r4") = child_tidptr; - __asm__ __volatile__(/* if (fn == NULL || child_stack == NULL) - * return -EINVAL; - */ - "cmp %2,#0\n" - "cmpne %3,#0\n" - "moveq %0,%1\n" - "beq 1f\n" - - /* Push "arg" and "fn" onto the stack that will be - * used by the child. - */ - "str %5,[%3,#-4]!\n" - "str %2,[%3,#-4]!\n" - - /* %r0 = syscall(%r0 = flags, - * %r1 = child_stack, - * %r2 = parent_tidptr, - * %r3 = newtls, - * %r4 = child_tidptr) - */ - __syscall(clone)"\n" - - /* if (%r0 != 0) - * return %r0; - */ - "movs %0,r0\n" - "bne 1f\n" - - /* In the child, now. Call "fn(arg)". - */ - "ldr r0,[sp, #4]\n" - "mov lr,pc\n" - "ldr pc,[sp]\n" - - /* Call _exit(%r0). - */ - __syscall(exit)"\n" - "1:\n" - : "=r" (__res) - : "i"(-EINVAL), - "r"(fn), "r"(__stack), "r"(__flags), "r"(arg), - "r"(__ptid), "r"(__tls), "r"(__ctid) - : "lr", "memory"); - } - LSS_RETURN(int, __res); - } - #elif defined(__mips__) - #undef LSS_REG - #define LSS_REG(r,a) register unsigned long __r##r __asm__("$"#r) = \ - (unsigned long)(a) - #undef LSS_BODY - #define LSS_BODY(type,name,r7,...) \ - register unsigned long __v0 __asm__("$2") = __NR_##name; \ - __asm__ __volatile__ ("syscall\n" \ - : "=&r"(__v0), r7 (__r7) \ - : "0"(__v0), ##__VA_ARGS__ \ - : "$8", "$9", "$10", "$11", "$12", \ - "$13", "$14", "$15", "$24", "memory"); \ - LSS_RETURN(type, __v0, __r7) - #undef _syscall0 - #define _syscall0(type, name) \ - type LSS_NAME(name)() { \ - register unsigned long __r7 __asm__("$7"); \ - LSS_BODY(type, name, "=r"); \ - } - #undef _syscall1 - #define _syscall1(type, name, type1, arg1) \ - type LSS_NAME(name)(type1 arg1) { \ - register unsigned long __r7 __asm__("$7"); \ - LSS_REG(4, arg1); LSS_BODY(type, name, "=r", "r"(__r4)); \ - } - #undef _syscall2 - #define _syscall2(type, name, type1, arg1, type2, arg2) \ - type LSS_NAME(name)(type1 arg1, type2 arg2) { \ - register unsigned long __r7 __asm__("$7"); \ - LSS_REG(4, arg1); LSS_REG(5, arg2); \ - LSS_BODY(type, name, "=r", "r"(__r4), "r"(__r5)); \ - } - #undef _syscall3 - #define _syscall3(type, name, type1, arg1, type2, arg2, type3, arg3) \ - type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3) { \ - register unsigned long __r7 __asm__("$7"); \ - LSS_REG(4, arg1); LSS_REG(5, arg2); LSS_REG(6, arg3); \ - LSS_BODY(type, name, "=r", "r"(__r4), "r"(__r5), "r"(__r6)); \ - } - #undef _syscall4 - #define _syscall4(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4) \ - type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4) { \ - LSS_REG(4, arg1); LSS_REG(5, arg2); LSS_REG(6, arg3); \ - LSS_REG(7, arg4); \ - LSS_BODY(type, name, "+r", "r"(__r4), "r"(__r5), "r"(__r6)); \ - } - #undef _syscall5 - #if _MIPS_SIM == _MIPS_SIM_ABI32 - /* The old 32bit MIPS system call API passes the fifth and sixth argument - * on the stack, whereas the new APIs use registers "r8" and "r9". - */ - #define _syscall5(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4, \ - type5,arg5) \ - type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \ - type5 arg5) { \ - LSS_REG(4, arg1); LSS_REG(5, arg2); LSS_REG(6, arg3); \ - LSS_REG(7, arg4); \ - register unsigned long __v0 __asm__("$2"); \ - __asm__ __volatile__ (".set noreorder\n" \ - "lw $2, %6\n" \ - "subu $29, 32\n" \ - "sw $2, 16($29)\n" \ - "li $2, %2\n" \ - "syscall\n" \ - "addiu $29, 32\n" \ - ".set reorder\n" \ - : "=&r"(__v0), "+r" (__r7) \ - : "i" (__NR_##name), "r"(__r4), "r"(__r5), \ - "r"(__r6), "m" ((unsigned long)arg5) \ - : "$8", "$9", "$10", "$11", "$12", \ - "$13", "$14", "$15", "$24", "memory"); \ - LSS_RETURN(type, __v0, __r7); \ - } - #else - #define _syscall5(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4, \ - type5,arg5) \ - type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \ - type5 arg5) { \ - LSS_REG(4, arg1); LSS_REG(5, arg2); LSS_REG(6, arg3); \ - LSS_REG(7, arg4); LSS_REG(8, arg5); \ - LSS_BODY(type, name, "+r", "r"(__r4), "r"(__r5), "r"(__r6), \ - "r"(__r8)); \ - } - #endif - #undef _syscall6 - #if _MIPS_SIM == _MIPS_SIM_ABI32 - /* The old 32bit MIPS system call API passes the fifth and sixth argument - * on the stack, whereas the new APIs use registers "r8" and "r9". - */ - #define _syscall6(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4, \ - type5,arg5,type6,arg6) \ - type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \ - type5 arg5, type6 arg6) { \ - LSS_REG(4, arg1); LSS_REG(5, arg2); LSS_REG(6, arg3); \ - LSS_REG(7, arg4); \ - register unsigned long __v0 __asm__("$2"); \ - __asm__ __volatile__ (".set noreorder\n" \ - "lw $2, %6\n" \ - "lw $8, %7\n" \ - "subu $29, 32\n" \ - "sw $2, 16($29)\n" \ - "sw $8, 20($29)\n" \ - "li $2, %2\n" \ - "syscall\n" \ - "addiu $29, 32\n" \ - ".set reorder\n" \ - : "=&r"(__v0), "+r" (__r7) \ - : "i" (__NR_##name), "r"(__r4), "r"(__r5), \ - "r"(__r6), "r" ((unsigned long)arg5), \ - "r" ((unsigned long)arg6) \ - : "$8", "$9", "$10", "$11", "$12", \ - "$13", "$14", "$15", "$24", "memory"); \ - LSS_RETURN(type, __v0, __r7); \ - } - #else - #define _syscall6(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4, \ - type5,arg5,type6,arg6) \ - type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \ - type5 arg5,type6 arg6) { \ - LSS_REG(4, arg1); LSS_REG(5, arg2); LSS_REG(6, arg3); \ - LSS_REG(7, arg4); LSS_REG(8, arg5); LSS_REG(9, arg6); \ - LSS_BODY(type, name, "+r", "r"(__r4), "r"(__r5), "r"(__r6), \ - "r"(__r8), "r"(__r9)); \ - } - #endif - LSS_INLINE int LSS_NAME(clone)(int (*fn)(void *), void *child_stack, - int flags, void *arg, int *parent_tidptr, - void *newtls, int *child_tidptr) { - register unsigned long __v0 __asm__("$2"); - register unsigned long __r7 __asm__("$7") = (unsigned long)newtls; - { - register int __flags __asm__("$4") = flags; - register void *__stack __asm__("$5") = child_stack; - register void *__ptid __asm__("$6") = parent_tidptr; - register int *__ctid __asm__("$8") = child_tidptr; - __asm__ __volatile__( - #if _MIPS_SIM == _MIPS_SIM_ABI32 && _MIPS_SZPTR == 32 - "subu $29,24\n" - #elif _MIPS_SIM == _MIPS_SIM_NABI32 - "sub $29,16\n" - #else - "dsubu $29,16\n" - #endif - - /* if (fn == NULL || child_stack == NULL) - * return -EINVAL; - */ - "li %0,%2\n" - "beqz %5,1f\n" - "beqz %6,1f\n" - - /* Push "arg" and "fn" onto the stack that will be - * used by the child. - */ - #if _MIPS_SIM == _MIPS_SIM_ABI32 && _MIPS_SZPTR == 32 - "subu %6,32\n" - "sw %5,0(%6)\n" - "sw %8,4(%6)\n" - #elif _MIPS_SIM == _MIPS_SIM_NABI32 - "sub %6,32\n" - "sw %5,0(%6)\n" - "sw %8,8(%6)\n" - #else - "dsubu %6,32\n" - "sd %5,0(%6)\n" - "sd %8,8(%6)\n" - #endif - - /* $7 = syscall($4 = flags, - * $5 = child_stack, - * $6 = parent_tidptr, - * $7 = newtls, - * $8 = child_tidptr) - */ - "li $2,%3\n" - "syscall\n" - - /* if ($7 != 0) - * return $2; - */ - "bnez $7,1f\n" - "bnez $2,1f\n" - - /* In the child, now. Call "fn(arg)". - */ - #if _MIPS_SIM == _MIPS_SIM_ABI32 && _MIPS_SZPTR == 32 - "lw $25,0($29)\n" - "lw $4,4($29)\n" - #elif _MIPS_SIM == _MIPS_SIM_NABI32 - "lw $25,0($29)\n" - "lw $4,8($29)\n" - #else - "ld $25,0($29)\n" - "ld $4,8($29)\n" - #endif - "jalr $25\n" - - /* Call _exit($2) - */ - "move $4,$2\n" - "li $2,%4\n" - "syscall\n" - - "1:\n" - #if _MIPS_SIM == _MIPS_SIM_ABI32 && _MIPS_SZPTR == 32 - "addu $29, 24\n" - #elif _MIPS_SIM == _MIPS_SIM_NABI32 - "add $29, 16\n" - #else - "daddu $29,16\n" - #endif - : "=&r" (__v0), "=r" (__r7) - : "i"(-EINVAL), "i"(__NR_clone), "i"(__NR_exit), - "r"(fn), "r"(__stack), "r"(__flags), "r"(arg), - "r"(__ptid), "r"(__r7), "r"(__ctid) - : "$9", "$10", "$11", "$12", "$13", "$14", "$15", - "$24", "memory"); - } - LSS_RETURN(int, __v0, __r7); - } - #elif defined (__PPC__) - #undef LSS_LOADARGS_0 - #define LSS_LOADARGS_0(name, dummy...) \ - __sc_0 = __NR_##name - #undef LSS_LOADARGS_1 - #define LSS_LOADARGS_1(name, arg1) \ - LSS_LOADARGS_0(name); \ - __sc_3 = (unsigned long) (arg1) - #undef LSS_LOADARGS_2 - #define LSS_LOADARGS_2(name, arg1, arg2) \ - LSS_LOADARGS_1(name, arg1); \ - __sc_4 = (unsigned long) (arg2) - #undef LSS_LOADARGS_3 - #define LSS_LOADARGS_3(name, arg1, arg2, arg3) \ - LSS_LOADARGS_2(name, arg1, arg2); \ - __sc_5 = (unsigned long) (arg3) - #undef LSS_LOADARGS_4 - #define LSS_LOADARGS_4(name, arg1, arg2, arg3, arg4) \ - LSS_LOADARGS_3(name, arg1, arg2, arg3); \ - __sc_6 = (unsigned long) (arg4) - #undef LSS_LOADARGS_5 - #define LSS_LOADARGS_5(name, arg1, arg2, arg3, arg4, arg5) \ - LSS_LOADARGS_4(name, arg1, arg2, arg3, arg4); \ - __sc_7 = (unsigned long) (arg5) - #undef LSS_LOADARGS_6 - #define LSS_LOADARGS_6(name, arg1, arg2, arg3, arg4, arg5, arg6) \ - LSS_LOADARGS_5(name, arg1, arg2, arg3, arg4, arg5); \ - __sc_8 = (unsigned long) (arg6) - #undef LSS_ASMINPUT_0 - #define LSS_ASMINPUT_0 "0" (__sc_0) - #undef LSS_ASMINPUT_1 - #define LSS_ASMINPUT_1 LSS_ASMINPUT_0, "1" (__sc_3) - #undef LSS_ASMINPUT_2 - #define LSS_ASMINPUT_2 LSS_ASMINPUT_1, "2" (__sc_4) - #undef LSS_ASMINPUT_3 - #define LSS_ASMINPUT_3 LSS_ASMINPUT_2, "3" (__sc_5) - #undef LSS_ASMINPUT_4 - #define LSS_ASMINPUT_4 LSS_ASMINPUT_3, "4" (__sc_6) - #undef LSS_ASMINPUT_5 - #define LSS_ASMINPUT_5 LSS_ASMINPUT_4, "5" (__sc_7) - #undef LSS_ASMINPUT_6 - #define LSS_ASMINPUT_6 LSS_ASMINPUT_5, "6" (__sc_8) - #undef LSS_BODY - #define LSS_BODY(nr, type, name, args...) \ - long __sc_ret, __sc_err; \ - { \ - register unsigned long __sc_0 __asm__ ("r0"); \ - register unsigned long __sc_3 __asm__ ("r3"); \ - register unsigned long __sc_4 __asm__ ("r4"); \ - register unsigned long __sc_5 __asm__ ("r5"); \ - register unsigned long __sc_6 __asm__ ("r6"); \ - register unsigned long __sc_7 __asm__ ("r7"); \ - register unsigned long __sc_8 __asm__ ("r8"); \ - \ - LSS_LOADARGS_##nr(name, args); \ - __asm__ __volatile__ \ - ("sc\n\t" \ - "mfcr %0" \ - : "=&r" (__sc_0), \ - "=&r" (__sc_3), "=&r" (__sc_4), \ - "=&r" (__sc_5), "=&r" (__sc_6), \ - "=&r" (__sc_7), "=&r" (__sc_8) \ - : LSS_ASMINPUT_##nr \ - : "cr0", "ctr", "memory", \ - "r9", "r10", "r11", "r12"); \ - __sc_ret = __sc_3; \ - __sc_err = __sc_0; \ - } \ - LSS_RETURN(type, __sc_ret, __sc_err) - #undef _syscall0 - #define _syscall0(type, name) \ - type LSS_NAME(name)(void) { \ - LSS_BODY(0, type, name); \ - } - #undef _syscall1 - #define _syscall1(type, name, type1, arg1) \ - type LSS_NAME(name)(type1 arg1) { \ - LSS_BODY(1, type, name, arg1); \ - } - #undef _syscall2 - #define _syscall2(type, name, type1, arg1, type2, arg2) \ - type LSS_NAME(name)(type1 arg1, type2 arg2) { \ - LSS_BODY(2, type, name, arg1, arg2); \ - } - #undef _syscall3 - #define _syscall3(type, name, type1, arg1, type2, arg2, type3, arg3) \ - type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3) { \ - LSS_BODY(3, type, name, arg1, arg2, arg3); \ - } - #undef _syscall4 - #define _syscall4(type, name, type1, arg1, type2, arg2, type3, arg3, \ - type4, arg4) \ - type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4) { \ - LSS_BODY(4, type, name, arg1, arg2, arg3, arg4); \ - } - #undef _syscall5 - #define _syscall5(type, name, type1, arg1, type2, arg2, type3, arg3, \ - type4, arg4, type5, arg5) \ - type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \ - type5 arg5) { \ - LSS_BODY(5, type, name, arg1, arg2, arg3, arg4, arg5); \ - } - #undef _syscall6 - #define _syscall6(type, name, type1, arg1, type2, arg2, type3, arg3, \ - type4, arg4, type5, arg5, type6, arg6) \ - type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \ - type5 arg5, type6 arg6) { \ - LSS_BODY(6, type, name, arg1, arg2, arg3, arg4, arg5, arg6); \ - } - /* clone function adapted from glibc 2.3.6 clone.S */ - /* TODO(csilvers): consider wrapping some args up in a struct, like we - * do for i386's _syscall6, so we can compile successfully on gcc 2.95 - */ - LSS_INLINE int LSS_NAME(clone)(int (*fn)(void *), void *child_stack, - int flags, void *arg, int *parent_tidptr, - void *newtls, int *child_tidptr) { - long __ret, __err; - { - register int (*__fn)(void *) __asm__ ("r8") = fn; - register void *__cstack __asm__ ("r4") = child_stack; - register int __flags __asm__ ("r3") = flags; - register void * __arg __asm__ ("r9") = arg; - register int * __ptidptr __asm__ ("r5") = parent_tidptr; - register void * __newtls __asm__ ("r6") = newtls; - register int * __ctidptr __asm__ ("r7") = child_tidptr; - __asm__ __volatile__( - /* check for fn == NULL - * and child_stack == NULL - */ - "cmpwi cr0, %6, 0\n\t" - "cmpwi cr1, %7, 0\n\t" - "cror cr0*4+eq, cr1*4+eq, cr0*4+eq\n\t" - "beq- cr0, 1f\n\t" - - /* set up stack frame for child */ - "clrrwi %7, %7, 4\n\t" - "li 0, 0\n\t" - "stwu 0, -16(%7)\n\t" - - /* fn, arg, child_stack are saved across the syscall: r28-30 */ - "mr 28, %6\n\t" - "mr 29, %7\n\t" - "mr 27, %9\n\t" - - /* syscall */ - "li 0, %4\n\t" - /* flags already in r3 - * child_stack already in r4 - * ptidptr already in r5 - * newtls already in r6 - * ctidptr already in r7 - */ - "sc\n\t" - - /* Test if syscall was successful */ - "cmpwi cr1, 3, 0\n\t" - "crandc cr1*4+eq, cr1*4+eq, cr0*4+so\n\t" - "bne- cr1, 1f\n\t" - - /* Do the function call */ - "mtctr 28\n\t" - "mr 3, 27\n\t" - "bctrl\n\t" - - /* Call _exit(r3) */ - "li 0, %5\n\t" - "sc\n\t" - - /* Return to parent */ - "1:\n" - "mfcr %1\n\t" - "mr %0, 3\n\t" - : "=r" (__ret), "=r" (__err) - : "0" (-1), "1" (EINVAL), - "i" (__NR_clone), "i" (__NR_exit), - "r" (__fn), "r" (__cstack), "r" (__flags), - "r" (__arg), "r" (__ptidptr), "r" (__newtls), - "r" (__ctidptr) - : "cr0", "cr1", "memory", "ctr", - "r0", "r29", "r27", "r28"); - } - LSS_RETURN(int, __ret, __err); - } - #endif - #define __NR__exit __NR_exit - #define __NR__gettid __NR_gettid - #define __NR__mremap __NR_mremap - LSS_INLINE _syscall1(int, brk, void *, e) - LSS_INLINE _syscall1(int, chdir, const char *,p) - LSS_INLINE _syscall1(int, close, int, f) - LSS_INLINE _syscall2(int, clock_getres, int, c, - struct kernel_timespec*, t) - LSS_INLINE _syscall2(int, clock_gettime, int, c, - struct kernel_timespec*, t) - LSS_INLINE _syscall1(int, dup, int, f) - LSS_INLINE _syscall2(int, dup2, int, s, - int, d) - LSS_INLINE _syscall3(int, execve, const char*, f, - const char*const*,a,const char*const*, e) - LSS_INLINE _syscall1(int, _exit, int, e) - LSS_INLINE _syscall1(int, exit_group, int, e) - LSS_INLINE _syscall3(int, fcntl, int, f, - int, c, long, a) - LSS_INLINE _syscall0(pid_t, fork) - LSS_INLINE _syscall2(int, fstat, int, f, - struct kernel_stat*, b) - LSS_INLINE _syscall2(int, fstatfs, int, f, - struct kernel_statfs*, b) - LSS_INLINE _syscall2(int, ftruncate, int, f, - off_t, l) - LSS_INLINE _syscall4(int, futex, int*, a, - int, o, int, v, - struct kernel_timespec*, t) - LSS_INLINE _syscall3(int, getdents, int, f, - struct kernel_dirent*, d, int, c) - LSS_INLINE _syscall3(int, getdents64, int, f, - struct kernel_dirent64*, d, int, c) - LSS_INLINE _syscall0(gid_t, getegid) - LSS_INLINE _syscall0(uid_t, geteuid) - LSS_INLINE _syscall0(pid_t, getpgrp) - LSS_INLINE _syscall0(pid_t, getpid) - LSS_INLINE _syscall0(pid_t, getppid) - LSS_INLINE _syscall2(int, getpriority, int, a, - int, b) - LSS_INLINE _syscall3(int, getresgid, gid_t *, r, - gid_t *, e, gid_t *, s) - LSS_INLINE _syscall3(int, getresuid, uid_t *, r, - uid_t *, e, uid_t *, s) - LSS_INLINE _syscall2(int, getrlimit, int, r, - struct kernel_rlimit*, l) - LSS_INLINE _syscall1(pid_t, getsid, pid_t, p) - LSS_INLINE _syscall0(pid_t, _gettid) - LSS_INLINE _syscall2(int, gettimeofday, struct timeval *, v, - struct timezone *, z) - LSS_INLINE _syscall5(int, setxattr, const char *,p, - const char *, n, const void *,v, - size_t, s, int, f) - LSS_INLINE _syscall5(int, lsetxattr, const char *,p, - const char *, n, const void *,v, - size_t, s, int, f) - LSS_INLINE _syscall4(ssize_t, getxattr, const char *,p, - const char *, n, void *, v, size_t, s) - LSS_INLINE _syscall4(ssize_t, lgetxattr, const char *,p, - const char *, n, void *, v, size_t, s) - LSS_INLINE _syscall3(ssize_t, listxattr, const char *,p, - char *, l, size_t, s) - LSS_INLINE _syscall3(ssize_t, llistxattr, const char *,p, - char *, l, size_t, s) - LSS_INLINE _syscall3(int, ioctl, int, d, - int, r, void *, a) - LSS_INLINE _syscall2(int, ioprio_get, int, which, - int, who) - LSS_INLINE _syscall3(int, ioprio_set, int, which, - int, who, int, ioprio) - LSS_INLINE _syscall2(int, kill, pid_t, p, - int, s) - LSS_INLINE _syscall3(off_t, lseek, int, f, - off_t, o, int, w) - LSS_INLINE _syscall2(int, munmap, void*, s, - size_t, l) - LSS_INLINE _syscall6(long, move_pages, pid_t, p, - unsigned long, n, void **,g, int *, d, - int *, s, int, f) - LSS_INLINE _syscall3(int, mprotect, const void *,a, - size_t, l, int, p) - LSS_INLINE _syscall5(void*, _mremap, void*, o, - size_t, os, size_t, ns, - unsigned long, f, void *, a) - LSS_INLINE _syscall3(int, open, const char*, p, - int, f, int, m) - LSS_INLINE _syscall3(int, poll, struct kernel_pollfd*, u, - unsigned int, n, int, t) - LSS_INLINE _syscall2(int, prctl, int, o, - long, a) - LSS_INLINE _syscall4(long, ptrace, int, r, - pid_t, p, void *, a, void *, d) - #if defined(__NR_quotactl) - // Defined on x86_64 / i386 only - LSS_INLINE _syscall4(int, quotactl, int, cmd, const char *, special, - int, id, caddr_t, addr) - #endif - LSS_INLINE _syscall3(ssize_t, read, int, f, - void *, b, size_t, c) - LSS_INLINE _syscall3(int, readlink, const char*, p, - char*, b, size_t, s) - LSS_INLINE _syscall4(int, rt_sigaction, int, s, - const struct kernel_sigaction*, a, - struct kernel_sigaction*, o, size_t, c) - LSS_INLINE _syscall2(int, rt_sigpending, struct kernel_sigset_t *, s, - size_t, c) - LSS_INLINE _syscall4(int, rt_sigprocmask, int, h, - const struct kernel_sigset_t*, s, - struct kernel_sigset_t*, o, size_t, c); - LSS_INLINE _syscall1(int, rt_sigreturn, unsigned long, u); - LSS_INLINE _syscall2(int, rt_sigsuspend, - const struct kernel_sigset_t*, s, size_t, c); - LSS_INLINE _syscall3(int, sched_getaffinity,pid_t, p, - unsigned int, l, unsigned long *, m) - LSS_INLINE _syscall3(int, sched_setaffinity,pid_t, p, - unsigned int, l, unsigned long *, m) - LSS_INLINE _syscall0(int, sched_yield) - LSS_INLINE _syscall1(long, set_tid_address, int *, t) - LSS_INLINE _syscall1(int, setfsgid, gid_t, g) - LSS_INLINE _syscall1(int, setfsuid, uid_t, u) - LSS_INLINE _syscall1(int, setuid, uid_t, u) - LSS_INLINE _syscall1(int, setgid, gid_t, g) - LSS_INLINE _syscall2(int, setpgid, pid_t, p, - pid_t, g) - LSS_INLINE _syscall3(int, setpriority, int, a, - int, b, int, p) - LSS_INLINE _syscall3(int, setresgid, gid_t, r, - gid_t, e, gid_t, s) - LSS_INLINE _syscall3(int, setresuid, uid_t, r, - uid_t, e, uid_t, s) - LSS_INLINE _syscall2(int, setrlimit, int, r, - const struct kernel_rlimit*, l) - LSS_INLINE _syscall0(pid_t, setsid) - LSS_INLINE _syscall2(int, sigaltstack, const stack_t*, s, - const stack_t*, o) - #if defined(__NR_sigreturn) - LSS_INLINE _syscall1(int, sigreturn, unsigned long, u); - #endif - LSS_INLINE _syscall2(int, stat, const char*, f, - struct kernel_stat*, b) - LSS_INLINE _syscall2(int, statfs, const char*, f, - struct kernel_statfs*, b) - LSS_INLINE _syscall3(int, tgkill, pid_t, p, - pid_t, t, int, s) - LSS_INLINE _syscall2(int, tkill, pid_t, p, - int, s) - LSS_INLINE _syscall3(ssize_t, write, int, f, - const void *, b, size_t, c) - LSS_INLINE _syscall3(ssize_t, writev, int, f, - const struct kernel_iovec*, v, size_t, c) - LSS_INLINE _syscall1(int, unlink, const char*, f) - #if defined(__NR_getcpu) - LSS_INLINE _syscall3(long, getcpu, unsigned *, cpu, - unsigned *, node, void *, unused); - #endif - #if defined(__x86_64__) || \ - (defined(__mips__) && _MIPS_SIM != _MIPS_SIM_ABI32) - LSS_INLINE _syscall3(int, recvmsg, int, s, - struct kernel_msghdr*, m, int, f) - LSS_INLINE _syscall3(int, sendmsg, int, s, - const struct kernel_msghdr*, m, int, f) - LSS_INLINE _syscall6(int, sendto, int, s, - const void*, m, size_t, l, - int, f, - const struct kernel_sockaddr*, a, int, t) - LSS_INLINE _syscall2(int, shutdown, int, s, - int, h) - LSS_INLINE _syscall3(int, socket, int, d, - int, t, int, p) - LSS_INLINE _syscall4(int, socketpair, int, d, - int, t, int, p, int*, s) - #endif - #if defined(__x86_64__) - LSS_INLINE _syscall4(int, fallocate, int, fd, int, mode, - loff_t, offset, loff_t, len) - - LSS_INLINE int LSS_NAME(getresgid32)(gid_t *rgid, - gid_t *egid, - gid_t *sgid) { - return LSS_NAME(getresgid)(rgid, egid, sgid); - } - - LSS_INLINE int LSS_NAME(getresuid32)(uid_t *ruid, - uid_t *euid, - uid_t *suid) { - return LSS_NAME(getresuid)(ruid, euid, suid); - } - - LSS_INLINE _syscall6(void*, mmap, void*, s, - size_t, l, int, p, - int, f, int, d, - __off64_t, o) - - LSS_INLINE _syscall4(int, newfstatat, int, d, - const char *, p, - struct kernel_stat*, b, int, f) - - LSS_INLINE int LSS_NAME(setfsgid32)(gid_t gid) { - return LSS_NAME(setfsgid)(gid); - } - - LSS_INLINE int LSS_NAME(setfsuid32)(uid_t uid) { - return LSS_NAME(setfsuid)(uid); - } - - LSS_INLINE int LSS_NAME(setresgid32)(gid_t rgid, gid_t egid, gid_t sgid) { - return LSS_NAME(setresgid)(rgid, egid, sgid); - } - - LSS_INLINE int LSS_NAME(setresuid32)(uid_t ruid, uid_t euid, uid_t suid) { - return LSS_NAME(setresuid)(ruid, euid, suid); - } - - LSS_INLINE int LSS_NAME(sigaction)(int signum, - const struct kernel_sigaction *act, - struct kernel_sigaction *oldact) { - /* On x86_64, the kernel requires us to always set our own - * SA_RESTORER in order to be able to return from a signal handler. - * This function must have a "magic" signature that the "gdb" - * (and maybe the kernel?) can recognize. - */ - if (act != NULL && !(act->sa_flags & SA_RESTORER)) { - struct kernel_sigaction a = *act; - a.sa_flags |= SA_RESTORER; - a.sa_restorer = LSS_NAME(restore_rt)(); - return LSS_NAME(rt_sigaction)(signum, &a, oldact, - (KERNEL_NSIG+7)/8); - } else { - return LSS_NAME(rt_sigaction)(signum, act, oldact, - (KERNEL_NSIG+7)/8); - } - } - - LSS_INLINE int LSS_NAME(sigpending)(struct kernel_sigset_t *set) { - return LSS_NAME(rt_sigpending)(set, (KERNEL_NSIG+7)/8); - } - - LSS_INLINE int LSS_NAME(sigprocmask)(int how, - const struct kernel_sigset_t *set, - struct kernel_sigset_t *oldset) { - return LSS_NAME(rt_sigprocmask)(how, set, oldset, (KERNEL_NSIG+7)/8); - } - - LSS_INLINE int LSS_NAME(sigsuspend)(const struct kernel_sigset_t *set) { - return LSS_NAME(rt_sigsuspend)(set, (KERNEL_NSIG+7)/8); - } - #endif - #if defined(__x86_64__) || defined(__ARM_ARCH_3__) || \ - (defined(__mips__) && _MIPS_SIM != _MIPS_SIM_ABI32) - LSS_INLINE _syscall4(pid_t, wait4, pid_t, p, - int*, s, int, o, - struct kernel_rusage*, r) - - LSS_INLINE pid_t LSS_NAME(waitpid)(pid_t pid, int *status, int options){ - return LSS_NAME(wait4)(pid, status, options, 0); - } - #endif - #if defined(__i386__) || defined(__x86_64__) - LSS_INLINE _syscall4(int, openat, int, d, const char *, p, int, f, int, m) - LSS_INLINE _syscall3(int, unlinkat, int, d, const char *, p, int, f) - #endif - #if defined(__i386__) || defined(__ARM_ARCH_3__) - #define __NR__getresgid32 __NR_getresgid32 - #define __NR__getresuid32 __NR_getresuid32 - #define __NR__setfsgid32 __NR_setfsgid32 - #define __NR__setfsuid32 __NR_setfsuid32 - #define __NR__setresgid32 __NR_setresgid32 - #define __NR__setresuid32 __NR_setresuid32 - LSS_INLINE _syscall2(int, ugetrlimit, int, r, - struct kernel_rlimit*, l) - LSS_INLINE _syscall3(int, _getresgid32, gid_t *, r, - gid_t *, e, gid_t *, s) - LSS_INLINE _syscall3(int, _getresuid32, uid_t *, r, - uid_t *, e, uid_t *, s) - LSS_INLINE _syscall1(int, _setfsgid32, gid_t, f) - LSS_INLINE _syscall1(int, _setfsuid32, uid_t, f) - LSS_INLINE _syscall3(int, _setresgid32, gid_t, r, - gid_t, e, gid_t, s) - LSS_INLINE _syscall3(int, _setresuid32, uid_t, r, - uid_t, e, uid_t, s) - - LSS_INLINE int LSS_NAME(getresgid32)(gid_t *rgid, - gid_t *egid, - gid_t *sgid) { - int rc; - if ((rc = LSS_NAME(_getresgid32)(rgid, egid, sgid)) < 0 && - LSS_ERRNO == ENOSYS) { - if ((rgid == NULL) || (egid == NULL) || (sgid == NULL)) { - return EFAULT; - } - // Clear the high bits first, since getresgid only sets 16 bits - *rgid = *egid = *sgid = 0; - rc = LSS_NAME(getresgid)(rgid, egid, sgid); - } - return rc; - } - - LSS_INLINE int LSS_NAME(getresuid32)(uid_t *ruid, - uid_t *euid, - uid_t *suid) { - int rc; - if ((rc = LSS_NAME(_getresuid32)(ruid, euid, suid)) < 0 && - LSS_ERRNO == ENOSYS) { - if ((ruid == NULL) || (euid == NULL) || (suid == NULL)) { - return EFAULT; - } - // Clear the high bits first, since getresuid only sets 16 bits - *ruid = *euid = *suid = 0; - rc = LSS_NAME(getresuid)(ruid, euid, suid); - } - return rc; - } - - LSS_INLINE int LSS_NAME(setfsgid32)(gid_t gid) { - int rc; - if ((rc = LSS_NAME(_setfsgid32)(gid)) < 0 && - LSS_ERRNO == ENOSYS) { - if ((unsigned int)gid & ~0xFFFFu) { - rc = EINVAL; - } else { - rc = LSS_NAME(setfsgid)(gid); - } - } - return rc; - } - - LSS_INLINE int LSS_NAME(setfsuid32)(uid_t uid) { - int rc; - if ((rc = LSS_NAME(_setfsuid32)(uid)) < 0 && - LSS_ERRNO == ENOSYS) { - if ((unsigned int)uid & ~0xFFFFu) { - rc = EINVAL; - } else { - rc = LSS_NAME(setfsuid)(uid); - } - } - return rc; - } - - LSS_INLINE int LSS_NAME(setresgid32)(gid_t rgid, gid_t egid, gid_t sgid) { - int rc; - if ((rc = LSS_NAME(_setresgid32)(rgid, egid, sgid)) < 0 && - LSS_ERRNO == ENOSYS) { - if ((unsigned int)rgid & ~0xFFFFu || - (unsigned int)egid & ~0xFFFFu || - (unsigned int)sgid & ~0xFFFFu) { - rc = EINVAL; - } else { - rc = LSS_NAME(setresgid)(rgid, egid, sgid); - } - } - return rc; - } - - LSS_INLINE int LSS_NAME(setresuid32)(uid_t ruid, uid_t euid, uid_t suid) { - int rc; - if ((rc = LSS_NAME(_setresuid32)(ruid, euid, suid)) < 0 && - LSS_ERRNO == ENOSYS) { - if ((unsigned int)ruid & ~0xFFFFu || - (unsigned int)euid & ~0xFFFFu || - (unsigned int)suid & ~0xFFFFu) { - rc = EINVAL; - } else { - rc = LSS_NAME(setresuid)(ruid, euid, suid); - } - } - return rc; - } - #endif - LSS_INLINE int LSS_NAME(sigemptyset)(struct kernel_sigset_t *set) { - memset(&set->sig, 0, sizeof(set->sig)); - return 0; - } - - LSS_INLINE int LSS_NAME(sigfillset)(struct kernel_sigset_t *set) { - memset(&set->sig, -1, sizeof(set->sig)); - return 0; - } - - LSS_INLINE int LSS_NAME(sigaddset)(struct kernel_sigset_t *set, - int signum) { - if (signum < 1 || signum > (int)(8*sizeof(set->sig))) { - LSS_ERRNO = EINVAL; - return -1; - } else { - set->sig[(signum - 1)/(8*sizeof(set->sig[0]))] - |= 1UL << ((signum - 1) % (8*sizeof(set->sig[0]))); - return 0; - } - } - - LSS_INLINE int LSS_NAME(sigdelset)(struct kernel_sigset_t *set, - int signum) { - if (signum < 1 || signum > (int)(8*sizeof(set->sig))) { - LSS_ERRNO = EINVAL; - return -1; - } else { - set->sig[(signum - 1)/(8*sizeof(set->sig[0]))] - &= ~(1UL << ((signum - 1) % (8*sizeof(set->sig[0])))); - return 0; - } - } - - LSS_INLINE int LSS_NAME(sigismember)(struct kernel_sigset_t *set, - int signum) { - if (signum < 1 || signum > (int)(8*sizeof(set->sig))) { - LSS_ERRNO = EINVAL; - return -1; - } else { - return !!(set->sig[(signum - 1)/(8*sizeof(set->sig[0]))] & - (1UL << ((signum - 1) % (8*sizeof(set->sig[0]))))); - } - } - #if defined(__i386__) || defined(__ARM_ARCH_3__) || \ - (defined(__mips__) && _MIPS_SIM == _MIPS_SIM_ABI32) || defined(__PPC__) - #define __NR__sigaction __NR_sigaction - #define __NR__sigpending __NR_sigpending - #define __NR__sigprocmask __NR_sigprocmask - #define __NR__sigsuspend __NR_sigsuspend - #define __NR__socketcall __NR_socketcall - LSS_INLINE _syscall2(int, fstat64, int, f, - struct kernel_stat64 *, b) - LSS_INLINE _syscall5(int, _llseek, uint, fd, ulong, hi, ulong, lo, - loff_t *, res, uint, wh) - LSS_INLINE _syscall1(void*, mmap, void*, a) - LSS_INLINE _syscall6(void*, mmap2, void*, s, - size_t, l, int, p, - int, f, int, d, - __off64_t, o) - LSS_INLINE _syscall3(int, _sigaction, int, s, - const struct kernel_old_sigaction*, a, - struct kernel_old_sigaction*, o) - LSS_INLINE _syscall1(int, _sigpending, unsigned long*, s) - LSS_INLINE _syscall3(int, _sigprocmask, int, h, - const unsigned long*, s, - unsigned long*, o) - #ifdef __PPC__ - LSS_INLINE _syscall1(int, _sigsuspend, unsigned long, s) - #else - LSS_INLINE _syscall3(int, _sigsuspend, const void*, a, - int, b, - unsigned long, s) - #endif - LSS_INLINE _syscall2(int, stat64, const char *, p, - struct kernel_stat64 *, b) - - LSS_INLINE int LSS_NAME(sigaction)(int signum, - const struct kernel_sigaction *act, - struct kernel_sigaction *oldact) { - int old_errno = LSS_ERRNO; - int rc; - struct kernel_sigaction a; - if (act != NULL) { - a = *act; - #ifdef __i386__ - /* On i386, the kernel requires us to always set our own - * SA_RESTORER when using realtime signals. Otherwise, it does not - * know how to return from a signal handler. This function must have - * a "magic" signature that the "gdb" (and maybe the kernel?) can - * recognize. - * Apparently, a SA_RESTORER is implicitly set by the kernel, when - * using non-realtime signals. - * - * TODO: Test whether ARM needs a restorer - */ - if (!(a.sa_flags & SA_RESTORER)) { - a.sa_flags |= SA_RESTORER; - a.sa_restorer = (a.sa_flags & SA_SIGINFO) - ? LSS_NAME(restore_rt)() : LSS_NAME(restore)(); - } - #endif - } - rc = LSS_NAME(rt_sigaction)(signum, act ? &a : act, oldact, - (KERNEL_NSIG+7)/8); - if (rc < 0 && LSS_ERRNO == ENOSYS) { - struct kernel_old_sigaction oa, ooa, *ptr_a = &oa, *ptr_oa = &ooa; - if (!act) { - ptr_a = NULL; - } else { - oa.sa_handler_ = act->sa_handler_; - memcpy(&oa.sa_mask, &act->sa_mask, sizeof(oa.sa_mask)); - #ifndef __mips__ - oa.sa_restorer = act->sa_restorer; - #endif - oa.sa_flags = act->sa_flags; - } - if (!oldact) { - ptr_oa = NULL; - } - LSS_ERRNO = old_errno; - rc = LSS_NAME(_sigaction)(signum, ptr_a, ptr_oa); - if (rc == 0 && oldact) { - if (act) { - memcpy(oldact, act, sizeof(*act)); - } else { - memset(oldact, 0, sizeof(*oldact)); - } - oldact->sa_handler_ = ptr_oa->sa_handler_; - oldact->sa_flags = ptr_oa->sa_flags; - memcpy(&oldact->sa_mask, &ptr_oa->sa_mask, sizeof(ptr_oa->sa_mask)); - #ifndef __mips__ - oldact->sa_restorer = ptr_oa->sa_restorer; - #endif - } - } - return rc; - } - - LSS_INLINE int LSS_NAME(sigpending)(struct kernel_sigset_t *set) { - int old_errno = LSS_ERRNO; - int rc = LSS_NAME(rt_sigpending)(set, (KERNEL_NSIG+7)/8); - if (rc < 0 && LSS_ERRNO == ENOSYS) { - LSS_ERRNO = old_errno; - LSS_NAME(sigemptyset)(set); - rc = LSS_NAME(_sigpending)(&set->sig[0]); - } - return rc; - } - - LSS_INLINE int LSS_NAME(sigprocmask)(int how, - const struct kernel_sigset_t *set, - struct kernel_sigset_t *oldset) { - int olderrno = LSS_ERRNO; - int rc = LSS_NAME(rt_sigprocmask)(how, set, oldset, (KERNEL_NSIG+7)/8); - if (rc < 0 && LSS_ERRNO == ENOSYS) { - LSS_ERRNO = olderrno; - if (oldset) { - LSS_NAME(sigemptyset)(oldset); - } - rc = LSS_NAME(_sigprocmask)(how, - set ? &set->sig[0] : NULL, - oldset ? &oldset->sig[0] : NULL); - } - return rc; - } - - LSS_INLINE int LSS_NAME(sigsuspend)(const struct kernel_sigset_t *set) { - int olderrno = LSS_ERRNO; - int rc = LSS_NAME(rt_sigsuspend)(set, (KERNEL_NSIG+7)/8); - if (rc < 0 && LSS_ERRNO == ENOSYS) { - LSS_ERRNO = olderrno; - rc = LSS_NAME(_sigsuspend)( - #ifndef __PPC__ - set, 0, - #endif - set->sig[0]); - } - return rc; - } - #endif - #if defined(__PPC__) - #undef LSS_SC_LOADARGS_0 - #define LSS_SC_LOADARGS_0(dummy...) - #undef LSS_SC_LOADARGS_1 - #define LSS_SC_LOADARGS_1(arg1) \ - __sc_4 = (unsigned long) (arg1) - #undef LSS_SC_LOADARGS_2 - #define LSS_SC_LOADARGS_2(arg1, arg2) \ - LSS_SC_LOADARGS_1(arg1); \ - __sc_5 = (unsigned long) (arg2) - #undef LSS_SC_LOADARGS_3 - #define LSS_SC_LOADARGS_3(arg1, arg2, arg3) \ - LSS_SC_LOADARGS_2(arg1, arg2); \ - __sc_6 = (unsigned long) (arg3) - #undef LSS_SC_LOADARGS_4 - #define LSS_SC_LOADARGS_4(arg1, arg2, arg3, arg4) \ - LSS_SC_LOADARGS_3(arg1, arg2, arg3); \ - __sc_7 = (unsigned long) (arg4) - #undef LSS_SC_LOADARGS_5 - #define LSS_SC_LOADARGS_5(arg1, arg2, arg3, arg4, arg5) \ - LSS_SC_LOADARGS_4(arg1, arg2, arg3, arg4); \ - __sc_8 = (unsigned long) (arg5) - #undef LSS_SC_BODY - #define LSS_SC_BODY(nr, type, opt, args...) \ - long __sc_ret, __sc_err; \ - { \ - register unsigned long __sc_0 __asm__ ("r0") = __NR_socketcall; \ - register unsigned long __sc_3 __asm__ ("r3") = opt; \ - register unsigned long __sc_4 __asm__ ("r4"); \ - register unsigned long __sc_5 __asm__ ("r5"); \ - register unsigned long __sc_6 __asm__ ("r6"); \ - register unsigned long __sc_7 __asm__ ("r7"); \ - register unsigned long __sc_8 __asm__ ("r8"); \ - LSS_SC_LOADARGS_##nr(args); \ - __asm__ __volatile__ \ - ("stwu 1, -48(1)\n\t" \ - "stw 4, 20(1)\n\t" \ - "stw 5, 24(1)\n\t" \ - "stw 6, 28(1)\n\t" \ - "stw 7, 32(1)\n\t" \ - "stw 8, 36(1)\n\t" \ - "addi 4, 1, 20\n\t" \ - "sc\n\t" \ - "mfcr %0" \ - : "=&r" (__sc_0), \ - "=&r" (__sc_3), "=&r" (__sc_4), \ - "=&r" (__sc_5), "=&r" (__sc_6), \ - "=&r" (__sc_7), "=&r" (__sc_8) \ - : LSS_ASMINPUT_##nr \ - : "cr0", "ctr", "memory"); \ - __sc_ret = __sc_3; \ - __sc_err = __sc_0; \ - } \ - LSS_RETURN(type, __sc_ret, __sc_err) - - LSS_INLINE ssize_t LSS_NAME(recvmsg)(int s,struct kernel_msghdr *msg, - int flags){ - LSS_SC_BODY(3, ssize_t, 17, s, msg, flags); - } - - LSS_INLINE ssize_t LSS_NAME(sendmsg)(int s, - const struct kernel_msghdr *msg, - int flags) { - LSS_SC_BODY(3, ssize_t, 16, s, msg, flags); - } - - // TODO(csilvers): why is this ifdef'ed out? -#if 0 - LSS_INLINE ssize_t LSS_NAME(sendto)(int s, const void *buf, size_t len, - int flags, - const struct kernel_sockaddr *to, - unsigned int tolen) { - LSS_BODY(6, ssize_t, 11, s, buf, len, flags, to, tolen); - } -#endif - - LSS_INLINE int LSS_NAME(shutdown)(int s, int how) { - LSS_SC_BODY(2, int, 13, s, how); - } - - LSS_INLINE int LSS_NAME(socket)(int domain, int type, int protocol) { - LSS_SC_BODY(3, int, 1, domain, type, protocol); - } - - LSS_INLINE int LSS_NAME(socketpair)(int d, int type, int protocol, - int sv[2]) { - LSS_SC_BODY(4, int, 8, d, type, protocol, sv); - } - #endif - #if defined(__i386__) || defined(__ARM_ARCH_3__) || \ - (defined(__mips__) && _MIPS_SIM == _MIPS_SIM_ABI32) - #define __NR__socketcall __NR_socketcall - LSS_INLINE _syscall2(int, _socketcall, int, c, - va_list, a) - - LSS_INLINE int LSS_NAME(socketcall)(int op, ...) { - int rc; - va_list ap; - va_start(ap, op); - rc = LSS_NAME(_socketcall)(op, ap); - va_end(ap); - return rc; - } - - LSS_INLINE ssize_t LSS_NAME(recvmsg)(int s,struct kernel_msghdr *msg, - int flags){ - return (ssize_t)LSS_NAME(socketcall)(17, s, msg, flags); - } - - LSS_INLINE ssize_t LSS_NAME(sendmsg)(int s, - const struct kernel_msghdr *msg, - int flags) { - return (ssize_t)LSS_NAME(socketcall)(16, s, msg, flags); - } - - LSS_INLINE ssize_t LSS_NAME(sendto)(int s, const void *buf, size_t len, - int flags, - const struct kernel_sockaddr *to, - unsigned int tolen) { - return (ssize_t)LSS_NAME(socketcall)(11, s, buf, len, flags, to, tolen); - } - - LSS_INLINE int LSS_NAME(shutdown)(int s, int how) { - return LSS_NAME(socketcall)(13, s, how); - } - - LSS_INLINE int LSS_NAME(socket)(int domain, int type, int protocol) { - return LSS_NAME(socketcall)(1, domain, type, protocol); - } - - LSS_INLINE int LSS_NAME(socketpair)(int d, int type, int protocol, - int sv[2]) { - return LSS_NAME(socketcall)(8, d, type, protocol, sv); - } - #endif - #if defined(__i386__) || defined(__PPC__) - LSS_INLINE _syscall4(int, fstatat64, int, d, - const char *, p, - struct kernel_stat64 *, b, int, f) - #endif - #if defined(__i386__) || defined(__PPC__) || \ - (defined(__mips__) && _MIPS_SIM == _MIPS_SIM_ABI32) - LSS_INLINE _syscall3(pid_t, waitpid, pid_t, p, - int*, s, int, o) - #endif - #if defined(__mips__) - /* sys_pipe() on MIPS has non-standard calling conventions, as it returns - * both file handles through CPU registers. - */ - LSS_INLINE int LSS_NAME(pipe)(int *p) { - register unsigned long __v0 __asm__("$2") = __NR_pipe; - register unsigned long __v1 __asm__("$3"); - register unsigned long __r7 __asm__("$7"); - __asm__ __volatile__ ("syscall\n" - : "=&r"(__v0), "=&r"(__v1), "+r" (__r7) - : "0"(__v0) - : "$8", "$9", "$10", "$11", "$12", - "$13", "$14", "$15", "$24", "memory"); - if (__r7) { - LSS_ERRNO = __v0; - return -1; - } else { - p[0] = __v0; - p[1] = __v1; - return 0; - } - } - #else - LSS_INLINE _syscall1(int, pipe, int *, p) - #endif - /* TODO(csilvers): see if ppc can/should support this as well */ - #if defined(__i386__) || defined(__ARM_ARCH_3__) || \ - (defined(__mips__) && _MIPS_SIM != _MIPS_SIM_ABI64) - #define __NR__statfs64 __NR_statfs64 - #define __NR__fstatfs64 __NR_fstatfs64 - LSS_INLINE _syscall3(int, _statfs64, const char*, p, - size_t, s,struct kernel_statfs64*, b) - LSS_INLINE _syscall3(int, _fstatfs64, int, f, - size_t, s,struct kernel_statfs64*, b) - LSS_INLINE int LSS_NAME(statfs64)(const char *p, - struct kernel_statfs64 *b) { - return LSS_NAME(_statfs64)(p, sizeof(*b), b); - } - LSS_INLINE int LSS_NAME(fstatfs64)(int f,struct kernel_statfs64 *b) { - return LSS_NAME(_fstatfs64)(f, sizeof(*b), b); - } - #endif - - LSS_INLINE int LSS_NAME(execv)(const char *path, const char *const argv[]) { - extern char **environ; - return LSS_NAME(execve)(path, argv, (const char *const *)environ); - } - - LSS_INLINE pid_t LSS_NAME(gettid)() { - pid_t tid = LSS_NAME(_gettid)(); - if (tid != -1) { - return tid; - } - return LSS_NAME(getpid)(); - } - - LSS_INLINE void *LSS_NAME(mremap)(void *old_address, size_t old_size, - size_t new_size, int flags, ...) { - va_list ap; - void *new_address, *rc; - va_start(ap, flags); - new_address = va_arg(ap, void *); - rc = LSS_NAME(_mremap)(old_address, old_size, new_size, - flags, new_address); - va_end(ap); - return rc; - } - - LSS_INLINE int LSS_NAME(ptrace_detach)(pid_t pid) { - /* PTRACE_DETACH can sometimes forget to wake up the tracee and it - * then sends job control signals to the real parent, rather than to - * the tracer. We reduce the risk of this happening by starting a - * whole new time slice, and then quickly sending a SIGCONT signal - * right after detaching from the tracee. - * - * We use tkill to ensure that we only issue a wakeup for the thread being - * detached. Large multi threaded apps can take a long time in the kernel - * processing SIGCONT. - */ - int rc, err; - LSS_NAME(sched_yield)(); - rc = LSS_NAME(ptrace)(PTRACE_DETACH, pid, (void *)0, (void *)0); - err = LSS_ERRNO; - LSS_NAME(tkill)(pid, SIGCONT); - /* Old systems don't have tkill */ - if (LSS_ERRNO == ENOSYS) - LSS_NAME(kill)(pid, SIGCONT); - LSS_ERRNO = err; - return rc; - } - - LSS_INLINE int LSS_NAME(raise)(int sig) { - return LSS_NAME(kill)(LSS_NAME(getpid)(), sig); - } - - LSS_INLINE int LSS_NAME(setpgrp)() { - return LSS_NAME(setpgid)(0, 0); - } - - LSS_INLINE int LSS_NAME(sysconf)(int name) { - extern int __getpagesize(void); - switch (name) { - case _SC_OPEN_MAX: { - struct kernel_rlimit limit; - return LSS_NAME(getrlimit)(RLIMIT_NOFILE, &limit) < 0 - ? 8192 : limit.rlim_cur; - } - case _SC_PAGESIZE: - return __getpagesize(); - default: - LSS_ERRNO = ENOSYS; - return -1; - } - } - #if defined(__x86_64__) || \ - (defined(__mips__) && _MIPS_SIM == _MIPS_SIM_ABI64) - LSS_INLINE _syscall4(ssize_t, pread64, int, f, - void *, b, size_t, c, - loff_t, o) - LSS_INLINE _syscall4(ssize_t, pwrite64, int, f, - const void *, b, size_t, c, - loff_t, o) - LSS_INLINE _syscall3(int, readahead, int, f, - loff_t, o, unsigned, c) - #else - #define __NR__pread64 __NR_pread64 - #define __NR__pwrite64 __NR_pwrite64 - #define __NR__readahead __NR_readahead - LSS_INLINE _syscall5(ssize_t, _pread64, int, f, - void *, b, size_t, c, unsigned, o1, - unsigned, o2) - LSS_INLINE _syscall5(ssize_t, _pwrite64, int, f, - const void *, b, size_t, c, unsigned, o1, - long, o2) - LSS_INLINE _syscall4(int, _readahead, int, f, - unsigned, o1, unsigned, o2, size_t, c); - /* We force 64bit-wide parameters onto the stack, then access each - * 32-bit component individually. This guarantees that we build the - * correct parameters independent of the native byte-order of the - * underlying architecture. - */ - LSS_INLINE ssize_t LSS_NAME(pread64)(int fd, void *buf, size_t count, - loff_t off) { - union { loff_t off; unsigned arg[2]; } o = { off }; - return LSS_NAME(_pread64)(fd, buf, count, o.arg[0], o.arg[1]); - } - LSS_INLINE ssize_t LSS_NAME(pwrite64)(int fd, const void *buf, - size_t count, loff_t off) { - union { loff_t off; unsigned arg[2]; } o = { off }; - return LSS_NAME(_pwrite64)(fd, buf, count, o.arg[0], o.arg[1]); - } - LSS_INLINE int LSS_NAME(readahead)(int fd, loff_t off, int len) { - union { loff_t off; unsigned arg[2]; } o = { off }; - return LSS_NAME(_readahead)(fd, o.arg[0], o.arg[1], len); - } - #endif -#endif - -#if defined(__cplusplus) && !defined(SYS_CPLUSPLUS) -} -#endif - -#endif -#endif diff --git a/sandbox/linux/seccomp/madvise.cc b/sandbox/linux/seccomp/madvise.cc deleted file mode 100644 index 70c594f..0000000 --- a/sandbox/linux/seccomp/madvise.cc +++ /dev/null @@ -1,81 +0,0 @@ -// Copyright (c) 2010 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#include "debug.h" -#include "sandbox_impl.h" - -namespace playground { - -long Sandbox::sandbox_madvise(void* start, size_t length, int advice) { - long long tm; - Debug::syscall(&tm, __NR_madvise, "Executing handler"); - struct { - int sysnum; - long long cookie; - MAdvise madvise_req; - } __attribute__((packed)) request; - request.sysnum = __NR_madvise; - request.cookie = cookie(); - request.madvise_req.start = start; - request.madvise_req.len = length; - request.madvise_req.advice = advice; - - long rc; - SysCalls sys; - if (write(sys, processFdPub(), &request, sizeof(request)) != - sizeof(request) || - read(sys, threadFdPub(), &rc, sizeof(rc)) != sizeof(rc)) { - die("Failed to forward madvise() request [sandbox]"); - } - Debug::elapsed(tm, __NR_madvise); - return rc; -} - -bool Sandbox::process_madvise(int parentMapsFd, int sandboxFd, int threadFdPub, - int threadFd, SecureMem::Args* mem) { - // Read request - MAdvise madvise_req; - SysCalls sys; - if (read(sys, sandboxFd, &madvise_req, sizeof(madvise_req)) != - sizeof(madvise_req)) { - die("Failed to read parameters for madvise() [process]"); - } - int rc = -EINVAL; - switch (madvise_req.advice) { - case MADV_NORMAL: - case MADV_RANDOM: - case MADV_SEQUENTIAL: - case MADV_WILLNEED: - ok: - SecureMem::sendSystemCall(threadFdPub, false, -1, mem, __NR_madvise, - madvise_req.start, madvise_req.len, - madvise_req.advice); - return true; - default: - // All other flags to madvise() are potential dangerous (as opposed to - // merely affecting overall performance). Do not allow them on memory - // ranges that were part of the original mappings. - void *stop = reinterpret_cast<void *>( - (char *)madvise_req.start + madvise_req.len); - ProtectedMap::const_iterator iter = protectedMap_.lower_bound( - (void *)madvise_req.start); - if (iter != protectedMap_.begin()) { - --iter; - } - for (; iter != protectedMap_.end() && iter->first < stop; ++iter) { - if (madvise_req.start < reinterpret_cast<void *>( - reinterpret_cast<char *>(iter->first) + iter->second) && - stop > iter->first) { - SecureMem::abandonSystemCall(threadFd, rc); - return false; - } - } - - // Changing attributes on memory regions that were newly mapped inside of - // the sandbox is OK. - goto ok; - } -} - -} // namespace diff --git a/sandbox/linux/seccomp/maps.cc b/sandbox/linux/seccomp/maps.cc deleted file mode 100644 index 8ae218d..0000000 --- a/sandbox/linux/seccomp/maps.cc +++ /dev/null @@ -1,267 +0,0 @@ -// Copyright (c) 2010 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#include <errno.h> -#include <fcntl.h> -#include <linux/unistd.h> -#include <signal.h> -#include <stdarg.h> -#include <stdlib.h> -#include <sys/ptrace.h> -#include <sys/types.h> -#include <sys/wait.h> - -#include "library.h" -#include "maps.h" -#include "sandbox_impl.h" - -namespace playground { - -Maps::Maps(int proc_self_maps) : - proc_self_maps_(proc_self_maps), - begin_iter_(this, true, false), - end_iter_(this, false, true), - vsyscall_(0) { - Sandbox::SysCalls sys; - if (proc_self_maps_ >= 0 && - !sys.lseek(proc_self_maps_, 0, SEEK_SET)) { - char buf[256] = { 0 }; - int len = 0, rc = 1; - bool long_line = false; - do { - if (rc > 0) { - rc = Sandbox::read(sys, proc_self_maps_, buf + len, - sizeof(buf) - len - 1); - if (rc > 0) { - len += rc; - } - } - char *ptr = buf; - if (!long_line) { - long_line = true; - unsigned long start = strtoul(ptr, &ptr, 16); - unsigned long stop = strtoul(ptr + 1, &ptr, 16); - while (*ptr == ' ' || *ptr == '\t') ++ptr; - char *perm_ptr = ptr; - while (*ptr && *ptr != ' ' && *ptr != '\t') ++ptr; - string perm(perm_ptr, ptr - perm_ptr); - unsigned long offset = strtoul(ptr, &ptr, 16); - while (*ptr == ' ' || *ptr == '\t') ++ptr; - char *id_ptr = ptr; - while (*ptr && *ptr != ' ' && *ptr != '\t') ++ptr; - while (*ptr == ' ' || *ptr == '\t') ++ptr; - while (*ptr && *ptr != ' ' && *ptr != '\t') ++ptr; - string id(id_ptr, ptr - id_ptr); - while (*ptr == ' ' || *ptr == '\t') ++ptr; - char *library_ptr = ptr; - while (*ptr && *ptr != ' ' && *ptr != '\t' && *ptr != '\n') ++ptr; - string library(library_ptr, ptr - library_ptr); - bool isVDSO = false; - if (library == "[vdso]") { - // /proc/self/maps has a misleading file offset in the [vdso] entry. - // Override it with a sane value. - offset = 0; - isVDSO = true; - } else if (library == "[vsyscall]") { - vsyscall_ = reinterpret_cast<char *>(start); - } else if (library.empty() || library[0] == '[') { - goto skip_entry; - } - int prot = 0; - if (perm.find('r') != string::npos) { - prot |= PROT_READ; - } - if (perm.find('w') != string::npos) { - prot |= PROT_WRITE; - } - if (perm.find('x') != string::npos) { - prot |= PROT_EXEC; - } - if ((prot & (PROT_EXEC | PROT_READ)) == 0) { - goto skip_entry; - } - Library* lib = &libs_[id + ' ' + library]; - lib->setLibraryInfo(this); - lib->addMemoryRange(reinterpret_cast<void *>(start), - reinterpret_cast<void *>(stop), - Elf_Addr(offset), - prot, isVDSO); - } - skip_entry: - for (;;) { - if (!*ptr || *ptr++ == '\n') { - long_line = false; - memmove(buf, ptr, len - (ptr - buf)); - memset(buf + len - (ptr - buf), 0, ptr - buf); - len -= (ptr - buf); - break; - } - } - } while (len || long_line); - } -} - -Maps::Iterator::Iterator(Maps* maps, bool at_beginning, bool at_end) - : maps_(maps), - at_beginning_(at_beginning), - at_end_(at_end) { -} - -Maps::LibraryMap::iterator& Maps::Iterator::getIterator() const { - if (at_beginning_) { - iter_ = maps_->libs_.begin(); - } else if (at_end_) { - iter_ = maps_->libs_.end(); - } - return iter_; -} - -Maps::Iterator Maps::Iterator::begin() { - return maps_->begin_iter_; -} - -Maps::Iterator Maps::Iterator::end() { - return maps_->end_iter_; -} - -Maps::Iterator& Maps::Iterator::operator++() { - getIterator().operator++(); - at_beginning_ = false; - return *this; -} - -Maps::Iterator Maps::Iterator::operator++(int i) { - getIterator().operator++(i); - at_beginning_ = false; - return *this; -} - -Library* Maps::Iterator::operator*() const { - return &getIterator().operator*().second; -} - -bool Maps::Iterator::operator==(const Maps::Iterator& iter) const { - return getIterator().operator==(iter.getIterator()); -} - -bool Maps::Iterator::operator!=(const Maps::Iterator& iter) const { - return !operator==(iter); -} - -Maps::string Maps::Iterator::name() const { - return getIterator()->first; -} - -// Test whether a line ends with "[stack]"; used for identifying the -// stack entry of /proc/self/maps. -static bool isStackLine(char* buf, char* end) { - char* ptr = buf; - for ( ; *ptr != '\n' && ptr < end; ++ptr) - ; - if (ptr < end && ptr - 7 > buf) { - return (memcmp(ptr - 7, "[stack]", 7) == 0); - } - return false; -} - -char* Maps::allocNearAddr(char* addr_target, size_t size, int prot) const { - // We try to allocate memory within 1.5GB of a target address. This means, - // we will be able to perform relative 32bit jumps from the target address. - const unsigned long kMaxDistance = 1536 << 20; - // In most of the code below, we just care about the numeric value of - // the address. - const long addr = reinterpret_cast<long>(addr_target); - size = (size + 4095) & ~4095; - Sandbox::SysCalls sys; - if (sys.lseek(proc_self_maps_, 0, SEEK_SET)) { - return NULL; - } - - // Iterate through lines of /proc/self/maps to consider each mapped - // region one at a time, looking for a gap between regions to allocate. - char buf[256] = { 0 }; - int len = 0, rc = 1; - bool long_line = false; - unsigned long gap_start = 0x10000; - void* new_addr; - do { - if (rc > 0) { - do { - rc = Sandbox::read(sys, proc_self_maps_, buf + len, - sizeof(buf) - len - 1); - if (rc > 0) { - len += rc; - } - } while (rc > 0 && len < (int)sizeof(buf) - 1); - } - char *ptr = buf; - if (!long_line) { - long_line = true; - // Maps lines have the form "<start address>-<end address> ... <name>". - unsigned long gap_end = strtoul(ptr, &ptr, 16); - unsigned long map_end = strtoul(ptr + 1, &ptr, 16); - - // gap_start to gap_end now covers the region of empty space before - // the current line. Now we try to see if there's a place within the - // gap we can use. - - if (gap_end - gap_start >= size) { - // Is the gap before our target address? - if (addr - static_cast<long>(gap_end) >= 0) { - if (addr - (gap_end - size) < kMaxDistance) { - unsigned long position; - if (isStackLine(ptr, buf + len)) { - // If we're adjacent to the stack, try to stay away from - // the GROWS_DOWN region. Pick the farthest away region that - // is still within the gap. - - if (static_cast<unsigned long>(addr) < kMaxDistance || // Underflow protection. - static_cast<unsigned long>(addr) - kMaxDistance < gap_start) { - position = gap_start; - } else { - position = (addr - kMaxDistance) & ~4095; - if (position < gap_start) { - position = gap_start; - } - } - } else { - // Otherwise, take the end of the region. - position = gap_end - size; - } - new_addr = reinterpret_cast<char *>(sys.MMAP - (reinterpret_cast<void *>(position), size, prot, - MAP_PRIVATE|MAP_ANONYMOUS|MAP_FIXED, -1, 0)); - if (new_addr != MAP_FAILED) { - goto done; - } - } - } else if (gap_start + size - addr < kMaxDistance) { - // Gap is after the address. Above checks that we can wrap around - // through 0 to a space we'd use. - new_addr = reinterpret_cast<char *>(sys.MMAP - (reinterpret_cast<void *>(gap_start), size, prot, - MAP_PRIVATE|MAP_ANONYMOUS|MAP_FIXED, -1 ,0)); - if (new_addr != MAP_FAILED) { - goto done; - } - } - } - gap_start = map_end; - } - for (;;) { - if (!*ptr || *ptr++ == '\n') { - long_line = false; - memmove(buf, ptr, len - (ptr - buf)); - memset(buf + len - (ptr - buf), 0, ptr - buf); - len -= (ptr - buf); - break; - } - } - } while (len || long_line); - new_addr = NULL; -done: - return reinterpret_cast<char*>(new_addr); -} - -} // namespace diff --git a/sandbox/linux/seccomp/maps.h b/sandbox/linux/seccomp/maps.h deleted file mode 100644 index fbcc7672..0000000 --- a/sandbox/linux/seccomp/maps.h +++ /dev/null @@ -1,94 +0,0 @@ -// Copyright (c) 2010 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#ifndef MAPS_H__ -#define MAPS_H__ - -#include <elf.h> -#include <functional> -#include <map> -#include <string> - -#include "allocator.h" - -#if defined(__x86_64__) -typedef Elf64_Addr Elf_Addr; -#elif defined(__i386__) -typedef Elf32_Addr Elf_Addr; -#else -#error Undefined target platform -#endif - -namespace playground { - -class Library; -class Maps { - friend class Library; - public: - typedef std::basic_string<char, std::char_traits<char>, - SystemAllocator<char> > string; - - Maps(int proc_self_maps); - ~Maps() { } - - protected: - // A map with all the libraries currently loaded into the application. - // The key is a unique combination of device number, inode number, and - // file name. It should be treated as opaque. - typedef std::map<string, Library, std::less<string>, - SystemAllocator<std::pair<const string, - Library> > > LibraryMap; - friend class Iterator; - class Iterator { - friend class Maps; - - protected: - explicit Iterator(Maps* maps); - Iterator(Maps* maps, bool at_beginning, bool at_end); - Maps::LibraryMap::iterator& getIterator() const; - - public: - Iterator begin(); - Iterator end(); - Iterator& operator++(); - Iterator operator++(int i); - Library* operator*() const; - bool operator==(const Iterator& iter) const; - bool operator!=(const Iterator& iter) const; - string name() const; - - protected: - mutable LibraryMap::iterator iter_; - Maps *maps_; - bool at_beginning_; - bool at_end_; - }; - - public: - typedef class Iterator const_iterator; - - const_iterator begin() { - return begin_iter_; - } - - const_iterator end() { - return end_iter_; - } - - char* allocNearAddr(char *addr, size_t size, int prot) const; - - char* vsyscall() const { return vsyscall_; } - - protected: - const int proc_self_maps_; - const Iterator begin_iter_; - const Iterator end_iter_; - - LibraryMap libs_; - char* vsyscall_; -}; - -} // namespace - -#endif // MAPS_H__ diff --git a/sandbox/linux/seccomp/mmap.cc b/sandbox/linux/seccomp/mmap.cc deleted file mode 100644 index 700da91..0000000 --- a/sandbox/linux/seccomp/mmap.cc +++ /dev/null @@ -1,75 +0,0 @@ -// Copyright (c) 2010 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#include "debug.h" -#include "sandbox_impl.h" - -namespace playground { - -void* Sandbox::sandbox_mmap(void *start, size_t length, int prot, int flags, - int fd, off_t offset) { - long long tm; - Debug::syscall(&tm, __NR_mmap, "Executing handler"); - struct { - int sysnum; - long long cookie; - MMap mmap_req; - } __attribute__((packed)) request; - request.sysnum = __NR_MMAP; - request.cookie = cookie(); - request.mmap_req.start = start; - request.mmap_req.length = length; - request.mmap_req.prot = prot; - request.mmap_req.flags = flags; - request.mmap_req.fd = fd; - request.mmap_req.offset = offset; - - void* rc; - SysCalls sys; - if (write(sys, processFdPub(), &request, sizeof(request)) != - sizeof(request) || - read(sys, threadFdPub(), &rc, sizeof(rc)) != sizeof(rc)) { - die("Failed to forward mmap() request [sandbox]"); - } - Debug::elapsed(tm, __NR_mmap); - return rc; -} - -bool Sandbox::process_mmap(int parentMapsFd, int sandboxFd, int threadFdPub, - int threadFd, SecureMem::Args* mem) { - // Read request - SysCalls sys; - MMap mmap_req; - if (read(sys, sandboxFd, &mmap_req, sizeof(mmap_req)) != sizeof(mmap_req)) { - die("Failed to read parameters for mmap() [process]"); - } - - if (mmap_req.flags & MAP_FIXED) { - // Cannot map a memory area that was part of the original memory mappings. - void *stop = reinterpret_cast<void *>( - (char *)mmap_req.start + mmap_req.length); - ProtectedMap::const_iterator iter = protectedMap_.lower_bound( - (void *)mmap_req.start); - if (iter != protectedMap_.begin()) { - --iter; - } - for (; iter != protectedMap_.end() && iter->first < stop; ++iter) { - if (mmap_req.start < reinterpret_cast<void *>( - reinterpret_cast<char *>(iter->first) + iter->second) && - stop > iter->first) { - int rc = -EINVAL; - SecureMem::abandonSystemCall(threadFd, rc); - return false; - } - } - } - - // All other mmap() requests are OK - SecureMem::sendSystemCall(threadFdPub, false, -1, mem, __NR_MMAP, - mmap_req.start, mmap_req.length, mmap_req.prot, - mmap_req.flags, mmap_req.fd, mmap_req.offset); - return true; -} - -} // namespace diff --git a/sandbox/linux/seccomp/mprotect.cc b/sandbox/linux/seccomp/mprotect.cc deleted file mode 100644 index 548199d..0000000 --- a/sandbox/linux/seccomp/mprotect.cc +++ /dev/null @@ -1,73 +0,0 @@ -// Copyright (c) 2010 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#include "debug.h" -#include "sandbox_impl.h" - -namespace playground { - -long Sandbox::sandbox_mprotect(const void *addr, size_t len, int prot) { - long long tm; - Debug::syscall(&tm, __NR_mprotect, "Executing handler"); - struct { - int sysnum; - long long cookie; - MProtect mprotect_req; - } __attribute__((packed)) request; - request.sysnum = __NR_mprotect; - request.cookie = cookie(); - request.mprotect_req.addr = addr; - request.mprotect_req.len = len; - request.mprotect_req.prot = prot; - - long rc; - SysCalls sys; - if (write(sys, processFdPub(), &request, sizeof(request)) != - sizeof(request) || - read(sys, threadFdPub(), &rc, sizeof(rc)) != sizeof(rc)) { - die("Failed to forward mprotect() request [sandbox]"); - } - Debug::elapsed(tm, __NR_mprotect); - return rc; -} - -bool Sandbox::process_mprotect(int parentMapsFd, int sandboxFd, - int threadFdPub, int threadFd, - SecureMem::Args* mem) { - // Read request - SysCalls sys; - MProtect mprotect_req; - if (read(sys, sandboxFd, &mprotect_req, sizeof(mprotect_req)) != - sizeof(mprotect_req)) { - die("Failed to read parameters for mprotect() [process]"); - } - - // Cannot change permissions on any memory region that was part of the - // original memory mappings. - int rc = -EINVAL; - void *stop = reinterpret_cast<void *>( - (char *)mprotect_req.addr + mprotect_req.len); - ProtectedMap::const_iterator iter = protectedMap_.lower_bound( - (void *)mprotect_req.addr); - if (iter != protectedMap_.begin()) { - --iter; - } - for (; iter != protectedMap_.end() && iter->first < stop; ++iter) { - if (mprotect_req.addr < reinterpret_cast<void *>( - reinterpret_cast<char *>(iter->first) + iter->second) && - stop > iter->first) { - SecureMem::abandonSystemCall(threadFd, rc); - return false; - } - } - - // Changing permissions on memory regions that were newly mapped inside of - // the sandbox is OK. - SecureMem::sendSystemCall(threadFdPub, false, -1, mem, __NR_mprotect, - mprotect_req.addr, mprotect_req.len, - mprotect_req.prot); - return true; -} - -} // namespace diff --git a/sandbox/linux/seccomp/munmap.cc b/sandbox/linux/seccomp/munmap.cc deleted file mode 100644 index dde7c7a..0000000 --- a/sandbox/linux/seccomp/munmap.cc +++ /dev/null @@ -1,70 +0,0 @@ -// Copyright (c) 2010 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#include "debug.h" -#include "sandbox_impl.h" - -namespace playground { - -long Sandbox::sandbox_munmap(void* start, size_t length) { - long long tm; - Debug::syscall(&tm, __NR_munmap, "Executing handler"); - struct { - int sysnum; - long long cookie; - MUnmap munmap_req; - } __attribute__((packed)) request; - request.sysnum = __NR_munmap; - request.cookie = cookie(); - request.munmap_req.start = start; - request.munmap_req.length = length; - - long rc; - SysCalls sys; - if (write(sys, processFdPub(), &request, sizeof(request)) != - sizeof(request) || - read(sys, threadFdPub(), &rc, sizeof(rc)) != sizeof(rc)) { - die("Failed to forward munmap() request [sandbox]"); - } - Debug::elapsed(tm, __NR_munmap); - return rc; -} - -bool Sandbox::process_munmap(int parentMapsFd, int sandboxFd, int threadFdPub, - int threadFd, SecureMem::Args* mem) { - // Read request - SysCalls sys; - MUnmap munmap_req; - if (read(sys, sandboxFd, &munmap_req, sizeof(munmap_req)) != - sizeof(munmap_req)) { - die("Failed to read parameters for munmap() [process]"); - } - - // Cannot unmap any memory region that was part of the original memory - // mappings. - int rc = -EINVAL; - void *stop = reinterpret_cast<void *>( - reinterpret_cast<char *>(munmap_req.start) + munmap_req.length); - ProtectedMap::const_iterator iter = protectedMap_.lower_bound( - munmap_req.start); - if (iter != protectedMap_.begin()) { - --iter; - } - for (; iter != protectedMap_.end() && iter->first < stop; ++iter) { - if (munmap_req.start < reinterpret_cast<void *>( - reinterpret_cast<char *>(iter->first) + iter->second) && - stop > iter->first) { - SecureMem::abandonSystemCall(threadFd, rc); - return false; - } - } - - // Unmapping memory regions that were newly mapped inside of the sandbox - // is OK. - SecureMem::sendSystemCall(threadFdPub, false, -1, mem, __NR_munmap, - munmap_req.start, munmap_req.length); - return true; -} - -} // namespace diff --git a/sandbox/linux/seccomp/mutex.h b/sandbox/linux/seccomp/mutex.h deleted file mode 100644 index d7e1c5d..0000000 --- a/sandbox/linux/seccomp/mutex.h +++ /dev/null @@ -1,153 +0,0 @@ -// Copyright (c) 2010 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#ifndef MUTEX_H__ -#define MUTEX_H__ - -#include "sandbox_impl.h" - -namespace playground { - -class Mutex { - public: - typedef int mutex_t; - - enum { kInitValue = 0 }; - - static void initMutex(mutex_t* mutex) { - // Mutex is unlocked, and nobody is waiting for it - *mutex = kInitValue; - } - - static void unlockMutex(mutex_t* mutex) { - char status; - #if defined(__x86_64__) || defined(__i386__) - asm volatile( - "lock; addl %2, %0\n" - "setz %1" - : "=m"(*mutex), "=qm"(status) - : "ir"(0x80000000), "m"(*mutex)); - #else - #error Unsupported target platform - #endif - if (status) { - // Mutex is zero now. No other waiters. So, we can return. - return; - } - // We unlocked the mutex, but still need to wake up other waiters. - Sandbox::SysCalls sys; - sys.futex(mutex, FUTEX_WAKE, 1, NULL); - } - - static bool lockMutex(mutex_t* mutex, int timeout = 0) { - bool rc = true; - // Increment mutex to add ourselves to the list of waiters - #if defined(__x86_64__) || defined(__i386__) - asm volatile( - "lock; incl %0\n" - : "=m"(*mutex) - : "m"(*mutex)); - #else - #error Unsupported target platform - #endif - for (;;) { - // Atomically check whether the mutex is available and if so, acquire it - char status; - #if defined(__x86_64__) || defined(__i386__) - asm volatile( - "lock; btsl %3, %1\n" - "setc %0" - : "=q"(status), "=m"(*mutex) - : "m"(*mutex), "ir"(31)); - #else - #error Unsupported target platform - #endif - if (!status) { - done: - // If the mutex was available, remove ourselves from list of waiters - #if defined(__x86_64__) || defined(__i386__) - asm volatile( - "lock; decl %0\n" - : "=m"(*mutex) - : "m"(*mutex)); - #else - #error Unsupported target platform - #endif - return rc; - } - int value = *mutex; - if (value >= 0) { - // Mutex has just become available, no need to call kernel - continue; - } - Sandbox::SysCalls sys; - Sandbox::SysCalls::kernel_timespec tm; - if (timeout) { - tm.tv_sec = timeout / 1000; - tm.tv_nsec = (timeout % 1000) * 1000 * 1000; - } else { - tm.tv_sec = 0; - tm.tv_nsec = 0; - } - if (NOINTR_SYS(sys.futex(mutex, FUTEX_WAIT, value, &tm)) && - sys.my_errno == ETIMEDOUT) { - rc = false; - goto done; - } - } - } - - static bool waitForUnlock(mutex_t* mutex, int timeout = 0) { - bool rc = true; - // Increment mutex to add ourselves to the list of waiters - #if defined(__x86_64__) || defined(__i386__) - asm volatile( - "lock; incl %0\n" - : "=m"(*mutex) - : "m"(*mutex)); - #else - #error Unsupported target platform - #endif - Sandbox::SysCalls sys; - for (;;) { - mutex_t value = *mutex; - if (value >= 0) { - done: - // Mutex was not locked. Remove ourselves from list of waiters, notify - // any other waiters (if any), and return. - #if defined(__x86_64__) || defined(__i386__) - asm volatile( - "lock; decl %0\n" - : "=m"(*mutex) - : "m"(*mutex)); - #else - #error Unsupported target platform - #endif - NOINTR_SYS(sys.futex(mutex, FUTEX_WAKE, 1, 0)); - return rc; - } - - // Wait for mutex to become unlocked - Sandbox::SysCalls::kernel_timespec tm; - if (timeout) { - tm.tv_sec = timeout / 1000; - tm.tv_nsec = (timeout % 1000) * 1000 * 1000; - } else { - tm.tv_sec = 0; - tm.tv_nsec = 0; - } - - if (NOINTR_SYS(sys.futex(mutex, FUTEX_WAIT, value, &tm)) && - sys.my_errno == ETIMEDOUT) { - rc = false; - goto done; - } - } - } - -}; - -} // namespace - -#endif // MUTEX_H__ diff --git a/sandbox/linux/seccomp/open.cc b/sandbox/linux/seccomp/open.cc deleted file mode 100644 index 8a9093c..0000000 --- a/sandbox/linux/seccomp/open.cc +++ /dev/null @@ -1,99 +0,0 @@ -// Copyright (c) 2010 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#include "debug.h" -#include "sandbox_impl.h" - -namespace playground { - -long Sandbox::sandbox_open(const char *pathname, int flags, mode_t mode) { - long long tm; - Debug::syscall(&tm, __NR_open, "Executing handler"); - size_t len = strlen(pathname); - struct Request { - int sysnum; - long long cookie; - Open open_req; - char pathname[0]; - } __attribute__((packed)) *request; - char data[sizeof(struct Request) + len]; - request = reinterpret_cast<struct Request*>(data); - request->sysnum = __NR_open; - request->cookie = cookie(); - request->open_req.path_length = len; - request->open_req.flags = flags; - request->open_req.mode = mode; - memcpy(request->pathname, pathname, len); - - long rc; - SysCalls sys; - if (write(sys, processFdPub(), request, sizeof(data)) != (int)sizeof(data) || - read(sys, threadFdPub(), &rc, sizeof(rc)) != sizeof(rc)) { - die("Failed to forward open() request [sandbox]"); - } - Debug::elapsed(tm, __NR_open); - return rc; -} - -bool Sandbox::process_open(int parentMapsFd, int sandboxFd, int threadFdPub, - int threadFd, SecureMem::Args* mem) { - // Read request - SysCalls sys; - Open open_req; - if (read(sys, sandboxFd, &open_req, sizeof(open_req)) != sizeof(open_req)) { - read_parm_failed: - die("Failed to read parameters for open() [process]"); - } - int rc = -ENAMETOOLONG; - if (open_req.path_length >= sizeof(mem->pathname)) { - char buf[32]; - while (open_req.path_length > 0) { - size_t len = open_req.path_length > sizeof(buf) ? - sizeof(buf) : open_req.path_length; - ssize_t i = read(sys, sandboxFd, buf, len); - if (i <= 0) { - goto read_parm_failed; - } - open_req.path_length -= i; - } - if (write(sys, threadFd, &rc, sizeof(rc)) != sizeof(rc)) { - die("Failed to return data from open() [process]"); - } - return false; - } - - if ((open_req.flags & O_ACCMODE) != O_RDONLY || - !g_policy.allow_file_namespace) { - // After locking the mutex, we can no longer abandon the system call. So, - // perform checks before clobbering the securely shared memory. - char tmp[open_req.path_length]; - if (read(sys, sandboxFd, tmp, open_req.path_length) != - (ssize_t)open_req.path_length) { - goto read_parm_failed; - } - Debug::message(("Denying access to \"" + std::string(tmp) + "\"").c_str()); - SecureMem::abandonSystemCall(threadFd, -EACCES); - return false; - } - - SecureMem::lockSystemCall(parentMapsFd, mem); - if (read(sys, sandboxFd, mem->pathname, open_req.path_length) != - (ssize_t)open_req.path_length) { - goto read_parm_failed; - } - mem->pathname[open_req.path_length] = '\000'; - - // TODO(markus): Implement sandboxing policy. For now, we allow read - // access to everything. That's probably not correct. - Debug::message(("Allowing access to \"" + std::string(mem->pathname) + - "\"").c_str()); - - // Tell trusted thread to open the file. - SecureMem::sendSystemCall(threadFdPub, true, parentMapsFd, mem, __NR_open, - mem->pathname - (char*)mem + (char*)mem->self, - open_req.flags, open_req.mode); - return true; -} - -} // namespace diff --git a/sandbox/linux/seccomp/sandbox.cc b/sandbox/linux/seccomp/sandbox.cc deleted file mode 100644 index 0b09457..0000000 --- a/sandbox/linux/seccomp/sandbox.cc +++ /dev/null @@ -1,838 +0,0 @@ -// Copyright (c) 2010 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#include "library.h" -#include "sandbox_impl.h" -#include "syscall_table.h" - -namespace playground { - -// Global variables -int Sandbox::proc_self_maps_ = -1; -enum Sandbox::SandboxStatus Sandbox::status_ = STATUS_UNKNOWN; -int Sandbox::pid_; -int Sandbox::processFdPub_; -int Sandbox::cloneFdPub_; -Sandbox::SysCalls::kernel_sigaction Sandbox::sa_segv_; -Sandbox::ProtectedMap Sandbox::protectedMap_; -std::vector<SecureMem::Args*> Sandbox::secureMemPool_; - -bool Sandbox::sendFd(int transport, int fd0, int fd1, const void* buf, - size_t len) { - int fds[2], count = 0; - if (fd0 >= 0) { fds[count++] = fd0; } - if (fd1 >= 0) { fds[count++] = fd1; } - if (!count) { - return false; - } - char cmsg_buf[CMSG_SPACE(count*sizeof(int))]; - memset(cmsg_buf, 0, sizeof(cmsg_buf)); - struct SysCalls::kernel_iovec iov[2] = { { 0 } }; - struct SysCalls::kernel_msghdr msg = { 0 }; - int dummy = 0; - iov[0].iov_base = &dummy; - iov[0].iov_len = sizeof(dummy); - if (buf && len > 0) { - iov[1].iov_base = const_cast<void *>(buf); - iov[1].iov_len = len; - } - msg.msg_iov = iov; - msg.msg_iovlen = (buf && len > 0) ? 2 : 1; - msg.msg_control = cmsg_buf; - msg.msg_controllen = CMSG_LEN(count*sizeof(int)); - struct cmsghdr *cmsg = CMSG_FIRSTHDR(&msg); - cmsg->cmsg_level = SOL_SOCKET; - cmsg->cmsg_type = SCM_RIGHTS; - cmsg->cmsg_len = CMSG_LEN(count*sizeof(int)); - memcpy(CMSG_DATA(cmsg), fds, count*sizeof(int)); - SysCalls sys; - return NOINTR_SYS(sys.sendmsg(transport, &msg, 0)) == - (ssize_t)(sizeof(dummy) + ((buf && len > 0) ? len : 0)); -} - -bool Sandbox::getFd(int transport, int* fd0, int* fd1, void* buf, size_t*len) { - int count = 0; - int *err = NULL; - if (fd0) { - count++; - err = fd0; - *fd0 = -1; - } - if (fd1) { - if (!count++) { - err = fd1; - } - *fd1 = -1; - } - if (!count) { - return false; - } - char cmsg_buf[CMSG_SPACE(count*sizeof(int))]; - memset(cmsg_buf, 0, sizeof(cmsg_buf)); - struct SysCalls::kernel_iovec iov[2] = { { 0 } }; - struct SysCalls::kernel_msghdr msg = { 0 }; - iov[0].iov_base = err; - iov[0].iov_len = sizeof(int); - if (buf && len && *len > 0) { - iov[1].iov_base = buf; - iov[1].iov_len = *len; - } - msg.msg_iov = iov; - msg.msg_iovlen = (buf && len && *len > 0) ? 2 : 1; - msg.msg_control = cmsg_buf; - msg.msg_controllen = CMSG_LEN(count*sizeof(int)); - SysCalls sys; - ssize_t bytes = NOINTR_SYS(sys.recvmsg(transport, &msg, 0)); - if (len) { - *len = bytes > (int)sizeof(int) ? - bytes - sizeof(int) : 0; - } - if (bytes != (ssize_t)(sizeof(int) + ((buf && len && *len > 0) ? *len : 0))){ - *err = bytes >= 0 ? 0 : -EBADF; - return false; - } - if (*err) { - // "err" is the first four bytes of the payload. If these are non-zero, - // the sender on the other side of the socketpair sent us an errno value. - // We don't expect to get any file handles in this case. - return false; - } - struct cmsghdr *cmsg = CMSG_FIRSTHDR(&msg); - if ((msg.msg_flags & (MSG_TRUNC|MSG_CTRUNC)) || - !cmsg || - cmsg->cmsg_level != SOL_SOCKET || - cmsg->cmsg_type != SCM_RIGHTS || - cmsg->cmsg_len != CMSG_LEN(count*sizeof(int))) { - *err = -EBADF; - return false; - } - if (fd1) { *fd1 = ((int *)CMSG_DATA(cmsg))[--count]; } - if (fd0) { *fd0 = ((int *)CMSG_DATA(cmsg))[--count]; } - return true; -} - -void Sandbox::setupSignalHandlers() { - // Set SIGCHLD to SIG_DFL so that waitpid() can work - SysCalls sys; - struct SysCalls::kernel_sigaction sa; - memset(&sa, 0, sizeof(sa)); - sa.sa_handler_ = SIG_DFL; - sys.sigaction(SIGCHLD, &sa, NULL); - - // Set up SEGV handler for dealing with RDTSC instructions, system calls - // that have been rewritten to use INT0, for sigprocmask() emulation, for - // the creation of threads, and for user-provided SEGV handlers. - sa.sa_sigaction_ = segv(); - sa.sa_flags = SA_SIGINFO | SA_NODEFER; - sys.sigaction(SIGSEGV, &sa, &sa_segv_); - - // Unblock SIGSEGV and SIGCHLD - SysCalls::kernel_sigset_t mask; - memset(&mask, 0x00, sizeof(mask)); - mask.sig[0] |= (1 << (SIGSEGV - 1)) | (1 << (SIGCHLD - 1)); - sys.sigprocmask(SIG_UNBLOCK, &mask, 0); -} - -void (*Sandbox::segv())(int signo, SysCalls::siginfo *context, void *unused) { - void (*fnc)(int signo, SysCalls::siginfo *context, void *unused); - asm volatile( - "call 999f\n" -#if defined(__x86_64__) - // Inspect instruction at the point where the segmentation fault - // happened. If it is RDTSC, forward the request to the trusted - // thread. - "mov $-3, %%r14\n" // request for RDTSC - "mov 0xB0(%%rsp), %%r15\n" // %rip at time of segmentation fault - "cmpw $0x310F, (%%r15)\n" // RDTSC - "jz 0f\n" - "cmpw $0x010F, (%%r15)\n" // RDTSCP - "jnz 8f\n" - "cmpb $0xF9, 2(%%r15)\n" - "jnz 8f\n" - "mov $-4, %%r14\n" // request for RDTSCP - "0:" -#ifndef NDEBUG - "lea 100f(%%rip), %%rdi\n" - "call playground$debugMessage\n" -#endif - "sub $4, %%rsp\n" - "push %%r14\n" - "mov %%gs:16, %%edi\n" // fd = threadFdPub - "mov %%rsp, %%rsi\n" // buf = %rsp - "mov $4, %%edx\n" // len = sizeof(int) - "1:mov $1, %%eax\n" // NR_write - "syscall\n" - "cmp %%rax, %%rdx\n" - "jz 5f\n" - "cmp $-4, %%eax\n" // EINTR - "jz 1b\n" - "2:add $12, %%rsp\n" - "movq $0, 0x98(%%rsp)\n" // %rax at time of segmentation fault - "movq $0, 0x90(%%rsp)\n" // %rdx at time of segmentation fault - "cmpw $0x310F, (%%r15)\n" // RDTSC - "jz 3f\n" - "movq $0, 0xA0(%%rsp)\n" // %rcx at time of segmentation fault - "3:addq $2, 0xB0(%%rsp)\n" // %rip at time of segmentation fault - "cmpw $0x010F, (%%r15)\n" // RDTSC - "jnz 4f\n" - "addq $1, 0xB0(%%rsp)\n" // %rip at time of segmentation fault - "4:ret\n" - "5:mov $12, %%edx\n" // len = 3*sizeof(int) - "6:mov $0, %%eax\n" // NR_read - "syscall\n" - "cmp $-4, %%eax\n" // EINTR - "jz 6b\n" - "cmp %%rax, %%rdx\n" - "jnz 2b\n" - "mov 0(%%rsp), %%eax\n" - "mov 4(%%rsp), %%edx\n" - "mov 8(%%rsp), %%ecx\n" - "add $12, %%rsp\n" - "mov %%rdx, 0x90(%%rsp)\n" // %rdx at time of segmentation fault - "cmpw $0x310F, (%%r15)\n" // RDTSC - "jz 7f\n" - "mov %%rcx, 0xA0(%%rsp)\n" // %rcx at time of segmentation fault - "7:mov %%rax, 0x98(%%rsp)\n" // %rax at time of segmentation fault - "jmp 3b\n" - - // If the instruction is INT 0, then this was probably the result - // of playground::Library being unable to find a way to safely - // rewrite the system call instruction. Retrieve the CPU register - // at the time of the segmentation fault and invoke syscallWrapper(). - "8:cmpw $0x00CD, (%%r15)\n" // INT $0x0 - "jnz 16f\n" -#ifndef NDEBUG - "lea 200f(%%rip), %%rdi\n" - "call playground$debugMessage\n" -#endif - "mov 0x98(%%rsp), %%rax\n" // %rax at time of segmentation fault - "mov 0x70(%%rsp), %%rdi\n" // %rdi at time of segmentation fault - "mov 0x78(%%rsp), %%rsi\n" // %rsi at time of segmentation fault - "mov 0x90(%%rsp), %%rdx\n" // %rdx at time of segmentation fault - "mov 0x40(%%rsp), %%r10\n" // %r10 at time of segmentation fault - "mov 0x30(%%rsp), %%r8\n" // %r8 at time of segmentation fault - "mov 0x38(%%rsp), %%r9\n" // %r9 at time of segmentation fault - - // Handle rt_sigprocmask() - "cmp $14, %%rax\n" // NR_rt_sigprocmask - "jnz 12f\n" - "mov $-22, %%rax\n" // -EINVAL - "cmp $8, %%r10\n" // %r10 = sigsetsize (8 bytes = 64 signals) - "jl 7b\n" - "mov 0x130(%%rsp), %%r10\n" // signal mask at time of segmentation fault - "test %%rsi, %%rsi\n" // only set mask, if set is non-NULL - "jz 11f\n" - "mov 0(%%rsi), %%rsi\n" - "cmp $0, %%rdi\n" // %rdi = how (SIG_BLOCK) - "jnz 9f\n" - "or %%rsi, 0x130(%%rsp)\n" // signal mask at time of segmentation fault - "jmp 11f\n" - "9:cmp $1, %%rdi\n" // %rdi = how (SIG_UNBLOCK) - "jnz 10f\n" - "xor $-1, %%rsi\n" - "and %%rsi, 0x130(%%rsp)\n" // signal mask at time of segmentation fault - "jmp 11f\n" - "10:cmp $2, %%rdi\n" // %rdi = how (SIG_SETMASK) - "jnz 7b\n" - "mov %%rsi, 0x130(%%rsp)\n" // signal mask at time of segmentation fault - "11:xor %%rax, %%rax\n" - "test %%rdx, %%rdx\n" // only return old mask, if set is non-NULL - "jz 7b\n" - "mov %%r10, 0(%%rdx)\n" // old_set - "jmp 7b\n" - - // Handle rt_sigreturn() - "12:cmp $15, %%rax\n" // NR_rt_sigreturn - "jnz 14f\n" - "mov 0xA8(%%rsp), %%rsp\n" // %rsp at time of segmentation fault - "13:syscall\n" // rt_sigreturn() is unrestricted - "mov $66, %%edi\n" // rt_sigreturn() should never return - "mov $231, %%eax\n" // NR_exit_group - "jmp 13b\n" - - // Copy signal frame onto new stack. See clone.cc for details - "14:cmp $56+0xF000, %%rax\n" // NR_clone + 0xF000 - "jnz 15f\n" - "lea 8(%%rsp), %%rax\n" // retain stack frame upon returning - "mov %%rax, 0xA8(%%rsp)\n" // %rsp at time of segmentation fault - "jmp 7b\n" - - // Forward system call to syscallWrapper() - "15:lea 7b(%%rip), %%rcx\n" - "push %%rcx\n" - "push 0xB8(%%rsp)\n" // %rip at time of segmentation fault - "lea playground$syscallWrapper(%%rip), %%rcx\n" - "jmp *%%rcx\n" - - // In order to implement SA_NODEFER, we have to keep track of recursive - // calls to SIGSEGV handlers. This means we have to increment a counter - // before calling the user's signal handler, and decrement it on - // leaving the user's signal handler. - // Some signal handlers look at the return address of the signal - // stack, and more importantly "gdb" uses the call to rt_sigreturn() - // as a magic signature when doing stacktraces. So, we have to use - // a little more unusual code to regain control after the user's - // signal handler is done. We adjust the return address to point to - // non-executable memory. And when we trigger another SEGV we pop the - // extraneous signal frame and then call rt_sigreturn(). - // N.B. We currently do not correctly adjust the SEGV counter, if the - // user's signal handler exits in way other than by returning (e.g. by - // directly calling rt_sigreturn(), or by calling siglongjmp()). - "16:lea 22f(%%rip), %%r14\n" - "cmp %%r14, %%r15\n" - "jnz 17f\n" // check if returning from user's handler - "decl %%gs:0x105C-0xE0\n" // decrement SEGV recursion counter - "mov 0xA8(%%rsp), %%rsp\n" // %rsp at time of segmentation fault - "mov $0xF, %%eax\n" // NR_rt_sigreturn - "syscall\n" - - // This was a genuine segmentation fault. Check Sandbox::sa_segv_ for - // what we are supposed to do. - "17:mov playground$sa_segv@GOTPCREL(%%rip), %%rax\n" - "cmp $0, 0(%%rax)\n" // SIG_DFL - "jz 18f\n" - "cmp $1, 0(%%rax)\n" // SIG_IGN - "jnz 19f\n" // can't really ignore synchronous signals - - // Trigger the kernel's default signal disposition. The only way we can - // do this from seccomp mode is by blocking the signal and retriggering - // it. - "18:orb $4, 0x131(%%rsp)\n" // signal mask at time of segmentation fault - "ret\n" - - // Check sa_flags: - // - We can ignore SA_NOCLDSTOP, SA_NOCLDWAIT, and SA_RESTART as they - // do not have any effect for SIGSEGV. - // - On x86-64, we can also ignore SA_SIGINFO, as the calling - // conventions for sa_handler() are a subset of the conventions for - // sa_sigaction(). - // - We have to always register our signal handler with SA_NODEFER so - // that the user's signal handler can make system calls which might - // require additional help from our SEGV handler. - // - If the user's signal handler wasn't supposed to be SA_NODEFER, then - // we emulate this behavior by keeping track of a recursion counter. - // - // TODO(markus): If/when we add support for sigaltstack(), we have to - // handle SA_ONSTACK. - "19:cmpl $0, %%gs:0x105C-0xE0\n"// check if we failed inside of SEGV handler - "jnz 18b\n" // if so, then terminate program - "mov 0(%%rax), %%rbx\n" // sa_segv_.sa_sigaction - "mov 8(%%rax), %%rcx\n" // sa_segv_.sa_flags - "btl $31, %%ecx\n" // SA_RESETHAND - "jnc 20f\n" - "movq $0, 0(%%rax)\n" // set handler to SIG_DFL - "20:btl $30, %%ecx\n" // SA_NODEFER - "jc 21f\n" - "mov %%r14, 0(%%rsp)\n" // trigger a SEGV on return, so that we can - "incl %%gs:0x105C-0xE0\n" // clean up state; incr. recursion counter - "21:jmp *%%rbx\n" // call user's signal handler - - - // Non-executable version of the restorer function. We use this to - // trigger a SEGV upon returning from the user's signal handler, giving - // us an ability to clean up prior to returning from the SEGV handler. - ".pushsection .data\n" // move code into non-executable section - "22:mov $0xF, %%rax\n" // gdb looks for this signature when doing - "syscall\n" // backtraces - ".popsection\n" -#elif defined(__i386__) - // Inspect instruction at the point where the segmentation fault - // happened. If it is RDTSC, forward the request to the trusted - // thread. - "mov $-3, %%ebx\n" // request for RDTSC - "mov 0xDC(%%esp), %%ebp\n" // %eip at time of segmentation fault - "cmpw $0x310F, (%%ebp)\n" // RDTSC - "jz 0f\n" - "cmpw $0x010F, (%%ebp)\n" // RDTSCP - "jnz 9f\n" - "cmpb $0xF9, 2(%%ebp)\n" - "jnz 9f\n" - "mov $-4, %%ebx\n" // request for RDTSCP - "0:" -#ifndef NDEBUG - "lea 100f, %%eax\n" - "push %%eax\n" - "call playground$debugMessage\n" - "sub $4, %%esp\n" -#else - "sub $8, %%esp\n" // allocate buffer for receiving timestamp -#endif - "push %%ebx\n" - "mov %%fs:16, %%ebx\n" // fd = threadFdPub - "mov %%esp, %%ecx\n" // buf = %esp - "mov $4, %%edx\n" // len = sizeof(int) - "1:mov %%edx, %%eax\n" // NR_write - "int $0x80\n" - "cmp %%eax, %%edx\n" - "jz 7f\n" - "cmp $-4, %%eax\n" // EINTR - "jz 1b\n" - "2:add $12, %%esp\n" // remove temporary buffer from stack - "xor %%eax, %%eax\n" - "movl $0, 0xC8(%%esp)\n" // %edx at time of segmentation fault - "cmpw $0x310F, (%%ebp)\n" // RDTSC - "jz 3f\n" - "movl $0, 0xCC(%%esp)\n" // %ecx at time of segmentation fault - "3:mov %%eax, 0xD0(%%esp)\n" // %eax at time of segmentation fault - "4:mov 0xDC(%%esp), %%ebp\n" // %eip at time of segmentation fault - "addl $2, 0xDC(%%esp)\n" // %eip at time of segmentation fault - "cmpw $0x010F, (%%ebp)\n" // RDTSCP - "jnz 5f\n" - "addl $1, 0xDC(%%esp)\n" // %eip at time of segmentation fault - "5:sub $0x1C8, %%esp\n" // a legacy signal stack is much larger - "mov 0x1CC(%%esp), %%eax\n" // push signal number - "push %%eax\n" - "lea 0x270(%%esp), %%esi\n" // copy siginfo register values - "lea 0x4(%%esp), %%edi\n" // into new location - "mov $22, %%ecx\n" - "cld\n" - "rep movsl\n" - "mov 0x2C8(%%esp), %%ebx\n" // copy first half of signal mask - "mov %%ebx, 0x54(%%esp)\n" - "lea 6f, %%esi\n" // copy "magic" restorer function - "push %%esi\n" // push restorer function - "lea 0x2D4(%%esp), %%edi\n" // patch up retcode magic numbers - "movb $2, %%cl\n" - "rep movsl\n" - "ret\n" // return to restorer function - - // The restorer function is sometimes used by gdb as a magic marker to - // recognize signal stack frames. Don't change any of the next three - // instructions. - "6:pop %%eax\n" // remove dummy argument (signo) - "mov $119, %%eax\n" // NR_sigreturn - "int $0x80\n" - "7:mov $12, %%edx\n" // len = 3*sizeof(int) - "8:mov $3, %%eax\n" // NR_read - "int $0x80\n" - "cmp $-4, %%eax\n" // EINTR - "jz 8b\n" - "cmp %%eax, %%edx\n" - "jnz 2b\n" - "pop %%eax\n" - "pop %%edx\n" - "pop %%ecx\n" - "mov %%edx, 0xC8(%%esp)\n" // %edx at time of segmentation fault - "cmpw $0x310F, (%%ebp)\n" // RDTSC - "jz 3b\n" - "mov %%ecx, 0xCC(%%esp)\n" // %ecx at time of segmentation fault - "jmp 3b\n" - - // If the instruction is INT 0, then this was probably the result - // of playground::Library being unable to find a way to safely - // rewrite the system call instruction. Retrieve the CPU register - // at the time of the segmentation fault and invoke syscallWrapper(). - "9:cmpw $0x00CD, (%%ebp)\n" // INT $0x0 - "jnz 20f\n" -#ifndef NDEBUG - "lea 200f, %%eax\n" - "push %%eax\n" - "call playground$debugMessage\n" - "add $0x4, %%esp\n" -#endif - "mov 0xD0(%%esp), %%eax\n" // %eax at time of segmentation fault - "mov 0xC4(%%esp), %%ebx\n" // %ebx at time of segmentation fault - "mov 0xCC(%%esp), %%ecx\n" // %ecx at time of segmentation fault - "mov 0xC8(%%esp), %%edx\n" // %edx at time of segmentation fault - "mov 0xB8(%%esp), %%esi\n" // %esi at time of segmentation fault - "mov 0xB4(%%esp), %%edi\n" // %edi at time of segmentation fault - "mov 0xB2(%%esp), %%ebp\n" // %ebp at time of segmentation fault - - // Handle sigprocmask() and rt_sigprocmask() - "cmp $175, %%eax\n" // NR_rt_sigprocmask - "jnz 10f\n" - "mov $-22, %%eax\n" // -EINVAL - "cmp $8, %%esi\n" // %esi = sigsetsize (8 bytes = 64 signals) - "jl 3b\n" - "jmp 11f\n" - "10:cmp $126, %%eax\n" // NR_sigprocmask - "jnz 15f\n" - "mov $-22, %%eax\n" - "11:mov 0xFC(%%esp), %%edi\n" // signal mask at time of segmentation fault - "mov 0x100(%%esp), %%ebp\n" - "test %%ecx, %%ecx\n" // only set mask, if set is non-NULL - "jz 14f\n" - "mov 0(%%ecx), %%esi\n" - "mov 4(%%ecx), %%ecx\n" - "cmp $0, %%ebx\n" // %ebx = how (SIG_BLOCK) - "jnz 12f\n" - "or %%esi, 0xFC(%%esp)\n" // signal mask at time of segmentation fault - "or %%ecx, 0x100(%%esp)\n" - "jmp 14f\n" - "12:cmp $1, %%ebx\n" // %ebx = how (SIG_UNBLOCK) - "jnz 13f\n" - "xor $-1, %%esi\n" - "xor $-1, %%ecx\n" - "and %%esi, 0xFC(%%esp)\n" // signal mask at time of segmentation fault - "and %%ecx, 0x100(%%esp)\n" - "jmp 14f\n" - "13:cmp $2, %%ebx\n" // %ebx = how (SIG_SETMASK) - "jnz 3b\n" - "mov %%esi, 0xFC(%%esp)\n" // signal mask at time of segmentation fault - "mov %%ecx, 0x100(%%esp)\n" - "14:xor %%eax, %%eax\n" - "test %%edx, %%edx\n" // only return old mask, if set is non-NULL - "jz 3b\n" - "mov %%edi, 0(%%edx)\n" // old_set - "mov %%ebp, 4(%%edx)\n" - "jmp 3b\n" - - // Handle sigreturn() and rt_sigreturn() - // See syscall.cc for a discussion on how we can emulate rt_sigreturn() - // by calling sigreturn() with a suitably adjusted stack. - "15:cmp $119, %%eax\n" // NR_sigreturn - "jnz 17f\n" - "mov 0xC0(%%esp), %%esp\n" // %esp at time of segmentation fault - "16:int $0x80\n" // sigreturn() is unrestricted - "17:cmp $173, %%eax\n" // NR_rt_sigreturn - "jnz 18f\n" - "mov 0xC0(%%esp), %%esp\n" // %esp at time of segmentation fault - "sub $4, %%esp\n" // add fake return address - "jmp 4b\n" - - // Copy signal frame onto new stack. In the process, we have to convert - // it from an RT signal frame to a legacy signal frame. - // See clone.cc for details - "18:cmp $120+0xF000, %%eax\n" // NR_clone + 0xF000 - "jnz 19f\n" - "lea -0x1C8(%%esp), %%eax\n"// retain stack frame upon returning - "mov %%eax, 0xC0(%%esp)\n" // %esp at time of segmentation fault - "jmp 3b\n" - - // Forward system call to syscallWrapper() - "19:call playground$syscallWrapper\n" - "jmp 3b\n" - - // In order to implement SA_NODEFER, we have to keep track of recursive - // calls to SIGSEGV handlers. This means we have to increment a counter - // before calling the user's signal handler, and decrement it on - // leaving the user's signal handler. - // Some signal handlers look at the return address of the signal - // stack, and more importantly "gdb" uses the call to {,rt_}sigreturn() - // as a magic signature when doing stacktraces. So, we have to use - // a little more unusual code to regain control after the user's - // signal handler is done. We adjust the return address to point to - // non-executable memory. And when we trigger another SEGV we pop the - // extraneous signal frame and then call sigreturn(). - // N.B. We currently do not correctly adjust the SEGV counter, if the - // user's signal handler exits in way other than by returning (e.g. by - // directly calling {,rt_}sigreturn(), or by calling siglongjmp()). - "20:lea 30f, %%edi\n" // rt-style restorer function - "lea 31f, %%esi\n" // legacy restorer function - "cmp %%ebp, %%edi\n" // check if returning from user's handler - "jnz 21f\n" - "decl %%fs:0x1040-0x58\n" // decrement SEGV recursion counter - "mov 0xC0(%%esp), %%esp\n" // %esp at time of segmentation fault - "jmp 29f\n" - "21:cmp %%ebp, %%esi\n" // check if returning from user's handler - "jnz 22f\n" - "decl %%fs:0x1040-0x58\n" // decrement SEGV recursion counter - "mov 0xC0(%%esp), %%esp\n" // %esp at time of segmentation fault - "jmp 6b\n" - - // This was a genuine segmentation fault. Check Sandbox::sa_segv_ for - // what we are supposed to do. - "22:lea playground$sa_segv, %%eax\n" - "cmp $0, 0(%%eax)\n" // SIG_DFL - "jz 23f\n" - "cmp $1, 0(%%eax)\n" // SIG_IGN - "jnz 24f\n" // can't really ignore synchronous signals - - // Trigger the kernel's default signal disposition. The only way we can - // do this from seccomp mode is by blocking the signal and retriggering - // it. - "23:orb $4, 0xFD(%%esp)\n" // signal mask at time of segmentation fault - "jmp 5b\n" - - // Check sa_flags: - // - We can ignore SA_NOCLDSTOP, SA_NOCLDWAIT, and SA_RESTART as they - // do not have any effect for SIGSEGV. - // - We have to always register our signal handler with SA_NODEFER so - // that the user's signal handler can make system calls which might - // require additional help from our SEGV handler. - // - If the user's signal handler wasn't supposed to be SA_NODEFER, then - // we emulate this behavior by keeping track of a recursion counter. - // - // TODO(markus): If/when we add support for sigaltstack(), we have to - // handle SA_ONSTACK. - "24:cmpl $0, %%fs:0x1040-0x58\n"// check if we failed inside of SEGV handler - "jnz 23b\n" // if so, then terminate program - "mov 0(%%eax), %%ebx\n" // sa_segv_.sa_sigaction - "mov 4(%%eax), %%ecx\n" // sa_segv_.sa_flags - "btl $31, %%ecx\n" // SA_RESETHAND - "jnc 25f\n" - "movl $0, 0(%%eax)\n" // set handler to SIG_DFL - "25:btl $30, %%ecx\n" // SA_NODEFER - "jc 28f\n" - "btl $2, %%ecx\n" // SA_SIGINFO - "jnc 26f\n" - "mov %%edi, 0(%%esp)\n" // trigger a SEGV on return - "incl %%fs:0x1040-0x58\n" // increment recursion counter - "jmp *%%ebx\n" // call user's signal handler - "26:mov %%esi, 0(%%esp)\n" - "incl %%fs:0x1040-0x58\n" // increment recursion counter - - // We always register the signal handler to give us rt-style signal - // frames. But if the user asked for legacy signal frames, we must - // convert the signal frame prior to calling the user's signal handler. - "27:sub $0x1C8, %%esp\n" // a legacy signal stack is much larger - "mov 0x1CC(%%esp), %%eax\n" // push signal number - "push %%eax\n" - "mov 0x1CC(%%esp), %%eax\n" // push restorer function - "push %%eax\n" - "lea 0x274(%%esp), %%esi\n" // copy siginfo register values - "lea 0x8(%%esp), %%edi\n" // into new location - "mov $22, %%ecx\n" - "cld\n" - "rep movsl\n" - "mov 0x2CC(%%esp), %%eax\n" // copy first half of signal mask - "mov %%eax, 0x58(%%esp)\n" - "lea 31f, %%esi\n" - "lea 0x2D4(%%esp), %%edi\n" // patch up retcode magic numbers - "movb $2, %%cl\n" - "rep movsl\n" - "jmp *%%ebx\n" // call user's signal handler - "28:lea 6b, %%eax\n" // set appropriate restorer function - "mov %%eax, 0(%%esp)\n" - "btl $2, %%ecx\n" // SA_SIGINFO - "jnc 27b\n" - "lea 29f, %%eax\n" - "mov %%eax, 0(%%esp)\n" // set appropriate restorer function - "jmp *%%ebx\n" // call user's signal handler - "29:pushl $30f\n" // emulate rt_sigreturn() - "jmp 5b\n" - - // Non-executable versions of the restorer function. We use these to - // trigger a SEGV upon returning from the user's signal handler, giving - // us an ability to clean up prior to returning from the SEGV handler. - ".pushsection .data\n" // move code into non-executable section - "30:mov $173, %%eax\n" // NR_rt_sigreturn - "int $0x80\n" // gdb looks for this signature when doing - ".byte 0\n" // backtraces - "31:pop %%eax\n" - "mov $119, %%eax\n" // NR_sigreturn - "int $0x80\n" - ".popsection\n" -#else -#error Unsupported target platform -#endif - ".pushsection \".rodata\"\n" -#ifndef NDEBUG - "100:.asciz \"RDTSC(P): Executing handler\\n\"\n" - "200:.asciz \"INT $0x0: Executing handler\\n\"\n" -#endif - ".popsection\n" - "999:pop %0\n" - : "=g"(fnc) - : - : "memory" -#if defined(__x86_64__) - , "rsp" -#elif defined(__i386__) - , "esp" -#endif - ); - return fnc; -} - -SecureMem::Args* Sandbox::getSecureMem() { - // Check trusted_thread.cc for the magic offset that gets us from the TLS - // to the beginning of the secure memory area. - SecureMem::Args* ret; -#if defined(__x86_64__) - asm volatile( - "movq %%gs:-0xE0, %0\n" - : "=q"(ret)); -#elif defined(__i386__) - asm volatile( - "movl %%fs:-0x58, %0\n" - : "=r"(ret)); -#else -#error Unsupported target platform -#endif - return ret; -} - -void Sandbox::snapshotMemoryMappings(int processFd, int proc_self_maps) { - SysCalls sys; - if (sys.lseek(proc_self_maps, 0, SEEK_SET) || - !sendFd(processFd, proc_self_maps, -1, NULL, 0)) { - failure: - die("Cannot access /proc/self/maps"); - } - int dummy; - if (read(sys, processFd, &dummy, sizeof(dummy)) != sizeof(dummy)) { - goto failure; - } -} - -int Sandbox::supportsSeccompSandbox(int proc_fd) { - if (status_ != STATUS_UNKNOWN) { - return status_ != STATUS_UNSUPPORTED; - } - int fds[2]; - SysCalls sys; - if (sys.pipe(fds)) { - status_ = STATUS_UNSUPPORTED; - return 0; - } - pid_t pid; - switch ((pid = sys.fork())) { - case -1: - status_ = STATUS_UNSUPPORTED; - return 0; - case 0: { - int devnull = sys.open("/dev/null", O_RDWR, 0); - if (devnull >= 0) { - sys.dup2(devnull, 0); - sys.dup2(devnull, 1); - sys.dup2(devnull, 2); - sys.close(devnull); - } - if (proc_fd >= 0) { - setProcSelfMaps(sys.openat(proc_fd, "self/maps", O_RDONLY, 0)); - } - startSandbox(); - write(sys, fds[1], "", 1); - - // Try to tell the trusted thread to shut down the entire process in an - // orderly fashion - defaultSystemCallHandler(__NR_exit_group, 0, 0, 0, 0, 0, 0); - - // If that did not work (e.g. because the kernel does not know about the - // exit_group() system call), make a direct _exit() system call instead. - // This system call is unrestricted in seccomp mode, so it will always - // succeed. Normally, we don't like it, because unlike exit_group() it - // does not terminate any other thread. But since we know that - // exit_group() exists in all kernels which support kernel-level threads, - // this is OK we only get here for old kernels where _exit() is OK. - sys._exit(0); - } - default: - NOINTR_SYS(sys.close(fds[1])); - char ch; - if (read(sys, fds[0], &ch, 1) != 1) { - status_ = STATUS_UNSUPPORTED; - } else { - status_ = STATUS_AVAILABLE; - } - int rc; - NOINTR_SYS(sys.waitpid(pid, &rc, 0)); - NOINTR_SYS(sys.close(fds[0])); - return status_ != STATUS_UNSUPPORTED; - } -} - -void Sandbox::setProcSelfMaps(int proc_self_maps) { - proc_self_maps_ = proc_self_maps; -} - -void Sandbox::startSandbox() { - if (status_ == STATUS_UNSUPPORTED) { - die("The seccomp sandbox is not supported on this computer"); - } else if (status_ == STATUS_ENABLED) { - return; - } - - SysCalls sys; - if (proc_self_maps_ < 0) { - proc_self_maps_ = sys.open("/proc/self/maps", O_RDONLY, 0); - if (proc_self_maps_ < 0) { - die("Cannot access \"/proc/self/maps\""); - } - } - - // The pid is unchanged for the entire program, so we can retrieve it once - // and store it in a global variable. - pid_ = sys.getpid(); - - // Block all signals, except for the RDTSC handler - setupSignalHandlers(); - - // Get socketpairs for talking to the trusted process - int pair[4]; - if (sys.socketpair(AF_UNIX, SOCK_STREAM, 0, pair) || - sys.socketpair(AF_UNIX, SOCK_STREAM, 0, pair+2)) { - die("Failed to create trusted thread"); - } - processFdPub_ = pair[0]; - cloneFdPub_ = pair[2]; - SecureMemArgs* secureMem = createTrustedProcess(pair[0], pair[1], - pair[2], pair[3]); - - // We find all libraries that have system calls and redirect the system - // calls to the sandbox. If we miss any system calls, the application will be - // terminated by the kernel's seccomp code. So, from a security point of - // view, if this code fails to identify system calls, we are still behaving - // correctly. - { - Maps maps(proc_self_maps_); - const char *libs[] = { "ld", "libc", "librt", "libpthread", NULL }; - - // Intercept system calls in the VDSO segment (if any). This has to happen - // before intercepting system calls in any of the other libraries, as - // the main kernel entry point might be inside of the VDSO and we need to - // determine its address before we can compare it to jumps from inside - // other libraries. - for (Maps::const_iterator iter = maps.begin(); iter != maps.end(); ++iter){ - Library* library = *iter; - if (library->isVDSO() && library->parseElf()) { - library->makeWritable(true); - library->patchSystemCalls(); - library->makeWritable(false); - break; - } - } - - // Intercept system calls in libraries that are known to have them. - for (Maps::const_iterator iter = maps.begin(); iter != maps.end(); ++iter){ - Library* library = *iter; - const char* mapping = iter.name().c_str(); - - // Find the actual base name of the mapped library by skipping past any - // SPC and forward-slashes. We don't want to accidentally find matches, - // because the directory name included part of our well-known lib names. - // - // Typically, prior to pruning, entries would look something like this: - // 08:01 2289011 /lib/libc-2.7.so - for (const char *delim = " /"; *delim; ++delim) { - const char* skip = strrchr(mapping, *delim); - if (skip) { - mapping = skip + 1; - } - } - - for (const char **ptr = libs; *ptr; ptr++) { - const char *name = strstr(mapping, *ptr); - if (name == mapping) { - char ch = name[strlen(*ptr)]; - if (ch < 'A' || (ch > 'Z' && ch < 'a') || ch > 'z') { - if (library->parseElf()) { - library->makeWritable(true); - library->patchSystemCalls(); - library->makeWritable(false); - break; - } - } - } - } - } - } - - // Take a snapshot of the current memory mappings. These mappings will be - // off-limits to all future mmap(), munmap(), mremap(), and mprotect() calls. - snapshotMemoryMappings(processFdPub_, proc_self_maps_); - NOINTR_SYS(sys.close(proc_self_maps_)); - proc_self_maps_ = -1; - - // Creating the trusted thread enables sandboxing - createTrustedThread(processFdPub_, cloneFdPub_, secureMem); - - // We can no longer check for sandboxing support at this point, but we also - // know for a fact that it is available (as we just turned it on). So update - // the status to reflect this information. - status_ = STATUS_ENABLED; -} - -} // namespace diff --git a/sandbox/linux/seccomp/sandbox.h b/sandbox/linux/seccomp/sandbox.h deleted file mode 100644 index 8f49575..0000000 --- a/sandbox/linux/seccomp/sandbox.h +++ /dev/null @@ -1,12 +0,0 @@ -// Copyright (c) 2010 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#ifndef SANDBOX_H__ -#define SANDBOX_H__ - -extern "C" int SupportsSeccompSandbox(int proc_fd); -extern "C" void SeccompSandboxSetProcSelfMaps(int proc_self_maps); -extern "C" void StartSeccompSandbox(); - -#endif // SANDBOX_H__ diff --git a/sandbox/linux/seccomp/sandbox_impl.h b/sandbox/linux/seccomp/sandbox_impl.h deleted file mode 100644 index 3e99a5510..0000000 --- a/sandbox/linux/seccomp/sandbox_impl.h +++ /dev/null @@ -1,715 +0,0 @@ -// Copyright (c) 2010 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#ifndef SANDBOX_IMPL_H__ -#define SANDBOX_IMPL_H__ - -#include <asm/ldt.h> -#include <errno.h> -#include <fcntl.h> -#include <limits.h> -#include <linux/prctl.h> -#include <linux/unistd.h> -#include <netinet/in.h> -#include <netinet/tcp.h> -#include <sched.h> -#include <signal.h> -#include <stdarg.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <sys/ioctl.h> -#include <sys/mman.h> -#include <sys/ptrace.h> -#include <sys/resource.h> -#include <sys/socket.h> -#include <sys/stat.h> -#include <sys/time.h> -#include <sys/types.h> -#include <time.h> -#include <unistd.h> - -#define NOINTR_SYS(x) \ - ({ typeof(x) i__; while ((i__ = (x)) < 0 && sys.my_errno == EINTR); i__;}) - -#ifdef __cplusplus -#include <map> -#include <vector> -#include "sandbox.h" -#include "securemem.h" -#include "tls.h" - -namespace playground { - -class Sandbox { - // TODO(markus): restrict access to our private file handles - public: - enum { kMaxThreads = 100 }; - - - // There are a lot of reasons why the Seccomp sandbox might not be available. - // This could be because the kernel does not support Seccomp mode, or it - // could be because we fail to successfully rewrite all system call entry - // points. - // "proc_fd" should be a file descriptor for "/proc", or -1 if not provided - // by the caller. - static int supportsSeccompSandbox(int proc_fd) - asm("SupportsSeccompSandbox"); - - // The sandbox needs to be able to access "/proc/self/maps". If this file - // is not accessible when "startSandbox()" gets called, the caller can - // provide an already opened file descriptor by calling "setProcSelfMaps()". - // The sandbox becomes the newer owner of this file descriptor and will - // eventually close it when "startSandbox()" executes. - static void setProcSelfMaps(int proc_self_maps) - asm("SeccompSandboxSetProcSelfMaps"); - - // This is the main public entry point. It finds all system calls that - // need rewriting, sets up the resources needed by the sandbox, and - // enters Seccomp mode. - static void startSandbox() asm("StartSeccompSandbox"); - - private: -// syscall_table.c has to be implemented in C, as C++ does not support -// designated initializers for arrays. The only other alternative would be -// to have a source code generator for this table. -// -// We would still like the C source file to include our header file. This -// requires some define statements to transform C++ specific constructs to -// something that is palatable to a C compiler. -#define STATIC static -#define SecureMemArgs SecureMem::Args - // Clone() is special as it has a wrapper in syscall_table.c. The wrapper - // adds one extra argument (the pointer to the saved registers) and then - // calls playground$sandbox__clone(). - static long sandbox_clone(int flags, char* stack, int* pid, int* ctid, - void* tls, void* wrapper_sp) - asm("playground$sandbox__clone") - #if defined(__x86_64__) - __attribute__((visibility("internal"))) -#endif - ; -#else -#define STATIC -#define bool int -#define SecureMemArgs void - // This is the wrapper entry point that is found in the syscall_table. - long sandbox_clone(int flags, char* stack, int* pid, int* ctid, void* tls) - asm("playground$sandbox_clone"); -#endif - - // Entry points for sandboxed code that is attempting to make system calls - STATIC long sandbox_access(const char*, int) - asm("playground$sandbox_access"); - STATIC long sandbox_exit(int status) asm("playground$sandbox_exit"); - STATIC long sandbox_getpid() asm("playground$sandbox_getpid"); - #if defined(__NR_getsockopt) - STATIC long sandbox_getsockopt(int, int, int, void*, socklen_t*) - asm("playground$sandbox_getsockopt"); - #endif - STATIC long sandbox_gettid() asm("playground$sandbox_gettid"); - STATIC long sandbox_ioctl(int d, int req, void* arg) - asm("playground$sandbox_ioctl"); - #if defined(__NR_ipc) - STATIC long sandbox_ipc(unsigned, int, int, int, void*, long) - asm("playground$sandbox_ipc"); - #endif - STATIC long sandbox_lstat(const char* path, void* buf) - asm("playground$sandbox_lstat"); - #if defined(__NR_lstat64) - STATIC long sandbox_lstat64(const char *path, void* b) - asm("playground$sandbox_lstat64"); - #endif - STATIC long sandbox_madvise(void*, size_t, int) - asm("playground$sandbox_madvise"); - STATIC void *sandbox_mmap(void* start, size_t length, int prot, int flags, - int fd, off_t offset) - asm("playground$sandbox_mmap"); - STATIC long sandbox_mprotect(const void*, size_t, int) - asm("playground$sandbox_mprotect"); - STATIC long sandbox_munmap(void* start, size_t length) - asm("playground$sandbox_munmap"); - STATIC long sandbox_open(const char*, int, mode_t) - asm("playground$sandbox_open"); - #if defined(__NR_recvfrom) - STATIC ssize_t sandbox_recvfrom(int, void*, size_t, int, void*, socklen_t*) - asm("playground$sandbox_recvfrom"); - STATIC ssize_t sandbox_recvmsg(int, struct msghdr*, int) - asm("playground$sandbox_recvmsg"); - #endif - #if defined(__NR_rt_sigaction) - STATIC long sandbox_rt_sigaction(int, const void*, void*, size_t) - asm("playground$sandbox_rt_sigaction"); - #endif - #if defined(__NR_rt_sigprocmask) - STATIC long sandbox_rt_sigprocmask(int how, const void*, void*, size_t) - asm("playground$sandbox_rt_sigprocmask"); - #endif - #if defined(__NR_sendmsg) - STATIC size_t sandbox_sendmsg(int, const struct msghdr*, int) - asm("playground$sandbox_sendmsg"); - STATIC ssize_t sandbox_sendto(int, const void*, size_t, int, const void*, - socklen_t)asm("playground$sandbox_sendto"); - #endif - #if defined(__NR_shmat) - STATIC void* sandbox_shmat(int, const void*, int) - asm("playground$sandbox_shmat"); - STATIC long sandbox_shmctl(int, int, void*) - asm("playground$sandbox_shmctl"); - STATIC long sandbox_shmdt(const void*) asm("playground$sandbox_shmdt"); - STATIC long sandbox_shmget(int, size_t, int) - asm("playground$sandbox_shmget"); - #endif - #if defined(__NR_setsockopt) - STATIC long sandbox_setsockopt(int, int, int, const void*, socklen_t) - asm("playground$sandbox_setsockopt"); - #endif - #if defined(__NR_sigaction) - STATIC long sandbox_sigaction(int, const void*, void*) - asm("playground$sandbox_sigaction"); - #endif - #if defined(__NR_signal) - STATIC void* sandbox_signal(int, const void*) - asm("playground$sandbox_signal"); - #endif - #if defined(__NR_sigprocmask) - STATIC long sandbox_sigprocmask(int how, const void*, void*) - asm("playground$sandbox_sigprocmask"); - #endif - #if defined(__NR_socketcall) - STATIC long sandbox_socketcall(int call, void* args) - asm("playground$sandbox_socketcall"); - #endif - STATIC long sandbox_stat(const char* path, void* buf) - asm("playground$sandbox_stat"); - #if defined(__NR_stat64) - STATIC long sandbox_stat64(const char *path, void* b) - asm("playground$sandbox_stat64"); - #endif - - // Functions for system calls that need to be handled in the trusted process - STATIC bool process_access(int, int, int, int, SecureMemArgs*) - asm("playground$process_access"); - STATIC bool process_clone(int, int, int, int, SecureMemArgs*) - asm("playground$process_clone"); - STATIC bool process_exit(int, int, int, int, SecureMemArgs*) - asm("playground$process_exit"); - #if defined(__NR_getsockopt) - STATIC bool process_getsockopt(int, int, int, int, SecureMemArgs*) - asm("playground$process_getsockopt"); - #endif - STATIC bool process_ioctl(int, int, int, int, SecureMemArgs*) - asm("playground$process_ioctl"); - #if defined(__NR_ipc) - STATIC bool process_ipc(int, int, int, int, SecureMemArgs*) - asm("playground$process_ipc"); - #endif - STATIC bool process_madvise(int, int, int, int, SecureMemArgs*) - asm("playground$process_madvise"); - STATIC bool process_mmap(int, int, int, int, SecureMemArgs*) - asm("playground$process_mmap"); - STATIC bool process_mprotect(int, int, int, int, SecureMemArgs*) - asm("playground$process_mprotect"); - STATIC bool process_munmap(int, int, int, int, SecureMemArgs*) - asm("playground$process_munmap"); - STATIC bool process_open(int, int, int, int, SecureMemArgs*) - asm("playground$process_open"); - #if defined(__NR_recvfrom) - STATIC bool process_recvfrom(int, int, int, int, SecureMemArgs*) - asm("playground$process_recvfrom"); - STATIC bool process_recvmsg(int, int, int, int, SecureMemArgs*) - asm("playground$process_recvmsg"); - STATIC bool process_sendmsg(int, int, int, int, SecureMemArgs*) - asm("playground$process_sendmsg"); - STATIC bool process_sendto(int, int, int, int, SecureMemArgs*) - asm("playground$process_sendto"); - STATIC bool process_setsockopt(int, int, int, int, SecureMemArgs*) - asm("playground$process_setsockopt"); - #endif - #if defined(__NR_shmat) - STATIC bool process_shmat(int, int, int, int, SecureMemArgs*) - asm("playground$process_shmat"); - STATIC bool process_shmctl(int, int, int, int, SecureMemArgs*) - asm("playground$process_shmctl"); - STATIC bool process_shmdt(int, int, int, int, SecureMemArgs*) - asm("playground$process_shmdt"); - STATIC bool process_shmget(int, int, int, int, SecureMemArgs*) - asm("playground$process_shmget"); - #endif - STATIC bool process_sigaction(int, int, int, int, SecureMemArgs*) - asm("playground$process_sigaction"); - #if defined(__NR_socketcall) - STATIC bool process_socketcall(int, int, int, int, SecureMemArgs*) - asm("playground$process_socketcall"); - #endif - STATIC bool process_stat(int, int, int, int, SecureMemArgs*) - asm("playground$process_stat"); - -#ifdef __cplusplus - friend class Debug; - friend class Library; - friend class Maps; - friend class Mutex; - friend class SecureMem; - friend class TLS; - - // Define our own inline system calls. These calls will not be rewritten - // to point to the sandboxed wrapper functions. They thus allow us to - // make actual system calls (e.g. in the sandbox initialization code, and - // in the trusted process) - class SysCalls { - public: - #define SYS_CPLUSPLUS - #define SYS_ERRNO my_errno - #define SYS_INLINE inline - #define SYS_PREFIX -1 - #undef SYS_LINUX_SYSCALL_SUPPORT_H - #include "linux_syscall_support.h" - SysCalls() : my_errno(0) { } - int my_errno; - }; - #ifdef __NR_mmap2 - #define MMAP mmap2 - #define __NR_MMAP __NR_mmap2 - #else - #define MMAP mmap - #define __NR_MMAP __NR_mmap - #endif - - // Print an error message and terminate the program. Used for fatal errors. - static void die(const char *msg = 0) __attribute__((noreturn)) { - SysCalls sys; - if (msg) { - sys.write(2, msg, strlen(msg)); - sys.write(2, "\n", 1); - } - for (;;) { - sys.exit_group(1); - sys._exit(1); - } - } - - // Wrapper around "read()" that can deal with partial and interrupted reads - // and that does not modify the global errno variable. - static ssize_t read(SysCalls& sys, int fd, void* buf, size_t len) { - if (static_cast<ssize_t>(len) < 0) { - sys.my_errno = EINVAL; - return -1; - } - size_t offset = 0; - while (offset < len) { - ssize_t partial = - NOINTR_SYS(sys.read(fd, reinterpret_cast<char*>(buf) + offset, - len - offset)); - if (partial < 0) { - return partial; - } else if (!partial) { - break; - } - offset += partial; - } - return offset; - } - - // Wrapper around "write()" that can deal with interrupted writes and that - // does not modify the global errno variable. - static ssize_t write(SysCalls& sys, int fd, const void* buf, size_t len){ - return NOINTR_SYS(sys.write(fd, buf, len)); - } - - // Sends a file handle to another process. - // N.B. trusted_thread.cc has an assembly version of this function that - // is safe to use without a call stack. If the wire-format is changed, - /// make sure to update the assembly code. - static bool sendFd(int transport, int fd0, int fd1, const void* buf, - size_t len); - - // If getFd() fails, it will set the first valid fd slot (e.g. fd0) to - // -errno. - static bool getFd(int transport, int* fd0, int* fd1, void* buf, - size_t* len); - - // Data structures used to forward system calls to the trusted process. - struct Accept { - int sockfd; - void* addr; - socklen_t* addrlen; - } __attribute__((packed)); - - struct Accept4 { - int sockfd; - void* addr; - socklen_t* addrlen; - int flags; - } __attribute__((packed)); - - struct Access { - size_t path_length; - int mode; - } __attribute__((packed)); - - struct Bind { - int sockfd; - void* addr; - socklen_t addrlen; - } __attribute__((packed)); - - struct Clone { - int flags; - char* stack; - int* pid; - int* ctid; - void* tls; - #if defined(__x86_64__) - struct { - void* r15; - void* r14; - void* r13; - void* r12; - void* r11; - void* r10; - void* r9; - void* r8; - void* rdi; - void* rsi; - void* rdx; - void* rcx; - void* rbx; - void* rbp; - void* fake_ret; - } regs64 __attribute__((packed)); - #elif defined(__i386__) - struct { - void* ebp; - void* edi; - void* esi; - void* edx; - void* ecx; - void* ebx; - } regs32 __attribute__((packed)); - #else - #error Unsupported target platform - #endif - void* ret; - } __attribute__((packed)); - - struct Connect { - int sockfd; - void* addr; - socklen_t addrlen; - } __attribute__((packed)); - - struct GetSockName { - int sockfd; - void* name; - socklen_t* namelen; - } __attribute__((packed)); - - struct GetPeerName { - int sockfd; - void* name; - socklen_t* namelen; - } __attribute__((packed)); - - struct GetSockOpt { - int sockfd; - int level; - int optname; - void* optval; - socklen_t* optlen; - } __attribute__((packed)); - - struct IOCtl { - int d; - int req; - void *arg; - } __attribute__((packed)); - - #if defined(__NR_ipc) - struct IPC { - unsigned call; - int first; - int second; - int third; - void* ptr; - long fifth; - } __attribute__((packed)); - #endif - - struct Listen { - int sockfd; - int backlog; - } __attribute__((packed)); - - struct MAdvise { - const void* start; - size_t len; - int advice; - } __attribute__((packed)); - - struct MMap { - void* start; - size_t length; - int prot; - int flags; - int fd; - off_t offset; - } __attribute__((packed)); - - struct MProtect { - const void* addr; - size_t len; - int prot; - }; - - struct MUnmap { - void* start; - size_t length; - } __attribute__((packed)); - - struct Open { - size_t path_length; - int flags; - mode_t mode; - } __attribute__((packed)); - - struct Recv { - int sockfd; - void* buf; - size_t len; - int flags; - } __attribute__((packed)); - - struct RecvFrom { - int sockfd; - void* buf; - size_t len; - int flags; - void* from; - socklen_t *fromlen; - } __attribute__((packed)); - - struct RecvMsg { - int sockfd; - struct msghdr* msg; - int flags; - } __attribute__((packed)); - - struct Send { - int sockfd; - const void* buf; - size_t len; - int flags; - } __attribute__((packed)); - - struct SendMsg { - int sockfd; - const struct msghdr* msg; - int flags; - } __attribute__((packed)); - - struct SendTo { - int sockfd; - const void* buf; - size_t len; - int flags; - const void* to; - socklen_t tolen; - } __attribute__((packed)); - - struct SetSockOpt { - int sockfd; - int level; - int optname; - const void* optval; - socklen_t optlen; - } __attribute__((packed)); - - #if defined(__NR_shmat) - struct ShmAt { - int shmid; - const void* shmaddr; - int shmflg; - } __attribute__((packed)); - - struct ShmCtl { - int shmid; - int cmd; - void *buf; - } __attribute__((packed)); - - struct ShmDt { - const void *shmaddr; - } __attribute__((packed)); - - struct ShmGet { - int key; - size_t size; - int shmflg; - } __attribute__((packed)); - #endif - - struct ShutDown { - int sockfd; - int how; - } __attribute__((packed)); - - struct SigAction { - int sysnum; - int signum; - const SysCalls::kernel_sigaction* action; - const SysCalls::kernel_sigaction* old_action; - size_t sigsetsize; - } __attribute__((packed)); - - struct Socket { - int domain; - int type; - int protocol; - } __attribute__((packed)); - - struct SocketPair { - int domain; - int type; - int protocol; - int* pair; - } __attribute__((packed)); - - #if defined(__NR_socketcall) - struct SocketCall { - int call; - void* arg_ptr; - union { - Socket socket; - Bind bind; - Connect connect; - Listen listen; - Accept accept; - GetSockName getsockname; - GetPeerName getpeername; - SocketPair socketpair; - Send send; - Recv recv; - SendTo sendto; - RecvFrom recvfrom; - ShutDown shutdown; - SetSockOpt setsockopt; - GetSockOpt getsockopt; - SendMsg sendmsg; - RecvMsg recvmsg; - Accept4 accept4; - } args; - } __attribute__((packed)); - #endif - - struct Stat { - int sysnum; - size_t path_length; - void* buf; - } __attribute__((packed)); - - // Thread local data available from each sandboxed thread. - enum { TLS_COOKIE, TLS_TID, TLS_THREAD_FD }; - static long long cookie() { return TLS::getTLSValue<long long>(TLS_COOKIE); } - static int tid() { return TLS::getTLSValue<int>(TLS_TID); } - static int threadFdPub() { return TLS::getTLSValue<int>(TLS_THREAD_FD); } - static int processFdPub() { return processFdPub_; } - static kernel_sigset_t* signalMask() { return &getSecureMem()->signalMask; } - - // The SEGV handler knows how to handle RDTSC instructions - static void setupSignalHandlers(); - static void (*segv())(int signo, SysCalls::siginfo *context, void *unused); - - // If no specific handler has been registered for a system call, call this - // function which asks the trusted thread to perform the call. This is used - // for system calls that are not restricted. - static void* defaultSystemCallHandler(int syscallNum, void* arg0, - void* arg1, void* arg2, void* arg3, - void* arg4, void* arg5) - asm("playground$defaultSystemCallHandler") - #if defined(__x86_64__) - __attribute__((visibility("internal"))) - #endif - ; - - // Return the current secure memory structure for this thread. - static SecureMem::Args* getSecureMem(); - - // Return a secure memory structure that can be used by a newly created - // thread. - static SecureMem::Args* getNewSecureMem(); - - // This functions runs in the trusted process at startup and finds all the - // memory mappings that existed when the sandbox was first enabled. Going - // forward, all these mappings are off-limits for operations such as - // mmap(), munmap(), and mprotect(). - static int initializeProtectedMap(int fd); - - // Helper functions that allows the trusted process to get access to - // "/proc/self/maps" in the sandbox. - static void snapshotMemoryMappings(int processFd, int proc_self_maps); - - // Main loop for the trusted process. - static void trustedProcess(int parentMapsFd, int processFdPub, - int sandboxFd, int cloneFd, - SecureMem::Args* secureArena) - __attribute__((noreturn)); - - // Fork()s of the trusted process. - static SecureMem::Args* createTrustedProcess(int processFdPub, int sandboxFd, - int cloneFdPub, int cloneFd); - - // Creates the trusted thread for the initial thread, then enables - // Seccomp mode. - static void createTrustedThread(int processFdPub, int cloneFdPub, - SecureMem::Args* secureMem); - - static int proc_self_maps_; - static enum SandboxStatus { - STATUS_UNKNOWN, STATUS_UNSUPPORTED, STATUS_AVAILABLE, STATUS_ENABLED - } status_; - static int pid_; - static int processFdPub_; - static int cloneFdPub_; - - #ifdef __i386__ - struct SocketCallArgInfo; - static const struct SocketCallArgInfo socketCallArgInfo[]; - #endif - - // We always have to intercept SIGSEGV. If the application wants to set its - // own SEGV handler, we forward to it whenever necessary. - static SysCalls::kernel_sigaction sa_segv_ asm("playground$sa_segv"); - - // The syscall_mutex_ can only be directly accessed by the trusted process. - // It can be accessed by the trusted thread after fork()ing and calling - // mprotect(PROT_READ|PROT_WRITE). The mutex is used for system calls that - // require passing additional data, and that require the trusted process to - // wait until the trusted thread is done processing (e.g. exit(), clone(), - // open(), stat()) - static int syscall_mutex_ asm("playground$syscall_mutex"); - - // Available in trusted process, only - typedef std::map<void *, long> ProtectedMap; - static ProtectedMap protectedMap_; - static std::vector<SecureMem::Args*> secureMemPool_; -}; - -// If this struct is extended to contain parameters that are read by -// the trusted thread, we will have to mprotect() it to be read-only when -// starting the sandbox. However, currently it is read only by the -// trusted process, and the sandboxed process cannot change the values -// that the fork()'d trusted process sees. -struct SandboxPolicy { - bool allow_file_namespace; // Allow filename-based system calls. -}; - -extern struct SandboxPolicy g_policy; - -} // namespace - -using playground::Sandbox; -#endif // __cplusplus - -#endif // SANDBOX_IMPL_H__ diff --git a/sandbox/linux/seccomp/seccomp.gyp b/sandbox/linux/seccomp/seccomp.gyp deleted file mode 100644 index 596be21..0000000 --- a/sandbox/linux/seccomp/seccomp.gyp +++ /dev/null @@ -1,93 +0,0 @@ -# Copyright (c) 2010 The Chromium Authors. All rights reserved. -# Use of this source code is governed by a BSD-style license that can be -# found in the LICENSE file. - -{ - 'variables': { - 'chromium_code': 1, - 'seccomp_intermediate_dir': '<(INTERMEDIATE_DIR)/seccomp-sandbox', - }, - 'targets': [ - { - 'target_name': 'seccomp_sandbox', - 'type': 'static_library', - 'sources': [ - 'access.cc', - 'allocator.cc', - 'allocator.h', - 'clone.cc', - 'exit.cc', - 'debug.cc', - 'getpid.cc', - 'gettid.cc', - 'ioctl.cc', - 'ipc.cc', - 'library.cc', - 'library.h', - 'linux_syscall_support.h', - 'madvise.cc', - 'maps.cc', - 'maps.h', - 'mmap.cc', - 'mprotect.cc', - 'munmap.cc', - 'mutex.h', - 'open.cc', - 'sandbox.cc', - 'sandbox.h', - 'sandbox_impl.h', - 'securemem.cc', - 'securemem.h', - 'sigaction.cc', - 'sigprocmask.cc', - 'socketcall.cc', - 'stat.cc', - 'syscall.cc', - 'syscall.h', - 'syscall_table.c', - 'syscall_table.h', - 'tls.h', - 'trusted_process.cc', - 'trusted_thread.cc', - 'x86_decode.cc', - 'x86_decode.h', - ], - }, - { - 'target_name': 'seccomp_tests', - 'type': 'executable', - 'sources': [ - 'tests/test_syscalls.cc', - ], - 'include_dirs': [ - '.', - '<(seccomp_intermediate_dir)', - ], - 'dependencies': [ - 'seccomp_sandbox', - ], - 'libraries': [ - '-lpthread', - '-lutil', # For openpty() - ], - 'actions': [ - { - 'action_name': 'make_test_list', - 'inputs': [ - 'tests/list_tests.py', - 'tests/test_syscalls.cc', - ], - 'outputs': ['<(seccomp_intermediate_dir)/test-list.h'], - 'action': ['sh', '-c', 'python <(_inputs) > <(_outputs)'], - }, - ], - }, - { - 'target_name': 'timestats', - 'type': 'executable', - 'sources': [ - 'timestats.cc', - ], - }, - ], -} diff --git a/sandbox/linux/seccomp/securemem.cc b/sandbox/linux/seccomp/securemem.cc deleted file mode 100644 index 5f07bbe..0000000 --- a/sandbox/linux/seccomp/securemem.cc +++ /dev/null @@ -1,105 +0,0 @@ -// Copyright (c) 2010 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#include "debug.h" -#include "mutex.h" -#include "sandbox_impl.h" -#include "securemem.h" - -namespace playground { - -void SecureMem::abandonSystemCall(int fd, int err) { - void* rc = reinterpret_cast<void *>(err); - if (err) { - Debug::message("System call failed\n"); - } - Sandbox::SysCalls sys; - if (Sandbox::write(sys, fd, &rc, sizeof(rc)) != sizeof(rc)) { - Sandbox::die("Failed to send system call"); - } -} - -void SecureMem::dieIfParentDied(int parentMapsFd) { - // The syscall_mutex_ should not be contended. If it is, we are either - // experiencing a very unusual load of system calls that the sandbox is not - // optimized for; or, more likely, the sandboxed process terminated while the - // trusted process was in the middle of waiting for the mutex. We detect - // this situation and terminate the trusted process. - int alive = !lseek(parentMapsFd, 0, SEEK_SET); - if (alive) { - char buf; - do { - alive = read(parentMapsFd, &buf, 1); - } while (alive < 0 && errno == EINTR); - } - if (!alive) { - Sandbox::die(); - } -} - -void SecureMem::lockSystemCall(int parentMapsFd, Args* mem) { - while (!Mutex::lockMutex(&Sandbox::syscall_mutex_, 500)) { - dieIfParentDied(parentMapsFd); - } - asm volatile( - #if defined(__x86_64__) - "lock; incq (%0)\n" - #elif defined(__i386__) - "lock; incl (%0)\n" - #else - #error Unsupported target platform - #endif - : - : "q"(&mem->sequence) - : "memory"); -} - -void SecureMem::sendSystemCallInternal(int fd, bool locked, int parentMapsFd, - Args* mem, int syscallNum, void* arg1, - void* arg2, void* arg3, void* arg4, - void* arg5, void* arg6) { - if (!locked) { - asm volatile( - #if defined(__x86_64__) - "lock; incq (%0)\n" - #elif defined(__i386__) - "lock; incl (%0)\n" - #else - #error Unsupported target platform - #endif - : - : "q"(&mem->sequence) - : "memory"); - } - mem->callType = locked ? -2 : -1; - mem->syscallNum = syscallNum; - mem->arg1 = arg1; - mem->arg2 = arg2; - mem->arg3 = arg3; - mem->arg4 = arg4; - mem->arg5 = arg5; - mem->arg6 = arg6; - asm volatile( - #if defined(__x86_64__) - "lock; incq (%0)\n" - #elif defined(__i386__) - "lock; incl (%0)\n" - #else - #error Unsupported target platform - #endif - : - : "q"(&mem->sequence) - : "memory"); - Sandbox::SysCalls sys; - if (Sandbox::write(sys, fd, &mem->callType, sizeof(int)) != sizeof(int)) { - Sandbox::die("Failed to send system call"); - } - if (parentMapsFd >= 0) { - while (!Mutex::waitForUnlock(&Sandbox::syscall_mutex_, 500)) { - dieIfParentDied(parentMapsFd); - } - } -} - -} // namespace diff --git a/sandbox/linux/seccomp/securemem.h b/sandbox/linux/seccomp/securemem.h deleted file mode 100644 index 91283db..0000000 --- a/sandbox/linux/seccomp/securemem.h +++ /dev/null @@ -1,205 +0,0 @@ -// Copyright (c) 2010 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#ifndef SECURE_MEM_H__ -#define SECURE_MEM_H__ - -#include <stdlib.h> -#include "linux_syscall_support.h" - -namespace playground { - -class SecureMem { - public: - // Each thread is associated with two memory pages (i.e. 8192 bytes). This - // memory is fully accessible by the trusted process, but in the trusted - // thread and the sandboxed thread, the first page is only mapped PROT_READ, - // and the second one is PROT_READ|PROT_WRITE. - // - // The first page can be modified by the trusted process and this is the - // main mechanism how it communicates with the trusted thread. After each - // update, it updates the "sequence" number. The trusted process must - // check the "sequence" number has the expected value, and only then can - // it trust the data in this page. - typedef struct Args { - union { - struct { - union { - struct { - struct Args* self; - long sequence; - long callType; - long syscallNum; - void* arg1; - void* arg2; - void* arg3; - void* arg4; - void* arg5; - void* arg6; - - // Used by clone() to allow return from the syscall wrapper. - void* ret; - #if defined(__x86_64__) - void* rbp; - void* rbx; - void* rcx; - void* rdx; - void* rsi; - void* rdi; - void* r8; - void* r9; - void* r10; - void* r11; - void* r12; - void* r13; - void* r14; - void* r15; - #elif defined(__i386__) - void* ebp; - void* edi; - void* esi; - void* edx; - void* ecx; - void* ebx; - #else - #error Unsupported target platform - #endif - - // Used by clone() to set up data for the new thread. - struct Args* newSecureMem; - int processFdPub; - int cloneFdPub; - - // Set to non-zero, if in debugging mode - int allowAllSystemCalls; - - // The most recent SysV SHM identifier returned by - // shmget(IPC_PRIVATE) - int shmId; - - // The following entries make up the sandboxed thread's TLS - long long cookie; - long long threadId; - long long threadFdPub; - } __attribute__((packed)); - char header[512]; - }; - // Used for calls such as open() and stat(). - char pathname[4096 - 512]; - } __attribute__((packed)); - char securePage[4096]; - }; - union { - struct { - // This scratch space is used by the trusted thread to read parameters - // for unrestricted system calls. - int tmpSyscallNum; - void* tmpArg1; - void* tmpArg2; - void* tmpArg3; - void* tmpArg4; - void* tmpArg5; - void* tmpArg6; - void* tmpReturnValue; - - // Scratch space used to return the result of a rdtsc instruction - int rdtscpEax; - int rdtscpEdx; - int rdtscpEcx; - - // We often have long sequences of calls to gettimeofday(). This is - // needlessly expensive. Coalesce them into a single call. - int lastSyscallNum; - int gettimeofdayCounter; - - // For debugging purposes, we want to be able to log messages. This can - // result in additional system calls. Make sure that we don't trigger - // logging of those recursive calls. - int recursionLevel; - - // Computing the signal mask is expensive. Keep a cached copy. - kernel_sigset_t signalMask; - - // Keep track of whether we are in a SEGV handler - int inSegvHandler; - } __attribute__((packed)); - char scratchPage[4096]; - }; - } __attribute__((packed)) Args; - - // Allows the trusted process to check whether the parent process still - // exists. If it doesn't, kill the trusted process. - static void dieIfParentDied(int parentProc); - - // The trusted process received a system call that it intends to deny. - static void abandonSystemCall(int fd, int err); - - // Acquires the syscall_mutex_ prior to making changes to the parameters in - // the secure memory page. Used by calls such as exit(), clone(), open(), - // socketcall(), and stat(). - // After locking the mutex, it is no longer valid to abandon the system - // call! - static void lockSystemCall(int parentProc, Args* mem); - - // Sends a system call to the trusted thread. If "locked" is true, the - // caller must first call lockSystemCall() and must also provide - // "parentProc". In locked mode, sendSystemCall() won't return until the - // trusted thread has completed processing. - // Use sparingly as it serializes the operation of the trusted process. - static void sendSystemCall(int fd, bool locked, int parentProc, Args* mem, - int syscallNum) { - sendSystemCallInternal(fd, locked, parentProc, mem, syscallNum); - } - template<class T1> static - void sendSystemCall(int fd, bool locked, int parentProc, Args* mem, - int syscallNum, T1 arg1) { - sendSystemCallInternal(fd, locked, parentProc, mem, syscallNum, - (void*)arg1); - } - template<class T1, class T2> static - void sendSystemCall(int fd, bool locked, int parentProc, Args* mem, - int syscallNum, T1 arg1, T2 arg2) { - sendSystemCallInternal(fd, locked, parentProc, mem, syscallNum, - (void*)arg1, (void*)arg2); - } - template<class T1, class T2, class T3> static - void sendSystemCall(int fd, bool locked, int parentProc, Args* mem, - int syscallNum, T1 arg1, T2 arg2, T3 arg3) { - sendSystemCallInternal(fd, locked, parentProc, mem, syscallNum, - (void*)arg1, (void*)arg2, (void*)arg3); - } - template<class T1, class T2, class T3, class T4> static - void sendSystemCall(int fd, bool locked, int parentProc, Args* mem, - int syscallNum, T1 arg1, T2 arg2, T3 arg3, T4 arg4) { - sendSystemCallInternal(fd, locked, parentProc, mem, syscallNum, - (void*)arg1, (void*)arg2, (void*)arg3, (void*)arg4); - } - template<class T1, class T2, class T3, class T4, class T5> static - void sendSystemCall(int fd, bool locked, int parentProc, Args* mem, - int syscallNum, T1 arg1, T2 arg2, T3 arg3, T4 arg4, - T5 arg5) { - sendSystemCallInternal(fd, locked, parentProc, mem, syscallNum, - (void*)arg1, (void*)arg2, (void*)arg3, (void*)arg4, - (void*)arg5); - } - template<class T1, class T2, class T3, class T4, class T5, class T6> static - void sendSystemCall(int fd, bool locked, int parentProc, Args* mem, - int syscallNum, T1 arg1, T2 arg2, T3 arg3, T4 arg4, - T5 arg5, T6 arg6) { - sendSystemCallInternal(fd, locked, parentProc, mem, syscallNum, - (void*)arg1, (void*)arg2, (void*)arg3, (void*)arg4, - (void*)arg5, (void*)arg6); - } - - private: - static void sendSystemCallInternal(int fd, bool locked, int parentProc, - Args* mem, int syscallNum, void* arg1 = 0, - void* arg2 = 0, void* arg3 = 0, - void* arg4 = 0, void* arg5 = 0, - void* arg6 = 0); -}; - -} // namespace - -#endif // SECURE_MEM_H__ diff --git a/sandbox/linux/seccomp/sigaction.cc b/sandbox/linux/seccomp/sigaction.cc deleted file mode 100644 index 162416d..0000000 --- a/sandbox/linux/seccomp/sigaction.cc +++ /dev/null @@ -1,177 +0,0 @@ -// Copyright (c) 2010 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -// TODO(markus): We currently instrument the restorer functions with calls to -// the syscallWrapper(). This prevents gdb from properly -// creating backtraces of code that is running in signal -// handlers. We might instead want to always override the -// restorer with a function that contains the "magic" signature -// but that is not executable. The SEGV handler can detect this -// and then invoke the appropriate restorer. - -#include "debug.h" -#include "sandbox_impl.h" - -namespace playground { - -#if defined(__NR_sigaction) -long Sandbox::sandbox_sigaction(int signum, const void* a_, void* oa_) { - const SysCalls::kernel_old_sigaction* action = - reinterpret_cast<const SysCalls::kernel_old_sigaction*>(a_); - SysCalls::kernel_old_sigaction* old_action = - reinterpret_cast<SysCalls::kernel_old_sigaction*>(oa_); - - long rc = 0; - long long tm; - Debug::syscall(&tm, __NR_sigaction, "Executing handler"); - if (signum == SIGSEGV) { - if (old_action) { - old_action->sa_handler_ = sa_segv_.sa_handler_; - old_action->sa_mask = sa_segv_.sa_mask.sig[0]; - old_action->sa_flags = sa_segv_.sa_flags; - old_action->sa_restorer = sa_segv_.sa_restorer; - } - if (action) { - sa_segv_.sa_handler_ = action->sa_handler_; - sa_segv_.sa_mask.sig[0] = action->sa_mask; - sa_segv_.sa_flags = action->sa_flags; - sa_segv_.sa_restorer = action->sa_restorer; - } - } else { - struct { - int sysnum; - long long cookie; - SigAction sigaction_req; - } __attribute__((packed)) request; - request.sysnum = __NR_sigaction; - request.cookie = cookie(); - request.sigaction_req.sysnum = __NR_sigaction; - request.sigaction_req.signum = signum; - request.sigaction_req.action = - reinterpret_cast<const SysCalls::kernel_sigaction *>(action); - request.sigaction_req.old_action = - reinterpret_cast<const SysCalls::kernel_sigaction *>(old_action); - request.sigaction_req.sigsetsize = 8; - - SysCalls sys; - if (write(sys, processFdPub(), &request, sizeof(request)) != - sizeof(request) || - read(sys, threadFdPub(), &rc, sizeof(rc)) != sizeof(rc)) { - die("Failed to forward sigaction() request [sandbox]"); - } - } - Debug::elapsed(tm, __NR_sigaction); - return rc; -} -#endif - -#if defined(__NR_rt_sigaction) -#define min(a,b) ({ typeof(a) a_=(a); typeof(b) b_=(b); a_ < b_ ? a_ : b_; }) -#define max(a,b) ({ typeof(a) a_=(a); typeof(b) b_=(b); a_ > b_ ? a_ : b_; }) - -long Sandbox::sandbox_rt_sigaction(int signum, const void* a_, void* oa_, - size_t sigsetsize) { - const SysCalls::kernel_sigaction* action = - reinterpret_cast<const SysCalls::kernel_sigaction*>(a_); - SysCalls::kernel_sigaction* old_action = - reinterpret_cast<SysCalls::kernel_sigaction*>(oa_); - - long rc = 0; - long long tm; - Debug::syscall(&tm, __NR_rt_sigaction, "Executing handler"); - if (signum == SIGSEGV) { - size_t theirSize = offsetof(SysCalls::kernel_sigaction, sa_mask) + - sigsetsize; - if (old_action) { - memcpy(old_action, &sa_segv_, min(sizeof(sa_segv_), theirSize)); - memset(old_action + 1, 0, max(0u, theirSize - sizeof(sa_segv_))); - } - if (action) { - memcpy(&sa_segv_, action, min(sizeof(sa_segv_), theirSize)); - memset(&sa_segv_.sa_mask, 0, max(0u, 8 - sigsetsize)); - } - } else { - struct { - int sysnum; - long long cookie; - SigAction sigaction_req; - } __attribute__((packed)) request; - request.sysnum = __NR_rt_sigaction; - request.cookie = cookie(); - request.sigaction_req.sysnum = __NR_rt_sigaction; - request.sigaction_req.signum = signum; - request.sigaction_req.action = action; - request.sigaction_req.old_action = old_action; - request.sigaction_req.sigsetsize = sigsetsize; - - SysCalls sys; - if (write(sys, processFdPub(), &request, sizeof(request)) != - sizeof(request) || - read(sys, threadFdPub(), &rc, sizeof(rc)) != sizeof(rc)) { - die("Failed to forward rt_sigaction() request [sandbox]"); - } - } - Debug::elapsed(tm, __NR_rt_sigaction); - return rc; -} -#endif - -#if defined(__NR_signal) -void* Sandbox::sandbox_signal(int signum, const void* handler) { - struct kernel_old_sigaction sa, osa; - sa.sa_handler_ = reinterpret_cast<void (*)(int)>(handler); - sa.sa_flags = SA_NODEFER | SA_RESETHAND | SA_RESTORER; - sa.sa_mask = 0; - asm volatile( - "lea 0f, %0\n" - "jmp 1f\n" - "0:pop %%eax\n" - "mov $119, %%eax\n" // __NR_sigreturn - "int $0x80\n" - "1:\n" - : "=r"(sa.sa_restorer)); - long rc = sandbox_sigaction(signum, &sa, &osa); - if (rc < 0) { - return (void *)rc; - } - return reinterpret_cast<void *>(osa.sa_handler_); -} -#endif - -bool Sandbox::process_sigaction(int parentMapsFd, int sandboxFd, - int threadFdPub, int threadFd, - SecureMem::Args* mem) { - // We need to intercept sigaction() in order to properly rewrite calls to - // sigaction(SEGV). While there is no security implication if we didn't do - // so, it would end up preventing the program from running correctly as the - // the sandbox's SEGV handler could accidentally get removed. All of this is - // done in sandbox_{,rt_}sigaction(). But we still bounce through the - // trusted process as that is the only way we can instrument system calls. - // This is somewhat needlessly complicated. But as sigaction() is not a - // performance critical system call, it is easier to do this way than to - // extend the format of the syscall_table so that it could deal with this - // special case. - - // Read request - SigAction sigaction_req; - SysCalls sys; - if (read(sys, sandboxFd, &sigaction_req, sizeof(sigaction_req)) != - sizeof(sigaction_req)) { - die("Failed to read parameters for sigaction() [process]"); - } - if (sigaction_req.signum == SIGSEGV) { - // This should never happen. Something went wrong when intercepting the - // system call. This is not a security problem, but it clearly doesn't - // make sense to let the system call pass. - SecureMem::abandonSystemCall(threadFd, -EINVAL); - return false; - } - SecureMem::sendSystemCall(threadFdPub, false, -1, mem, sigaction_req.sysnum, - sigaction_req.signum, sigaction_req.action, - sigaction_req.old_action, - sigaction_req.sigsetsize); - return true; -} - -} // namespace diff --git a/sandbox/linux/seccomp/sigprocmask.cc b/sandbox/linux/seccomp/sigprocmask.cc deleted file mode 100644 index 9ff2922..0000000 --- a/sandbox/linux/seccomp/sigprocmask.cc +++ /dev/null @@ -1,120 +0,0 @@ -// Copyright (c) 2010 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#include "debug.h" -#include "sandbox_impl.h" - -namespace playground { - -// If the sandboxed process tries to mask SIGSEGV, there is a good chance -// the process will eventually get terminated. If this is really ever a -// problem, we can hide the fact that SIGSEGV is unmasked. But I don't think -// we really need this. Masking of synchronous signals is rarely necessary. - -#if defined(__NR_sigprocmask) -long Sandbox::sandbox_sigprocmask(int how, const void* set, void* old_set) { - long long tm; - Debug::syscall(&tm, __NR_sigprocmask, "Executing handler"); - - // Access the signal mask by triggering a SEGV and modifying the signal state - // prior to calling rt_sigreturn(). - long res = -ENOSYS; - #if defined(__x86_64__) - #error x86-64 does not support sigprocmask(); use rt_sigprocmask() instead - #elif defined(__i386__) - asm volatile( - "push %%ebx\n" - "movl %2, %%ebx\n" - "int $0\n" - "pop %%ebx\n" - : "=a"(res) - : "0"(__NR_sigprocmask), "ri"((long)how), - "c"((long)set), "d"((long)old_set) - : "esp", "memory"); - #else - #error Unsupported target platform - #endif - - // Update our shadow signal mask, so that we can copy it upon creation of - // new threads. - if (res == 0 && set != NULL) { - SecureMem::Args* args = getSecureMem(); - switch (how) { - case SIG_BLOCK: - *(unsigned long long *)&args->signalMask |= *(unsigned long long *)set; - break; - case SIG_UNBLOCK: - *(unsigned long long *)&args->signalMask &= ~*(unsigned long long *)set; - break; - case SIG_SETMASK: - *(unsigned long long *)&args->signalMask = *(unsigned long long *)set; - break; - default: - break; - } - } - - Debug::elapsed(tm, __NR_sigprocmask); - - return res; -} -#endif - -#if defined(__NR_rt_sigprocmask) -long Sandbox::sandbox_rt_sigprocmask(int how, const void* set, void* old_set, - size_t bytes) { - long long tm; - Debug::syscall(&tm, __NR_rt_sigprocmask, "Executing handler"); - - // Access the signal mask by triggering a SEGV and modifying the signal state - // prior to calling rt_sigreturn(). - long res = -ENOSYS; - #if defined(__x86_64__) - asm volatile( - "movq %5, %%r10\n" - "int $0\n" - : "=a"(res) - : "0"(__NR_rt_sigprocmask), "D"((long)how), - "S"((long)set), "d"((long)old_set), "r"((long)bytes) - : "r10", "r11", "rcx", "memory"); - #elif defined(__i386__) - asm volatile( - "push %%ebx\n" - "movl %2, %%ebx\n" - "int $0\n" - "pop %%ebx\n" - : "=a"(res) - : "0"(__NR_rt_sigprocmask), "ri"((long)how), - "c"((long)set), "d"((long)old_set), "S"((long)bytes) - : "esp", "memory"); - #else - #error Unsupported target platform - #endif - - // Update our shadow signal mask, so that we can copy it upon creation of - // new threads. - if (res == 0 && set != NULL && bytes >= 8) { - SecureMem::Args* args = getSecureMem(); - switch (how) { - case SIG_BLOCK: - *(unsigned long long *)&args->signalMask |= *(unsigned long long *)set; - break; - case SIG_UNBLOCK: - *(unsigned long long *)&args->signalMask &= ~*(unsigned long long *)set; - break; - case SIG_SETMASK: - *(unsigned long long *)&args->signalMask = *(unsigned long long *)set; - break; - default: - break; - } - } - - Debug::elapsed(tm, __NR_rt_sigprocmask); - - return res; -} -#endif - -} // namespace diff --git a/sandbox/linux/seccomp/socketcall.cc b/sandbox/linux/seccomp/socketcall.cc deleted file mode 100644 index c7b2015..0000000 --- a/sandbox/linux/seccomp/socketcall.cc +++ /dev/null @@ -1,1039 +0,0 @@ -// Copyright (c) 2010 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#include "debug.h" -#include "sandbox_impl.h" - -namespace playground { - -#if defined(__NR_socket) - -ssize_t Sandbox::sandbox_recvfrom(int sockfd, void* buf, size_t len, int flags, - void* from, socklen_t* fromlen) { - long long tm; - Debug::syscall(&tm, __NR_recvfrom, "Executing handler"); - - SysCalls sys; - if (!from && !flags) { - // recv() with a NULL sender and no flags is the same as read(), which - // is unrestricted in seccomp mode. - Debug::message("Replaced recv() with call to read()"); - ssize_t rc = sys.read(sockfd, buf, len); - if (rc < 0) { - Debug::elapsed(tm, __NR_recvfrom); - return -sys.my_errno; - } else { - Debug::elapsed(tm, __NR_recvfrom); - return rc; - } - } - - struct { - int sysnum; - long long cookie; - RecvFrom recvfrom_req; - } __attribute__((packed)) request; - request.sysnum = __NR_recvfrom; - request.cookie = cookie(); - request.recvfrom_req.sockfd = sockfd; - request.recvfrom_req.buf = buf; - request.recvfrom_req.len = len; - request.recvfrom_req.flags = flags; - request.recvfrom_req.from = from; - request.recvfrom_req.fromlen = fromlen; - - long rc; - if (write(sys, processFdPub(), &request, sizeof(request)) != - sizeof(request) || - read(sys, threadFdPub(), &rc, sizeof(rc)) != sizeof(rc)) { - die("Failed to forward recvfrom() request [sandbox]"); - } - Debug::elapsed(tm, __NR_recvfrom); - return static_cast<ssize_t>(rc); -} - -ssize_t Sandbox::sandbox_recvmsg(int sockfd, struct msghdr* msg, int flags) { - long long tm; - Debug::syscall(&tm, __NR_recvmsg, "Executing handler"); - - // We cannot simplify recvmsg() to recvfrom(), recv() or read(), as we do - // not know whether the caller needs us to set msg->msg_flags. - struct { - int sysnum; - long long cookie; - RecvMsg recvmsg_req; - } __attribute__((packed)) request; - request.sysnum = __NR_recvmsg; - request.cookie = cookie(); - request.recvmsg_req.sockfd = sockfd; - request.recvmsg_req.msg = msg; - request.recvmsg_req.flags = flags; - - long rc; - SysCalls sys; - if (write(sys, processFdPub(), &request, sizeof(request)) != - sizeof(request) || - read(sys, threadFdPub(), &rc, sizeof(rc)) != sizeof(rc)) { - die("Failed to forward recvmsg() request [sandbox]"); - } - Debug::elapsed(tm, __NR_recvmsg); - return static_cast<ssize_t>(rc); -} - -size_t Sandbox::sandbox_sendmsg(int sockfd, const struct msghdr* msg, - int flags) { - long long tm; - Debug::syscall(&tm, __NR_sendmsg, "Executing handler"); - - if (msg->msg_iovlen == 1 && msg->msg_controllen == 0) { - // sendmsg() can sometimes be simplified as sendto() - return sandbox_sendto(sockfd, msg->msg_iov, msg->msg_iovlen, - flags, msg->msg_name, msg->msg_namelen); - } - - struct Request { - int sysnum; - long long cookie; - SendMsg sendmsg_req; - struct msghdr msg; - } __attribute__((packed)); - char data[sizeof(struct Request) + msg->msg_namelen + msg->msg_controllen]; - struct Request *request = reinterpret_cast<struct Request *>(data); - request->sysnum = __NR_sendmsg; - request->cookie = cookie(); - request->sendmsg_req.sockfd = sockfd; - request->sendmsg_req.msg = msg; - request->sendmsg_req.flags = flags; - request->msg = *msg; - memcpy(reinterpret_cast<char *>( - memcpy(request + 1, msg->msg_name, msg->msg_namelen)) + - msg->msg_namelen, - msg->msg_control, msg->msg_controllen); - - long rc; - SysCalls sys; - if (write(sys, processFdPub(), &data, sizeof(data)) != - (ssize_t)sizeof(data) || - read(sys, threadFdPub(), &rc, sizeof(rc)) != sizeof(rc)) { - die("Failed to forward sendmsg() request [sandbox]"); - } - Debug::elapsed(tm, __NR_sendmsg); - return static_cast<ssize_t>(rc); -} - -ssize_t Sandbox::sandbox_sendto(int sockfd, const void* buf, size_t len, - int flags, const void* to, socklen_t tolen) { - long long tm; - Debug::syscall(&tm, __NR_sendto, "Executing handler"); - - SysCalls sys; - if (!to && !flags) { - // sendto() with a NULL recipient and no flags is the same as write(), - // which is unrestricted in seccomp mode. - Debug::message("Replaced sendto() with call to write()"); - ssize_t rc = sys.write(sockfd, buf, len); - if (rc < 0) { - Debug::elapsed(tm, __NR_sendto); - return -sys.my_errno; - } else { - Debug::elapsed(tm, __NR_sendto); - return rc; - } - } - - struct { - int sysnum; - long long cookie; - SendTo sendto_req; - } __attribute__((packed)) request; - request.sysnum = __NR_sendto; - request.cookie = cookie(); - request.sendto_req.sockfd = sockfd; - request.sendto_req.buf = buf; - request.sendto_req.len = len; - request.sendto_req.flags = flags; - request.sendto_req.to = to; - request.sendto_req.tolen = tolen; - - long rc; - if (write(sys, processFdPub(), &request, sizeof(request)) != - sizeof(request) || - read(sys, threadFdPub(), &rc, sizeof(rc)) != sizeof(rc)) { - die("Failed to forward sendto() request [sandbox]"); - } - Debug::elapsed(tm, __NR_sendto); - return static_cast<ssize_t>(rc); -} - -long Sandbox::sandbox_setsockopt(int sockfd, int level, int optname, - const void* optval, socklen_t optlen) { - long long tm; - Debug::syscall(&tm, __NR_setsockopt, "Executing handler"); - - struct { - int sysnum; - long long cookie; - SetSockOpt setsockopt_req; - } __attribute__((packed)) request; - request.sysnum = __NR_setsockopt; - request.cookie = cookie(); - request.setsockopt_req.sockfd = sockfd; - request.setsockopt_req.level = level; - request.setsockopt_req.optname = optname; - request.setsockopt_req.optval = optval; - request.setsockopt_req.optlen = optlen; - - long rc; - SysCalls sys; - if (write(sys, processFdPub(), &request, sizeof(request)) != - sizeof(request) || - read(sys, threadFdPub(), &rc, sizeof(rc)) != sizeof(rc)) { - die("Failed to forward setsockopt() request [sandbox]"); - } - Debug::elapsed(tm, __NR_setsockopt); - return rc; -} - -long Sandbox::sandbox_getsockopt(int sockfd, int level, int optname, - void* optval, socklen_t* optlen) { - long long tm; - Debug::syscall(&tm, __NR_getsockopt, "Executing handler"); - - struct { - int sysnum; - long long cookie; - GetSockOpt getsockopt_req; - } __attribute__((packed)) request; - request.sysnum = __NR_getsockopt; - request.cookie = cookie(); - request.getsockopt_req.sockfd = sockfd; - request.getsockopt_req.level = level; - request.getsockopt_req.optname = optname; - request.getsockopt_req.optval = optval; - request.getsockopt_req.optlen = optlen; - - long rc; - SysCalls sys; - if (write(sys, processFdPub(), &request, sizeof(request)) != - sizeof(request) || - read(sys, threadFdPub(), &rc, sizeof(rc)) != sizeof(rc)) { - die("Failed to forward getsockopt() request [sandbox]"); - } - Debug::elapsed(tm, __NR_getsockopt); - return rc; -} - -bool Sandbox::process_recvfrom(int parentMapsFd, int sandboxFd, - int threadFdPub, int threadFd, - SecureMem::Args* mem) { - // Read request - RecvFrom recvfrom_req; - SysCalls sys; - if (read(sys, sandboxFd, &recvfrom_req, sizeof(recvfrom_req)) != - sizeof(recvfrom_req)) { - die("Failed to read parameters for recvfrom() [process]"); - } - - // Unsupported flag encountered. Deny the call. - if (recvfrom_req.flags & - ~(MSG_DONTWAIT|MSG_OOB|MSG_PEEK|MSG_TRUNC|MSG_WAITALL)) { - SecureMem::abandonSystemCall(threadFd, -EINVAL); - return false; - } - - // While we do not anticipate any particular need to receive data on - // unconnected sockets, there is no particular risk in doing so. - SecureMem::sendSystemCall(threadFdPub, false, -1, mem, - __NR_recvfrom, recvfrom_req.sockfd, - recvfrom_req.buf, recvfrom_req.len, - recvfrom_req.flags, recvfrom_req.from, - recvfrom_req.fromlen); - return true; -} - -bool Sandbox::process_recvmsg(int parentMapsFd, int sandboxFd, int threadFdPub, - int threadFd, SecureMem::Args* mem) { - // Read request - RecvMsg recvmsg_req; - SysCalls sys; - if (read(sys, sandboxFd, &recvmsg_req, sizeof(recvmsg_req)) != - sizeof(recvmsg_req)) { - die("Failed to read parameters for recvmsg() [process]"); - } - - // Unsupported flag encountered. Deny the call. - if (recvmsg_req.flags & - ~(MSG_DONTWAIT|MSG_OOB|MSG_PEEK|MSG_TRUNC|MSG_WAITALL)) { - SecureMem::abandonSystemCall(threadFd, -EINVAL); - return false; - } - - // Receiving messages is general not security critical. - SecureMem::sendSystemCall(threadFdPub, false, -1, mem, - __NR_recvmsg, recvmsg_req.sockfd, - recvmsg_req.msg, recvmsg_req.flags); - return true; -} - -bool Sandbox::process_sendmsg(int parentMapsFd, int sandboxFd, int threadFdPub, - int threadFd, SecureMem::Args* mem) { - // Read request - struct { - SendMsg sendmsg_req; - struct msghdr msg; - } __attribute__((packed)) data; - SysCalls sys; - if (read(sys, sandboxFd, &data, sizeof(data)) != sizeof(data)) { - die("Failed to read parameters for sendmsg() [process]"); - } - - if (data.msg.msg_namelen > 4096 || data.msg.msg_controllen > 4096) { - die("Unexpected size for socketcall() payload [process]"); - } - char extra[data.msg.msg_namelen + data.msg.msg_controllen]; - if (read(sys, sandboxFd, &extra, sizeof(extra)) != (ssize_t)sizeof(extra)) { - die("Failed to read parameters for sendmsg() [process]"); - } - if (sizeof(struct msghdr) + sizeof(extra) > sizeof(mem->pathname)) { - goto deny; - } - - if (data.msg.msg_namelen || - (data.sendmsg_req.flags & - ~(MSG_CONFIRM|MSG_DONTWAIT|MSG_EOR|MSG_MORE|MSG_NOSIGNAL|MSG_OOB))) { - deny: - SecureMem::abandonSystemCall(threadFd, -EINVAL); - return false; - } - - // The trusted process receives file handles when a new untrusted thread - // gets created. We have security checks in place that prevent any - // critical information from being tampered with during thread creation. - // But if we disallowed passing of file handles, this would add an extra - // hurdle for an attacker. - // Unfortunately, for now, this is not possible as Chrome's - // base::SendRecvMsg() needs the ability to pass file handles. - if (data.msg.msg_controllen) { - data.msg.msg_control = extra + data.msg.msg_namelen; - struct cmsghdr *cmsg = CMSG_FIRSTHDR(&data.msg); - do { - if (cmsg->cmsg_level != SOL_SOCKET || - cmsg->cmsg_type != SCM_RIGHTS) { - goto deny; - } - } while ((cmsg = CMSG_NXTHDR(&data.msg, cmsg)) != NULL); - } - - // This must be a locked system call, because we have to ensure that the - // untrusted code does not tamper with the msghdr after we have examined it. - SecureMem::lockSystemCall(parentMapsFd, mem); - if (sizeof(extra) > 0) { - if (data.msg.msg_namelen > 0) { - data.msg.msg_name = mem->pathname + sizeof(struct msghdr); - } - if (data.msg.msg_controllen > 0) { - data.msg.msg_control = mem->pathname + sizeof(struct msghdr) + - data.msg.msg_namelen; - } - memcpy(mem->pathname + sizeof(struct msghdr), extra, sizeof(extra)); - } - memcpy(mem->pathname, &data.msg, sizeof(struct msghdr)); - SecureMem::sendSystemCall(threadFdPub, true, parentMapsFd, mem, - __NR_sendmsg, data.sendmsg_req.sockfd, - mem->pathname - (char*)mem + (char*)mem->self, - data.sendmsg_req.flags); - return true; -} - -bool Sandbox::process_sendto(int parentMapsFd, int sandboxFd, int threadFdPub, - int threadFd, SecureMem::Args* mem) { - // Read request - SendTo sendto_req; - SysCalls sys; - if (read(sys, sandboxFd, &sendto_req, sizeof(sendto_req)) != - sizeof(sendto_req)) { - die("Failed to read parameters for sendto() [process]"); - } - - // The sandbox does not allow sending to arbitrary addresses. - if (sendto_req.to) { - SecureMem::abandonSystemCall(threadFd, -EINVAL); - return false; - } - - // Unsupported flag encountered. Deny the call. - if (sendto_req.flags & - ~(MSG_CONFIRM|MSG_DONTWAIT|MSG_EOR|MSG_MORE|MSG_NOSIGNAL|MSG_OOB)) { - SecureMem::abandonSystemCall(threadFd, -EINVAL); - return false; - } - - // Sending data on a connected socket is similar to calling write(). - // Allow it. - SecureMem::sendSystemCall(threadFdPub, false, -1, mem, - __NR_sendto, sendto_req.sockfd, - sendto_req.buf, sendto_req.len, - sendto_req.flags, sendto_req.to, - sendto_req.tolen); - return true; -} - -bool Sandbox::process_setsockopt(int parentMapsFd, int sandboxFd, - int threadFdPub, int threadFd, - SecureMem::Args* mem) { - // Read request - SetSockOpt setsockopt_req; - SysCalls sys; - if (read(sys, sandboxFd, &setsockopt_req, sizeof(setsockopt_req)) != - sizeof(setsockopt_req)) { - die("Failed to read parameters for setsockopt() [process]"); - } - - switch (setsockopt_req.level) { - case SOL_SOCKET: - switch (setsockopt_req.optname) { - case SO_KEEPALIVE: - case SO_LINGER: - case SO_OOBINLINE: - case SO_RCVBUF: - case SO_RCVLOWAT: - case SO_SNDLOWAT: - case SO_RCVTIMEO: - case SO_SNDTIMEO: - case SO_REUSEADDR: - case SO_SNDBUF: - case SO_TIMESTAMP: - SecureMem::sendSystemCall(threadFdPub, false, -1, mem, - __NR_setsockopt, setsockopt_req.sockfd, - setsockopt_req.level, setsockopt_req.optname, - setsockopt_req.optval, setsockopt_req.optlen); - return true; - default: - break; - } - break; - case IPPROTO_TCP: - switch (setsockopt_req.optname) { - case TCP_CORK: - case TCP_DEFER_ACCEPT: - case TCP_INFO: - case TCP_KEEPCNT: - case TCP_KEEPIDLE: - case TCP_KEEPINTVL: - case TCP_LINGER2: - case TCP_MAXSEG: - case TCP_NODELAY: - case TCP_QUICKACK: - case TCP_SYNCNT: - case TCP_WINDOW_CLAMP: - SecureMem::sendSystemCall(threadFdPub, false, -1, mem, - __NR_setsockopt, setsockopt_req.sockfd, - setsockopt_req.level, setsockopt_req.optname, - setsockopt_req.optval, setsockopt_req.optlen); - return true; - default: - break; - } - break; - default: - break; - } - SecureMem::abandonSystemCall(threadFd, -EINVAL); - return false; -} - -bool Sandbox::process_getsockopt(int parentMapsFd, int sandboxFd, - int threadFdPub, int threadFd, - SecureMem::Args* mem) { - // Read request - GetSockOpt getsockopt_req; - SysCalls sys; - if (read(sys, sandboxFd, &getsockopt_req, sizeof(getsockopt_req)) != - sizeof(getsockopt_req)) { - die("Failed to read parameters for getsockopt() [process]"); - } - - switch (getsockopt_req.level) { - case SOL_SOCKET: - switch (getsockopt_req.optname) { - case SO_ACCEPTCONN: - case SO_ERROR: - case SO_KEEPALIVE: - case SO_LINGER: - case SO_OOBINLINE: - case SO_RCVBUF: - case SO_RCVLOWAT: - case SO_SNDLOWAT: - case SO_RCVTIMEO: - case SO_SNDTIMEO: - case SO_REUSEADDR: - case SO_SNDBUF: - case SO_TIMESTAMP: - case SO_TYPE: - SecureMem::sendSystemCall(threadFdPub, false, -1, mem, - __NR_getsockopt, getsockopt_req.sockfd, - getsockopt_req.level, getsockopt_req.optname, - getsockopt_req.optval, getsockopt_req.optlen); - return true; - default: - break; - } - break; - case IPPROTO_TCP: - switch (getsockopt_req.optname) { - case TCP_CORK: - case TCP_DEFER_ACCEPT: - case TCP_INFO: - case TCP_KEEPCNT: - case TCP_KEEPIDLE: - case TCP_KEEPINTVL: - case TCP_LINGER2: - case TCP_MAXSEG: - case TCP_NODELAY: - case TCP_QUICKACK: - case TCP_SYNCNT: - case TCP_WINDOW_CLAMP: - SecureMem::sendSystemCall(threadFdPub, false, -1, mem, - __NR_getsockopt, getsockopt_req.sockfd, - getsockopt_req.level, getsockopt_req.optname, - getsockopt_req.optval, getsockopt_req.optlen); - return true; - default: - break; - } - break; - default: - break; - } - SecureMem::abandonSystemCall(threadFd, -EINVAL); - return false; -} - -#endif -#if defined(__NR_socketcall) - -enum { - SYS_SOCKET = 1, - SYS_BIND = 2, - SYS_CONNECT = 3, - SYS_LISTEN = 4, - SYS_ACCEPT = 5, - SYS_GETSOCKNAME = 6, - SYS_GETPEERNAME = 7, - SYS_SOCKETPAIR = 8, - SYS_SEND = 9, - SYS_RECV = 10, - SYS_SENDTO = 11, - SYS_RECVFROM = 12, - SYS_SHUTDOWN = 13, - SYS_SETSOCKOPT = 14, - SYS_GETSOCKOPT = 15, - SYS_SENDMSG = 16, - SYS_RECVMSG = 17, - SYS_ACCEPT4 = 18 -}; - -struct Sandbox::SocketCallArgInfo { - size_t len; - off_t addrOff; - off_t lengthOff; -}; -const struct Sandbox::SocketCallArgInfo Sandbox::socketCallArgInfo[] = { - #define STRUCT(s) reinterpret_cast<SocketCall *>(0)->args.s - #define SIZE(s) sizeof(STRUCT(s)) - #define OFF(s, f) offsetof(typeof STRUCT(s), f) - { 0 }, - { SIZE(socket) }, - { SIZE(bind), OFF(bind, addr), OFF(bind, addrlen) }, - { SIZE(connect), OFF(connect, addr), OFF(connect, addrlen) }, - { SIZE(listen) }, - { SIZE(accept) }, - { SIZE(getsockname) }, - { SIZE(getpeername) }, - { SIZE(socketpair) }, - { SIZE(send) }, - { SIZE(recv) }, - { SIZE(sendto), OFF(sendto, to), OFF(sendto, tolen) }, - { SIZE(recvfrom) }, - { SIZE(shutdown) }, - { SIZE(setsockopt), OFF(setsockopt, optval), OFF(setsockopt, optlen) }, - { SIZE(getsockopt) }, - { SIZE(sendmsg) }, - { SIZE(recvmsg) }, - { SIZE(accept4) } - #undef STRUCT - #undef SIZE - #undef OFF -}; - -long Sandbox::sandbox_socketcall(int call, void* args) { - long long tm; - Debug::syscall(&tm, __NR_socketcall, "Executing handler", call); - - // When demultiplexing socketcall(), only accept calls that have a valid - // "call" opcode. - if (call < SYS_SOCKET || call > SYS_ACCEPT4) { - Debug::elapsed(tm, __NR_socketcall, call); - return -ENOSYS; - } - - // Some type of calls include a pointer to an address or name, which cannot - // be accessed by the trusted process, as it lives in a separate address - // space. For these calls, append the extra data to the serialized request. - // This requires some copying of data, as we have to make sure there is - // only a single atomic call to write(). - socklen_t numExtraData = 0; - const void* extraDataAddr = NULL; - if (socketCallArgInfo[call].lengthOff) { - memcpy(&numExtraData, - reinterpret_cast<char *>(args) + socketCallArgInfo[call].lengthOff, - sizeof(socklen_t)); - extraDataAddr = reinterpret_cast<char *>(args) + - socketCallArgInfo[call].addrOff; - } - - // sendmsg() and recvmsg() have more complicated requirements for computing - // the amount of extra data that needs to be sent to the trusted process. - if (call == SYS_SENDMSG) { - SendMsg *sendmsg_args = reinterpret_cast<SendMsg *>(args); - if (sendmsg_args->msg->msg_iovlen == 1 && - !sendmsg_args->msg->msg_control) { - // Further down in the code, this sendmsg() call will be simplified to - // a sendto() call. Make sure we already compute the correct value for - // numExtraData, as it is needed when we allocate "data[]" on the stack. - numExtraData = sendmsg_args->msg->msg_namelen; - extraDataAddr = sendmsg_args->msg->msg_name; - } else { - // sendmsg() needs to include some of the extra data so that we can - // inspect it in process_socketcall() - numExtraData = sizeof(*sendmsg_args->msg) + - sendmsg_args->msg->msg_namelen + - sendmsg_args->msg->msg_controllen; - extraDataAddr = NULL; - } - } - if (call == SYS_RECVMSG) { - RecvMsg *recvmsg_args = reinterpret_cast<RecvMsg *>(args); - numExtraData = sizeof(*recvmsg_args->msg); - extraDataAddr = recvmsg_args->msg; - } - - // Set up storage for the request header and copy the data from "args" - // into it. - struct Request { - int sysnum; - long long cookie; - SocketCall socketcall_req; - } __attribute__((packed)) *request; - char data[sizeof(struct Request) + numExtraData]; - request = reinterpret_cast<struct Request *>(data); - memcpy(&request->socketcall_req.args, args, socketCallArgInfo[call].len); - - // Simplify send(), sendto() and sendmsg(), if there are simpler equivalent - // calls. This allows us to occasionally replace them with calls to write(), - // which don't have to be forwarded to the trusted process. - SysCalls sys; - if (call == SYS_SENDMSG && - request->socketcall_req.args.sendmsg.msg->msg_iovlen == 1 && - !request->socketcall_req.args.sendmsg.msg->msg_control) { - // Ordering of these assignments is important, as we are reshuffling - // fields inside of a union. - call = SYS_SENDTO; - request->socketcall_req.args.sendto.flags = - request->socketcall_req.args.sendmsg.flags; - request->socketcall_req.args.sendto.to = - request->socketcall_req.args.sendmsg.msg->msg_name; - request->socketcall_req.args.sendto.tolen = - request->socketcall_req.args.sendmsg.msg->msg_namelen; - request->socketcall_req.args.sendto.len = - request->socketcall_req.args.sendmsg.msg->msg_iov->iov_len; - request->socketcall_req.args.sendto.buf = - request->socketcall_req.args.sendmsg.msg->msg_iov->iov_base; - } - if (call == SYS_SENDTO && !request->socketcall_req.args.sendto.to) { - // sendto() with a NULL address is the same as send() - call = SYS_SEND; - numExtraData = 0; - } - if (call == SYS_SEND && !request->socketcall_req.args.send.flags) { - // send() with no flags is the same as write(), which is unrestricted - // in seccomp mode. - Debug::message("Replaced socketcall() with call to write()"); - ssize_t rc = sys.write(request->socketcall_req.args.send.sockfd, - request->socketcall_req.args.send.buf, - request->socketcall_req.args.send.len); - if (rc < 0) { - Debug::elapsed(tm, __NR_socketcall, call); - return -sys.my_errno; - } else { - Debug::elapsed(tm, __NR_socketcall, call); - return rc; - } - } - - // Simplify recv(), and recvfrom(), if there are simpler equivalent calls. - // This allows us to occasionally replace them with calls to read(), which - // don't have to be forwarded to the trusted process. - // We cannot simplify recvmsg() to recvfrom(), recv() or read(), as we do - // not know whether the caller needs us to set msg->msg_flags. - if (call == SYS_RECVFROM && !request->socketcall_req.args.recvfrom.from) { - // recvfrom() with a NULL address buffer is the same as recv() - call = SYS_RECV; - } - if (call == SYS_RECV && !request->socketcall_req.args.recv.flags) { - // recv() with no flags is the same as read(), which is unrestricted - // in seccomp mode. - Debug::message("Replaced socketcall() with call to read()"); - ssize_t rc = sys.read(request->socketcall_req.args.recv.sockfd, - request->socketcall_req.args.recv.buf, - request->socketcall_req.args.recv.len); - if (rc < 0) { - Debug::elapsed(tm, __NR_socketcall, call); - return -sys.my_errno; - } else { - Debug::elapsed(tm, __NR_socketcall, call); - return rc; - } - } - - // Fill in the rest of the request header. - request->sysnum = __NR_socketcall; - request->cookie = cookie(); - request->socketcall_req.call = call; - request->socketcall_req.arg_ptr = args; - int padding = sizeof(request->socketcall_req.args) - - socketCallArgInfo[call].len; - if (padding > 0) { - memset((char *)(&request->socketcall_req.args + 1) - padding, 0, padding); - } - if (call == SYS_SENDMSG) { - // for sendmsg() we include the (optional) destination address, and the - // (optional) control data in the payload. - SendMsg *sendmsg_args = reinterpret_cast<SendMsg *>(args); - memcpy(reinterpret_cast<char *>( - memcpy(reinterpret_cast<char *>( - memcpy(request + 1, sendmsg_args->msg, sizeof(*sendmsg_args->msg))) + - sizeof(*sendmsg_args->msg), - sendmsg_args->msg->msg_name, sendmsg_args->msg->msg_namelen)) + - sendmsg_args->msg->msg_namelen, - sendmsg_args->msg->msg_control, sendmsg_args->msg->msg_controllen); - } else if (extraDataAddr) { - memcpy(request + 1, extraDataAddr, numExtraData); - } - - // Send request to trusted process and collect response from trusted thread. - long rc; - ssize_t len = sizeof(struct Request) + numExtraData; - if (write(sys, processFdPub(), data, len) != len || - read(sys, threadFdPub(), &rc, sizeof(rc)) != sizeof(rc)) { - die("Failed to forward socketcall() request [sandbox]"); - } - Debug::elapsed(tm, __NR_socketcall, call); - return rc; -} - -bool Sandbox::process_socketcall(int parentMapsFd, int sandboxFd, - int threadFdPub, int threadFd, - SecureMem::Args* mem) { - // Read request - SocketCall socketcall_req; - SysCalls sys; - if (read(sys, sandboxFd, &socketcall_req, sizeof(socketcall_req)) != - sizeof(socketcall_req)) { - die("Failed to read parameters for socketcall() [process]"); - } - - // sandbox_socketcall() should never send us an unexpected "call" opcode. - // If it did, something went very wrong and we better terminate the process. - if (socketcall_req.call < SYS_SOCKET || socketcall_req.call > SYS_ACCEPT4) { - die("Unexpected socketcall() [process]"); - } - - // Check if this particular operation carries an extra payload. - socklen_t numExtraData = 0; - if (socketCallArgInfo[socketcall_req.call].lengthOff) { - memcpy(&numExtraData, - reinterpret_cast<char *>(&socketcall_req) + - socketCallArgInfo[socketcall_req.call].lengthOff, - sizeof(socklen_t)); - } else if (socketcall_req.call == SYS_SENDMSG) { - numExtraData = sizeof(*socketcall_req.args.sendmsg.msg); - } else if (socketcall_req.call == SYS_RECVMSG) { - numExtraData = sizeof(*socketcall_req.args.recvmsg.msg); - } - - // Verify that the length for the payload is reasonable. We don't want to - // blow up our stack, and excessive (or negative) buffer sizes are almost - // certainly a bug. - if (numExtraData > 4096) { - die("Unexpected size for socketcall() payload [process]"); - } - - // Read the extra payload, if any. - char extra[numExtraData]; - if (numExtraData) { - if (read(sys, sandboxFd, extra, numExtraData) != (ssize_t)numExtraData) { - die("Failed to read socketcall() payload [process]"); - } - } - - // sendmsg() has another level of indirection and can carry even more payload - ssize_t numSendmsgExtra = 0; - if (socketcall_req.call == SYS_SENDMSG) { - struct msghdr* msg = reinterpret_cast<struct msghdr*>(extra); - if (msg->msg_namelen > 4096 || msg->msg_controllen > 4096) { - die("Unexpected size for socketcall() payload [process]"); - } - numSendmsgExtra = msg->msg_namelen + msg->msg_controllen; - } - char sendmsgExtra[numSendmsgExtra]; - if (numSendmsgExtra) { - if (read(sys, sandboxFd, sendmsgExtra, numSendmsgExtra) != - numSendmsgExtra) { - die("Failed to read socketcall() payload [process]"); - } - } - - int rc = -EINVAL; - switch (socketcall_req.call) { - case SYS_SOCKET: - // The sandbox does not allow creation of any new sockets. - goto deny; - case SYS_BIND: - // The sandbox does not allow binding an address to a socket. - goto deny; - case SYS_CONNECT: - // The sandbox does not allow connecting a socket. - goto deny; - case SYS_LISTEN: - // The sandbox does not allow a socket to enter listening state. - goto deny; - case SYS_ACCEPT4: - case SYS_ACCEPT: - // If the sandbox obtained a socket that is already in the listening - // state (e.g. because somebody sent it a suitable file descriptor), it - // is permissible to call accept(). - - accept_simple: - // None of the parameters need to be checked, so it is OK to refer - // to the parameter block created by the untrusted code. - SecureMem::sendSystemCall(threadFdPub, false, -1, mem, __NR_socketcall, - socketcall_req.call, socketcall_req.arg_ptr); - return true; - case SYS_GETSOCKNAME: - case SYS_GETPEERNAME: - // Querying the local and the remote name is not considered security - // sensitive for the purposes of the sandbox. - goto accept_simple; - case SYS_SOCKETPAIR: - // Socket pairs are connected to each other and not considered - // security sensitive. - goto accept_simple; - case SYS_SENDTO: - if (socketcall_req.args.sendto.to) { - // The sandbox does not allow sending to arbitrary addresses. - goto deny; - } - // Fall through - case SYS_SEND: - if (socketcall_req.args.send.flags & - ~(MSG_CONFIRM|MSG_DONTWAIT|MSG_EOR|MSG_MORE|MSG_NOSIGNAL|MSG_OOB)) { - // Unsupported flag encountered. Deny the call. - goto deny; - } - // Sending data on a connected socket is similar to calling write(). - // Allow it. - - accept_complex: - // The parameter block contains potentially security critical information - // that should not be tampered with after it has been inspected. Copy it - // into the write-protected securely shared memory before telling the - // trusted thread to execute the socket call. - SecureMem::lockSystemCall(parentMapsFd, mem); - memcpy(mem->pathname, &socketcall_req.args, sizeof(socketcall_req.args)); - SecureMem::sendSystemCall(threadFdPub, true, parentMapsFd, mem, - __NR_socketcall, socketcall_req.call, - mem->pathname - (char*)mem + (char*)mem->self); - return true; - case SYS_RECVFROM: - // While we do not anticipate any particular need to receive data on - // unconnected sockets, there is no particular risk in doing so. - // Fall through - case SYS_RECV: - if (socketcall_req.args.recv.flags & - ~(MSG_DONTWAIT|MSG_OOB|MSG_PEEK|MSG_TRUNC|MSG_WAITALL)) { - // Unsupported flag encountered. Deny the call. - goto deny; - } - // Receiving data on a connected socket is similar to calling read(). - // Allow it. - goto accept_complex; - case SYS_SHUTDOWN: - // Shutting down a socket is always OK. - goto accept_simple; - case SYS_SETSOCKOPT: - switch (socketcall_req.args.setsockopt.level) { - case SOL_SOCKET: - switch (socketcall_req.args.setsockopt.optname) { - case SO_KEEPALIVE: - case SO_LINGER: - case SO_OOBINLINE: - case SO_RCVBUF: - case SO_RCVLOWAT: - case SO_SNDLOWAT: - case SO_RCVTIMEO: - case SO_SNDTIMEO: - case SO_REUSEADDR: - case SO_SNDBUF: - case SO_TIMESTAMP: - goto accept_complex; - default: - break; - } - break; - case IPPROTO_TCP: - switch (socketcall_req.args.setsockopt.optname) { - case TCP_CORK: - case TCP_DEFER_ACCEPT: - case TCP_INFO: - case TCP_KEEPCNT: - case TCP_KEEPIDLE: - case TCP_KEEPINTVL: - case TCP_LINGER2: - case TCP_MAXSEG: - case TCP_NODELAY: - case TCP_QUICKACK: - case TCP_SYNCNT: - case TCP_WINDOW_CLAMP: - goto accept_complex; - default: - break; - } - break; - default: - break; - } - goto deny; - case SYS_GETSOCKOPT: - switch (socketcall_req.args.getsockopt.level) { - case SOL_SOCKET: - switch (socketcall_req.args.getsockopt.optname) { - case SO_ACCEPTCONN: - case SO_ERROR: - case SO_KEEPALIVE: - case SO_LINGER: - case SO_OOBINLINE: - case SO_RCVBUF: - case SO_RCVLOWAT: - case SO_SNDLOWAT: - case SO_RCVTIMEO: - case SO_SNDTIMEO: - case SO_REUSEADDR: - case SO_SNDBUF: - case SO_TIMESTAMP: - case SO_TYPE: - goto accept_complex; - default: - break; - } - break; - case IPPROTO_TCP: - switch (socketcall_req.args.getsockopt.optname) { - case TCP_CORK: - case TCP_DEFER_ACCEPT: - case TCP_INFO: - case TCP_KEEPCNT: - case TCP_KEEPIDLE: - case TCP_KEEPINTVL: - case TCP_LINGER2: - case TCP_MAXSEG: - case TCP_NODELAY: - case TCP_QUICKACK: - case TCP_SYNCNT: - case TCP_WINDOW_CLAMP: - goto accept_complex; - default: - break; - } - break; - default: - break; - } - goto deny; - case SYS_SENDMSG: { - struct msghdr* msg = reinterpret_cast<struct msghdr*>(extra); - - if (sizeof(socketcall_req.args) + sizeof(*msg) + numSendmsgExtra > - sizeof(mem->pathname)) { - goto deny; - } - - if (msg->msg_namelen || - (socketcall_req.args.sendmsg.flags & - ~(MSG_CONFIRM|MSG_DONTWAIT|MSG_EOR|MSG_MORE|MSG_NOSIGNAL|MSG_OOB))){ - goto deny; - } - - // The trusted process receives file handles when a new untrusted thread - // gets created. We have security checks in place that prevent any - // critical information from being tampered with during thread creation. - // But if we disallowed passing of file handles, this would add an extra - // hurdle for an attacker. - // Unfortunately, for now, this is not possible as Chrome's - // base::SendRecvMsg() needs the ability to pass file handles. - if (msg->msg_controllen) { - msg->msg_control = sendmsgExtra + msg->msg_namelen; - struct cmsghdr *cmsg = CMSG_FIRSTHDR(msg); - do { - if (cmsg->cmsg_level != SOL_SOCKET || - cmsg->cmsg_type != SCM_RIGHTS) { - goto deny; - } - } while ((cmsg = CMSG_NXTHDR(msg, cmsg)) != NULL); - } - - // This must be a locked system call, because we have to ensure that - // the untrusted code does not tamper with the msghdr after we have - // examined it. - SecureMem::lockSystemCall(parentMapsFd, mem); - socketcall_req.args.sendmsg.msg = - reinterpret_cast<struct msghdr*>(mem->pathname + - sizeof(socketcall_req.args) - - (char*)mem + (char*)mem->self); - memcpy(mem->pathname, &socketcall_req.args, sizeof(socketcall_req.args)); - if (numSendmsgExtra) { - if (msg->msg_namelen > 0) { - msg->msg_name = const_cast<struct msghdr*>( - socketcall_req.args.sendmsg.msg) + 1; - } - if (msg->msg_controllen > 0) { - msg->msg_control = (char *)( - socketcall_req.args.sendmsg.msg + 1) + msg->msg_namelen; - } - memcpy(mem->pathname + sizeof(socketcall_req.args) + sizeof(*msg), - sendmsgExtra, numSendmsgExtra); - } - memcpy(mem->pathname + sizeof(socketcall_req.args), msg, sizeof(*msg)); - SecureMem::sendSystemCall(threadFdPub, true, parentMapsFd, mem, - __NR_socketcall, socketcall_req.call, - mem->pathname - (char*)mem + (char*)mem->self); - return true; - } - case SYS_RECVMSG: - // Receiving messages is general not security critical. - if (socketcall_req.args.recvmsg.flags & - ~(MSG_DONTWAIT|MSG_OOB|MSG_PEEK|MSG_TRUNC|MSG_WAITALL)) { - goto deny; - } - goto accept_complex; - default: - deny: - SecureMem::abandonSystemCall(threadFd, rc); - return false; - } -} - -#endif - -} // namespace diff --git a/sandbox/linux/seccomp/stat.cc b/sandbox/linux/seccomp/stat.cc deleted file mode 100644 index cdf7e4c..0000000 --- a/sandbox/linux/seccomp/stat.cc +++ /dev/null @@ -1,197 +0,0 @@ -// Copyright (c) 2010 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#include "debug.h" -#include "sandbox_impl.h" - -namespace playground { - -long Sandbox::sandbox_stat(const char *path, void *buf) { - long long tm; - Debug::syscall(&tm, __NR_stat, "Executing handler"); - size_t len = strlen(path); - struct Request { - int sysnum; - long long cookie; - Stat stat_req; - char pathname[0]; - } __attribute__((packed)) *request; - char data[sizeof(struct Request) + len]; - request = reinterpret_cast<struct Request*>(data); - request->sysnum = __NR_stat; - request->cookie = cookie(); - request->stat_req.sysnum = __NR_stat; - request->stat_req.path_length = len; - request->stat_req.buf = buf; - memcpy(request->pathname, path, len); - - long rc; - SysCalls sys; - if (write(sys, processFdPub(), request, sizeof(data)) != (int)sizeof(data) || - read(sys, threadFdPub(), &rc, sizeof(rc)) != sizeof(rc)) { - die("Failed to forward stat() request [sandbox]"); - } - Debug::elapsed(tm, __NR_stat); - return rc; -} - -long Sandbox::sandbox_lstat(const char *path, void *buf) { - long long tm; - Debug::syscall(&tm, __NR_lstat, "Executing handler"); - size_t len = strlen(path); - struct Request { - int sysnum; - long long cookie; - Stat stat_req; - char pathname[0]; - } __attribute__((packed)) *request; - char data[sizeof(struct Request) + len]; - request = reinterpret_cast<struct Request*>(data); - request->sysnum = __NR_lstat; - request->cookie = cookie(); - request->stat_req.sysnum = __NR_lstat; - request->stat_req.path_length = len; - request->stat_req.buf = buf; - memcpy(request->pathname, path, len); - - long rc; - SysCalls sys; - if (write(sys, processFdPub(), request, sizeof(data)) != (int)sizeof(data) || - read(sys, threadFdPub(), &rc, sizeof(rc)) != sizeof(rc)) { - die("Failed to forward lstat() request [sandbox]"); - } - Debug::elapsed(tm, __NR_lstat); - return rc; -} - -#if defined(__NR_stat64) -long Sandbox::sandbox_stat64(const char *path, void *buf) { - long long tm; - Debug::syscall(&tm, __NR_stat64, "Executing handler"); - size_t len = strlen(path); - struct Request { - int sysnum; - long long cookie; - Stat stat_req; - char pathname[0]; - } __attribute__((packed)) *request; - char data[sizeof(struct Request) + len]; - request = reinterpret_cast<struct Request*>(data); - request->sysnum = __NR_stat64; - request->cookie = cookie(); - request->stat_req.sysnum = __NR_stat64; - request->stat_req.path_length = len; - request->stat_req.buf = buf; - memcpy(request->pathname, path, len); - - long rc; - SysCalls sys; - if (write(sys, processFdPub(), request, sizeof(data)) != (int)sizeof(data) || - read(sys, threadFdPub(), &rc, sizeof(rc)) != sizeof(rc)) { - die("Failed to forward stat64() request [sandbox]"); - } - Debug::elapsed(tm, __NR_stat64); - return rc; -} - -long Sandbox::sandbox_lstat64(const char *path, void *buf) { - long long tm; - Debug::syscall(&tm, __NR_lstat64, "Executing handler"); - size_t len = strlen(path); - struct Request { - int sysnum; - long long cookie; - Stat stat_req; - char pathname[0]; - } __attribute__((packed)) *request; - char data[sizeof(struct Request) + len]; - request = reinterpret_cast<struct Request*>(data); - request->sysnum = __NR_lstat64; - request->cookie = cookie(); - request->stat_req.sysnum = __NR_lstat64; - request->stat_req.path_length = len; - request->stat_req.buf = buf; - memcpy(request->pathname, path, len); - - long rc; - SysCalls sys; - if (write(sys, processFdPub(), request, sizeof(data)) != (int)sizeof(data) || - read(sys, threadFdPub(), &rc, sizeof(rc)) != sizeof(rc)) { - die("Failed to forward lstat64() request [sandbox]"); - } - Debug::elapsed(tm, __NR_lstat64); - return rc; -} -#endif - -bool Sandbox::process_stat(int parentMapsFd, int sandboxFd, int threadFdPub, - int threadFd, SecureMem::Args* mem) { - // Read request - SysCalls sys; - Stat stat_req; - if (read(sys, sandboxFd, &stat_req, sizeof(stat_req)) != sizeof(stat_req)) { - read_parm_failed: - die("Failed to read parameters for stat() [process]"); - } - int rc = -ENAMETOOLONG; - if (stat_req.path_length >= (int)sizeof(mem->pathname)) { - char buf[32]; - while (stat_req.path_length > 0) { - size_t len = stat_req.path_length > sizeof(buf) ? - sizeof(buf) : stat_req.path_length; - ssize_t i = read(sys, sandboxFd, buf, len); - if (i <= 0) { - goto read_parm_failed; - } - stat_req.path_length -= i; - } - if (write(sys, threadFd, &rc, sizeof(rc)) != sizeof(rc)) { - die("Failed to return data from stat() [process]"); - } - return false; - } - if (stat_req.sysnum != __NR_stat && stat_req.sysnum != __NR_lstat - #ifdef __NR_stat64 - && stat_req.sysnum != __NR_stat64 - #endif - #ifdef __NR_lstat64 - && stat_req.sysnum != __NR_lstat64 - #endif - ) { - die("Corrupted stat() request"); - } - - if (!g_policy.allow_file_namespace) { - // After locking the mutex, we can no longer abandon the system call. So, - // perform checks before clobbering the securely shared memory. - char tmp[stat_req.path_length]; - if (read(sys, sandboxFd, tmp, stat_req.path_length) != - (ssize_t)stat_req.path_length) { - goto read_parm_failed; - } - Debug::message(("Denying access to \"" + std::string(tmp) + "\"").c_str()); - SecureMem::abandonSystemCall(threadFd, -EACCES); - return false; - } - - SecureMem::lockSystemCall(parentMapsFd, mem); - if (read(sys, sandboxFd, mem->pathname, stat_req.path_length) != - (ssize_t)stat_req.path_length) { - goto read_parm_failed; - } - mem->pathname[stat_req.path_length] = '\000'; - - // TODO(markus): Implement sandboxing policy - Debug::message(("Allowing access to \"" + std::string(mem->pathname) + - "\"").c_str()); - - // Tell trusted thread to stat the file. - SecureMem::sendSystemCall(threadFdPub, true, parentMapsFd, mem, - stat_req.sysnum, - mem->pathname - (char*)mem + (char*)mem->self, - stat_req.buf); - return true; -} - -} // namespace diff --git a/sandbox/linux/seccomp/syscall.cc b/sandbox/linux/seccomp/syscall.cc deleted file mode 100644 index 681fec9..0000000 --- a/sandbox/linux/seccomp/syscall.cc +++ /dev/null @@ -1,380 +0,0 @@ -// Copyright (c) 2010 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#include "debug.h" -#include "sandbox_impl.h" -#include "syscall_table.h" - -namespace playground { - -// TODO(markus): change this into a function that returns the address of the assembly code. If that isn't possible for sandbox_clone, then move that function into a *.S file -asm( - ".pushsection .text, \"ax\", @progbits\n" - - // This is the special wrapper for the clone() system call. The code - // relies on the stack layout of the system call wrapper (c.f. below). It - // passes the stack pointer as an additional argument to sandbox__clone(), - // so that upon starting the child, register values can be restored and - // the child can start executing at the correct IP, instead of trying to - // run in the trusted thread. - "playground$sandbox_clone:" - ".globl playground$sandbox_clone\n" - ".type playground$sandbox_clone, @function\n" - #if defined(__x86_64__) - // Skip the 8 byte return address into the system call wrapper. The - // following bytes are the saved register values that we need to restore - // upon return from clone() in the new thread. - "lea 8(%rsp), %r9\n" - "jmp playground$sandbox__clone\n" - #elif defined(__i386__) - // As i386 passes function arguments on the stack, we need to skip a few - // more values before we can get to the saved registers. - "lea 28(%esp), %eax\n" - "mov %eax, 24(%esp)\n" - "jmp playground$sandbox__clone\n" - #else - #error Unsupported target platform - #endif - ".size playground$sandbox_clone, .-playground$sandbox_clone\n" - - - // This is the wrapper which is called by the untrusted code, trying to - // make a system call. - "playground$syscallWrapper:" - ".internal playground$syscallWrapper\n" - ".globl playground$syscallWrapper\n" - ".type playground$syscallWrapper, @function\n" - #if defined(__x86_64__) - // Check for rt_sigreturn(). It needs to be handled specially. - "cmp $15, %rax\n" // NR_rt_sigreturn - "jnz 1f\n" - "add $0x90, %rsp\n" // pop return addresses and red zone - "0:syscall\n" // rt_sigreturn() is unrestricted - "mov $66, %edi\n" // rt_sigreturn() should never return - "mov $231, %eax\n" // NR_exit_group - "jmp 0b\n" - - // Save all registers - "1:push %rbp\n" - "mov %rsp, %rbp\n" - "push %rbx\n" - "push %rcx\n" - "push %rdx\n" - "push %rsi\n" - "push %rdi\n" - "push %r8\n" - "push %r9\n" - "push %r10\n" - "push %r11\n" - "push %r12\n" - "push %r13\n" - "push %r14\n" - "push %r15\n" - - // Convert from syscall calling conventions to C calling conventions. - // System calls have a subtly different register ordering than the user- - // space x86-64 ABI. - "mov %r10, %rcx\n" - - // Check range of system call - "cmp playground$maxSyscall(%rip), %eax\n" - "ja 3f\n" - - // Retrieve function call from system call table (c.f. syscall_table.c). - // We have three different types of entries; zero for denied system calls, - // that should be handled by the defaultSystemCallHandler(); minus one - // for unrestricted system calls that need to be forwarded to the trusted - // thread; and function pointers to specific handler functions. - "mov %rax, %r10\n" - "shl $4, %r10\n" - "lea playground$syscallTable(%rip), %r11\n" - "add %r11, %r10\n" - "mov 0(%r10), %r10\n" - - // Jump to function if non-null and not UNRESTRICTED_SYSCALL, otherwise - // jump to fallback handler. - "cmp $1, %r10\n" - "jbe 3f\n" - "call *%r10\n" - "2:" - - // Restore CPU registers, except for %rax which was set by the system call. - "pop %r15\n" - "pop %r14\n" - "pop %r13\n" - "pop %r12\n" - "pop %r11\n" - "pop %r10\n" - "pop %r9\n" - "pop %r8\n" - "pop %rdi\n" - "pop %rsi\n" - "pop %rdx\n" - "pop %rcx\n" - "pop %rbx\n" - "pop %rbp\n" - - // Remove fake return address. This is added in the patching code in - // library.cc and it makes stack traces a little cleaner. - "add $8, %rsp\n" - - // Return to caller - "ret\n" - - "3:" - // If we end up calling a specific handler, we don't need to know the - // system call number. However, in the generic case, we do. Shift - // registers so that the system call number becomes visible as the - // first function argument. - "push %r9\n" - "mov %r8, %r9\n" - "mov %rcx, %r8\n" - "mov %rdx, %rcx\n" - "mov %rsi, %rdx\n" - "mov %rdi, %rsi\n" - "mov %rax, %rdi\n" - - // Call default handler. - "call playground$defaultSystemCallHandler\n" - "pop %r9\n" - "jmp 2b\n" - #elif defined(__i386__) - "cmp $119, %eax\n" // NR_sigreturn - "jnz 1f\n" - "add $0x4, %esp\n" // pop return address - "0:int $0x80\n" // sigreturn() is unrestricted - "mov $66, %ebx\n" // sigreturn() should never return - "mov %ebx, %eax\n" // NR_exit - "jmp 0b\n" - "1:cmp $173, %eax\n" // NR_rt_sigreturn - "jnz 3f\n" - - // Convert rt_sigframe into sigframe, allowing us to call sigreturn(). - // This is possible since the first part of signal stack frames have - // stayed very stable since the earliest kernel versions. While never - // officially documented, lots of user space applications rely on this - // part of the ABI, and kernel developers have been careful to maintain - // backwards compatibility. - // In general, the rt_sigframe includes a lot of extra information that - // the signal handler can look at. Most notably, this means a complete - // siginfo record. - // Fortunately though, the kernel doesn't look at any of this extra data - // when returning from a signal handler. So, we can safely convert an - // rt_sigframe to a legacy sigframe, discarding the extra data in the - // process. Interestingly, the legacy signal frame is actually larger than - // the rt signal frame, as it includes a lot more padding. - "sub $0x1C8, %esp\n" // a legacy signal stack is much larger - "mov 0x1CC(%esp), %eax\n" // push signal number - "push %eax\n" - "lea 0x270(%esp), %esi\n" // copy siginfo register values - "lea 0x4(%esp), %edi\n" // into new location - "mov $0x16, %ecx\n" - "cld\n" - "rep movsl\n" - "mov 0x2C8(%esp), %ebx\n" // copy first half of signal mask - "mov %ebx, 0x54(%esp)\n" - "lea 2f, %esi\n" - "push %esi\n" // push restorer function - "lea 0x2D4(%esp), %edi\n" // patch up retcode magic numbers - "movb $2, %cl\n" - "rep movsl\n" - "ret\n" // return to restorer function - "2:pop %eax\n" // remove dummy argument (signo) - "mov $119, %eax\n" // NR_sigaction - "int $0x80\n" - - - // Preserve all registers - "3:push %ebx\n" - "push %ecx\n" - "push %edx\n" - "push %esi\n" - "push %edi\n" - "push %ebp\n" - - // Convert from syscall calling conventions to C calling conventions - "push %ebp\n" - "push %edi\n" - "push %esi\n" - "push %edx\n" - "push %ecx\n" - "push %ebx\n" - "push %eax\n" - - // Check range of system call - "cmp playground$maxSyscall, %eax\n" - "ja 9f\n" - - // We often have long sequences of calls to gettimeofday(). This is - // needlessly expensive. Coalesce them into a single call. - // - // We keep track of state in TLS storage that we can access through - // the %fs segment register. See trusted_thread.cc for the exact - // memory layout. - // - // TODO(markus): maybe, we should proactively call gettimeofday() and - // clock_gettime(), whenever we talk to the trusted thread? - // or maybe, if we have recently seen requests to compute - // the time. There might be a repeated pattern of those. - "cmp $78, %eax\n" // __NR_gettimeofday - "jnz 6f\n" - "cmp %eax, %fs:0x102C-0x58\n" // last system call - "jnz 4f\n" - - // This system call and the last system call prior to this one both are - // calls to gettimeofday(). Try to avoid making the new call and just - // return the same result as in the previous call. - // Just in case the caller is spinning on the result from gettimeofday(), - // every so often, call the actual system call. - "decl %fs:0x1030-0x58\n" // countdown calls to gettimofday() - "jz 4f\n" - - // Atomically read the 64bit word representing last-known timestamp and - // return it to the caller. On x86-32 this is a little more complicated and - // requires the use of the cmpxchg8b instruction. - "mov %ebx, %eax\n" - "mov %ecx, %edx\n" - "lock; cmpxchg8b 100f\n" - "mov %eax, 0(%ebx)\n" - "mov %edx, 4(%ebx)\n" - "xor %eax, %eax\n" - "add $28, %esp\n" - "jmp 8f\n" - - // This is a call to gettimeofday(), but we don't have a valid cached - // result, yet. - "4:mov %eax, %fs:0x102C-0x58\n" // remember syscall number - "movl $500, %fs:0x1030-0x58\n" // make system call, each 500 invocations - "call playground$defaultSystemCallHandler\n" - - // Returned from gettimeofday(). Remember return value, in case the - // application calls us again right away. - // Again, this has to happen atomically and requires cmpxchg8b. - "mov 4(%ebx), %ecx\n" - "mov 0(%ebx), %ebx\n" - "mov 100f, %eax\n" - "mov 101f, %edx\n" - "5:lock; cmpxchg8b 100f\n" - "jnz 5b\n" - "xor %eax, %eax\n" - "jmp 10f\n" - - // Remember the number of the last system call made. We deliberately do - // not remember calls to gettid(), as we have often seen long sequences - // of calls to just gettimeofday() and gettid(). In that situation, we - // would still like to coalesce the gettimeofday() calls. - "6:cmp $224, %eax\n" // __NR_gettid - "jz 7f\n" - "mov %eax, %fs:0x102C-0x58\n" // remember syscall number - - // Retrieve function call from system call table (c.f. syscall_table.c). - // We have three different types of entries; zero for denied system calls, - // that should be handled by the defaultSystemCallHandler(); minus one - // for unrestricted system calls that need to be forwarded to the trusted - // thread; and function pointers to specific handler functions. - "7:shl $3, %eax\n" - "lea playground$syscallTable, %ebx\n" - "add %ebx, %eax\n" - "mov 0(%eax), %eax\n" - - // Jump to function if non-null and not UNRESTRICTED_SYSCALL, otherwise - // jump to fallback handler. - "cmp $1, %eax\n" - "jbe 9f\n" - "add $4, %esp\n" - "call *%eax\n" - "add $24, %esp\n" - - // Restore CPU registers, except for %eax which was set by the system call. - "8:pop %ebp\n" - "pop %edi\n" - "pop %esi\n" - "pop %edx\n" - "pop %ecx\n" - "pop %ebx\n" - - // Return to caller - "ret\n" - - // Call default handler. - "9:call playground$defaultSystemCallHandler\n" - "10:add $28, %esp\n" - "jmp 8b\n" - - ".pushsection \".bss\"\n" - ".balign 8\n" -"100:.byte 0, 0, 0, 0\n" -"101:.byte 0, 0, 0, 0\n" - ".popsection\n" - - #else - #error Unsupported target platform - #endif - ".size playground$syscallWrapper, .-playground$syscallWrapper\n" - ".popsection\n" -); - - -void* Sandbox::defaultSystemCallHandler(int syscallNum, void* arg0, void* arg1, - void* arg2, void* arg3, void* arg4, - void* arg5) { - // TODO(markus): The following comment is currently not true, we do intercept these system calls. Try to fix that. - - // We try to avoid intercepting read(), and write(), as these system calls - // are not restricted in Seccomp mode. But depending on the exact - // instruction sequence in libc, we might not be able to reliably - // filter out these system calls at the time when we instrument the code. - SysCalls sys; - long rc; - long long tm; - switch (syscallNum) { - case __NR_read: - Debug::syscall(&tm, syscallNum, "Allowing unrestricted system call"); - rc = sys.read((long)arg0, arg1, (size_t)arg2); - break; - case __NR_write: - Debug::syscall(&tm, syscallNum, "Allowing unrestricted system call"); - rc = sys.write((long)arg0, arg1, (size_t)arg2); - break; - default: - if (Debug::isEnabled()) { - // In debug mode, prevent stderr from being closed - if (syscallNum == __NR_close && arg0 == (void *)2) - return 0; - } - - if ((unsigned)syscallNum <= maxSyscall && - syscallTable[syscallNum].handler == UNRESTRICTED_SYSCALL) { - Debug::syscall(&tm, syscallNum, "Allowing unrestricted system call"); - perform_unrestricted: - struct { - int sysnum; - void* unrestricted_req[6]; - } __attribute__((packed)) request = { - syscallNum, { arg0, arg1, arg2, arg3, arg4, arg5 } }; - - int thread = threadFdPub(); - void* rc; - if (write(sys, thread, &request, sizeof(request)) != sizeof(request) || - read(sys, thread, &rc, sizeof(rc)) != sizeof(rc)) { - die("Failed to forward unrestricted system call"); - } - Debug::elapsed(tm, syscallNum); - return rc; - } else if (Debug::isEnabled()) { - Debug::syscall(&tm, syscallNum, - "In production mode, this call would be disallowed"); - goto perform_unrestricted; - } else { - return (void *)-ENOSYS; - } - } - if (rc < 0) { - rc = -sys.my_errno; - } - Debug::elapsed(tm, syscallNum); - return (void *)rc; -} - -} // namespace diff --git a/sandbox/linux/seccomp/syscall.h b/sandbox/linux/seccomp/syscall.h deleted file mode 100644 index 1315e12..0000000 --- a/sandbox/linux/seccomp/syscall.h +++ /dev/null @@ -1,22 +0,0 @@ -// Copyright (c) 2010 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#ifndef SYSCALL_H__ -#define SYSCALL_H__ - -#ifdef __cplusplus -extern "C" { -#endif - -void syscallWrapper() asm("playground$syscallWrapper") -#if defined(__x86_64__) - __attribute__((visibility("internal"))) -#endif -; - -#ifdef __cplusplus -} -#endif - -#endif // SYSCALL_H__ diff --git a/sandbox/linux/seccomp/syscall_table.c b/sandbox/linux/seccomp/syscall_table.c deleted file mode 100644 index c9dd7a4..0000000 --- a/sandbox/linux/seccomp/syscall_table.c +++ /dev/null @@ -1,153 +0,0 @@ -// Copyright (c) 2010 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#include <asm/unistd.h> -#include "sandbox_impl.h" -#include "syscall_table.h" - -#if defined(__x86_64__) -#ifndef __NR_set_robust_list -#define __NR_set_robust_list 273 -#endif -#ifndef __NR_accept4 -#define __NR_accept4 288 -#endif -#elif defined(__i386__) -#ifndef __NR_set_robust_list -#define __NR_set_robust_list 311 -#endif -#else -#error Unsupported target platform -#endif - -// TODO(markus): This is an incredibly dirty hack to make the syscallTable -// live in r/o memory. -// Unfortunately, gcc doesn't give us a clean option to do -// this. Ultimately, we should probably write some code that -// parses /usr/include/asm/unistd*.h and generates a *.S file. -// But we then need to figure out how to integrate this code -// with our build system. - -const struct SyscallTable syscallTable[] __attribute__(( - section(".rodata, \"a\", @progbits\n#"))) ={ - - #if defined(__NR_accept) - [ __NR_accept ] = { UNRESTRICTED_SYSCALL, 0 }, - [ __NR_accept4 ] = { UNRESTRICTED_SYSCALL, 0 }, - #endif - [ __NR_access ] = { (void*)&sandbox_access, process_access }, - [ __NR_brk ] = { UNRESTRICTED_SYSCALL, 0 }, - [ __NR_clock_gettime ] = { UNRESTRICTED_SYSCALL, 0 }, - [ __NR_clone ] = { (void*)&sandbox_clone, process_clone }, - [ __NR_close ] = { UNRESTRICTED_SYSCALL, 0 }, - [ __NR_dup ] = { UNRESTRICTED_SYSCALL, 0 }, - [ __NR_dup2 ] = { UNRESTRICTED_SYSCALL, 0 }, - [ __NR_epoll_create ] = { UNRESTRICTED_SYSCALL, 0 }, - [ __NR_epoll_ctl ] = { UNRESTRICTED_SYSCALL, 0 }, - [ __NR_epoll_wait ] = { UNRESTRICTED_SYSCALL, 0 }, - [ __NR_exit ] = { (void*)&sandbox_exit, process_exit }, - [ __NR_exit_group ] = { UNRESTRICTED_SYSCALL, 0 }, - [ __NR_fcntl ] = { UNRESTRICTED_SYSCALL, 0 }, - #if defined(__NR_fcntl64) - [ __NR_fcntl64 ] = { UNRESTRICTED_SYSCALL, 0 }, - #endif - [ __NR_fstat ] = { UNRESTRICTED_SYSCALL, 0 }, - #if defined(__NR_fstat64) - [ __NR_fstat64 ] = { UNRESTRICTED_SYSCALL, 0 }, - #endif - [ __NR_futex ] = { UNRESTRICTED_SYSCALL, 0 }, - [ __NR_getdents ] = { UNRESTRICTED_SYSCALL, 0 }, - [ __NR_getdents64 ] = { UNRESTRICTED_SYSCALL, 0 }, - #if defined(__NR_getpeername) - [ __NR_getpeername ] = { UNRESTRICTED_SYSCALL, 0 }, - #endif - [ __NR_getpid ] = { (void*)&sandbox_getpid, 0 }, - #if defined(__NR_getsockname) - [ __NR_getsockname ] = { UNRESTRICTED_SYSCALL, 0 }, - [ __NR_getsockopt ] = { (void*)&sandbox_getsockopt,process_getsockopt }, - #endif - [ __NR_gettid ] = { (void*)&sandbox_gettid, 0 }, - [ __NR_gettimeofday ] = { UNRESTRICTED_SYSCALL, 0 }, - [ __NR_ioctl ] = { (void*)&sandbox_ioctl, process_ioctl }, - #if defined(__NR_ipc) - [ __NR_ipc ] = { (void*)&sandbox_ipc, process_ipc }, - #endif - #if defined(__NR__llseek) - [ __NR__llseek ] = { UNRESTRICTED_SYSCALL, 0 }, - #endif - [ __NR_lseek ] = { UNRESTRICTED_SYSCALL, 0 }, - [ __NR_lstat ] = { (void*)&sandbox_lstat, process_stat }, - #if defined(__NR_lstat64) - [ __NR_lstat64 ] = { (void*)&sandbox_lstat64, process_stat }, - #endif - [ __NR_madvise ] = { (void*)&sandbox_madvise, process_madvise }, - #if defined(__NR_mmap2) - [ __NR_mmap2 ] = - #else - [ __NR_mmap ] = - #endif - { (void*)&sandbox_mmap, process_mmap }, - [ __NR_mprotect ] = { (void*)&sandbox_mprotect, process_mprotect }, - [ __NR_munmap ] = { (void*)&sandbox_munmap, process_munmap }, - [ __NR_open ] = { (void*)&sandbox_open, process_open }, - [ __NR_pipe ] = { UNRESTRICTED_SYSCALL, 0 }, - [ __NR_poll ] = { UNRESTRICTED_SYSCALL, 0 }, - #if defined(__NR_recvfrom) - [ __NR_recvfrom ] = { (void*)&sandbox_recvfrom, process_recvfrom }, - [ __NR_recvmsg ] = { (void*)&sandbox_recvmsg, process_recvmsg }, - #endif - #if defined(__NR_rt_sigaction) - [ __NR_rt_sigaction ] = { (void*)&sandbox_rt_sigaction,process_sigaction}, - #endif - #if defined(__NR_rt_sigprocmask) - [ __NR_rt_sigprocmask ] = { (void*)&sandbox_rt_sigprocmask, 0 }, - #endif - #if defined(__NR_sendmsg) - [ __NR_sendmsg ] = { (void*)&sandbox_sendmsg, process_sendmsg }, - [ __NR_sendto ] = { (void*)&sandbox_sendto, process_sendto }, - #endif - [ __NR_set_robust_list ] = { UNRESTRICTED_SYSCALL, 0 }, - #if defined(__NR_setsockopt) - [ __NR_setsockopt ] = { (void*)&sandbox_setsockopt,process_setsockopt }, - #endif - #if defined(__NR_shmat) - [ __NR_shmat ] = { (void*)&sandbox_shmat, process_shmat }, - [ __NR_shmctl ] = { (void*)&sandbox_shmctl, process_shmctl }, - [ __NR_shmdt ] = { (void*)&sandbox_shmdt, process_shmdt }, - [ __NR_shmget ] = { (void*)&sandbox_shmget, process_shmget }, - #endif - #if defined(__NR_shutdown) - [ __NR_shutdown ] = { UNRESTRICTED_SYSCALL, 0 }, - #endif - #if defined(__NR_sigaction) - [ __NR_sigaction ] = { (void*)&sandbox_sigaction,process_sigaction }, - #endif - #if defined(__NR_signal) - [ __NR_signal ] = { (void*)&sandbox_signal, process_sigaction }, - #endif - #if defined(__NR_sigprocmask) - [ __NR_sigprocmask ] = { (void*)&sandbox_sigprocmask, 0 }, - #endif - #if defined(__NR_socketpair) - [ __NR_socketpair ] = { UNRESTRICTED_SYSCALL, 0 }, - #endif - #if defined(__NR_socketcall) - [ __NR_socketcall ] = { (void*)&sandbox_socketcall,process_socketcall }, - #endif - [ __NR_stat ] = { (void*)&sandbox_stat, process_stat }, - #if defined(__NR_stat64) - [ __NR_stat64 ] = { (void*)&sandbox_stat64, process_stat }, - #endif - [ __NR_time ] = { UNRESTRICTED_SYSCALL, 0 }, - [ __NR_uname ] = { UNRESTRICTED_SYSCALL, 0 }, -}; -const unsigned maxSyscall __attribute__((section(".rodata"))) = - sizeof(syscallTable)/sizeof(struct SyscallTable); - -const int syscall_mutex_[4096/sizeof(int)] asm("playground$syscall_mutex") - __attribute__((section(".rodata"),aligned(4096) -#if defined(__x86_64__) - ,visibility("internal") -#endif - )) = { 0x80000000 }; diff --git a/sandbox/linux/seccomp/syscall_table.h b/sandbox/linux/seccomp/syscall_table.h deleted file mode 100644 index 5bd6791..0000000 --- a/sandbox/linux/seccomp/syscall_table.h +++ /dev/null @@ -1,43 +0,0 @@ -// Copyright (c) 2010 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#ifndef SYSCALL_TABLE_H__ -#define SYSCALL_TABLE_H__ - -#include <sys/types.h> - -#ifdef __cplusplus -#include "securemem.h" -extern "C" { -namespace playground { -#define SecureMemArgs SecureMem::Args -#else -#define SecureMemArgs void -#define bool int -#endif - #define UNRESTRICTED_SYSCALL ((void *)1) - - struct SyscallTable { - void *handler; - bool (*trustedProcess)(int parentMapsFd, int sandboxFd, int threadFdPub, - int threadFd, SecureMemArgs* mem); - }; - extern const struct SyscallTable syscallTable[] - asm("playground$syscallTable") -#if defined(__x86_64__) - __attribute__((visibility("internal"))) -#endif - ; - extern const unsigned maxSyscall - asm("playground$maxSyscall") -#if defined(__x86_64__) - __attribute__((visibility("internal"))) -#endif - ; -#ifdef __cplusplus -} // namespace -} -#endif - -#endif // SYSCALL_TABLE_H__ diff --git a/sandbox/linux/seccomp/tests/list_tests.py b/sandbox/linux/seccomp/tests/list_tests.py deleted file mode 100644 index 011a52e..0000000 --- a/sandbox/linux/seccomp/tests/list_tests.py +++ /dev/null @@ -1,22 +0,0 @@ -# Copyright (c) 2010 The Chromium Authors. All rights reserved. -# Use of this source code is governed by a BSD-style license that can be -# found in the LICENSE file. - -import re -import sys - - -def get_tests(filename): - for line in open(filename): - match = re.match(r"TEST\((\w+)\)", line) - if match is not None: - yield match.group(1) - - -def main(args): - for name in get_tests(args[0]): - print ' { "%s", %s },' % (name, name) - - -if __name__ == "__main__": - main(sys.argv[1:]) diff --git a/sandbox/linux/seccomp/tests/test_syscalls.cc b/sandbox/linux/seccomp/tests/test_syscalls.cc deleted file mode 100644 index 3e6acd5..0000000 --- a/sandbox/linux/seccomp/tests/test_syscalls.cc +++ /dev/null @@ -1,758 +0,0 @@ -// Copyright (c) 2010 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#include <assert.h> -#include <dirent.h> -#include <pthread.h> -#include <pty.h> -#include <sys/types.h> -#include <sys/wait.h> - -#include "sandbox_impl.h" - -#ifdef DEBUG -#define MSG(fmt, ...) printf(fmt, ##__VA_ARGS__) -#else -#define MSG(fmt, ...) do { } while (0) -#endif - -int g_intended_status_fd = -1; - -// Declares the wait() status that the test subprocess intends to exit with. -void intend_exit_status(int val, bool is_signal) { - if (is_signal) { - val = W_EXITCODE(0, val); - } else { - val = W_EXITCODE(val, 0); - } - if (g_intended_status_fd != -1) { - int sent = write(g_intended_status_fd, &val, sizeof(val)); - assert(sent == sizeof(val)); - } else { - // This prints in cases where we run one test without forking - printf("Intending to exit with status %i...\n", val); - } -} - - -// This is basically a marker to grep for. -#define TEST(name) void name() - -TEST(test_dup) { - StartSeccompSandbox(); - // Test a simple syscall that is marked as UNRESTRICTED_SYSCALL. - int fd = dup(1); - assert(fd >= 0); - int rc = close(fd); - assert(rc == 0); -} - -TEST(test_segfault) { - StartSeccompSandbox(); - // Check that the sandbox's SIGSEGV handler does not stop the - // process from dying cleanly in the event of a real segfault. - intend_exit_status(SIGSEGV, true); - asm("hlt"); -} - -TEST(test_exit) { - StartSeccompSandbox(); - intend_exit_status(123, false); - _exit(123); -} - -// This has an off-by-three error because it counts ".", "..", and the -// FD for the /proc/self/fd directory. This doesn't matter because it -// is only used to check for differences in the number of open FDs. -static int count_fds() { - DIR *dir = opendir("/proc/self/fd"); - assert(dir != NULL); - int count = 0; - while (1) { - struct dirent *d = readdir(dir); - if (d == NULL) - break; - count++; - } - int rc = closedir(dir); - assert(rc == 0); - return count; -} - -static void *thread_func(void *x) { - int *ptr = (int *) x; - *ptr = 123; - MSG("In new thread\n"); - return (void *) 456; -} - -TEST(test_thread) { - playground::g_policy.allow_file_namespace = true; // To allow count_fds() - StartSeccompSandbox(); - int fd_count1 = count_fds(); - pthread_t tid; - int x = 999; - void *result; - pthread_create(&tid, NULL, thread_func, &x); - MSG("Waiting for thread\n"); - pthread_join(tid, &result); - assert(result == (void *) 456); - assert(x == 123); - // Check that the process has not leaked FDs. - int fd_count2 = count_fds(); - assert(fd_count2 == fd_count1); -} - -static int clone_func(void *x) { - int *ptr = (int *) x; - *ptr = 124; - MSG("In thread\n"); - // On x86-64, returning from this function calls the __NR_exit_group - // syscall instead of __NR_exit. - syscall(__NR_exit, 100); - // Not reached. - return 200; -} - -#if defined(__i386__) -static int get_gs() { - int gs; - asm volatile("mov %%gs, %0" : "=r"(gs)); - return gs; -} -#endif - -static void *get_tls_base() { - void *base; -#if defined(__x86_64__) - asm volatile("mov %%fs:0, %0" : "=r"(base)); -#elif defined(__i386__) - asm volatile("mov %%gs:0, %0" : "=r"(base)); -#else -#error Unsupported target platform -#endif - return base; -} - -TEST(test_clone) { - playground::g_policy.allow_file_namespace = true; // To allow count_fds() - StartSeccompSandbox(); - int fd_count1 = count_fds(); - int stack_size = 0x1000; - char *stack = (char *) malloc(stack_size); - assert(stack != NULL); - int flags = CLONE_VM | CLONE_FS | CLONE_FILES | - CLONE_SIGHAND | CLONE_THREAD | CLONE_SYSVSEM | - CLONE_SETTLS | CLONE_PARENT_SETTID | CLONE_CHILD_CLEARTID; - int tid = -1; - int x = 999; - - // The sandbox requires us to pass CLONE_TLS. Pass settings that - // are enough to copy the parent thread's TLS setup. This allows us - // to invoke libc in the child thread. -#if defined(__x86_64__) - void *tls = get_tls_base(); -#elif defined(__i386__) - struct user_desc tls_desc, *tls = &tls_desc; - tls_desc.entry_number = get_gs() >> 3; - tls_desc.base_addr = (long) get_tls_base(); - tls_desc.limit = 0xfffff; - tls_desc.seg_32bit = 1; - tls_desc.contents = 0; - tls_desc.read_exec_only = 0; - tls_desc.limit_in_pages = 1; - tls_desc.seg_not_present = 0; - tls_desc.useable = 1; -#else -#error Unsupported target platform -#endif - - int rc = clone(clone_func, (void *) (stack + stack_size), flags, &x, - &tid, tls, &tid); - assert(rc > 0); - while (tid == rc) { - syscall(__NR_futex, &tid, FUTEX_WAIT, rc, NULL); - } - assert(tid == 0); - assert(x == 124); - // Check that the process has not leaked FDs. - int fd_count2 = count_fds(); - assert(fd_count2 == fd_count1); -} - -static int uncalled_clone_func(void *x) { - printf("In thread func, which shouldn't happen\n"); - return 1; -} - -TEST(test_clone_disallowed_flags) { - StartSeccompSandbox(); - int stack_size = 4096; - char *stack = (char *) malloc(stack_size); - assert(stack != NULL); - /* We omit the flags CLONE_SETTLS, CLONE_PARENT_SETTID and - CLONE_CHILD_CLEARTID, which is disallowed by the sandbox. */ - int flags = CLONE_VM | CLONE_FS | CLONE_FILES | - CLONE_SIGHAND | CLONE_THREAD | CLONE_SYSVSEM; - int rc = clone(uncalled_clone_func, (void *) (stack + stack_size), - flags, NULL, NULL, NULL, NULL); - assert(rc == -1); - assert(errno == EPERM); -} - -static void *fp_thread(void *x) { - int val; - asm("movss %%xmm0, %0" : "=m"(val)); - MSG("val=%i\n", val); - return NULL; -} - -TEST(test_fp_regs) { - StartSeccompSandbox(); - int val = 1234; - asm("movss %0, %%xmm0" : "=m"(val)); - pthread_t tid; - pthread_create(&tid, NULL, fp_thread, NULL); - pthread_join(tid, NULL); - MSG("thread done OK\n"); -} - -static long long read_tsc() { - long long rc; - asm volatile( - "rdtsc\n" - "mov %%eax, (%0)\n" - "mov %%edx, 4(%0)\n" - : - : "c"(&rc), "a"(-1), "d"(-1)); - return rc; -} - -TEST(test_rdtsc) { - StartSeccompSandbox(); - // Just check that we can do the instruction. - read_tsc(); -} - -TEST(test_getpid) { - int pid1 = getpid(); - StartSeccompSandbox(); - int pid2 = getpid(); - assert(pid1 == pid2); - // Bypass any caching that glibc's getpid() wrapper might do. - int pid3 = syscall(__NR_getpid); - assert(pid1 == pid3); -} - -TEST(test_gettid) { - // glibc doesn't provide a gettid() wrapper. - int tid1 = syscall(__NR_gettid); - assert(tid1 > 0); - StartSeccompSandbox(); - int tid2 = syscall(__NR_gettid); - assert(tid1 == tid2); -} - -static void *map_something() { - void *addr = mmap(NULL, 0x1000, PROT_READ, - MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); - assert(addr != MAP_FAILED); - return addr; -} - -TEST(test_mmap_disallows_remapping) { - void *addr = map_something(); - StartSeccompSandbox(); - // Overwriting a mapping that was created before the sandbox was - // enabled is not allowed. - void *result = mmap(addr, 0x1000, PROT_READ, - MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0); - assert(result == MAP_FAILED); - assert(errno == EINVAL); -} - -TEST(test_mmap_disallows_low_address) { - StartSeccompSandbox(); - // Mapping pages at low addresses is not allowed because this helps - // with exploiting buggy kernels. - void *result = mmap(NULL, 0x1000, PROT_READ, - MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0); - assert(result == MAP_FAILED); - assert(errno == EINVAL); -} - -TEST(test_munmap_allowed) { - StartSeccompSandbox(); - void *addr = map_something(); - int result = munmap(addr, 0x1000); - assert(result == 0); -} - -TEST(test_munmap_disallowed) { - void *addr = map_something(); - StartSeccompSandbox(); - int result = munmap(addr, 0x1000); - assert(result == -1); - assert(errno == EINVAL); -} - -TEST(test_mprotect_allowed) { - StartSeccompSandbox(); - void *addr = map_something(); - int result = mprotect(addr, 0x1000, PROT_READ | PROT_WRITE); - assert(result == 0); -} - -TEST(test_mprotect_disallowed) { - void *addr = map_something(); - StartSeccompSandbox(); - int result = mprotect(addr, 0x1000, PROT_READ | PROT_WRITE); - assert(result == -1); - assert(errno == EINVAL); -} - -static int get_tty_fd() { - int master_fd, tty_fd; - int rc = openpty(&master_fd, &tty_fd, NULL, NULL, NULL); - assert(rc == 0); - return tty_fd; -} - -TEST(test_ioctl_tiocgwinsz_allowed) { - int tty_fd = get_tty_fd(); - StartSeccompSandbox(); - int size[2]; - // Get terminal width and height. - int result = ioctl(tty_fd, TIOCGWINSZ, size); - assert(result == 0); -} - -TEST(test_ioctl_disallowed) { - int tty_fd = get_tty_fd(); - StartSeccompSandbox(); - // This ioctl call inserts a character into the tty's input queue, - // which provides a way to send commands to an interactive shell. - char c = 'x'; - int result = ioctl(tty_fd, TIOCSTI, &c); - assert(result == -1); - assert(errno == EINVAL); -} - -TEST(test_socket) { - StartSeccompSandbox(); - int fd = socket(AF_UNIX, SOCK_STREAM, 0); - assert(fd == -1); - // TODO: Make it consistent between i386 and x86-64. - assert(errno == EINVAL || errno == ENOSYS); -} - -TEST(test_open_disabled) { - StartSeccompSandbox(); - int fd = open("/dev/null", O_RDONLY); - assert(fd == -1); - assert(errno == EACCES); - - // Writing to the policy flag does not change this. - playground::g_policy.allow_file_namespace = true; - fd = open("/dev/null", O_RDONLY); - assert(fd == -1); - assert(errno == EACCES); -} - -TEST(test_open_enabled) { - playground::g_policy.allow_file_namespace = true; - StartSeccompSandbox(); - int fd = open("/dev/null", O_RDONLY); - assert(fd >= 0); - int rc = close(fd); - assert(rc == 0); - fd = open("/dev/null", O_WRONLY); - assert(fd == -1); - assert(errno == EACCES); -} - -TEST(test_access_disabled) { - StartSeccompSandbox(); - int rc = access("/dev/null", R_OK); - assert(rc == -1); - assert(errno == EACCES); -} - -TEST(test_access_enabled) { - playground::g_policy.allow_file_namespace = true; - StartSeccompSandbox(); - int rc = access("/dev/null", R_OK); - assert(rc == 0); - rc = access("path-that-does-not-exist", R_OK); - assert(rc == -1); - assert(errno == ENOENT); -} - -TEST(test_stat_disabled) { - StartSeccompSandbox(); - struct stat st; - int rc = stat("/dev/null", &st); - assert(rc == -1); - assert(errno == EACCES); -} - -TEST(test_stat_enabled) { - playground::g_policy.allow_file_namespace = true; - StartSeccompSandbox(); - struct stat st; - int rc = stat("/dev/null", &st); - assert(rc == 0); - rc = stat("path-that-does-not-exist", &st); - assert(rc == -1); - assert(errno == ENOENT); -} - -static int g_value; - -static void signal_handler(int sig) { - g_value = 300; - MSG("In signal handler\n"); -} - -static void sigaction_handler(int sig, siginfo_t *a, void *b) { - g_value = 300; - MSG("In sigaction handler\n"); -} - -static void (*g_sig_handler_ptr)(int sig, void *addr) asm("g_sig_handler_ptr"); - -static void non_fatal_sig_handler(int sig, void *addr) { - g_value = 300; - MSG("Caught signal %d at %p\n", sig, addr); -} - -static void fatal_sig_handler(int sig, void *addr) { - // Recursively trigger another segmentation fault while already in the SEGV - // handler. This should terminate the program if SIGSEGV is marked as a - // deferred signal. - // Only do this on the first entry to this function. Otherwise, the signal - // handler was probably marked as SA_NODEFER and we want to continue - // execution. - if (!g_value++) { - MSG("Caught signal %d at %p\n", sig, addr); - if (sig == SIGSEGV) { - asm volatile("hlt"); - } else { - asm volatile("int3"); - } - } -} - -static void (*generic_signal_handler(void)) - (int signo, siginfo_t *info, void *context) { - void (*hdl)(int, siginfo_t *, void *); - asm volatile( - "lea 0f, %0\n" - "jmp 999f\n" - "0:\n" - -#if defined(__x86_64__) - "mov 0xB0(%%rsp), %%rsi\n" // Pass original %rip to signal handler - "cmpb $0xF4, 0(%%rsi)\n" // hlt - "jnz 1f\n" - "addq $1, 0xB0(%%rsp)\n" // Adjust %eip past failing instruction - "1:jmp *g_sig_handler_ptr\n" // Call actual signal handler -#elif defined(__i386__) - // TODO(markus): We currently don't guarantee that signal handlers always - // have the correct "magic" restorer function. If we fix - // this, we should add a test for it (both for SEGV and - // non-SEGV). - "cmpw $0, 0xA(%%esp)\n" - "lea 0x40(%%esp), %%eax\n" // %eip at time of exception - "jz 1f\n" - "add $0x9C, %%eax\n" // %eip at time of exception - "1:mov 0(%%eax), %%ecx\n" - "cmpb $0xF4, 0(%%ecx)\n" // hlt - "jnz 2f\n" - "addl $1, 0(%%eax)\n" // Adjust %eip past failing instruction - "2:push %%ecx\n" // Pass original %eip to signal handler - "mov 8(%%esp), %%eax\n" - "push %%eax\n" // Pass signal number to signal handler - "call *g_sig_handler_ptr\n" // Call actual signal handler - "pop %%eax\n" - "pop %%ecx\n" - "ret\n" -#else -#error Unsupported target platform -#endif - -"999:\n" - : "=r"(hdl)); - return hdl; -} - -TEST(test_signal_handler) { - sighandler_t result = signal(SIGTRAP, signal_handler); - assert(result != SIG_ERR); - - StartSeccompSandbox(); - - result = signal(SIGTRAP, signal_handler); - assert(result != SIG_ERR); - - g_value = 200; - asm("int3"); - assert(g_value == 300); -} - -TEST(test_sigaction_handler) { - struct sigaction act; - act.sa_sigaction = sigaction_handler; - sigemptyset(&act.sa_mask); - act.sa_flags = SA_SIGINFO; - int rc = sigaction(SIGTRAP, &act, NULL); - assert(rc == 0); - - StartSeccompSandbox(); - - rc = sigaction(SIGTRAP, &act, NULL); - assert(rc == 0); - - g_value = 200; - asm("int3"); - assert(g_value == 300); -} - -TEST(test_blocked_signal) { - sighandler_t result = signal(SIGTRAP, signal_handler); - assert(result != SIG_ERR); - StartSeccompSandbox(); - - // Initially the signal should not be blocked. - sigset_t sigs; - sigfillset(&sigs); - int rc = sigprocmask(0, NULL, &sigs); - assert(rc == 0); - assert(!sigismember(&sigs, SIGTRAP)); - - sigemptyset(&sigs); - sigaddset(&sigs, SIGTRAP); - rc = sigprocmask(SIG_BLOCK, &sigs, NULL); - assert(rc == 0); - - // Check that we can read back the blocked status. - sigemptyset(&sigs); - rc = sigprocmask(0, NULL, &sigs); - assert(rc == 0); - assert(sigismember(&sigs, SIGTRAP)); - - // Check that the signal handler really is blocked. - intend_exit_status(SIGTRAP, true); - asm("int3"); -} - -TEST(test_sigaltstack) { - // The sandbox does not support sigaltstack() yet. Just test that - // it returns an error. - StartSeccompSandbox(); - stack_t st; - st.ss_size = 0x4000; - st.ss_sp = malloc(st.ss_size); - assert(st.ss_sp != NULL); - st.ss_flags = 0; - int rc = sigaltstack(&st, NULL); - assert(rc == -1); - assert(errno == ENOSYS); -} - -TEST(test_sa_flags) { - StartSeccompSandbox(); - int flags[4] = { 0, SA_NODEFER, SA_SIGINFO, SA_SIGINFO | SA_NODEFER }; - for (int i = 0; i < 4; ++i) { - struct sigaction sa; - memset(&sa, 0, sizeof(sa)); - sa.sa_sigaction = generic_signal_handler(); - g_sig_handler_ptr = non_fatal_sig_handler; - sa.sa_flags = flags[i]; - - // Test SEGV handling - g_value = 200; - sigaction(SIGSEGV, &sa, NULL); - asm volatile("hlt"); - assert(g_value == 300); - - // Test non-SEGV handling - g_value = 200; - sigaction(SIGTRAP, &sa, NULL); - asm volatile("int3"); - assert(g_value == 300); - } -} - -TEST(test_segv_defer) { - StartSeccompSandbox(); - struct sigaction sa; - memset(&sa, 0, sizeof(sa)); - sa.sa_sigaction = generic_signal_handler(); - g_sig_handler_ptr = fatal_sig_handler; - - // Test non-deferred SEGV (should continue execution) - sa.sa_flags = SA_NODEFER; - sigaction(SIGSEGV, &sa, NULL); - g_value = 0; - asm volatile("hlt"); - - // Test deferred SEGV (should terminate program) - sa.sa_flags = 0; - sigaction(SIGSEGV, &sa, NULL); - g_value = 0; - intend_exit_status(SIGSEGV, true); - asm volatile("hlt"); -} - -TEST(test_trap_defer) { - StartSeccompSandbox(); - struct sigaction sa; - memset(&sa, 0, sizeof(sa)); - sa.sa_sigaction = generic_signal_handler(); - g_sig_handler_ptr = fatal_sig_handler; - - // Test non-deferred TRAP (should continue execution) - sa.sa_flags = SA_NODEFER; - sigaction(SIGTRAP, &sa, NULL); - g_value = 0; - asm volatile("int3"); - - // Test deferred TRAP (should terminate program) - sa.sa_flags = 0; - sigaction(SIGTRAP, &sa, NULL); - g_value = 0; - intend_exit_status(SIGTRAP, true); - asm volatile("int3"); -} - -TEST(test_segv_resethand) { - StartSeccompSandbox(); - struct sigaction sa; - memset(&sa, 0, sizeof(sa)); - sa.sa_sigaction = generic_signal_handler(); - g_sig_handler_ptr = non_fatal_sig_handler; - sa.sa_flags = SA_RESETHAND; - sigaction(SIGSEGV, &sa, NULL); - - // Test first invocation of signal handler (should continue execution) - asm volatile("hlt"); - - // Test second invocation of signal handler (should terminate program) - intend_exit_status(SIGSEGV, true); - asm volatile("hlt"); -} - -TEST(test_trap_resethand) { - StartSeccompSandbox(); - struct sigaction sa; - memset(&sa, 0, sizeof(sa)); - sa.sa_sigaction = generic_signal_handler(); - g_sig_handler_ptr = non_fatal_sig_handler; - sa.sa_flags = SA_RESETHAND; - sigaction(SIGTRAP, &sa, NULL); - - // Test first invocation of signal handler (should continue execution) - asm volatile("int3"); - - // Test second invocation of signal handler (should terminate program) - intend_exit_status(SIGTRAP, true); - asm volatile("int3"); -} - -struct testcase { - const char *test_name; - void (*test_func)(); -}; - -struct testcase all_tests[] = { -#include "test-list.h" - { NULL, NULL }, -}; - -static int run_test_forked(struct testcase *test) { - printf("** %s\n", test->test_name); - int pipe_fds[2]; - int rc = pipe(pipe_fds); - assert(rc == 0); - int pid = fork(); - if (pid == 0) { - rc = close(pipe_fds[0]); - assert(rc == 0); - g_intended_status_fd = pipe_fds[1]; - - test->test_func(); - intend_exit_status(0, false); - _exit(0); - } - rc = close(pipe_fds[1]); - assert(rc == 0); - - int intended_status; - int got = read(pipe_fds[0], &intended_status, sizeof(intended_status)); - bool got_intended_status = got == sizeof(intended_status); - if (!got_intended_status) { - printf("Test runner: Did not receive intended status\n"); - } - - int status; - int pid2 = waitpid(pid, &status, 0); - assert(pid2 == pid); - if (!got_intended_status) { - printf("Test returned exit status %i\n", status); - return 1; - } - else if ((status & ~WCOREFLAG) != intended_status) { - printf("Test failed with exit status %i, expected %i\n", - status, intended_status); - return 1; - } - else { - return 0; - } -} - -static int run_test_by_name(const char *name) { - struct testcase *test; - for (test = all_tests; test->test_name != NULL; test++) { - if (strcmp(name, test->test_name) == 0) { - printf("Running test %s...\n", name); - test->test_func(); - printf("OK\n"); - return 0; - } - } - fprintf(stderr, "Test '%s' not found\n", name); - return 1; -} - -int main(int argc, char **argv) { - setvbuf(stdout, NULL, _IONBF, 0); - setvbuf(stderr, NULL, _IONBF, 0); - if (argc == 2) { - // Run one test without forking, to aid debugging. - return run_test_by_name(argv[1]); - } - else if (argc > 2) { - // TODO: run multiple tests. - fprintf(stderr, "Too many arguments\n"); - return 1; - } - else { - // Run all tests. - struct testcase *test; - int failures = 0; - for (test = all_tests; test->test_name != NULL; test++) { - failures += run_test_forked(test); - } - if (failures == 0) { - printf("OK\n"); - return 0; - } - else { - printf("%i FAILURE(S)\n", failures); - return 1; - } - } -} diff --git a/sandbox/linux/seccomp/timestats.cc b/sandbox/linux/seccomp/timestats.cc deleted file mode 100644 index 5d9b66a..0000000 --- a/sandbox/linux/seccomp/timestats.cc +++ /dev/null @@ -1,191 +0,0 @@ -// Copyright (c) 2010 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -// Helper program to analyze the time that Chrome's renderers spend in system -// calls. Start Chrome like this: -// -// SECCOMP_SANDBOX_DEBUGGING=1 chrome --enable-seccomp-sandbox 2>&1 | timestats -// -// The program prints CPU time (0-100%) spent within system calls. This gives -// a general idea of where it is worthwhile to spend effort optimizing Chrome. -// -// Caveats: -// - there currently is no way to estimate what the overhead is for running -// inside of the sandbox vs. running without a sandbox. -// - we currently use a very simple heuristic to decide whether a system call -// is blocking or not. Blocking system calls should not be included in the -// computations. But it is quite possible for the numbers to be somewhat -// wrong, because the heuristic failed. -// - in order to collect this data, we have to turn on sandbox debugging. -// There is a measurable performance penalty to doing so. Production numbers -// are strictly better than the numbers reported by this tool. -#include <set> -#include <vector> - -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <sys/time.h> -#include <time.h> - -static const int kAvgWindowSizeMs = 500; -static const int kPeakWindowSizeMs = 2*1000; - -// Class containing information on a single system call. Most notably, it -// contains the time when the system call happened, and the time that it -// took to complete. -class Datum { - friend class Data; - public: - Datum(const char* name, double ms) - : name_(name), - ms_(ms) { - struct timeval tv; - gettimeofday(&tv, NULL); - timestamp_ = tv.tv_sec*1000.0 + tv.tv_usec/1000.0; - } - virtual ~Datum() { } - - double operator-(const Datum& b) { - return timestamp_ - b.timestamp_; - } - - protected: - const char* name_; - double ms_; - double timestamp_; -}; - -// Class containing data on the most recent system calls. It maintains -// sliding averages for total CPU time used, and it also maintains a peak -// CPU usage. The peak usage is usually updated slower than the average -// usage, as that makes it easier to inspect visually. -class Data { - public: - Data() { } - virtual ~Data() { } - - void addData(const char* name, double ms) { - average_.push_back(Datum(name, ms)); - peak_.push_back(Datum(name, ms)); - - // Prune entries outside of the window - std::vector<Datum>::iterator iter; - for (iter = average_.begin(); - *average_.rbegin() - *iter > kAvgWindowSizeMs; - ++iter) { - } - average_.erase(average_.begin(), iter); - - for (iter = peak_.begin(); - *peak_.rbegin() - *iter > kPeakWindowSizeMs; - ++iter){ - } - peak_.erase(peak_.begin(), iter); - - // Add the total usage of all system calls inside of the window - double total = 0; - for (iter = average_.begin(); iter != average_.end(); ++iter) { - total += iter->ms_; - } - - // Compute the peak CPU usage during the last window - double peak = 0; - double max = 0; - std::vector<Datum>::iterator tail = peak_.begin(); - for (iter = tail; iter != peak_.end(); ++iter) { - while (*iter - *tail > kAvgWindowSizeMs) { - peak -= tail->ms_; - ++tail; - } - peak += iter->ms_; - if (peak > max) { - max = peak; - } - } - - // Print the average CPU usage in the last window - char buf[80]; - total *= 100.0/kAvgWindowSizeMs; - max *= 100.0/kAvgWindowSizeMs; - sprintf(buf, "%6.2f%% (peak=%6.2f%%) ", total, max); - - // Animate the actual usage, displaying both average and peak values - int len = strlen(buf); - int space = sizeof(buf) - len - 1; - int mark = (total * space + 50)/100; - int bar = (max * space + 50)/100; - for (int i = 0; i < mark; ++i) { - buf[len++] = '*'; - } - if (mark == bar) { - if (bar) { - len--; - } - } else { - for (int i = 0; i < bar - mark - 1; ++i) { - buf[len++] = ' '; - } - } - buf[len++] = '|'; - while (len < static_cast<int>(sizeof(buf))) { - buf[len++] = ' '; - } - strcpy(buf + len, "\r"); - fwrite(buf, len + 1, 1, stdout); - fflush(stdout); - } - - private: - std::vector<Datum> average_; - std::vector<Datum> peak_; -}; -static Data data; - - -int main(int argc, char *argv[]) { - char buf[80]; - bool expensive = false; - while (fgets(buf, sizeof(buf), stdin)) { - // Allow longer delays for expensive system calls - if (strstr(buf, "This is an expensive system call")) { - expensive = true; - continue; - } - - // Parse the string and extract the elapsed time - const char elapsed[] = "Elapsed time: "; - char* ms_string = strstr(buf, elapsed); - char* endptr; - double ms; - char* colon = strchr(buf, ':'); - - // If this string doesn't match, then it must be some other type of - // message. Just ignore it. - // It is quite likely that we will regularly encounter debug messages - // that either should be parsed by a completely different tool, or - // messages that were intended for humans to read. - if (!ms_string || - ((ms = strtod(ms_string + sizeof(elapsed) - 1, &endptr)), - endptr == ms_string) || - !colon) { - continue; - } - - // Filter out system calls that were probably just blocking - // TODO(markus): automatically compute the cut-off for blocking calls - if (!expensive && ms > 0.05) { - continue; - } - expensive = false; - - // Extract the name of the system call - *colon = '\000'; - - // Add the data point and update the display - data.addData(buf, ms); - } - puts(""); - return 0; -} diff --git a/sandbox/linux/seccomp/tls.h b/sandbox/linux/seccomp/tls.h deleted file mode 100644 index 7ec5a28..0000000 --- a/sandbox/linux/seccomp/tls.h +++ /dev/null @@ -1,155 +0,0 @@ -// Copyright (c) 2010 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#ifndef TLS_H__ -#define TLS_H__ - -#include <asm/ldt.h> -#include <stdlib.h> -#include <sys/mman.h> -#include <sys/prctl.h> - -namespace playground { - -class TLS { - private: - class SysCalls { - public: - #define SYS_CPLUSPLUS - #define SYS_ERRNO my_errno - #define SYS_INLINE inline - #define SYS_PREFIX -1 - #undef SYS_LINUX_SYSCALL_SUPPORT_H - #include "linux_syscall_support.h" - SysCalls() : my_errno(0) { } - int my_errno; - }; - - public: - static void *allocateTLS() { - SysCalls sys; - #if defined(__x86_64__) - void *addr = sys.mmap(0, 4096, PROT_READ|PROT_WRITE, - MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); - if (sys.arch_prctl(ARCH_SET_GS, addr) < 0) { - return NULL; - } - #elif defined(__i386__) - void *addr = sys.mmap2(0, 4096, PROT_READ|PROT_WRITE, - MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); - struct user_desc u; - u.entry_number = (typeof u.entry_number)-1; - u.base_addr = (int)addr; - u.limit = 0xfffff; - u.seg_32bit = 1; - u.contents = 0; - u.read_exec_only = 0; - u.limit_in_pages = 1; - u.seg_not_present = 0; - u.useable = 1; - if (sys.set_thread_area(&u) < 0) { - return NULL; - } - asm volatile( - "movw %w0, %%fs" - : - : "q"(8*u.entry_number+3)); - #else - #error Unsupported target platform - #endif - return addr; - } - - static void freeTLS() { - SysCalls sys; - void *addr; - #if defined(__x86_64__) - sys.arch_prctl(ARCH_GET_GS, &addr); - #elif defined(__i386__) - struct user_desc u; - sys.get_thread_area(&u); - addr = (void *)u.base_addr; - #else - #error Unsupported target platform - #endif - sys.munmap(addr, 4096); - } - - template<class T> static inline bool setTLSValue(int idx, T val) { - #if defined(__x86_64__) - if (idx < 0 || idx >= 4096/8) { - return false; - } - asm volatile( - "movq %0, %%gs:(%1)\n" - : - : "q"((void *)val), "q"(8ll * idx)); - #elif defined(__i386__) - if (idx < 0 || idx >= 4096/8) { - return false; - } - if (sizeof(T) == 8) { - asm volatile( - "movl %0, %%fs:(%1)\n" - : - : "r"((unsigned)val), "r"(8 * idx)); - asm volatile( - "movl %0, %%fs:(%1)\n" - : - : "r"((unsigned)((unsigned long long)val >> 32)), "r"(8 * idx + 4)); - } else { - asm volatile( - "movl %0, %%fs:(%1)\n" - : - : "r"(val), "r"(8 * idx)); - } - #else - #error Unsupported target platform - #endif - return true; - } - - template<class T> static inline T getTLSValue(int idx) { - #if defined(__x86_64__) - long long rc; - if (idx < 0 || idx >= 4096/8) { - return 0; - } - asm volatile( - "movq %%gs:(%1), %0\n" - : "=q"(rc) - : "q"(8ll * idx)); - return (T)rc; - #elif defined(__i386__) - if (idx < 0 || idx >= 4096/8) { - return 0; - } - if (sizeof(T) == 8) { - unsigned lo, hi; - asm volatile( - "movl %%fs:(%1), %0\n" - : "=r"(lo) - : "r"(8 * idx)); - asm volatile( - "movl %%fs:(%1), %0\n" - : "=r"(hi) - : "r"(8 * idx + 4)); - return (T)((unsigned long long)lo + ((unsigned long long)hi << 32)); - } else { - long rc; - asm volatile( - "movl %%fs:(%1), %0\n" - : "=r"(rc) - : "r"(8 * idx)); - return (T)rc; - } - #else - #error Unsupported target platform - #endif - } - -}; - -} // namespace -#endif diff --git a/sandbox/linux/seccomp/trusted_process.cc b/sandbox/linux/seccomp/trusted_process.cc deleted file mode 100644 index 5c62b0f..0000000 --- a/sandbox/linux/seccomp/trusted_process.cc +++ /dev/null @@ -1,268 +0,0 @@ -// Copyright (c) 2010 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#include <dirent.h> -#include <map> - -#include "debug.h" -#include "sandbox_impl.h" -#include "syscall_table.h" - -namespace playground { - -struct SandboxPolicy g_policy; - -struct Thread { - int fdPub, fd; - SecureMem::Args* mem; -}; - -SecureMem::Args* Sandbox::getNewSecureMem() { - if (!secureMemPool_.empty()) { - SecureMem::Args* rc = secureMemPool_.back(); - secureMemPool_.pop_back(); - memset(rc->scratchPage, 0, sizeof(rc->scratchPage)); - return rc; - } - return NULL; -} - -void Sandbox::trustedProcess(int parentMapsFd, int processFdPub, int sandboxFd, - int cloneFd, SecureMem::Args* secureArena) { - // The trusted process doesn't have access to TLS. Zero out the segment - // registers so that we can later test that we are in the trusted process. - #if defined(__x86_64__) - asm volatile("mov %0, %%gs\n" : : "r"(0)); - #elif defined(__i386__) - asm volatile("mov %0, %%fs\n" : : "r"(0)); - #else - #error Unsupported target platform - #endif - - std::map<long long, struct Thread> threads; - SysCalls sys; - long long cookie = 0; - - // The very first entry in the secure memory arena has been assigned to the - // initial thread. The remaining entries are available for allocation. - SecureMem::Args* startAddress = secureArena; - SecureMem::Args* nextThread = startAddress; - for (int i = 0; i < kMaxThreads-1; i++) { - secureMemPool_.push_back(++startAddress); - } - -newThreadCreated: - // Receive information from newly created thread - Thread *newThread = &threads[++cookie]; - memset(newThread, 0, sizeof(Thread)); - struct { - SecureMem::Args* self; - int tid; - int fdPub; - } __attribute__((packed)) data; - - size_t dataLen = sizeof(data); - if (!getFd(cloneFd, &newThread->fdPub, &newThread->fd, &data, &dataLen) || - dataLen != sizeof(data)) { - // We get here either because the sandbox got corrupted, or because our - // parent process has terminated. - if (newThread->fdPub || dataLen) { - die("Failed to receive new thread information"); - } - die(); - } - if (data.self != nextThread) { - // The only potentially security critical information received from the - // newly created thread is "self". The "tid" is for informational purposes - // (and for use in the new thread's TLS), and "fdPub" is uncritical as all - // file descriptors are considered untrusted. - // Thus, we only use "self" for a sanity check, but don't actually trust - // it beyond that. - die("Received corrupted thread information"); - } - newThread->mem = nextThread; - - // Set up TLS area and let thread know that the data is now ready - nextThread->cookie = cookie; - nextThread->threadId = data.tid; - nextThread->threadFdPub = data.fdPub; - write(sys, newThread->fd, "", 1); - - // Dispatch system calls that have been forwarded from the trusted thread(s). - for (;;) { - struct { - unsigned int sysnum; - long long cookie; - } __attribute__((packed)) header; - - int rc; - if ((rc = read(sys, sandboxFd, &header, sizeof(header))) !=sizeof(header)){ - if (rc) { - die("Failed to read system call number and thread id"); - } - die(); - } - std::map<long long, struct Thread>::iterator iter = - threads.find(header.cookie); - if (iter == threads.end()) { - die("Received request from unknown thread"); - } - struct Thread* currentThread = &iter->second; - if (header.sysnum > maxSyscall || - !syscallTable[header.sysnum].trustedProcess) { - die("Trusted process encountered unexpected system call"); - } - - // Dispatch system call to handler function. Treat both exit() and clone() - // specially. - if (syscallTable[header.sysnum].trustedProcess(parentMapsFd, - sandboxFd, - currentThread->fdPub, - currentThread->fd, - currentThread->mem) && - header.sysnum == __NR_clone) { - nextThread = currentThread->mem->newSecureMem; - goto newThreadCreated; - } else if (header.sysnum == __NR_exit) { - NOINTR_SYS(sys.close(iter->second.fdPub)); - NOINTR_SYS(sys.close(iter->second.fd)); - SecureMem::Args* secureMem = currentThread->mem; - threads.erase(iter); - secureMemPool_.push_back(secureMem); - } - } -} - -int Sandbox::initializeProtectedMap(int fd) { - int mapsFd; - if (!getFd(fd, &mapsFd, NULL, NULL, NULL)) { - maps_failure: - die("Cannot access /proc/self/maps"); - } - - // Read the memory mappings as they were before the sandbox takes effect. - // These mappings cannot be changed by the sandboxed process. - char line[80]; - FILE *fp = fdopen(mapsFd, "r"); - for (bool truncated = false;;) { - if (fgets(line, sizeof(line), fp) == NULL) { - if (feof(fp) || errno != EINTR) { - break; - } - continue; - } - if (!truncated) { - unsigned long start, stop; - char *ptr = line; - errno = 0; - start = strtoul(ptr, &ptr, 16); - if (errno || *ptr++ != '-') { - parse_failure: - die("Failed to parse /proc/self/maps"); - } - stop = strtoul(ptr, &ptr, 16); - if (errno || *ptr++ != ' ') { - goto parse_failure; - } - protectedMap_[reinterpret_cast<void *>(start)] = stop - start; - } - truncated = strchr(line, '\n') == NULL; - } - - // Prevent low address memory allocations. Some buggy kernels allow those - if (protectedMap_[0] < (64 << 10)) { - protectedMap_[0] = 64 << 10; - } - - // Let the sandbox know that we are done parsing the memory map. - SysCalls sys; - if (write(sys, fd, &mapsFd, sizeof(mapsFd)) != sizeof(mapsFd)) { - goto maps_failure; - } - - return mapsFd; -} - -SecureMem::Args* Sandbox::createTrustedProcess(int processFdPub, int sandboxFd, - int cloneFdPub, int cloneFd) { - // Allocate memory that will be used by an arena for storing the secure - // memory. While we allow this memory area to be empty at times (e.g. when - // not all threads are in use), we make sure that it never gets overwritten - // by user-allocated memory. This happens in initializeProtectedMap() and - // snapshotMemoryMappings(). - SecureMem::Args* secureArena = reinterpret_cast<SecureMem::Args*>( - mmap(NULL, 8192*kMaxThreads, PROT_READ|PROT_WRITE, - MAP_SHARED|MAP_ANONYMOUS, -1, 0)); - if (secureArena == MAP_FAILED) { - die("Failed to allocate secure memory arena"); - } - - // Set up the mutex to be accessible from the trusted process and from - // children of the trusted thread(s) - if (mmap(&syscall_mutex_, 4096, PROT_READ|PROT_WRITE, - MAP_SHARED|MAP_ANONYMOUS|MAP_FIXED, -1, 0) != &syscall_mutex_) { - die("Failed to initialize secure mutex"); - } - syscall_mutex_ = 0x80000000; - - - // Create a trusted process that can evaluate system call parameters and - // decide whether a system call should execute. This process runs outside of - // the seccomp sandbox. It communicates with the sandbox'd process through - // a socketpair() and through securely shared memory. - pid_t pid = fork(); - if (pid < 0) { - die("Failed to create trusted process"); - } - if (!pid) { - // Close all file handles except for sandboxFd, cloneFd, and stdio - DIR *dir = opendir("/proc/self/fd"); - if (dir == 0) { - // If we don't know the list of our open file handles, just try closing - // all valid ones. - for (int fd = sysconf(_SC_OPEN_MAX); --fd > 2; ) { - if (fd != sandboxFd && fd != cloneFd) { - close(fd); - } - } - } else { - // If available, if is much more efficient to just close the file - // handles that show up in /proc/self/fd/ - struct dirent de, *res; - while (!readdir_r(dir, &de, &res) && res) { - if (res->d_name[0] < '0') - continue; - int fd = atoi(res->d_name); - if (fd > 2 && - fd != sandboxFd && fd != cloneFd && fd != dirfd(dir)) { - close(fd); - } - } - closedir(dir); - } - - // Initialize secure memory used for threads - for (int i = 0; i < kMaxThreads; i++) { - SecureMem::Args* args = secureArena + i; - args->self = args; - #ifndef NDEBUG - args->allowAllSystemCalls= Debug::isEnabled(); - #endif - } - - int parentMapsFd = initializeProtectedMap(sandboxFd); - trustedProcess(parentMapsFd, processFdPub, sandboxFd, - cloneFd, secureArena); - die(); - } - - // We are still in the untrusted code. Deny access to restricted resources. - mprotect(secureArena, 8192*kMaxThreads, PROT_NONE); - mprotect(&syscall_mutex_, 4096, PROT_NONE); - close(sandboxFd); - - return secureArena; -} - -} // namespace diff --git a/sandbox/linux/seccomp/trusted_thread.cc b/sandbox/linux/seccomp/trusted_thread.cc deleted file mode 100644 index 6d6a3f5..0000000 --- a/sandbox/linux/seccomp/trusted_thread.cc +++ /dev/null @@ -1,1483 +0,0 @@ -// Copyright (c) 2010 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#include "sandbox_impl.h" -#include "syscall_table.h" - -namespace playground { - -void Sandbox::createTrustedThread(int processFdPub, int cloneFdPub, - SecureMem::Args* secureMem) { - SecureMem::Args args = { { { { { 0 } } } } }; - args.self = &args; - args.newSecureMem = secureMem; - args.processFdPub = processFdPub; - args.cloneFdPub = cloneFdPub; -#if defined(__x86_64__) - asm volatile( - "push %%rbx\n" - "push %%rbp\n" - "mov %0, %%rbp\n" // %rbp = args - "xor %%rbx, %%rbx\n" // initial sequence number - "lea 999f(%%rip), %%r15\n" // continue in same thread - - // Signal handlers are process-wide. This means that for security - // reasons, we cannot allow that the trusted thread ever executes any - // signal handlers. - // We prevent the execution of signal handlers by setting a signal - // mask that blocks all signals. In addition, we make sure that the - // stack pointer is invalid. - // We cannot reset the signal mask until after we have enabled - // Seccomp mode. Our sigprocmask() wrapper would normally do this by - // raising a signal, modifying the signal mask in the kernel-generated - // signal frame, and then calling sigreturn(). This presents a bit of - // a Catch-22, as all signals are masked and we can therefore not - // raise any signal that would allow us to generate the signal stack - // frame. - // Instead, we have to create the signal stack frame prior to entering - // Seccomp mode. This incidentally also helps us to restore the - // signal mask to the same value that it had prior to entering the - // sandbox. - // The signal wrapper for clone() is the second entry point into this - // code (by means of sending an IPC to its trusted thread). It goes - // through the same steps of creating a signal stack frame on the - // newly created thread's stacks prior to cloning. See clone.cc for - // details. - "mov $56+0xF000, %%eax\n" // __NR_clone + 0xF000 - "mov %%rsp, %%rcx\n" - "int $0\n" // push a signal stack frame (see clone.cc) - "mov %%rcx, 0xA0(%%rsp)\n" // pop stack upon call to sigreturn() - "mov %%rsp, %%r9\n" - "mov $2, %%rdi\n" // how = SIG_SETMASK - "pushq $-1\n" - "mov %%rsp, %%rsi\n" // set = full mask - "xor %%rdx, %%rdx\n" // old_set = NULL - "mov $8, %%r10\n" // mask all 64 signals - "mov $14, %%eax\n" // NR_rt_sigprocmask - "syscall\n" - "xor %%rsp, %%rsp\n" // invalidate the stack in all trusted code - "jmp 20f\n" // create trusted thread - - // TODO(markus): Coalesce the read() operations by reading into a bigger - // buffer. - - // Parameters: - // *%fs: secure memory region - // the page following this one contains the scratch space - // %r13: thread's side of threadFd - // %r15: processFdPub - - // Local variables: - // %rbx: sequence number for trusted calls - - // Temporary variables: - // %r8: child stack - // %r9: system call number, child stack - // %rbp: secure memory of previous thread - - // Layout of secure shared memory region (c.f. securemem.h): - // 0x00: pointer to the secure shared memory region (i.e. self) - // 0x08: sequence number; must match %rbx - // 0x10: call type; must match %eax, iff %eax == -1 || %eax == -2 - // 0x18: system call number; passed to syscall in %rax - // 0x20: first argument; passed to syscall in %rdi - // 0x28: second argument; passed to syscall in %rsi - // 0x30: third argument; passed to syscall in %rdx - // 0x38: fourth argument; passed to syscall in %r10 - // 0x40: fifth argument; passed to syscall in %r8 - // 0x48: sixth argument; passed to syscall in %r9 - // 0x50: stored return address for clone() system call - // 0x58: stored %rbp value for clone() system call - // 0x60: stored %rbx value for clone() system call - // 0x68: stored %rcx value for clone() system call - // 0x70: stored %rdx value for clone() system call - // 0x78: stored %rsi value for clone() system call - // 0x80: stored %rdi value for clone() system call - // 0x88: stored %r8 value for clone() system call - // 0x90: stored %r9 value for clone() system call - // 0x98: stored %r10 value for clone() system call - // 0xA0: stored %r11 value for clone() system call - // 0xA8: stored %r12 value for clone() system call - // 0xB0: stored %r13 value for clone() system call - // 0xB8: stored %r14 value for clone() system call - // 0xC0: stored %r15 value for clone() system call - // 0xC8: new shared memory for clone() - // 0xD0: processFdPub for talking to trusted process - // 0xD4: cloneFdPub for talking to trusted process - // 0xD8: set to non-zero, if in debugging mode - // 0xDC: most recent SHM id returned by shmget(IPC_PRIVATE) - // 0xE0: cookie assigned to us by the trusted process (TLS_COOKIE) - // 0xE8: thread id (TLS_TID) - // 0xF0: threadFdPub (TLS_THREAD_FD) - // 0x200-0x1000: securely passed verified file name(s) - - // Layout of (untrusted) scratch space: - // 0x00: syscall number; passed in %rax - // 0x04: first argument; passed in %rdi - // 0x0C: second argument; passed in %rsi - // 0x14: third argument; passed in %rdx - // 0x1C: fourth argument; passed in %r10 - // 0x24: fifth argument; passed in %r8 - // 0x2C: sixth argument; passed in %r9 - // 0x34: return value - // 0x3C: RDTSCP result (%eax) - // 0x40: RDTSCP result (%edx) - // 0x44: RDTSCP result (%ecx) - // 0x48: last system call (not used on x86-64) - // 0x4C: number of consecutive calls to a time fnc (not used on x86-64) - // 0x50: nesting level of system calls (for debugging purposes only) - // 0x54: signal mask - // 0x5C: in SEGV handler - - // We use the %fs register for accessing the secure read-only page, and - // the untrusted scratch space immediately following it. The segment - // register and the local descriptor table is set up by passing - // appropriate arguments to clone(). - - "0:xor %%rsp, %%rsp\n" - "mov $2, %%ebx\n" // %rbx = initial sequence number - - // Read request from untrusted thread, or from trusted process. In either - // case, the data that we read has to be considered untrusted. - // read(threadFd, &scratch, 4) - "1:xor %%rax, %%rax\n" // NR_read - "mov %%r13, %%rdi\n" // fd = threadFd - "mov %%fs:0x0, %%rsi\n" // secure_mem - "add $0x1000, %%rsi\n" // buf = &scratch - "mov $4, %%edx\n" // len = 4 - "2:syscall\n" - "cmp $-4, %%rax\n" // EINTR - "jz 2b\n" - "cmp %%rdx, %%rax\n" - "jnz 25f\n" // exit process - - // Retrieve system call number. It is crucial that we only dereference - // %fs:0x1000 exactly once. Afterwards, memory becomes untrusted and - // we must use the value that we have read the first time. - "mov 0(%%rsi), %%eax\n" - - // If syscall number is -1, execute an unlocked system call from the - // secure memory area - "cmp $-1, %%eax\n" - "jnz 5f\n" - "3:cmp %%rbx, %%fs:0x8\n" - "jne 25f\n" // exit process - "cmp %%fs:0x10, %%eax\n" - "jne 25f\n" // exit process - "mov %%fs:0x18, %%rax\n" - "mov %%fs:0x20, %%rdi\n" - "mov %%fs:0x28, %%rsi\n" - "mov %%fs:0x30, %%rdx\n" - "mov %%fs:0x38, %%r10\n" - "mov %%fs:0x40, %%r8\n" - "mov %%fs:0x48, %%r9\n" - "cmp %%rbx, %%fs:0x8\n" - "jne 25f\n" // exit process - "add $2, %%rbx\n" - - // shmget() gets some special treatment. Whenever we return from this - // system call, we remember the most recently returned SysV shm id. - "cmp $29, %%eax\n" // NR_shmget - "jnz 4f\n" - "syscall\n" - "mov %%rax, %%r8\n" - "mov $56, %%eax\n" // NR_clone - "mov $17, %%edi\n" // flags = SIGCHLD - "mov $1, %%esi\n" // stack = 1 - "syscall\n" - "test %%rax, %%rax\n" - "js 25f\n" // exit process - "mov %%rax, %%rdi\n" - "jnz 8f\n" // wait for child, then return result - "mov %%fs:0x0, %%rdi\n" // start = secure_mem - "mov $4096, %%esi\n" // len = 4096 - "mov $3, %%edx\n" // prot = PROT_READ | PROT_WRITE - "mov $10, %%eax\n" // NR_mprotect - "syscall\n" - "mov %%r8d, 0xDC(%%rdi)\n" // set most recently returned SysV shm id - "xor %%rdi, %%rdi\n" - - // When debugging messages are enabled, warn about expensive system calls - #ifndef NDEBUG - "cmpw $0, %%fs:0xD8\n" // debug mode - "jz 27f\n" - "mov $1, %%eax\n" // NR_write - "mov $2, %%edi\n" // fd = stderr - "lea 101f(%%rip), %%rsi\n" // "This is an expensive system call" - "mov $102f-101f, %%edx\n" // len = strlen(msg) - "syscall\n" - "xor %%rdi, %%rdi\n" - #endif - - "jmp 27f\n" // exit program, no message - "4:syscall\n" - "jmp 15f\n" // return result - - // If syscall number is -2, execute locked system call from the - // secure memory area - "5:jg 12f\n" - "cmp $-2, %%eax\n" - "jnz 9f\n" - "cmp %%rbx, %%fs:0x8\n" - "jne 25f\n" // exit process - "cmp %%eax, %%fs:0x10\n" - "jne 25f\n" // exit process - - // When debugging messages are enabled, warn about expensive system calls - #ifndef NDEBUG - "cmpw $0, %%fs:0xD8\n" // debug mode - "jz 6f\n" - "mov $1, %%eax\n" // NR_write - "mov $2, %%edi\n" // fd = stderr - "lea 101f(%%rip), %%rsi\n" // "This is an expensive system call" - "mov $102f-101f, %%edx\n" // len = strlen(msg) - "syscall\n" - "6:" - #endif - - "mov %%fs:0x18, %%rax\n" - "mov %%fs:0x20, %%rdi\n" - "mov %%fs:0x28, %%rsi\n" - "mov %%fs:0x30, %%rdx\n" - "mov %%fs:0x38, %%r10\n" - "mov %%fs:0x40, %%r8\n" - "mov %%fs:0x48, %%r9\n" - "cmp %%rbx, %%fs:0x8\n" - "jne 25f\n" // exit process - - // clone() has unusual calling conventions and must be handled specially - "cmp $56, %%rax\n" // NR_clone - "jz 19f\n" - - // exit() terminates trusted thread - "cmp $60, %%eax\n" // NR_exit - "jz 18f\n" - - // Perform requested system call - "syscall\n" - - // Unlock mutex - "7:cmp %%rbx, %%fs:0x8\n" - "jne 25f\n" // exit process - "add $2, %%rbx\n" - "mov %%rax, %%r8\n" - "mov $56, %%eax\n" // NR_clone - "mov $17, %%rdi\n" // flags = SIGCHLD - "mov $1, %%rsi\n" // stack = 1 - "syscall\n" - "test %%rax, %%rax\n" - "js 25f\n" // exit process - "jz 22f\n" // unlock and exit - "mov %%rax, %%rdi\n" - "8:xor %%rsi, %%rsi\n" - "xor %%rdx, %%rdx\n" - "xor %%r10, %%r10\n" - "mov $61, %%eax\n" // NR_wait4 - "syscall\n" - "cmp $-4, %%eax\n" // EINTR - "jz 8b\n" - "mov %%r8, %%rax\n" - "jmp 15f\n" // return result - - // If syscall number is -3, read the time stamp counter - "9:cmp $-3, %%eax\n" - "jnz 10f\n" - "rdtsc\n" // sets %edx:%eax - "xor %%rcx, %%rcx\n" - "jmp 11f\n" - "10:cmp $-4, %%eax\n" - "jnz 12f\n" - "rdtscp\n" // sets %edx:%eax and %ecx - "11:add $0x3C, %%rsi\n" - "mov %%eax, 0(%%rsi)\n" - "mov %%edx, 4(%%rsi)\n" - "mov %%ecx, 8(%%rsi)\n" - "mov $12, %%edx\n" - "jmp 16f\n" // return result - - // Check in syscallTable whether this system call is unrestricted - "12:mov %%rax, %%r9\n" - #ifndef NDEBUG - "cmpw $0, %%fs:0xD8\n" // debug mode - "jnz 13f\n" - #endif - "cmp playground$maxSyscall(%%rip), %%eax\n" - "ja 25f\n" // exit process - "shl $4, %%rax\n" - "lea playground$syscallTable(%%rip), %%rdi\n" - "add %%rdi, %%rax\n" - "mov 0(%%rax), %%rax\n" - "cmp $1, %%rax\n" - "jne 25f\n" // exit process - - // Default behavior for unrestricted system calls is to just execute - // them. Read the remaining arguments first. - "13:mov %%rsi, %%r8\n" - "xor %%rax, %%rax\n" // NR_read - "mov %%r13, %%rdi\n" // fd = threadFd - "add $4, %%rsi\n" // buf = &scratch + 4 - "mov $48, %%edx\n" // len = 6*sizeof(void *) - "14:syscall\n" - "cmp $-4, %%rax\n" // EINTR - "jz 14b\n" - "cmp %%rdx, %%rax\n" - "jnz 25f\n" // exit process - "mov %%r9, %%rax\n" - "mov 0x04(%%r8), %%rdi\n" - "mov 0x0C(%%r8), %%rsi\n" - "mov 0x14(%%r8), %%rdx\n" - "mov 0x1C(%%r8), %%r10\n" - "mov 0x2C(%%r8), %%r9\n" - "mov 0x24(%%r8), %%r8\n" - "cmp $231, %%rax\n" // NR_exit_group - "jz 27f\n" // exit program, no message - "syscall\n" - - // Return result of system call to sandboxed thread - "15:mov %%fs:0x0, %%rsi\n" // secure_mem - "add $0x1034, %%rsi\n" // buf = &scratch + 52 - "mov %%rax, (%%rsi)\n" - "mov $8, %%edx\n" // len = 8 - "16:mov %%r13, %%rdi\n" // fd = threadFd - "mov $1, %%eax\n" // NR_write - "17:syscall\n" - "cmp %%rdx, %%rax\n" - "jz 1b\n" - "cmp $-4, %%rax\n" // EINTR - "jz 17b\n" - "jmp 25f\n" // exit process - - // NR_exit: - // Exit trusted thread after cleaning up resources - "18:mov %%fs:0x0, %%rsi\n" // secure_mem - "mov 0xF0(%%rsi), %%rdi\n" // fd = threadFdPub - "mov $3, %%eax\n" // NR_close - "syscall\n" - "mov %%rsi, %%rdi\n" // start = secure_mem - "mov $8192, %%esi\n" // length = 8192 - "xor %%rdx, %%rdx\n" // prot = PROT_NONE - "mov $10, %%eax\n" // NR_mprotect - "syscall\n" - "mov %%r13, %%rdi\n" // fd = threadFd - "mov $3, %%eax\n" // NR_close - "syscall\n" - "mov $56, %%eax\n" // NR_clone - "mov $17, %%rdi\n" // flags = SIGCHLD - "mov $1, %%rsi\n" // stack = 1 - "syscall\n" - "mov %%rax, %%rdi\n" - "test %%rax, %%rax\n" - "js 27f\n" // exit process - "jne 21f\n" // reap helper, exit thread - "jmp 22f\n" // unlock mutex - - // NR_clone: - // Original trusted thread calls clone() to create new nascent - // thread. This thread is (typically) fully privileged and shares all - // resources with the caller (i.e. the previous trusted thread), - // and by extension it shares all resources with the sandbox'd - // threads. - "19:mov %%fs:0x0, %%rbp\n" // %rbp = old_shared_mem - "mov %%rsi, %%r15\n" // remember child stack - "mov $1, %%rsi\n" // stack = 1 - "syscall\n" // calls NR_clone - "cmp $-4095, %%rax\n" // return codes -1..-4095 are errno values - "jae 7b\n" // unlock mutex, return result - "add $2, %%rbx\n" - "test %%rax, %%rax\n" - "jne 15b\n" // return result - - // In nascent thread, now. - "sub $2, %%rbx\n" - - // We want to maintain an invalid %rsp whenver we access untrusted - // memory. This ensures that even if an attacker can trick us into - // triggering a SIGSEGV, we will never successfully execute a signal - // handler. - // Signal handlers are inherently dangerous, as an attacker could trick - // us into returning to the wrong address by adjusting the signal stack - // right before the handler returns. - // N.B. While POSIX is curiously silent about this, it appears that on - // Linux, alternate signal stacks are a per-thread property. That is - // good. It means that this security mechanism works, even if the - // sandboxed thread manages to set up an alternate signal stack. - // - // TODO(markus): We currently do not support emulating calls to - // sys_clone() with a zero (i.e. copy) stack parameter. See clone.cc - // for a discussion on how to fix this, if this ever becomes neccessary. - "mov %%r15, %%r9\n" // %r9 = child_stack - "xor %%r15, %%r15\n" // Request to return from clone() when done - - // Get thread id of nascent thread - "20:mov $186, %%eax\n" // NR_gettid - "syscall\n" - "mov %%rax, %%r14\n" - - // Nascent thread creates socketpair() for sending requests to - // trusted thread. - // We can create the filehandles on the child's stack. Filehandles are - // always treated as untrusted. - // socketpair(AF_UNIX, SOCK_STREAM, 0, fds) - "sub $0x10, %%r9\n" - "mov %%r15, 8(%%r9)\n" // preserve return address on child stack - "mov $53, %%eax\n" // NR_socketpair - "mov $1, %%edi\n" // domain = AF_UNIX - "mov $1, %%esi\n" // type = SOCK_STREAM - "xor %%rdx, %%rdx\n" // protocol = 0 - "mov %%r9, %%r10\n" // sv = child_stack - "syscall\n" - "test %%rax, %%rax\n" - "jz 28f\n" - - // If things went wrong, we don't have an (easy) way of signaling - // the parent. For our purposes, it is sufficient to fail with a - // fatal error. - "jmp 25f\n" // exit process - "21:xor %%rsi, %%rsi\n" - "xor %%rdx, %%rdx\n" - "xor %%r10, %%r10\n" - "mov $61, %%eax\n" // NR_wait4 - "syscall\n" - "cmp $-4, %%eax\n" // EINTR - "jz 21b\n" - "jmp 23f\n" // exit thread (no message) - "22:lea playground$syscall_mutex(%%rip), %%rdi\n" - "mov $4096, %%esi\n" - "mov $3, %%edx\n" // prot = PROT_READ | PROT_WRITE - "mov $10, %%eax\n" // NR_mprotect - "syscall\n" - "lock; addl $0x80000000, (%%rdi)\n" - "jz 23f\n" // exit thread - "mov $1, %%edx\n" - "mov %%rdx, %%rsi\n" // FUTEX_WAKE - "mov $202, %%eax\n" // NR_futex - "syscall\n" - "23:mov $60, %%eax\n" // NR_exit - "mov $1, %%edi\n" // status = 1 - "24:syscall\n" - "25:mov $1, %%eax\n" // NR_write - "mov $2, %%edi\n" // fd = stderr - "lea 100f(%%rip), %%rsi\n" // "Sandbox violation detected" - "mov $101f-100f, %%edx\n" // len = strlen(msg) - "syscall\n" - "26:mov $1, %%edi\n" - "27:mov $231, %%eax\n" // NR_exit_group - "jmp 24b\n" - - // The first page is mapped read-only for use as securely shared memory - "28:mov 0xC8(%%rbp), %%r12\n" // %r12 = secure shared memory - "cmp %%rbx, 8(%%rbp)\n" - "jne 25b\n" // exit process - "mov $10, %%eax\n" // NR_mprotect - "mov %%r12, %%rdi\n" // addr = secure_mem - "mov $4096, %%esi\n" // len = 4096 - "mov $1, %%edx\n" // prot = PROT_READ - "syscall\n" - - // The second page is used as scratch space by the trusted thread. - // Make it writable. - "mov $10, %%eax\n" // NR_mprotect - "add $4096, %%rdi\n" // addr = secure_mem + 4096 - "mov $3, %%edx\n" // prot = PROT_READ | PROT_WRITE - "syscall\n" - - // Call clone() to create new trusted thread(). - // clone(CLONE_VM|CLONE_FS|CLONE_FILES|CLONE_SIGHAND|CLONE_THREAD| - // CLONE_SYSVSEM|CLONE_UNTRACED|CLONE_SETTLS, stack, NULL, NULL, - // tls) - "mov 4(%%r9), %%r13d\n" // %r13 = threadFd (on child's stack) - "mov $56, %%eax\n" // NR_clone - "mov $0x8D0F00, %%edi\n" // flags = VM|FS|FILES|SIGH|THR|SYSV|UTR|TLS - "mov $1, %%rsi\n" // stack = 1 - "mov %%r12, %%r8\n" // tls = new_secure_mem - "mov 0xD0(%%rbp), %%r15d\n" // %r15 = processFdPub - "cmp %%rbx, 8(%%rbp)\n" - "jne 25b\n" // exit process - "syscall\n" - "test %%rax, %%rax\n" - "js 25b\n" // exit process - "jz 0b\n" // invoke trustedThreadFnc() - - // Copy the caller's signal mask - "mov 0x1054(%%rbp), %%rax\n" - "mov %%rax, 0x1054(%%r12)\n" - - // Done creating trusted thread. We can now get ready to return to caller - "mov %%r9, %%r8\n" // %r8 = child_stack - "mov 0(%%r9), %%r9d\n" // %r9 = threadFdPub - - // Set up thread local storage with information on how to talk to - // trusted thread and trusted process. - "lea 0xE0(%%r12), %%rsi\n" // args = &secure_mem.TLS; - "mov $158, %%eax\n" // NR_arch_prctl - "mov $0x1001, %%edi\n" // option = ARCH_SET_GS - "syscall\n" - "cmp $-4095, %%rax\n" // return codes -1..-4095 are errno values - "jae 25b\n" // exit process - - // Check whether this is the initial thread, or a newly created one. - // At startup we run the same code as when we create a new thread. At - // the very top of this function, you will find that we push 999(%rip) - // on the stack. That is the signal that we should return on the same - // stack rather than return to where clone was called. - "mov 8(%%r8), %%r15\n" - "add $0x10, %%r8\n" - "test %%r15, %%r15\n" - "jne 29f\n" - - // Returning from clone() into the newly created thread is special. We - // cannot unroll the stack, as we just set up a new stack for this - // thread. We have to explicitly restore CPU registers to the values - // that they had when the program originally called clone(). - // We patch the register values in the signal stack frame so that we - // can ask sigreturn() to restore all registers for us. - "sub $0x8, %%r8\n" - "mov 0x50(%%rbp), %%rax\n" - "mov %%rax, 0x00(%%r8)\n" // return address - "xor %%rax, %%rax\n" - "mov %%rax, 0x98(%%r8)\n" // %rax = 0 - "mov 0x58(%%rbp), %%rax\n" - "mov %%rax, 0x80(%%r8)\n" // %rbp - "mov 0x60(%%rbp), %%rax\n" - "mov %%rax, 0x88(%%r8)\n" // %rbx - "mov 0x68(%%rbp), %%rax\n" - "mov %%rax, 0xA0(%%r8)\n" // %rcx - "mov 0x70(%%rbp), %%rax\n" - "mov %%rax, 0x90(%%r8)\n" // %rdx - "mov 0x78(%%rbp), %%rax\n" - "mov %%rax, 0x78(%%r8)\n" // %rsi - "mov 0x80(%%rbp), %%rax\n" - "mov %%rax, 0x70(%%r8)\n" // %rdi - "mov 0x88(%%rbp), %%rax\n" - "mov %%rax, 0x30(%%r8)\n" // %r8 - "mov 0x90(%%rbp), %%rax\n" - "mov %%rax, 0x38(%%r8)\n" // %r9 - "mov 0x98(%%rbp), %%rax\n" - "mov %%rax, 0x40(%%r8)\n" // %r10 - "mov 0xA0(%%rbp), %%rax\n" - "mov %%rax, 0x48(%%r8)\n" // %r11 - "mov 0xA8(%%rbp), %%rax\n" - "mov %%rax, 0x50(%%r8)\n" // %r12 - "mov 0xB0(%%rbp), %%rax\n" - "mov %%rax, 0x58(%%r8)\n" // %r13 - "mov 0xB8(%%rbp), %%rax\n" - "mov %%rax, 0x60(%%r8)\n" // %r14 - "mov 0xC0(%%rbp), %%rax\n" - "mov %%rax, 0x68(%%r8)\n" // %r15 - "cmp %%rbx, 8(%%rbp)\n" - "jne 25b\n" // exit process - - // Nascent thread launches a helper that doesn't share any of our - // resources, except for pages mapped as MAP_SHARED. - // clone(SIGCHLD, stack=1) - "29:mov $56, %%eax\n" // NR_clone - "mov $17, %%rdi\n" // flags = SIGCHLD - "mov $1, %%rsi\n" // stack = 1 - "syscall\n" - "test %%rax, %%rax\n" - "js 25b\n" // exit process - "jne 31f\n" - - // Use sendmsg() to send to the trusted process the file handles for - // communicating with the new trusted thread. We also send the address - // of the secure memory area (for sanity checks) and the thread id. - "mov 0xD4(%%rbp), %%edi\n" // transport = Sandbox::cloneFdPub() - "cmp %%rbx, 8(%%rbp)\n" - "jne 25b\n" // exit process - - // 0x00 msg: - // 0x00 msg_name ($0) - // 0x08 msg_namelen ($0) - // 0x10 msg_iov (%r8 + 0x44) - // 0x18 msg_iovlen ($1) - // 0x20 msg_control (%r8 + 0x54) - // 0x28 msg_controllen ($0x18) - // 0x30 data: - // 0x30 msg_flags/err ($0) - // 0x34 secure_mem (%r12) - // 0x3C threadId (%r14d) - // 0x40 threadFdPub (%r9d) - // 0x44 iov: - // 0x44 iov_base (%r8 + 0x30) - // 0x4C iov_len ($0x14) - // 0x54 cmsg: - // 0x54 cmsg_len ($0x18) - // 0x5C cmsg_level ($1, SOL_SOCKET) - // 0x60 cmsg_type ($1, SCM_RIGHTS) - // 0x64 threadFdPub (%r9d) - // 0x68 threadFd (%r13d) - // 0x6C - "sub $0x6C, %%r8\n" - "xor %%rdx, %%rdx\n" // flags = 0 - "mov %%rdx, 0x00(%%r8)\n" // msg_name - "mov %%edx, 0x08(%%r8)\n" // msg_namelen - "mov %%edx, 0x30(%%r8)\n" // msg_flags - "mov $1, %%r11d\n" - "mov %%r11, 0x18(%%r8)\n" // msg_iovlen - "mov %%r11d, 0x5C(%%r8)\n" // cmsg_level - "mov %%r11d, 0x60(%%r8)\n" // cmsg_type - "lea 0x30(%%r8), %%r11\n" - "mov %%r11, 0x44(%%r8)\n" // iov_base - "add $0x14, %%r11\n" - "mov %%r11, 0x10(%%r8)\n" // msg_iov - "add $0x10, %%r11\n" - "mov %%r11, 0x20(%%r8)\n" // msg_control - "mov $0x14, %%r11d\n" - "mov %%r11, 0x4C(%%r8)\n" // iov_len - "add $4, %%r11d\n" - "mov %%r11, 0x28(%%r8)\n" // msg_controllen - "mov %%r11, 0x54(%%r8)\n" // cmsg_len - "mov %%r12, 0x34(%%r8)\n" // secure_mem - "mov %%r14d, 0x3C(%%r8)\n" // threadId - "mov %%r9d, 0x40(%%r8)\n" // threadFdPub - "mov %%r9d, 0x64(%%r8)\n" // threadFdPub - "mov %%r13d, 0x68(%%r8)\n" // threadFd - "mov $46, %%eax\n" // NR_sendmsg - "mov %%r8, %%rsi\n" // msg - "syscall\n" - - // Release syscall_mutex_. This signals the trusted process that - // it can write into the original thread's secure memory again. - "mov $10, %%eax\n" // NR_mprotect - "lea playground$syscall_mutex(%%rip), %%rdi\n" - "mov $4096, %%esi\n" - "mov $3, %%edx\n" // PROT_READ | PROT_WRITE - "syscall\n" - "cmp %%rbx, 8(%%rbp)\n" - "jne 25b\n" // exit process - "lock; addl $0x80000000, (%%rdi)\n" - "jz 30f\n" // exit process (no error message) - "mov $1, %%edx\n" - "mov %%rdx, %%rsi\n" // FUTEX_WAKE - "mov $202, %%eax\n" // NR_futex - "syscall\n" - "30:xor %%rdi, %%rdi\n" - "jmp 27b\n" // exit process (no error message) - - // Reap helper - "31:mov %%rax, %%rdi\n" - "32:lea -4(%%r8), %%rsi\n" - "xor %%rdx, %%rdx\n" - "xor %%r10, %%r10\n" - "mov $61, %%eax\n" // NR_wait4 - "syscall\n" - "cmp $-4, %%eax\n" // EINTR - "jz 32b\n" - "mov -4(%%r8), %%eax\n" - "test %%rax, %%rax\n" - "jnz 26b\n" // exit process (no error message) - - // Release privileges by entering seccomp mode. - "mov $157, %%eax\n" // NR_prctl - "mov $22, %%edi\n" // PR_SET_SECCOMP - "mov $1, %%esi\n" - "syscall\n" - "test %%rax, %%rax\n" - "jnz 25b\n" // exit process - - // We can finally start using the stack. Signal handlers no longer pose - // a threat to us. - "mov %%r8, %%rsp\n" - - // Back in the newly created sandboxed thread, wait for trusted process - // to receive request. It is possible for an attacker to make us - // continue even before the trusted process is done. This is OK. It'll - // result in us putting stale values into the new thread's TLS. But that - // data is considered untrusted anyway. - "push %%rax\n" - "mov $1, %%edx\n" // len = 1 - "mov %%rsp, %%rsi\n" // buf = %rsp - "mov %%r9, %%rdi\n" // fd = threadFdPub - "33:xor %%rax, %%rax\n" // NR_read - "syscall\n" - "cmp $-4, %%rax\n" // EINTR - "jz 33b\n" - "cmp %%rdx, %%rax\n" - "jne 25b\n" // exit process - "pop %%rax\n" - - // Return to caller. We are in the new thread, now. - "test %%r15, %%r15\n" - "jnz 34f\n" // Returning to createTrustedThread() - - // Returning to the place where clone() had been called. We rely on - // using rt_sigreturn() for restoring our registers. The caller already - // created a signal stack frame, and we patched the register values - // with the ones that were in effect prior to calling sandbox_clone(). - "pop %%r15\n" - "34:mov %%r15, 0xA8(%%rsp)\n" // compute new %rip - "mov $15, %%eax\n" // NR_rt_sigreturn - "syscall\n" - - ".pushsection \".rodata\"\n" - "100:.ascii \"Sandbox violation detected, program aborted\\n\"\n" - "101:.ascii \"WARNING! This is an expensive system call\\n\"\n" - "102:\n" - ".popsection\n" - - "999:pop %%rbp\n" - "pop %%rbx\n" - : - : "g"(&args) - : "rax", "rcx", "rdx", "rdi", "rsi", "r8", "r9", "r10", "r11", "r12", - "r13", "r14", "r15", "rsp", "memory" -#elif defined(__i386__) - struct user_desc u; - u.entry_number = (typeof u.entry_number)-1; - u.base_addr = 0; - u.limit = 0xfffff; - u.seg_32bit = 1; - u.contents = 0; - u.read_exec_only = 0; - u.limit_in_pages = 1; - u.seg_not_present = 0; - u.useable = 1; - SysCalls sys; - if (sys.set_thread_area(&u) < 0) { - die("Cannot set up thread local storage"); - } - asm volatile("movw %w0, %%fs" - : - : "q"(8*u.entry_number+3)); - asm volatile( - "push %%ebx\n" - "push %%ebp\n" - - // Signal handlers are process-wide. This means that for security - // reasons, we cannot allow that the trusted thread ever executes any - // signal handlers. - // We prevent the execution of signal handlers by setting a signal - // mask that blocks all signals. In addition, we make sure that the - // stack pointer is invalid. - // We cannot reset the signal mask until after we have enabled - // Seccomp mode. Our sigprocmask() wrapper would normally do this by - // raising a signal, modifying the signal mask in the kernel-generated - // signal frame, and then calling sigreturn(). This presents a bit of - // a Catch-22, as all signals are masked and we can therefore not - // raise any signal that would allow us to generate the signal stack - // frame. - // Instead, we have to create the signal stack frame prior to entering - // Seccomp mode. This incidentally also helps us to restore the - // signal mask to the same value that it had prior to entering the - // sandbox. - // The signal wrapper for clone() is the second entry point into this - // code (by means of sending an IPC to its trusted thread). It goes - // through the same steps of creating a signal stack frame on the - // newly created thread's stacks prior to cloning. See clone.cc for - // details. - "mov %0, %%edi\n" // create signal stack before accessing MMX - "mov $120+0xF000, %%eax\n" // __NR_clone + 0xF000 - "mov %%esp, %%ebp\n" - "int $0\n" // push a signal stack frame (see clone.cc) - "mov %%ebp, 0x1C(%%esp)\n" // pop stack upon call to sigreturn() - "mov %%esp, %%ebp\n" - "mov $2, %%ebx\n" // how = SIG_SETMASK - "pushl $-1\n" - "pushl $-1\n" - "mov %%esp, %%ecx\n" // set = full mask - "xor %%edx, %%edx\n" // old_set = NULL - "mov $8, %%esi\n" // mask all 64 signals - "mov $175, %%eax\n" // NR_rt_sigprocmask - "int $0x80\n" - "mov $126, %%eax\n" // NR_sigprocmask - "int $0x80\n" - "xor %%esp, %%esp\n" // invalidate the stack in all trusted code - "movd %%edi, %%mm6\n" // %mm6 = args - "lea 999f, %%edi\n" // continue in same thread - "movd %%edi, %%mm3\n" - "xor %%edi, %%edi\n" // initial sequence number - "movd %%edi, %%mm2\n" - "jmp 20f\n" // create trusted thread - - // TODO(markus): Coalesce the read() operations by reading into a bigger - // buffer. - - // Parameters: - // %mm0: thread's side of threadFd - // %mm1: processFdPub - // %mm3: return address after creation of new trusted thread - // %mm5: secure memory region - // the page following this one contains the scratch space - - // Local variables: - // %mm2: sequence number for trusted calls - // %mm4: thread id - - // Temporary variables: - // %ebp: system call number - // %mm6: secure memory of previous thread - // %mm7: temporary variable for spilling data - - // Layout of secure shared memory region (c.f. securemem.h): - // 0x00: pointer to the secure shared memory region (i.e. self) - // 0x04: sequence number; must match %mm2 - // 0x08: call type; must match %eax, iff %eax == -1 || %eax == -2 - // 0x0C: system call number; passed to syscall in %eax - // 0x10: first argument; passed to syscall in %ebx - // 0x14: second argument; passed to syscall in %ecx - // 0x18: third argument; passed to syscall in %edx - // 0x1C: fourth argument; passed to syscall in %esi - // 0x20: fifth argument; passed to syscall in %edi - // 0x24: sixth argument; passed to syscall in %ebp - // 0x28: stored return address for clone() system call - // 0x2C: stored %ebp value for clone() system call - // 0x30: stored %edi value for clone() system call - // 0x34: stored %esi value for clone() system call - // 0x38: stored %edx value for clone() system call - // 0x3C: stored %ecx value for clone() system call - // 0x40: stored %ebx value for clone() system call - // 0x44: new shared memory for clone() - // 0x48: processFdPub for talking to trusted process - // 0x4C: cloneFdPub for talking to trusted process - // 0x50: set to non-zero, if in debugging mode - // 0x54: most recent SHM id returned by shmget(IPC_PRIVATE) - // 0x58: cookie assigned to us by the trusted process (TLS_COOKIE) - // 0x60: thread id (TLS_TID) - // 0x68: threadFdPub (TLS_THREAD_FD) - // 0x200-0x1000: securely passed verified file name(s) - - // Layout of (untrusted) scratch space: - // 0x00: syscall number; passed in %eax - // 0x04: first argument; passed in %ebx - // 0x08: second argument; passed in %ecx - // 0x0C: third argument; passed in %edx - // 0x10: fourth argument; passed in %esi - // 0x14: fifth argument; passed in %edi - // 0x18: sixth argument; passed in %ebp - // 0x1C: return value - // 0x20: RDTSCP result (%eax) - // 0x24: RDTSCP result (%edx) - // 0x28: RDTSCP result (%ecx) - // 0x2C: last system call (updated in syscall.cc) - // 0x30: number of consecutive calls to a time fnc. (e.g. gettimeofday) - // 0x34: nesting level of system calls (for debugging purposes only) - // 0x38: signal mask - // 0x40: in SEGV handler - - "0:xor %%esp, %%esp\n" - "mov $2, %%eax\n" // %mm2 = initial sequence number - "movd %%eax, %%mm2\n" - - // Read request from untrusted thread, or from trusted process. In either - // case, the data that we read has to be considered untrusted. - // read(threadFd, &scratch, 4) - "1:mov $3, %%eax\n" // NR_read - "movd %%mm0, %%ebx\n" // fd = threadFd - "movd %%mm5, %%ecx\n" // secure_mem - "add $0x1000, %%ecx\n" // buf = &scratch - "mov $4, %%edx\n" // len = 4 - "2:int $0x80\n" - "cmp $-4, %%eax\n" // EINTR - "jz 2b\n" - "cmp %%edx, %%eax\n" - "jnz 25f\n" // exit process - - // Retrieve system call number. It is crucial that we only dereference - // 0x1000(%mm5) exactly once. Afterwards, memory becomes untrusted and - // we must use the value that we have read the first time. - "mov 0(%%ecx), %%eax\n" - - // If syscall number is -1, execute an unlocked system call from the - // secure memory area - "cmp $-1, %%eax\n" - "jnz 5f\n" - "3:movd %%mm2, %%ebp\n" - "cmp %%ebp, 0x4-0x1000(%%ecx)\n" - "jne 25f\n" // exit process - "cmp 0x08-0x1000(%%ecx), %%eax\n" - "jne 25f\n" // exit process - "mov 0x0C-0x1000(%%ecx), %%eax\n" - "mov 0x10-0x1000(%%ecx), %%ebx\n" - "mov 0x18-0x1000(%%ecx), %%edx\n" - "mov 0x1C-0x1000(%%ecx), %%esi\n" - "mov 0x20-0x1000(%%ecx), %%edi\n" - "mov 0x24-0x1000(%%ecx), %%ebp\n" - "mov 0x14-0x1000(%%ecx), %%ecx\n" - "movd %%edi, %%mm4\n" - "movd %%ebp, %%mm7\n" - "movd %%mm2, %%ebp\n" - "movd %%mm5, %%edi\n" - "cmp %%ebp, 4(%%edi)\n" - "jne 25f\n" // exit process - "add $2, %%ebp\n" - "movd %%ebp, %%mm2\n" - "movd %%mm4, %%edi\n" - "movd %%mm7, %%ebp\n" - - // shmget() gets some special treatment. Whenever we return from this - // system call, we remember the most recently returned SysV shm id. - "cmp $117, %%eax\n" // NR_ipc - "jnz 4f\n" - "cmp $23, %%ebx\n" // shmget() - "jnz 4f\n" - "int $0x80\n" - "mov %%eax, %%ebp\n" - "mov $120, %%eax\n" // NR_clone - "mov $17, %%ebx\n" // flags = SIGCHLD - "mov $1, %%ecx\n" // stack = 1 - "int $0x80\n" - "test %%eax, %%eax\n" - "js 25f\n" // exit process - "mov %%eax, %%ebx\n" - "jnz 8f\n" // wait for child, then return result - "movd %%mm5, %%ebx\n" // start = secure_mem - "mov $4096, %%ecx\n" // len = 4096 - "mov $3, %%edx\n" // prot = PROT_READ | PROT_WRITE - "mov $125, %%eax\n" // NR_mprotect - "int $0x80\n" - "mov %%ebp, 0x54(%%ebx)\n" // set most recently returned SysV shm id - "xor %%ebx, %%ebx\n" - - // When debugging messages are enabled, warn about expensive system calls - #ifndef NDEBUG - "movd %%mm5, %%ecx\n" - "cmpw $0, 0x50(%%ecx)\n" // debug mode - "jz 27f\n" - "mov $4, %%eax\n" // NR_write - "mov $2, %%ebx\n" // fd = stderr - "lea 101f, %%ecx\n" // "This is an expensive system call" - "mov $102f-101f, %%edx\n" // len = strlen(msg) - "int $0x80\n" - "xor %%ebx, %%ebx\n" - #endif - - "jmp 27f\n" // exit program, no message - "4:int $0x80\n" - "jmp 15f\n" // return result - - // If syscall number is -2, execute locked system call from the - // secure memory area - "5:jg 12f\n" - "cmp $-2, %%eax\n" - "jnz 9f\n" - "movd %%mm2, %%ebp\n" - "cmp %%ebp, 0x4-0x1000(%%ecx)\n" - "jne 25f\n" // exit process - "cmp %%eax, 0x8-0x1000(%%ecx)\n" - "jne 25f\n" // exit process - - // When debugging messages are enabled, warn about expensive system calls - #ifndef NDEBUG - "cmpw $0, 0x50-0x1000(%%ecx)\n" - "jz 6f\n" // debug mode - "mov %%ecx, %%ebp\n" - "mov $4, %%eax\n" // NR_write - "mov $2, %%ebx\n" // fd = stderr - "lea 101f, %%ecx\n" // "This is an expensive system call" - "mov $102f-101f, %%edx\n" // len = strlen(msg) - "int $0x80\n" - "mov %%ebp, %%ecx\n" - "6:" - #endif - - "mov 0x0C-0x1000(%%ecx), %%eax\n" - "mov 0x10-0x1000(%%ecx), %%ebx\n" - "mov 0x18-0x1000(%%ecx), %%edx\n" - "mov 0x1C-0x1000(%%ecx), %%esi\n" - "mov 0x20-0x1000(%%ecx), %%edi\n" - "mov 0x24-0x1000(%%ecx), %%ebp\n" - "mov 0x14-0x1000(%%ecx), %%ecx\n" - "movd %%edi, %%mm4\n" - "movd %%ebp, %%mm7\n" - "movd %%mm2, %%ebp\n" - "movd %%mm5, %%edi\n" - "cmp %%ebp, 4(%%edi)\n" - "jne 25f\n" // exit process - - // clone() has unusual calling conventions and must be handled specially - "cmp $120, %%eax\n" // NR_clone - "jz 19f\n" - - // exit() terminates trusted thread - "cmp $1, %%eax\n" // NR_exit - "jz 18f\n" - - // Perform requested system call - "movd %%mm4, %%edi\n" - "movd %%mm7, %%ebp\n" - "int $0x80\n" - - // Unlock mutex - "7:movd %%mm2, %%ebp\n" - "movd %%mm5, %%edi\n" - "cmp %%ebp, 4(%%edi)\n" - "jne 25f\n" // exit process - "add $2, %%ebp\n" - "movd %%ebp, %%mm2\n" - "mov %%eax, %%ebp\n" - "mov $120, %%eax\n" // NR_clone - "mov $17, %%ebx\n" // flags = SIGCHLD - "mov $1, %%ecx\n" // stack = 1 - "int $0x80\n" - "test %%eax, %%eax\n" - "js 25f\n" // exit process - "jz 22f\n" // unlock and exit - "mov %%eax, %%ebx\n" - "8:xor %%ecx, %%ecx\n" - "xor %%edx, %%edx\n" - "mov $7, %%eax\n" // NR_waitpid - "int $0x80\n" - "cmp $-4, %%eax\n" // EINTR - "jz 8b\n" - "mov %%ebp, %%eax\n" - "jmp 15f\n" // return result - - // If syscall number is -3, read the time stamp counter - "9:cmp $-3, %%eax\n" - "jnz 10f\n" - "rdtsc\n" // sets %edx:%eax - "xor %%ecx, %%ecx\n" - "jmp 11f\n" - "10:cmp $-4, %%eax\n" - "jnz 12f\n" - "rdtscp\n" // sets %edx:%eax and %ecx - "11:movd %%mm5, %%ebx\n" - "add $0x1020, %%ebx\n" - "mov %%eax, 0(%%ebx)\n" - "mov %%edx, 4(%%ebx)\n" - "mov %%ecx, 8(%%ebx)\n" - "mov %%ebx, %%ecx\n" - "mov $12, %%edx\n" - "jmp 16f\n" // return result - - // Check in syscallTable whether this system call is unrestricted - "12:mov %%eax, %%ebp\n" - #ifndef NDEBUG - "cmpw $0, 0x50-0x1000(%%ecx)\n" - "jnz 13f\n" // debug mode - #endif - "cmp playground$maxSyscall, %%eax\n" - "ja 25f\n" // exit process - "shl $3, %%eax\n" - "add $playground$syscallTable, %%eax\n" - "mov 0(%%eax), %%eax\n" - "cmp $1, %%eax\n" - "jne 25f\n" // exit process - - // Default behavior for unrestricted system calls is to just execute - // them. Read the remaining arguments first. - "13:mov $3, %%eax\n" // NR_read - "movd %%mm0, %%ebx\n" // fd = threadFd - "add $4, %%ecx\n" // buf = &scratch + 4 - "mov $24, %%edx\n" // len = 6*sizeof(void *) - "14:int $0x80\n" - "cmp $-4, %%eax\n" // EINTR - "jz 14b\n" - "cmp %%edx, %%eax\n" - "jnz 25f\n" // exit process - "mov %%ebp, %%eax\n" - "mov 0x00(%%ecx), %%ebx\n" - "mov 0x08(%%ecx), %%edx\n" - "mov 0x0C(%%ecx), %%esi\n" - "mov 0x10(%%ecx), %%edi\n" - "mov 0x14(%%ecx), %%ebp\n" - "mov 0x04(%%ecx), %%ecx\n" - "cmp $252, %%eax\n" // NR_exit_group - "jz 27f\n" // exit program, no message - "int $0x80\n" - - // Return result of system call to sandboxed thread - "15:movd %%mm5, %%ecx\n" // secure_mem - "add $0x101C, %%ecx\n" // buf = &scratch + 28 - "mov %%eax, (%%ecx)\n" - "mov $4, %%edx\n" // len = 4 - "16:movd %%mm0, %%ebx\n" // fd = threadFd - "mov $4, %%eax\n" // NR_write - "17:int $0x80\n" - "cmp %%edx, %%eax\n" - "jz 1b\n" - "cmp $-4, %%eax\n" // EINTR - "jz 17b\n" - "jmp 25f\n" // exit process - - // NR_exit: - // Exit trusted thread after cleaning up resources - "18:mov %%edi, %%ecx\n" // secure_mem - "mov 0x68(%%ecx), %%ebx\n" // fd = threadFdPub - "mov $6, %%eax\n" // NR_close - "int $0x80\n" - "mov %%ecx, %%ebx\n" // start = secure_mem - "mov $8192, %%ecx\n" // length = 8192 - "xor %%edx, %%edx\n" // prot = PROT_NONE - "mov $125, %%eax\n" // NR_mprotect - "int $0x80\n" - "movd %%mm0, %%ebx\n" // fd = threadFd - "mov $6, %%eax\n" // NR_close - "int $0x80\n" - "mov $120, %%eax\n" // NR_clone - "mov $17, %%ebx\n" // flags = SIGCHLD - "mov $1, %%ecx\n" // stack = 1 - "int $0x80\n" - "mov %%eax, %%ebx\n" - "test %%eax, %%eax\n" - "js 25f\n" // exit process - "jne 21f\n" // reap helper, exit thread - "jmp 22f\n" // unlock mutex - - // NR_clone: - // Original trusted thread calls clone() to create new nascent - // thread. This thread is (typically) fully privileged and shares all - // resources with the caller (i.e. the previous trusted thread), - // and by extension it shares all resources with the sandbox'd - // threads. - "19:movd %%edi, %%mm6\n" // %mm6 = old_shared_mem - "movd %%mm4, %%edi\n" // child_tidptr - "mov %%ecx, %%ebp\n" // remember child stack - "mov $1, %%ecx\n" // stack = 1 - "int $0x80\n" // calls NR_clone - "cmp $-4095, %%eax\n" // return codes -1..-4095 are errno values - "jae 7b\n" // unlock mutex, return result - "movd %%mm2, %%edi\n" - "add $2, %%edi\n" - "movd %%edi, %%mm2\n" - "test %%eax, %%eax\n" - "jne 15b\n" // return result - - // In nascent thread, now. - "sub $2, %%edi\n" - "movd %%edi, %%mm2\n" - - // We want to maintain an invalid %esp whenver we access untrusted - // memory. This ensures that even if an attacker can trick us into - // triggering a SIGSEGV, we will never successfully execute a signal - // handler. - // Signal handlers are inherently dangerous, as an attacker could trick - // us into returning to the wrong address by adjusting the signal stack - // right before the handler returns. - // N.B. While POSIX is curiously silent about this, it appears that on - // Linux, alternate signal stacks are a per-thread property. That is - // good. It means that this security mechanism works, even if the - // sandboxed thread manages to set up an alternate signal stack. - // - // TODO(markus): We currently do not support emulating calls to - // sys_clone() with a zero (i.e. copy) stack parameter. See clone.cc - // for a discussion on how to fix this, if this ever becomes neccessary. - "movd %%eax, %%mm3\n" // Request to return from clone() when done - - // Get thread id of nascent thread - "20:mov $224, %%eax\n" // NR_gettid - "int $0x80\n" - "movd %%eax, %%mm4\n" - - // Nascent thread creates socketpair() for sending requests to - // trusted thread. - // We can create the filehandles on the child's stack. Filehandles are - // always treated as untrusted. - // socketpair(AF_UNIX, SOCK_STREAM, 0, fds) - "mov $102, %%eax\n" // NR_socketcall - "mov $8, %%ebx\n" // socketpair - "sub $8, %%ebp\n" // sv = child_stack - "mov %%ebp, -0x04(%%ebp)\n" - "movl $0, -0x08(%%ebp)\n" // protocol = 0 - "movl $1, -0x0C(%%ebp)\n" // type = SOCK_STREAM - "movl $1, -0x10(%%ebp)\n" // domain = AF_UNIX - "lea -0x10(%%ebp), %%ecx\n" - "int $0x80\n" - "test %%eax, %%eax\n" - "jz 28f\n" - - // If things went wrong, we don't have an (easy) way of signaling - // the parent. For our purposes, it is sufficient to fail with a - // fatal error. - "jmp 25f\n" // exit process - "21:xor %%ecx, %%ecx\n" - "xor %%edx, %%edx\n" - "mov $7, %%eax\n" // NR_waitpid - "int $0x80\n" - "cmp $-4, %%eax\n" // EINTR - "jz 21b\n" - "jmp 23f\n" // exit thread (no message) - "22:lea playground$syscall_mutex, %%ebx\n" - "mov $4096, %%ecx\n" - "mov $3, %%edx\n" // prot = PROT_READ | PROT_WRITE - "mov $125, %%eax\n" // NR_mprotect - "int $0x80\n" - "lock; addl $0x80000000, (%%ebx)\n" - "jz 23f\n" // exit thread - "mov $1, %%edx\n" - "mov %%edx, %%ecx\n" // FUTEX_WAKE - "mov $240, %%eax\n" // NR_futex - "int $0x80\n" - "23:mov $1, %%eax\n" // NR_exit - "mov $1, %%ebx\n" // status = 1 - "24:int $0x80\n" - "25:mov $4, %%eax\n" // NR_write - "mov $2, %%ebx\n" // fd = stderr - "lea 100f, %%ecx\n" // "Sandbox violation detected" - "mov $101f-100f, %%edx\n" // len = strlen(msg) - "int $0x80\n" - "26:mov $1, %%ebx\n" - "27:mov $252, %%eax\n" // NR_exit_group - "jmp 24b\n" - - // The first page is mapped read-only for use as securely shared memory - "28:movd %%mm6, %%edi\n" // %edi = old_shared_mem - "mov 0x44(%%edi), %%ebx\n" // addr = secure_mem - "movd %%ebx, %%mm5\n" // %mm5 = secure_mem - "movd %%mm2, %%esi\n" - "cmp %%esi, 4(%%edi)\n" - "jne 25b\n" // exit process - "mov $125, %%eax\n" // NR_mprotect - "mov $4096, %%ecx\n" // len = 4096 - "mov $1, %%edx\n" // prot = PROT_READ - "int $0x80\n" - - // The second page is used as scratch space by the trusted thread. - // Make it writable. - "mov $125, %%eax\n" // NR_mprotect - "add $4096, %%ebx\n" // addr = secure_mem + 4096 - "mov $3, %%edx\n" // prot = PROT_READ | PROT_WRITE - "int $0x80\n" - - // Call clone() to create new trusted thread(). - // clone(CLONE_VM|CLONE_FS|CLONE_FILES|CLONE_SIGHAND|CLONE_THREAD| - // CLONE_SYSVSEM|CLONE_UNTRACED, stack, NULL, NULL, NULL) - "mov 4(%%ebp), %%eax\n" // threadFd (on child's stack) - "movd %%eax, %%mm0\n" // %mm0 = threadFd - "mov $120, %%eax\n" // NR_clone - "mov $0x850F00, %%ebx\n" // flags = VM|FS|FILES|SIGH|THR|SYSV|UTR - "mov $1, %%ecx\n" // stack = 1 - "movd 0x48(%%edi), %%mm1\n" // %mm1 = processFdPub - "cmp %%esi, 4(%%edi)\n" - "jne 25b\n" // exit process - "int $0x80\n" - "test %%eax, %%eax\n" - "js 25b\n" // exit process - "jz 0b\n" // invoke trustedThreadFnc() - - // Set up thread local storage - "mov $0x51, %%eax\n" // seg_32bit, limit_in_pages, useable - "mov %%eax, -0x04(%%ebp)\n" - "mov $0xFFFFF, %%eax\n" // limit - "mov %%eax, -0x08(%%ebp)\n" - "movd %%mm5, %%eax\n" - "add $0x58, %%eax\n" - "mov %%eax, -0x0C(%%ebp)\n" // base_addr = &secure_mem.TLS - "mov %%fs, %%eax\n" - "shr $3, %%eax\n" - "mov %%eax, -0x10(%%ebp)\n" // entry_number - "mov $243, %%eax\n" // NR_set_thread_area - "lea -0x10(%%ebp), %%ebx\n" - "int $0x80\n" - "test %%eax, %%eax\n" - "jnz 25b\n" // exit process - - // Copy the caller's signal mask - "movd %%mm5, %%edx\n" - "mov 0x1038(%%edi), %%eax\n" - "mov %%eax, 0x1038(%%edx)\n" - "mov 0x103C(%%edi), %%eax\n" - "mov %%eax, 0x103C(%%edx)\n" - - // Done creating trusted thread. We can now get ready to return to caller - "mov 0(%%ebp), %%esi\n" // %esi = threadFdPub - "add $8, %%ebp\n" - - // Check whether this is the initial thread, or a newly created one. - // At startup we run the same code as when we create a new thread. At - // the very top of this function, you will find that we store 999f - // in %%mm3. That is the signal that we should return on the same - // stack rather than return to where clone was called. - "movd %%mm3, %%eax\n" - "movd %%mm2, %%edx\n" - "test %%eax, %%eax\n" - "jne 29f\n" - - // Returning from clone() into the newly created thread is special. We - // cannot unroll the stack, as we just set up a new stack for this - // thread. We have to explicitly restore CPU registers to the values - // that they had when the program originally called clone(). - // We patch the register values in the signal stack frame so that we - // can ask sigreturn() to restore all registers for us. - "sub $0x4, %%ebp\n" - "mov 0x28(%%edi), %%eax\n" - "mov %%eax, 0x00(%%ebp)\n" // return address - "xor %%eax, %%eax\n" - "mov %%eax, 0x30(%%ebp)\n" // %eax = 0 - "mov 0x2C(%%edi), %%eax\n" - "mov %%eax, 0x1C(%%ebp)\n" // %ebp - "mov 0x30(%%edi), %%eax\n" - "mov %%eax, 0x14(%%ebp)\n" // %edi - "mov 0x34(%%edi), %%eax\n" - "mov %%eax, 0x18(%%ebp)\n" // %esi - "mov 0x38(%%edi), %%eax\n" - "mov %%eax, 0x28(%%ebp)\n" // %edx - "mov 0x3C(%%edi), %%eax\n" - "mov %%eax, 0x2C(%%ebp)\n" // %ecx - "mov 0x40(%%edi), %%eax\n" - "mov %%eax, 0x24(%%ebp)\n" // %ebx - "cmp %%edx, 4(%%edi)\n" - "jne 25b\n" // exit process - - // Nascent thread launches a helper that doesn't share any of our - // resources, except for pages mapped as MAP_SHARED. - // clone(SIGCHLD, stack=1) - "29:mov $120, %%eax\n" // NR_clone - "mov $17, %%ebx\n" // flags = SIGCHLD - "mov $1, %%ecx\n" // stack = 1 - "int $0x80\n" - "test %%eax, %%eax\n" - "js 25b\n" // exit process - "jne 31f\n" - - // Use sendmsg() to send to the trusted process the file handles for - // communicating with the new trusted thread. We also send the address - // of the secure memory area (for sanity checks) and the thread id. - "cmp %%edx, 4(%%edi)\n" - "jne 25b\n" // exit process - - // 0x00 socketcall: - // 0x00 socket (0x4C(%edi)) - // 0x04 msg (%ecx + 0x0C) - // 0x08 flags ($0) - // 0x0C msg: - // 0x0C msg_name ($0) - // 0x10 msg_namelen ($0) - // 0x14 msg_iov (%ecx + 0x34) - // 0x18 msg_iovlen ($1) - // 0x1C msg_control (%ecx + 0x3C) - // 0x20 msg_controllen ($0x14) - // 0x24 data: - // 0x24 msg_flags/err ($0) - // 0x28 secure_mem (%mm5) - // 0x2C threadId (%mm4) - // 0x30 threadFdPub (%esi) - // 0x34 iov: - // 0x34 iov_base (%ecx + 0x24) - // 0x38 iov_len ($0x10) - // 0x3C cmsg: - // 0x3C cmsg_len ($0x14) - // 0x40 cmsg_level ($1, SOL_SOCKET) - // 0x44 cmsg_type ($1, SCM_RIGHTS) - // 0x48 threadFdPub (%esi) - // 0x4C threadFd (%mm0) - // 0x50 - "lea -0x50(%%ebp), %%ecx\n" - "xor %%eax, %%eax\n" - "mov %%eax, 0x08(%%ecx)\n" // flags - "mov %%eax, 0x0C(%%ecx)\n" // msg_name - "mov %%eax, 0x10(%%ecx)\n" // msg_namelen - "mov %%eax, 0x24(%%ecx)\n" // msg_flags - "inc %%eax\n" - "mov %%eax, 0x18(%%ecx)\n" // msg_iovlen - "mov %%eax, 0x40(%%ecx)\n" // cmsg_level - "mov %%eax, 0x44(%%ecx)\n" // cmsg_type - "movl $0x10, 0x38(%%ecx)\n" // iov_len - "mov $0x14, %%eax\n" - "mov %%eax, 0x20(%%ecx)\n" // msg_controllen - "mov %%eax, 0x3C(%%ecx)\n" // cmsg_len - "mov 0x4C(%%edi), %%eax\n" // cloneFdPub - "mov %%eax, 0x00(%%ecx)\n" // socket - "lea 0x0C(%%ecx), %%eax\n" - "mov %%eax, 0x04(%%ecx)\n" // msg - "add $0x18, %%eax\n" - "mov %%eax, 0x34(%%ecx)\n" // iov_base - "add $0x10, %%eax\n" - "mov %%eax, 0x14(%%ecx)\n" // msg_iov - "add $8, %%eax\n" - "mov %%eax, 0x1C(%%ecx)\n" // msg_control - "mov %%esi, 0x30(%%ecx)\n" // threadFdPub - "mov %%esi, 0x48(%%ecx)\n" // threadFdPub - "movd %%mm5, %%eax\n" - "mov %%eax, 0x28(%%ecx)\n" // secure_mem - "movd %%mm4, %%eax\n" - "mov %%eax, 0x2C(%%ecx)\n" // threadId - "movd %%mm0, %%eax\n" - "mov %%eax, 0x4C(%%ecx)\n" // threadFd - "mov $16, %%ebx\n" // sendmsg() - "mov $102, %%eax\n" // NR_socketcall - "int $0x80\n" - - // Release syscall_mutex_. This signals the trusted process that - // it can write into the original thread's secure memory again. - "mov $125, %%eax\n" // NR_mprotect - "lea playground$syscall_mutex, %%ebx\n" - "mov $4096, %%ecx\n" - "mov $3, %%edx\n" // PROT_READ | PROT_WRITE - "int $0x80\n" - "movd %%mm2, %%edx\n" - "cmp %%edx, 0x4(%%edi)\n" - "jnz 25b\n" // exit process - "lock; addl $0x80000000, (%%ebx)\n" - "jz 30f\n" // exit process (no error message) - "mov $1, %%edx\n" - "mov %%edx, %%ecx\n" // FUTEX_WAKE - "mov $240, %%eax\n" // NR_futex - "int $0x80\n" - "30:xor %%ebx, %%ebx\n" - "jmp 27b\n" // exit process (no error message) - - // Reap helper - "31:mov %%eax, %%ebx\n" - "32:lea -4(%%ebp), %%ecx\n" - "xor %%edx, %%edx\n" - "mov $7, %%eax\n" // NR_waitpid - "int $0x80\n" - "cmp $-4, %%eax\n" // EINTR - "jz 32b\n" - "mov -4(%%ebp), %%eax\n" - "test %%eax, %%eax\n" - "jnz 26b\n" // exit process (no error message) - - // Release privileges by entering seccomp mode. - "33:mov $172, %%eax\n" // NR_prctl - "mov $22, %%ebx\n" // PR_SET_SECCOMP - "mov $1, %%ecx\n" - "int $0x80\n" - "test %%eax, %%eax\n" - "jnz 25b\n" // exit process - - // We can finally start using the stack. Signal handlers no longer pose - // a threat to us. - "mov %%ebp, %%esp\n" - - // Back in the newly created sandboxed thread, wait for trusted process - // to receive request. It is possible for an attacker to make us - // continue even before the trusted process is done. This is OK. It'll - // result in us putting stale values into the new thread's TLS. But that - // data is considered untrusted anyway. - "push %%eax\n" - "mov $1, %%edx\n" // len = 1 - "mov %%esp, %%ecx\n" // buf = %esp - "mov %%esi, %%ebx\n" // fd = threadFdPub - "34:mov $3, %%eax\n" // NR_read - "int $0x80\n" - "cmp $-4, %%eax\n" // EINTR - "jz 34b\n" - "cmp %%edx, %%eax\n" - "jne 25b\n" // exit process - "pop %%eax\n" - - // Return to caller. We are in the new thread, now. - "movd %%mm3, %%ebx\n" - "test %%ebx, %%ebx\n" - "jnz 35f\n" // Returning to createTrustedThread() - - // Returning to the place where clone() had been called. We rely on - // using sigreturn() for restoring our registers. The caller already - // created a signal stack frame, and we patched the register values - // with the ones that were in effect prior to calling sandbox_clone(). - "pop %%ebx\n" - "35:mov %%ebx, 0x38(%%esp)\n" // compute new %eip - "mov $119, %%eax\n" // NR_sigreturn - "int $0x80\n" - - ".pushsection \".rodata\"\n" - "100:.ascii \"Sandbox violation detected, program aborted\\n\"\n" - "101:.ascii \"WARNING! This is an expensive system call\\n\"\n" - "102:\n" - ".popsection\n" - - "999:pop %%ebp\n" - "pop %%ebx\n" - : - : "g"(&args) - : "eax", "ecx", "edx", "edi", "esi", "esp", "memory" -#else -#error Unsupported target platform -#endif -); -} - -} // namespace diff --git a/sandbox/linux/seccomp/x86_decode.cc b/sandbox/linux/seccomp/x86_decode.cc deleted file mode 100644 index 1b55139..0000000 --- a/sandbox/linux/seccomp/x86_decode.cc +++ /dev/null @@ -1,310 +0,0 @@ -// Copyright (c) 2010 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#include "x86_decode.h" - -namespace playground { - -#if defined(__x86_64__) || defined(__i386__) -unsigned short next_inst(const char **ip, bool is64bit, bool *has_prefix, - char **rex_ptr, char **mod_rm_ptr, char **sib_ptr, - bool *is_group) { - enum { - BYTE_OP = (1<<1), // 0x02 - IMM = (1<<2), // 0x04 - IMM_BYTE = (2<<2), // 0x08 - MEM_ABS = (3<<2), // 0x0C - MODE_MASK = (7<<2), // 0x1C - MOD_RM = (1<<5), // 0x20 - STACK = (1<<6), // 0x40 - GROUP = (1<<7), // 0x80 - GROUP_MASK = 0x7F, - }; - - static unsigned char opcode_types[512] = { - 0x23, 0x21, 0x23, 0x21, 0x09, 0x05, 0x01, 0x01, // 0x00 - 0x07 - 0x23, 0x21, 0x23, 0x21, 0x09, 0x05, 0x01, 0x00, // 0x08 - 0x0F - 0x23, 0x21, 0x23, 0x21, 0x09, 0x05, 0x01, 0x01, // 0x10 - 0x17 - 0x23, 0x21, 0x23, 0x21, 0x09, 0x05, 0x01, 0x01, // 0x18 - 0x1F - 0x23, 0x21, 0x23, 0x21, 0x09, 0x05, 0x00, 0x01, // 0x20 - 0x27 - 0x23, 0x21, 0x23, 0x21, 0x09, 0x05, 0x00, 0x01, // 0x28 - 0x2F - 0x23, 0x21, 0x23, 0x21, 0x09, 0x05, 0x00, 0x01, // 0x30 - 0x37 - 0x23, 0x21, 0x23, 0x21, 0x09, 0x05, 0x00, 0x01, // 0x38 - 0x3F - 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, // 0x40 - 0x47 - 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, // 0x48 - 0x4F - 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, // 0x50 - 0x57 - 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, // 0x58 - 0x5F - 0x01, 0x01, 0x21, 0x21, 0x00, 0x00, 0x00, 0x00, // 0x60 - 0x67 - 0x45, 0x25, 0x49, 0x29, 0x03, 0x01, 0x03, 0x01, // 0x68 - 0x6F - 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, // 0x70 - 0x77 - 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, // 0x78 - 0x7F - 0x27, 0x25, 0x27, 0x29, 0x23, 0x21, 0x23, 0x21, // 0x80 - 0x87 - 0x23, 0x21, 0x23, 0x21, 0x21, 0x21, 0x21, 0x80, // 0x88 - 0x8F - 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, // 0x90 - 0x97 - 0x01, 0x01, 0x05, 0x01, 0x41, 0x41, 0x01, 0x01, // 0x98 - 0x9F - 0x0F, 0x0D, 0x0F, 0x0D, 0x03, 0x01, 0x03, 0x01, // 0xA0 - 0xA7 - 0x09, 0x05, 0x03, 0x01, 0x03, 0x01, 0x03, 0x01, // 0xA8 - 0xAF - 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, // 0xB0 - 0xB7 - 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, // 0xB8 - 0xBF - 0x27, 0x29, 0x01, 0x01, 0x21, 0x21, 0x27, 0x25, // 0xC0 - 0xC7 - 0x01, 0x01, 0x01, 0x01, 0x01, 0x09, 0x01, 0x01, // 0xC8 - 0xCF - 0x23, 0x21, 0x23, 0x21, 0x09, 0x09, 0x01, 0x01, // 0xD0 - 0xD7 - 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, // 0xD8 - 0xDF - 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, // 0xE0 - 0xE7 - 0x05, 0x05, 0x05, 0x09, 0x03, 0x01, 0x03, 0x01, // 0xE8 - 0xEF - 0x00, 0x01, 0x00, 0x00, 0x01, 0x01, 0x88, 0x90, // 0xF0 - 0xF7 - 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x98, 0xA0, // 0xF8 - 0xFF - 0x00, 0xA8, 0x00, 0x00, 0x00, 0x01, 0x01, 0x01, // 0xF00 - 0xF07 - 0x01, 0x01, 0x00, 0x01, 0x00, 0x21, 0x01, 0x00, // 0xF08 - 0xF0F - 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, // 0xF10 - 0xF17 - 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, // 0xF18 - 0xF1F - 0x21, 0x21, 0x21, 0x21, 0x00, 0x00, 0x00, 0x00, // 0xF20 - 0xF27 - 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, // 0xF28 - 0xF2F - 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, // 0xF30 - 0xF37 - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0xF38 - 0xF3F - 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, // 0xF40 - 0xF47 - 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, // 0xF48 - 0xF4F - 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, // 0xF50 - 0xF57 - 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, // 0xF58 - 0xF5F - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0xF60 - 0xF67 - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0xF68 - 0xF6F - 0x21, 0x00, 0x00, 0x00, 0x21, 0x21, 0x21, 0x00, // 0xF70 - 0xF77 - 0x21, 0x21, 0x00, 0x00, 0x21, 0x21, 0x21, 0x21, // 0xF78 - 0xF7F - 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, // 0xF80 - 0xF87 - 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, // 0xF88 - 0xF8F - 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, // 0xF90 - 0xF97 - 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, // 0xF98 - 0xF9F - 0x01, 0x01, 0x01, 0x21, 0x29, 0x21, 0x00, 0x00, // 0xFA0 - 0xFA7 - 0x01, 0x01, 0x01, 0x21, 0x29, 0x21, 0x21, 0x21, // 0xFA8 - 0xFAF - 0x23, 0x21, 0x00, 0x21, 0x00, 0x00, 0x23, 0x21, // 0xFB0 - 0xFB7 - 0x21, 0x00, 0x29, 0x21, 0x21, 0x21, 0x21, 0x21, // 0xFB8 - 0xFBF - 0x21, 0x21, 0x00, 0x21, 0x00, 0x00, 0x00, 0x21, // 0xFC0 - 0xFC7 - 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, // 0xFC8 - 0xFCF - 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, // 0xFD0 - 0xFD7 - 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, // 0xFD8 - 0xFDF - 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, // 0xFE0 - 0xFE7 - 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, // 0xFE8 - 0xFEF - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0xFF0 - 0xFF7 - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0xFF8 - 0xFFF - }; - - static unsigned char group_table[56] = { - 0x61, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Group 1A - 0x27, 0x27, 0x23, 0x23, 0x23, 0x23, 0x23, 0x23, // Group 3 (Byte) - 0x25, 0x25, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, // Group 3 - 0x23, 0x23, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Group 4 - 0x21, 0x21, 0x61, 0x21, 0x61, 0x21, 0x61, 0x00, // Group 5 - 0x00, 0x00, 0x21, 0x21, 0x21, 0x00, 0x21, 0x23, // Group 7 - 0x21, 0x00, 0x00, 0x21, 0x21, 0x00, 0x21, 0x00, // Group 7 (Alternate) - }; - - const unsigned char *insn_ptr = reinterpret_cast<const unsigned char *>(*ip); - int operand_width = 4; - int address_width = 4; - if (is64bit) { - address_width = 8; - } - unsigned char byte, rex = 0; - bool found_prefix = false; - if (rex_ptr) { - *rex_ptr = 0; - } - if (mod_rm_ptr) { - *mod_rm_ptr = 0; - } - if (sib_ptr) { - *sib_ptr = 0; - } - for (;; ++insn_ptr) { - switch (byte = *insn_ptr) { - case 0x66: // Operand width prefix - operand_width ^= 6; - break; - case 0x67: // Address width prefix - address_width ^= is64bit ? 12 : 6; - break; - case 0x26: // Segment selector prefixes - case 0x2e: - case 0x36: - case 0x3e: - case 0x64: - case 0x65: - case 0xF0: - case 0xF2: - case 0xF3: - break; - case 0x40: case 0x41: case 0x42: case 0x43: // 64 bit REX prefixes - case 0x44: case 0x45: case 0x46: case 0x47: - case 0x48: case 0x49: case 0x4A: case 0x4B: - case 0x4C: case 0x4D: case 0x4E: case 0x4F: - if (is64bit) { - if (rex_ptr) { - *rex_ptr = (char *)insn_ptr; - } - rex = byte; - found_prefix = true; - continue; - } - // fall through - default: - ++insn_ptr; - goto no_more_prefixes; - } - rex = 0; - found_prefix = true; - } -no_more_prefixes: - if (has_prefix) { - *has_prefix = found_prefix; - } - if (rex & REX_W) { - operand_width = 8; - } - unsigned char type; - unsigned short insn = byte; - unsigned int idx = 0; - if (byte == 0x0F) { - byte = *insn_ptr++; - insn = (insn << 8) | byte; - idx = 256; - } - type = opcode_types[idx + byte]; - bool found_mod_rm = false; - bool found_group = false; - bool found_sib = false; - unsigned char mod_rm = 0; - unsigned char sib = 0; - if (type & GROUP) { - found_mod_rm = true; - found_group = true; - mod_rm = *insn_ptr; - if (mod_rm_ptr) { - *mod_rm_ptr = (char *)insn_ptr; - } - unsigned char group = (type & GROUP_MASK) + ((mod_rm >> 3) & 0x7); - if ((type & GROUP_MASK) == 40 && (mod_rm >> 6) == 3) { - group += 8; - } - type = group_table[group]; - } - if (!type) { - // We know that we still don't decode some of the more obscure - // instructions, but for all practical purposes that doesn't matter. - // Compilers are unlikely to output them, and even if we encounter - // hand-coded assembly, we will soon synchronize to the instruction - // stream again. - // - // std::cerr << "Unsupported instruction at 0x" << std::hex << - // std::uppercase << reinterpret_cast<long>(*ip) << " [ "; - // for (const unsigned char *ptr = - // reinterpret_cast<const unsigned char *>(*ip); - // ptr < insn_ptr; ) { - // std::cerr << std::hex << std::uppercase << std::setw(2) << - // std::setfill('0') << (unsigned int)*ptr++ << ' '; - // } - // std::cerr << "]" << std::endl; - } else { - if (is64bit && (type & STACK)) { - operand_width = 8; - } - if (type & MOD_RM) { - found_mod_rm = true; - if (mod_rm_ptr) { - *mod_rm_ptr = (char *)insn_ptr; - } - mod_rm = *insn_ptr++; - int mod = (mod_rm >> 6) & 0x3; - int rm = 8*(rex & REX_B) + (mod_rm & 0x7); - if (mod != 3) { - if (address_width == 2) { - switch (mod) { - case 0: - if (rm != 6 /* SI */) { - break; - } - // fall through - case 2: - insn_ptr++; - // fall through - case 1: - insn_ptr++; - break; - } - } else { - if ((rm & 0x7) == 4) { - found_sib = true; - if (sib_ptr) { - *sib_ptr = (char *)insn_ptr; - } - sib = *insn_ptr++; - if (!mod && (sib & 0x7) == 5 /* BP */) { - insn_ptr += 4; - } - } - switch (mod) { - case 0: - if (rm != 5 /* BP */) { - break; - } - // fall through - case 2: - insn_ptr += 3; - // fall through - case 1: - insn_ptr++; - break; - } - } - } - } - switch (insn) { - case 0xC8: // ENTER - insn_ptr++; - // fall through - case 0x9A: // CALL (far) - case 0xC2: // RET (near) - case 0xCA: // LRET - case 0xEA: // JMP (far) - insn_ptr += 2; - break; - case 0xF80: case 0xF81: case 0xF82: case 0xF83: // Jcc (rel) - case 0xF84: case 0xF85: case 0xF86: case 0xF87: - case 0xF88: case 0xF89: case 0xF8A: case 0xF8B: - case 0xF8C: case 0xF8D: case 0xF8E: case 0xF8F: - insn_ptr += operand_width; - break; - } - switch (type & MODE_MASK) { - case IMM: - if (!(type & BYTE_OP)) { - switch (insn) { - case 0xB8: case 0xB9: case 0xBA: case 0xBB: - case 0xBC: case 0xBD: case 0xBE: case 0xBF: - // Allow MOV to/from 64bit addresses - insn_ptr += operand_width; - break; - default: - insn_ptr += (operand_width == 8) ? 4 : operand_width; - break; - } - break; - } - // fall through - case IMM_BYTE: - insn_ptr++; - break; - case MEM_ABS: - insn_ptr += address_width; - break; - } - } - if (is_group) { - *is_group = found_group; - } - *ip = reinterpret_cast<const char *>(insn_ptr); - return insn; -} -#endif - -} // namespace diff --git a/sandbox/linux/seccomp/x86_decode.h b/sandbox/linux/seccomp/x86_decode.h deleted file mode 100644 index 68f0ab5..0000000 --- a/sandbox/linux/seccomp/x86_decode.h +++ /dev/null @@ -1,19 +0,0 @@ -// Copyright (c) 2010 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#ifndef X86_DECODE_H__ -#define X86_DECODE_H__ -namespace playground { -enum { - REX_B = 0x01, - REX_X = 0x02, - REX_R = 0x04, - REX_W = 0x08 -}; - -unsigned short next_inst(const char **ip, bool is64bit, bool *has_prefix = 0, - char **rex_ptr = 0, char **mod_rm_ptr = 0, - char **sib_ptr = 0, bool *is_group = 0); -} // namespace -#endif // X86_DECODE_H__ |