diff options
-rw-r--r-- | sandbox/linux/sandbox_linux.gypi | 3 | ||||
-rw-r--r-- | sandbox/linux/seccomp-bpf/Makefile | 2 | ||||
-rw-r--r-- | sandbox/linux/seccomp-bpf/codegen.cc | 36 | ||||
-rw-r--r-- | sandbox/linux/seccomp-bpf/codegen.h | 9 | ||||
-rw-r--r-- | sandbox/linux/seccomp-bpf/die.cc | 22 | ||||
-rw-r--r-- | sandbox/linux/seccomp-bpf/die.h | 12 | ||||
-rw-r--r-- | sandbox/linux/seccomp-bpf/errorcode.cc | 4 | ||||
-rw-r--r-- | sandbox/linux/seccomp-bpf/errorcode.h | 3 | ||||
-rw-r--r-- | sandbox/linux/seccomp-bpf/sandbox_bpf.cc | 336 | ||||
-rw-r--r-- | sandbox/linux/seccomp-bpf/sandbox_bpf.h | 71 | ||||
-rw-r--r-- | sandbox/linux/seccomp-bpf/sandbox_bpf_unittest.cc | 159 | ||||
-rw-r--r-- | sandbox/linux/seccomp-bpf/syscall.cc | 282 | ||||
-rw-r--r-- | sandbox/linux/seccomp-bpf/syscall.h | 23 | ||||
-rw-r--r-- | sandbox/linux/seccomp-bpf/syscall_unittest.cc | 113 |
14 files changed, 67 insertions, 1008 deletions
diff --git a/sandbox/linux/sandbox_linux.gypi b/sandbox/linux/sandbox_linux.gypi index 535fb89..c02cd31 100644 --- a/sandbox/linux/sandbox_linux.gypi +++ b/sandbox/linux/sandbox_linux.gypi @@ -58,7 +58,6 @@ 'seccomp-bpf/errorcode_unittest.cc', 'seccomp-bpf/sandbox_bpf_unittest.cc', 'seccomp-bpf/syscall_iterator_unittest.cc', - 'seccomp-bpf/syscall_unittest.cc', ], }], ], @@ -78,8 +77,6 @@ 'seccomp-bpf/instruction.h', 'seccomp-bpf/sandbox_bpf.cc', 'seccomp-bpf/sandbox_bpf.h', - 'seccomp-bpf/syscall.cc', - 'seccomp-bpf/syscall.h', 'seccomp-bpf/syscall_iterator.cc', 'seccomp-bpf/syscall_iterator.h', 'seccomp-bpf/verifier.cc', diff --git a/sandbox/linux/seccomp-bpf/Makefile b/sandbox/linux/seccomp-bpf/Makefile index 6d644b8..a697198 100644 --- a/sandbox/linux/seccomp-bpf/Makefile +++ b/sandbox/linux/seccomp-bpf/Makefile @@ -2,7 +2,7 @@ DEF_CFLAGS = -g -O3 -Wall -Werror -Wextra -Wno-missing-field-initializers -fPIC DEF_CPPFLAGS = -D_GNU_SOURCE -DSECCOMP_BPF_STANDALONE -DSECCOMP_BPF_VALGRIND_HACKS -include valgrind/valgrind.h -iquote ../../.. DEF_LDFLAGS = -g -lpthread DEPFLAGS = -MMD -MF .$@.d -MODS := demo sandbox_bpf basicblock codegen die errorcode syscall syscall_iterator util verifier +MODS := demo sandbox_bpf die codegen errorcode syscall_iterator util verifier OBJS64 := $(shell echo ${MODS} | xargs -n 1 | sed -e 's/$$/.o64/') OBJS32 := $(shell echo ${MODS} | xargs -n 1 | sed -e 's/$$/.o32/') ALL_OBJS = $(OBJS32) $(OBJS64) diff --git a/sandbox/linux/seccomp-bpf/codegen.cc b/sandbox/linux/seccomp-bpf/codegen.cc index 649793c..8b36315 100644 --- a/sandbox/linux/seccomp-bpf/codegen.cc +++ b/sandbox/linux/seccomp-bpf/codegen.cc @@ -5,31 +5,6 @@ #include "sandbox/linux/seccomp-bpf/codegen.h" -namespace { - -// Helper function for Traverse(). -void TraverseRecursively(std::set<playground2::Instruction *> *visited, - playground2::Instruction *instruction) { - if (visited->find(instruction) == visited->end()) { - visited->insert(instruction); - switch (BPF_CLASS(instruction->code)) { - case BPF_JMP: - if (BPF_OP(instruction->code) != BPF_JA) { - TraverseRecursively(visited, instruction->jf_ptr); - } - TraverseRecursively(visited, instruction->jt_ptr); - break; - case BPF_RET: - break; - default: - TraverseRecursively(visited, instruction->next); - break; - } - } -} - -} // namespace - namespace playground2 { CodeGen::CodeGen() @@ -170,17 +145,6 @@ void CodeGen::JoinInstructions(Instruction *head, Instruction *tail) { return; } -void CodeGen::Traverse(Instruction *instruction, - void (*fnc)(Instruction *, void *), void *aux) { - std::set<Instruction *> visited; - TraverseRecursively(&visited, instruction); - for (std::set<Instruction *>::const_iterator iter = visited.begin(); - iter != visited.end(); - ++iter) { - fnc(*iter, aux); - } -} - void CodeGen::FindBranchTargets(const Instruction& instructions, BranchTargets *branch_targets) { // Follow all possible paths through the "instructions" graph and compute diff --git a/sandbox/linux/seccomp-bpf/codegen.h b/sandbox/linux/seccomp-bpf/codegen.h index 88521c2..b7d1d39 100644 --- a/sandbox/linux/seccomp-bpf/codegen.h +++ b/sandbox/linux/seccomp-bpf/codegen.h @@ -77,15 +77,6 @@ class CodeGen { // or if a (conditional) jump still has an unsatisfied target. void JoinInstructions(Instruction *head, Instruction *tail); - // Traverse the graph of instructions and visit each instruction once. - // Traversal order is implementation-defined. It is acceptable to make - // changes to the graph from within the callback function. These changes - // do not affect traversal. - // The "fnc" function gets called with both the instruction and the opaque - // "aux" pointer. - void Traverse(Instruction *, void (*fnc)(Instruction *, void *aux), - void *aux); - // Compiles the graph of instructions into a BPF program that can be passed // to the kernel. Please note that this function modifies the graph in place // and must therefore only be called once per graph. diff --git a/sandbox/linux/seccomp-bpf/die.cc b/sandbox/linux/seccomp-bpf/die.cc index 92ffa2a..b141424 100644 --- a/sandbox/linux/seccomp-bpf/die.cc +++ b/sandbox/linux/seccomp-bpf/die.cc @@ -5,7 +5,6 @@ #include <string> #include "sandbox/linux/seccomp-bpf/sandbox_bpf.h" -#include "sandbox/linux/seccomp-bpf/syscall.h" namespace playground2 { @@ -16,7 +15,7 @@ void Die::ExitGroup() { // Especially, since we are dealing with system call filters. Continuing // execution would be very bad in most cases where ExitGroup() gets called. // So, we'll try a few other strategies too. - SandboxSyscall(__NR_exit_group, 1); + syscall(__NR_exit_group, 1); // We have no idea what our run-time environment looks like. So, signal // handlers might or might not do the right thing. Try to reset settings @@ -24,7 +23,7 @@ void Die::ExitGroup() { // succeeded in doing so. Nonetheless, triggering a fatal signal could help // us terminate. signal(SIGSEGV, SIG_DFL); - SandboxSyscall(__NR_prctl, PR_SET_DUMPABLE, (void *)0, (void *)0, (void *)0); + syscall(__NR_prctl, PR_SET_DUMPABLE, (void *)0, (void *)0, (void *)0); if (*(volatile char *)0) { } // If there is no way for us to ask for the program to exit, the next @@ -33,7 +32,7 @@ void Die::ExitGroup() { // We in fact retry the system call inside of our loop so that it will // stand out when somebody tries to diagnose the problem by using "strace". for (;;) { - SandboxSyscall(__NR_exit_group, 1); + syscall(__NR_exit_group, 1); } } @@ -50,16 +49,6 @@ void Die::SandboxDie(const char *msg, const char *file, int line) { ExitGroup(); } -void Die::SandboxInfo(const char *msg, const char *file, int line) { - if (!suppress_info_) { - #if defined(SECCOMP_BPF_STANDALONE) - Die::LogToStderr(msg, file, line); - #else - logging::LogMessage(file, line, logging::LOG_INFO).stream() << msg; - #endif - } -} - void Die::LogToStderr(const char *msg, const char *file, int line) { if (msg) { char buf[40]; @@ -68,11 +57,10 @@ void Die::LogToStderr(const char *msg, const char *file, int line) { // No need to loop. Short write()s are unlikely and if they happen we // probably prefer them over a loop that blocks. - if (HANDLE_EINTR(SandboxSyscall(__NR_write, 2, s.c_str(), s.length()))) { } + if (HANDLE_EINTR(write(2, s.c_str(), s.length()))) { } } } -bool Die::simple_exit_ = false; -bool Die::suppress_info_ = false; +bool Die::simple_exit_ = false; } // namespace diff --git a/sandbox/linux/seccomp-bpf/die.h b/sandbox/linux/seccomp-bpf/die.h index c0ad8fd..608afde 100644 --- a/sandbox/linux/seccomp-bpf/die.h +++ b/sandbox/linux/seccomp-bpf/die.h @@ -13,9 +13,6 @@ class Die { // exits with a fatal error. #define SANDBOX_DIE(m) Die::SandboxDie(m, __FILE__, __LINE__) - // Adds an informational message to the log file or stderr as appropriate. - #define SANDBOX_INFO(m) Die::SandboxInfo(m, __FILE__, __LINE__) - // Terminate the program, even if the current sandbox policy prevents some // of the more commonly used functions used for exiting. // Most users would want to call SANDBOX_DIE() instead, as it logs extra @@ -28,10 +25,6 @@ class Die { static void SandboxDie(const char *msg, const char *file, int line) __attribute__((noreturn)); - // This method gets called by SANDBOX_INFO(). There is normally no reason - // to call it directly unless you are defining your own logging macro. - static void SandboxInfo(const char *msg, const char *file, int line); - // Writes a message to stderr. Used as a fall-back choice, if we don't have // any other way to report an error. static void LogToStderr(const char *msg, const char *file, int line); @@ -43,13 +36,8 @@ class Die { // unit tests or in the supportsSeccompSandbox() method). static void EnableSimpleExit() { simple_exit_ = true; } - // Sometimes we need to disable all informational messages (e.g. from within - // unittests). - static void SuppressInfoMessages(bool flag) { suppress_info_ = flag; } - private: static bool simple_exit_; - static bool suppress_info_; DISALLOW_IMPLICIT_CONSTRUCTORS(Die); }; diff --git a/sandbox/linux/seccomp-bpf/errorcode.cc b/sandbox/linux/seccomp-bpf/errorcode.cc index 4d21b792..cc79cb6 100644 --- a/sandbox/linux/seccomp-bpf/errorcode.cc +++ b/sandbox/linux/seccomp-bpf/errorcode.cc @@ -22,12 +22,10 @@ ErrorCode::ErrorCode(int err) { } } -ErrorCode::ErrorCode(ErrorCode::TrapFnc fnc, const void *aux, bool safe, - uint16_t id) +ErrorCode::ErrorCode(ErrorCode::TrapFnc fnc, const void *aux, uint16_t id) : error_type_(ET_TRAP), fnc_(fnc), aux_(const_cast<void *>(aux)), - safe_(safe), err_(SECCOMP_RET_TRAP + id) { } diff --git a/sandbox/linux/seccomp-bpf/errorcode.h b/sandbox/linux/seccomp-bpf/errorcode.h index d2661db..2b941ee 100644 --- a/sandbox/linux/seccomp-bpf/errorcode.h +++ b/sandbox/linux/seccomp-bpf/errorcode.h @@ -94,7 +94,7 @@ class ErrorCode { // If we are wrapping a callback, we must assign a unique id. This id is // how the kernel tells us which one of our different SECCOMP_RET_TRAP // cases has been triggered. - ErrorCode(TrapFnc fnc, const void *aux, bool safe, uint16_t id); + ErrorCode(TrapFnc fnc, const void *aux, uint16_t id); // Some system calls require inspection of arguments. This constructor // allows us to specify additional constraints. @@ -108,7 +108,6 @@ class ErrorCode { struct { TrapFnc fnc_; // Callback function and arg, if trap was void *aux_; // triggered by the kernel's BPF filter. - bool safe_; // Keep sandbox active while calling fnc_() }; // Fields needed when inspecting additional arguments. diff --git a/sandbox/linux/seccomp-bpf/sandbox_bpf.cc b/sandbox/linux/seccomp-bpf/sandbox_bpf.cc index 60ebd50..eb03995 100644 --- a/sandbox/linux/seccomp-bpf/sandbox_bpf.cc +++ b/sandbox/linux/seccomp-bpf/sandbox_bpf.cc @@ -2,27 +2,8 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. -#include <endian.h> -#if __BYTE_ORDER == __BIG_ENDIAN -// The BPF "struct seccomp_data" layout has to deal with storing 64bit -// values that need to be inspected by a virtual machine that only ever -// operates on 32bit values. The kernel developers decided how values -// should be split into two 32bit words to achieve this goal. But at this -// time, there is no existing BPF implementation in the kernel that uses -// 64bit big endian values. So, all we have to go by is the consensus -// from a discussion on LKLM. Actual implementations, if and when they -// happen, might very well differ. -// If this code is ever going to be used with such a kernel, you should -// disable the "#error" and carefully test the code (e.g. run the unit -// tests). If things don't work, search for all occurrences of __BYTE_ORDER -// and verify that the proposed implementation agrees with what the kernel -// actually does. -#error Big endian operation is untested and expected to be broken -#endif - #include "sandbox/linux/seccomp-bpf/codegen.h" #include "sandbox/linux/seccomp-bpf/sandbox_bpf.h" -#include "sandbox/linux/seccomp-bpf/syscall.h" #include "sandbox/linux/seccomp-bpf/syscall_iterator.h" #include "sandbox/linux/seccomp-bpf/verifier.h" @@ -37,30 +18,6 @@ void WriteFailedStderrSetupMessage(int out_fd) { } } -// We need to tell whether we are performing a "normal" callback, or -// whether we were called recursively from within a UnsafeTrap() callback. -// This is a little tricky to do, because we need to somehow get access to -// per-thread data from within a signal context. Normal TLS storage is not -// safely accessible at this time. We could roll our own, but that involves -// a lot of complexity. Instead, we co-opt one bit in the signal mask. -// If BUS is blocked, we assume that we have been called recursively. -// There is a possibility for collision with other code that needs to do -// this, but in practice the risks are low. -// If SIGBUS turns out to be a problem, we could instead co-opt one of the -// realtime signals. There are plenty of them. Unfortunately, there is no -// way to mark a signal as allocated. So, the potential for collision is -// possibly even worse. -bool GetIsInSigHandler(const ucontext_t *ctx) { - return sigismember(&ctx->uc_sigmask, SIGBUS); -} - -void SetIsInSigHandler() { - sigset_t mask; - sigemptyset(&mask); - sigaddset(&mask, SIGBUS); - sigprocmask(SIG_BLOCK, &mask, NULL); -} - } // namespace // The kernel gives us a sandbox, we turn it into a playground :-) @@ -362,48 +319,6 @@ void Sandbox::policySanityChecks(EvaluateSyscall syscallEvaluator, return; } -void Sandbox::CheckForUnsafeErrorCodes(Instruction *insn, void *aux) { - if (BPF_CLASS(insn->code) == BPF_RET && - insn->k > SECCOMP_RET_TRAP && - insn->k - SECCOMP_RET_TRAP <= trapArraySize_) { - const ErrorCode& err = trapArray_[insn->k - SECCOMP_RET_TRAP - 1]; - if (!err.safe_) { - bool *is_unsafe = static_cast<bool *>(aux); - *is_unsafe = true; - } - } -} - -void Sandbox::RedirectToUserspace(Instruction *insn, void *aux) { - // When inside an UnsafeTrap() callback, we want to allow all system calls. - // This means, we must conditionally disable the sandbox -- and that's not - // something that kernel-side BPF filters can do, as they cannot inspect - // any state other than the syscall arguments. - // But if we redirect all error handlers to user-space, then we can easily - // make this decision. - // The performance penalty for this extra round-trip to user-space is not - // actually that bad, as we only ever pay it for denied system calls; and a - // typical program has very few of these. - if (BPF_CLASS(insn->code) == BPF_RET && - (insn->k & SECCOMP_RET_ACTION) == SECCOMP_RET_ERRNO) { - insn->k = Trap(ReturnErrno, - reinterpret_cast<void *>(insn->k & SECCOMP_RET_DATA)).err(); - } -} - -ErrorCode Sandbox::RedirectToUserspaceEvalWrapper(int sysnum, void *aux) { - // We need to replicate the behavior of RedirectToUserspace(), so that our - // Verifier can still work correctly. - Evaluators *evaluators = reinterpret_cast<Evaluators *>(aux); - const std::pair<EvaluateSyscall, void *>& evaluator = *evaluators->begin(); - ErrorCode err = evaluator.first(sysnum, evaluator.second); - if ((err.err() & SECCOMP_RET_ACTION) == SECCOMP_RET_ERRNO) { - return Trap(ReturnErrno, - reinterpret_cast<void *>(err.err() & SECCOMP_RET_DATA)); - } - return err; -} - void Sandbox::setSandboxPolicy(EvaluateSyscall syscallEvaluator, void *aux) { if (status_ == STATUS_ENABLED) { SANDBOX_DIE("Cannot change policy after sandbox has started"); @@ -422,8 +337,8 @@ void Sandbox::installFilter(bool quiet) { // Set new SIGSYS handler struct sigaction sa; memset(&sa, 0, sizeof(sa)); - sa.sa_sigaction = sigSys; - sa.sa_flags = SA_SIGINFO | SA_NODEFER; + sa.sa_sigaction = &sigSys; + sa.sa_flags = SA_SIGINFO; if (sigaction(SIGSYS, &sa, NULL) < 0) { goto filter_failed; } @@ -454,13 +369,33 @@ void Sandbox::installFilter(bool quiet) { Instruction *head = gen->MakeInstruction(BPF_LD+BPF_W+BPF_ABS, offsetof(struct arch_seccomp_data, arch), - tail = gen->MakeInstruction(BPF_JMP+BPF_JEQ+BPF_K, SECCOMP_ARCH, - NULL, + tail = + // Grab the system call number, so that we can implement jump tables. + gen->MakeInstruction(BPF_LD+BPF_W+BPF_ABS, + offsetof(struct arch_seccomp_data, nr)), gen->MakeInstruction(BPF_RET+BPF_K, Kill( "Invalid audit architecture in BPF filter").err_))); + // On Intel architectures, verify that system call numbers are in the + // expected number range. The older i386 and x86-64 APIs clear bit 30 + // on all system calls. The newer x32 API always sets bit 30. +#if defined(__i386__) || defined(__x86_64__) + Instruction *invalidX32 = + gen->MakeInstruction(BPF_RET+BPF_K, + Kill("Illegal mixing of system call ABIs").err_); + Instruction *checkX32 = +#if defined(__x86_64__) && defined(__ILP32__) + gen->MakeInstruction(BPF_JMP+BPF_JSET+BPF_K, 0x40000000, 0, invalidX32); +#else + gen->MakeInstruction(BPF_JMP+BPF_JSET+BPF_K, 0x40000000, invalidX32, 0); +#endif + gen->JoinInstructions(tail, checkX32); + tail = checkX32; +#endif + + { // Evaluate all possible system calls and group their ErrorCodes into // ranges of identical codes. @@ -471,109 +406,6 @@ void Sandbox::installFilter(bool quiet) { Instruction *jumptable = assembleJumpTable(gen, ranges.begin(), ranges.end()); - // If there is at least one UnsafeTrap() in our program, the entire sandbox - // is unsafe. We need to modify the program so that all non- - // SECCOMP_RET_ALLOW ErrorCodes are handled in user-space. This will then - // allow us to temporarily disable sandboxing rules inside of callbacks to - // UnsafeTrap(). - has_unsafe_traps_ = false; - gen->Traverse(jumptable, CheckForUnsafeErrorCodes, &has_unsafe_traps_); - - // Grab the system call number, so that we can implement jump tables. - Instruction *load_nr = - gen->MakeInstruction(BPF_LD+BPF_W+BPF_ABS, - offsetof(struct arch_seccomp_data, nr)); - - // If our BPF program has unsafe jumps, enable support for them. This - // test happens very early in the BPF filter program. Even before we - // consider looking at system call numbers. - // As support for unsafe jumps essentially defeats all the security - // measures that the sandbox provides, we print a big warning message -- - // and of course, we make sure to only ever enable this feature if it - // is actually requested by the sandbox policy. - if (has_unsafe_traps_) { - if (SandboxSyscall(-1) == -1 && errno == ENOSYS) { - SANDBOX_DIE("Support for UnsafeTrap() has not yet been ported to this " - "architecture"); - } - - EvaluateSyscall evaluateSyscall = evaluators_.begin()->first; - void *aux = evaluators_.begin()->second; - if (!evaluateSyscall(__NR_rt_sigprocmask, aux). - Equals(ErrorCode(ErrorCode::ERR_ALLOWED)) || - !evaluateSyscall(__NR_rt_sigreturn, aux). - Equals(ErrorCode(ErrorCode::ERR_ALLOWED)) -#if defined(__NR_sigprocmask) - || !evaluateSyscall(__NR_sigprocmask, aux). - Equals(ErrorCode(ErrorCode::ERR_ALLOWED)) -#endif -#if defined(__NR_sigreturn) - || !evaluateSyscall(__NR_sigreturn, aux). - Equals(ErrorCode(ErrorCode::ERR_ALLOWED)) -#endif - ) { - SANDBOX_DIE("Invalid seccomp policy; if using UnsafeTrap(), you must " - "unconditionally allow sigreturn() and sigprocmask()"); - } - - SANDBOX_INFO("WARNING! Disabling sandbox for debugging purposes"); - gen->Traverse(jumptable, RedirectToUserspace, NULL); - - // Allow system calls, if they originate from our magic return address - // (which we can query by calling SandboxSyscall(-1)). - uintptr_t syscall_entry_point = - static_cast<uintptr_t>(SandboxSyscall(-1)); - uint32_t low = static_cast<uint32_t>(syscall_entry_point); -#if __SIZEOF_POINTER__ > 4 - uint32_t hi = static_cast<uint32_t>(syscall_entry_point >> 32); -#endif - - // BPF cannot do native 64bit comparisons. On 64bit architectures, we - // have to compare both 32bit halfs of the instruction pointer. If they - // match what we expect, we return ERR_ALLOWED. If either or both don't - // match, we continue evalutating the rest of the sandbox policy. - Instruction *escape_hatch = - gen->MakeInstruction(BPF_LD+BPF_W+BPF_ABS, - offsetof(struct arch_seccomp_data, - instruction_pointer) + - (__SIZEOF_POINTER__ > 4 && - __BYTE_ORDER == __BIG_ENDIAN ? 4 : 0), - gen->MakeInstruction(BPF_JMP+BPF_JEQ+BPF_K, low, -#if __SIZEOF_POINTER__ > 4 - gen->MakeInstruction(BPF_LD+BPF_W+BPF_ABS, - offsetof(struct arch_seccomp_data, - instruction_pointer) + - (__BYTE_ORDER == __BIG_ENDIAN ? 0 : 4), - gen->MakeInstruction(BPF_JMP+BPF_JEQ+BPF_K, hi, -#endif - gen->MakeInstruction(BPF_RET+BPF_K, ErrorCode(ErrorCode::ERR_ALLOWED)), -#if __SIZEOF_POINTER__ > 4 - load_nr)), -#endif - load_nr)); - gen->JoinInstructions(tail, escape_hatch); - } else { - gen->JoinInstructions(tail, load_nr); - } - tail = load_nr; - - // On Intel architectures, verify that system call numbers are in the - // expected number range. The older i386 and x86-64 APIs clear bit 30 - // on all system calls. The newer x32 API always sets bit 30. -#if defined(__i386__) || defined(__x86_64__) - Instruction *invalidX32 = - gen->MakeInstruction(BPF_RET+BPF_K, - Kill("Illegal mixing of system call ABIs").err_); - Instruction *checkX32 = -#if defined(__x86_64__) && defined(__ILP32__) - gen->MakeInstruction(BPF_JMP+BPF_JSET+BPF_K, 0x40000000, 0, invalidX32); -#else - gen->MakeInstruction(BPF_JMP+BPF_JSET+BPF_K, 0x40000000, invalidX32, 0); -#endif - gen->JoinInstructions(tail, checkX32); - tail = checkX32; -#endif - // Append jump table to our pre-amble gen->JoinInstructions(tail, jumptable); } @@ -587,22 +419,9 @@ void Sandbox::installFilter(bool quiet) { // correctly. Otherwise, there is an internal error in our BPF compiler. // There is really nothing the caller can do until the bug is fixed. #ifndef NDEBUG - { - // If we previously rewrote the BPF program so that it calls user-space - // whenever we return an "errno" value from the filter, then we have to - // wrap our system call evaluator to perform the same operation. Otherwise, - // the verifier would also report a mismatch in return codes. - Evaluators redirected_evaluators; - redirected_evaluators.push_back( - std::make_pair(RedirectToUserspaceEvalWrapper, &evaluators_)); - - const char *err = NULL; - if (!Verifier::VerifyBPF( - *program, - has_unsafe_traps_ ? redirected_evaluators : evaluators_, - &err)) { - SANDBOX_DIE(err); - } + const char *err = NULL; + if (!Verifier::VerifyBPF(*program, evaluators_, &err)) { + SANDBOX_DIE(err); } #endif @@ -625,6 +444,7 @@ void Sandbox::installFilter(bool quiet) { // Release memory that is no longer needed evaluators_.clear(); + errMap_.clear(); #if defined(SECCOMP_BPF_VALGRIND_HACKS) // Valgrind is really not happy about our sandbox. Disable it when running @@ -741,43 +561,27 @@ void Sandbox::sigSys(int nr, siginfo_t *info, void *void_context) { goto sigsys_err; } - intptr_t rc; - if (has_unsafe_traps_ && GetIsInSigHandler(ctx)) { - errno = old_errno; - if (sigsys.nr == __NR_clone) { - SANDBOX_DIE("Cannot call clone() from an UnsafeTrap() handler"); - } - rc = SandboxSyscall(sigsys.nr, - SECCOMP_PARM1(ctx), SECCOMP_PARM2(ctx), - SECCOMP_PARM3(ctx), SECCOMP_PARM4(ctx), - SECCOMP_PARM5(ctx), SECCOMP_PARM6(ctx)); - } else { - const ErrorCode& err = trapArray_[info->si_errno - 1]; - if (!err.safe_) { - SetIsInSigHandler(); + // Copy the seccomp-specific data into a arch_seccomp_data structure. This + // is what we are showing to TrapFnc callbacks that the system call evaluator + // registered with the sandbox. + struct arch_seccomp_data data = { + sigsys.nr, + SECCOMP_ARCH, + reinterpret_cast<uint64_t>(sigsys.ip), + { + static_cast<uint64_t>(SECCOMP_PARM1(ctx)), + static_cast<uint64_t>(SECCOMP_PARM2(ctx)), + static_cast<uint64_t>(SECCOMP_PARM3(ctx)), + static_cast<uint64_t>(SECCOMP_PARM4(ctx)), + static_cast<uint64_t>(SECCOMP_PARM5(ctx)), + static_cast<uint64_t>(SECCOMP_PARM6(ctx)) } + }; - // Copy the seccomp-specific data into a arch_seccomp_data structure. This - // is what we are showing to TrapFnc callbacks that the system call - // evaluator registered with the sandbox. - struct arch_seccomp_data data = { - sigsys.nr, - SECCOMP_ARCH, - reinterpret_cast<uint64_t>(sigsys.ip), - { - static_cast<uint64_t>(SECCOMP_PARM1(ctx)), - static_cast<uint64_t>(SECCOMP_PARM2(ctx)), - static_cast<uint64_t>(SECCOMP_PARM3(ctx)), - static_cast<uint64_t>(SECCOMP_PARM4(ctx)), - static_cast<uint64_t>(SECCOMP_PARM5(ctx)), - static_cast<uint64_t>(SECCOMP_PARM6(ctx)) - } - }; - - // Now call the TrapFnc callback associated with this particular instance - // of SECCOMP_RET_TRAP. - rc = err.fnc_(data, err.aux_); - } + // Now call the TrapFnc callback associated with this particular instance + // of SECCOMP_RET_TRAP. + const ErrorCode& err = trapArray_[info->si_errno - 1]; + intptr_t rc = err.fnc_(data, err.aux_); // Update the CPU register that stores the return code of the system call // that we just handled, and restore "errno" to the value that it had @@ -788,21 +592,10 @@ void Sandbox::sigSys(int nr, siginfo_t *info, void *void_context) { return; } -bool Sandbox::TrapKey::operator<(const Sandbox::TrapKey& o) const { - if (fnc != o.fnc) { - return fnc < o.fnc; - } else if (aux != o.aux) { - return aux < o.aux; - } else { - return safe < o.safe; - } -} - -ErrorCode Sandbox::MakeTrap(ErrorCode::TrapFnc fnc, const void *aux, - bool safe) { +ErrorCode Sandbox::Trap(ErrorCode::TrapFnc fnc, const void *aux) { // Each unique pair of TrapFnc and auxiliary data make up a distinct instance // of a SECCOMP_RET_TRAP. - TrapKey key(fnc, aux, safe); + std::pair<ErrorCode::TrapFnc, const void *> key(fnc, aux); TrapIds::const_iterator iter = trapIds_.find(key); uint16_t id; if (iter != trapIds_.end()) { @@ -825,7 +618,7 @@ ErrorCode Sandbox::MakeTrap(ErrorCode::TrapFnc fnc, const void *aux, } id = traps_->size() + 1; - traps_->push_back(ErrorCode(fnc, aux, safe, id)); + traps_->push_back(ErrorCode(fnc, aux, id)); trapIds_[key] = id; // We want to access the traps_ vector from our signal handler. But @@ -836,33 +629,10 @@ ErrorCode Sandbox::MakeTrap(ErrorCode::TrapFnc fnc, const void *aux, // signal handler, where we can safely do so. trapArray_ = &(*traps_)[0]; trapArraySize_ = id; - return traps_->back(); } - return ErrorCode(fnc, aux, safe, id); -} - -ErrorCode Sandbox::Trap(ErrorCode::TrapFnc fnc, const void *aux) { - return MakeTrap(fnc, aux, true /* Safe Trap */); -} - -ErrorCode Sandbox::UnsafeTrap(ErrorCode::TrapFnc fnc, const void *aux) { - return MakeTrap(fnc, aux, false /* Unsafe Trap */); -} - -intptr_t Sandbox::ForwardSyscall(const struct arch_seccomp_data& args) { - return SandboxSyscall(args.nr, - args.args[0], args.args[1], args.args[2], - args.args[3], args.args[4], args.args[5]); -} - -intptr_t Sandbox::ReturnErrno(const struct arch_seccomp_data&, void *aux) { - // TrapFnc functions report error by following the native kernel convention - // of returning an exit code in the range of -1..-4096. They do not try to - // set errno themselves. The glibc wrapper that triggered the SIGSYS will - // ultimately do so for us. - int err = reinterpret_cast<intptr_t>(aux) & SECCOMP_RET_DATA; - return -err; + ErrorCode err = ErrorCode(fnc, aux, id); + return errMap_[err.err()] = err; } intptr_t Sandbox::bpfFailure(const struct arch_seccomp_data&, void *aux) { @@ -876,10 +646,10 @@ ErrorCode Sandbox::Kill(const char *msg) { Sandbox::SandboxStatus Sandbox::status_ = STATUS_UNKNOWN; int Sandbox::proc_fd_ = -1; Sandbox::Evaluators Sandbox::evaluators_; +Sandbox::ErrMap Sandbox::errMap_; Sandbox::Traps *Sandbox::traps_ = NULL; Sandbox::TrapIds Sandbox::trapIds_; ErrorCode *Sandbox::trapArray_ = NULL; size_t Sandbox::trapArraySize_ = 0; - bool Sandbox::has_unsafe_traps_ = false; } // namespace diff --git a/sandbox/linux/seccomp-bpf/sandbox_bpf.h b/sandbox/linux/seccomp-bpf/sandbox_bpf.h index 5497963..16ab1d3 100644 --- a/sandbox/linux/seccomp-bpf/sandbox_bpf.h +++ b/sandbox/linux/seccomp-bpf/sandbox_bpf.h @@ -207,11 +207,6 @@ class Sandbox { // Please note that TrapFnc is executed from signal context and must be // async-signal safe: // http://pubs.opengroup.org/onlinepubs/009695399/functions/xsh_chap02_04.html - // Also note that it follows the calling convention of native system calls. - // In other words, it reports an error by returning an exit code in the - // range -1..-4096. It should not set errno when reporting errors; on the - // other hand, accidentally modifying errno is harmless and the changes will - // be undone afterwards. typedef intptr_t (*TrapFnc)(const struct arch_seccomp_data& args, void *aux); enum Operation { @@ -276,25 +271,6 @@ class Sandbox { // handler. static ErrorCode Trap(ErrorCode::TrapFnc fnc, const void *aux); - // Calls a user-space trap handler and disables all sandboxing for system - // calls made from this trap handler. - // NOTE: This feature, by definition, disables all security features of - // the sandbox. It should never be used in production, but it can be - // very useful to diagnose code that is incompatible with the sandbox. - // If even a single system call returns "UnsafeTrap", the security of - // entire sandbox should be considered compromised. - static ErrorCode UnsafeTrap(ErrorCode::TrapFnc fnc, const void *aux); - - // From within an UnsafeTrap() it is often useful to be able to execute - // the system call that triggered the trap. The ForwardSyscall() method - // makes this easy. It is more efficient than calling glibc's syscall() - // function, as it avoid the extra round-trip to the signal handler. And - // it automatically does the correct thing to report kernel-style error - // conditions, rather than setting errno. See the comments for TrapFnc for - // details. In other words, the return value from ForwardSyscall() is - // directly suitable as a return value for a trap handler. - static intptr_t ForwardSyscall(const struct arch_seccomp_data& args); - // Kill the program and print an error message. static ErrorCode Kill(const char *msg); @@ -313,29 +289,18 @@ class Sandbox { typedef std::vector<struct sock_filter> Program; struct Range { - Range(uint32_t f, uint32_t t, const ErrorCode& e) - : from(f), - to(t), - err(e) { + Range(uint32_t f, uint32_t t, const ErrorCode& e) : + from(f), + to(t), + err(e) { } uint32_t from, to; ErrorCode err; }; - struct TrapKey { - TrapKey(TrapFnc f, const void *a, bool s) - : fnc(f), - aux(a), - safe(s) { - } - TrapFnc fnc; - const void *aux; - bool safe; - bool operator<(const TrapKey&) const; - }; typedef std::vector<Range> Ranges; typedef std::map<uint32_t, ErrorCode> ErrMap; typedef std::vector<ErrorCode> Traps; - typedef std::map<TrapKey, uint16_t> TrapIds; + typedef std::map<std::pair<TrapFnc, const void *>, int> TrapIds; // Get a file descriptor pointing to "/proc", if currently available. static int proc_fd() { return proc_fd_; } @@ -355,47 +320,23 @@ class Sandbox { static bool disableFilesystem(); static void policySanityChecks(EvaluateSyscall syscallEvaluator, void *aux); - - // Function that can be passed as a callback function to CodeGen::Traverse(). - // Checks whether the "insn" returns an UnsafeTrap() ErrorCode. If so, it - // sets the "bool" variable pointed to by "aux". - static void CheckForUnsafeErrorCodes(Instruction *insn, void *aux); - - // Function that can be passed as a callback function to CodeGen::Traverse(). - // Checks whether the "insn" returns an errno value from a BPF filter. If so, - // it rewrites the instruction to instead call a Trap() handler that does - // the same thing. "aux" is ignored. - static void RedirectToUserspace(Instruction *insn, void *aux); - - // Stackable wrapper around an Evaluators handler. Changes ErrorCodes - // returned by a system call evaluator to match the changes made by - // RedirectToUserspace(). "aux" should be pointer to wrapped system call - // evaluator. - static ErrorCode RedirectToUserspaceEvalWrapper(int sysnum, void *aux); - static void installFilter(bool quiet); static void findRanges(Ranges *ranges); static Instruction *assembleJumpTable(CodeGen *gen, Ranges::const_iterator start, Ranges::const_iterator stop); static void sigSys(int nr, siginfo_t *info, void *void_context); - static ErrorCode MakeTrap(ErrorCode::TrapFnc fn, const void *aux, bool safe); - - // A Trap() handler that returns an "errno" value. The value is encoded - // in the "aux" parameter. - static intptr_t ReturnErrno(const struct arch_seccomp_data&, void *aux); - static intptr_t bpfFailure(const struct arch_seccomp_data& data, void *aux); static int getTrapId(TrapFnc fnc, const void *aux); static SandboxStatus status_; static int proc_fd_; static Evaluators evaluators_; + static ErrMap errMap_; static Traps *traps_; static TrapIds trapIds_; static ErrorCode *trapArray_; static size_t trapArraySize_; - static bool has_unsafe_traps_; DISALLOW_IMPLICIT_CONSTRUCTORS(Sandbox); }; diff --git a/sandbox/linux/seccomp-bpf/sandbox_bpf_unittest.cc b/sandbox/linux/seccomp-bpf/sandbox_bpf_unittest.cc index 3d1d8b6..8ea23d9 100644 --- a/sandbox/linux/seccomp-bpf/sandbox_bpf_unittest.cc +++ b/sandbox/linux/seccomp-bpf/sandbox_bpf_unittest.cc @@ -5,7 +5,6 @@ #include <ostream> #include "sandbox/linux/seccomp-bpf/bpf_tests.h" -#include "sandbox/linux/seccomp-bpf/syscall.h" #include "sandbox/linux/seccomp-bpf/verifier.h" #include "testing/gtest/include/gtest/gtest.h" @@ -265,162 +264,4 @@ BPF_TEST(SandboxBpf, ArmPrivatePolicy, ArmPrivatePolicy) { } #endif // defined(__arm__) -intptr_t CountSyscalls(const struct arch_seccomp_data& args, void *aux) { - // Count all invocations of our callback function. - ++*reinterpret_cast<int *>(aux); - - // Verify that within the callback function all filtering is temporarily - // disabled. - BPF_ASSERT(syscall(__NR_getpid) > 1); - - // Verify that we can now call the underlying system call without causing - // infinite recursion. - return Sandbox::ForwardSyscall(args); -} - -ErrorCode GreyListedPolicy(int sysno, void *aux) { - // The use of UnsafeTrap() causes us to print a warning message. This is - // generally desirable, but it results in the unittest failing, as it doesn't - // expect any messages on "stderr". So, temporarily disable messages. The - // BPF_TEST() is guaranteed to turn messages back on, after the policy - // function has completed. - Die::SuppressInfoMessages(true); - - // Some system calls must always be allowed, if our policy wants to make - // use of UnsafeTrap() - if (sysno == __NR_rt_sigprocmask || - sysno == __NR_rt_sigreturn -#if defined(__NR_sigprocmask) - || sysno == __NR_sigprocmask -#endif -#if defined(__NR_sigreturn) - || sysno == __NR_sigreturn -#endif - ) { - return ErrorCode(ErrorCode::ERR_ALLOWED); - } else if (sysno == __NR_getpid) { - // Disallow getpid() - return ErrorCode(EPERM); - } else if (Sandbox::isValidSyscallNumber(sysno)) { - // Allow (and count) all other system calls. - return Sandbox::UnsafeTrap(CountSyscalls, aux); - } else { - return ErrorCode(ENOSYS); - } -} - -BPF_TEST(SandboxBpf, GreyListedPolicy, - GreyListedPolicy, int /* BPF_AUX */) { - BPF_ASSERT(syscall(__NR_getpid) == -1); - BPF_ASSERT(errno == EPERM); - BPF_ASSERT(BPF_AUX == 0); - BPF_ASSERT(syscall(__NR_geteuid) == syscall(__NR_getuid)); - BPF_ASSERT(BPF_AUX == 2); -} - -intptr_t AllowRedirectedSyscall(const struct arch_seccomp_data& args, void *) { - return Sandbox::ForwardSyscall(args); -} - -ErrorCode RedirectAllSyscallsPolicy(int sysno, void *aux) { - Die::SuppressInfoMessages(true); - - // Some system calls must always be allowed, if our policy wants to make - // use of UnsafeTrap() - if (sysno == __NR_rt_sigprocmask || - sysno == __NR_rt_sigreturn -#if defined(__NR_sigprocmask) - || sysno == __NR_sigprocmask -#endif -#if defined(__NR_sigreturn) - || sysno == __NR_sigreturn -#endif - ) { - return ErrorCode(ErrorCode::ERR_ALLOWED); - } else if (Sandbox::isValidSyscallNumber(sysno)) { - return Sandbox::UnsafeTrap(AllowRedirectedSyscall, aux); - } else { - return ErrorCode(ENOSYS); - } -} - -int bus_handler_fd_ = -1; - -void SigBusHandler(int, siginfo_t *info, void *void_context) { - BPF_ASSERT(write(bus_handler_fd_, "\x55", 1) == 1); -} - -BPF_TEST(SandboxBpf, SigBus, RedirectAllSyscallsPolicy) { - // We use the SIGBUS bit in the signal mask as a thread-local boolean - // value in the implementation of UnsafeTrap(). This is obviously a bit - // of a hack that could conceivably interfere with code that uses SIGBUS - // in more traditional ways. This test verifies that basic functionality - // of SIGBUS is not impacted, but it is certainly possibly to construe - // more complex uses of signals where our use of the SIGBUS mask is not - // 100% transparent. This is expected behavior. - int fds[2]; - BPF_ASSERT(pipe(fds) == 0); - bus_handler_fd_ = fds[1]; - struct sigaction sa = { }; - sa.sa_sigaction = SigBusHandler; - sa.sa_flags = SA_SIGINFO; - BPF_ASSERT(sigaction(SIGBUS, &sa, NULL) == 0); - raise(SIGBUS); - char c = '\000'; - BPF_ASSERT(read(fds[0], &c, 1) == 1); - BPF_ASSERT(close(fds[0]) == 0); - BPF_ASSERT(close(fds[1]) == 0); - BPF_ASSERT(c == 0x55); -} - -BPF_TEST(SandboxBpf, SigMask, RedirectAllSyscallsPolicy) { - // Signal masks are potentially tricky to handle. For instance, if we - // ever tried to update them from inside a Trap() or UnsafeTrap() handler, - // the call to sigreturn() at the end of the signal handler would undo - // all of our efforts. So, it makes sense to test that sigprocmask() - // works, even if we have a policy in place that makes use of UnsafeTrap(). - // In practice, this works because we force sigprocmask() to be handled - // entirely in the kernel. - sigset_t mask0, mask1, mask2; - - // Call sigprocmask() to verify that SIGUSR1 wasn't blocked, if we didn't - // change the mask (it shouldn't have been, as it isn't blocked by default - // in POSIX). - sigemptyset(&mask0); - BPF_ASSERT(!sigprocmask(SIG_BLOCK, &mask0, &mask1)); - BPF_ASSERT(!sigismember(&mask1, SIGUSR1)); - - // Try again, and this time we verify that we can block it. This - // requires a second call to sigprocmask(). - sigaddset(&mask0, SIGUSR1); - BPF_ASSERT(!sigprocmask(SIG_BLOCK, &mask0, NULL)); - BPF_ASSERT(!sigprocmask(SIG_BLOCK, NULL, &mask2)); - BPF_ASSERT( sigismember(&mask2, SIGUSR1)); -} - -BPF_TEST(SandboxBpf, UnsafeTrapWithErrno, RedirectAllSyscallsPolicy) { - // An UnsafeTrap() (or for that matter, a Trap()) has to report error - // conditions by returning an exit code in the range -1..-4096. This - // should happen automatically if using ForwardSyscall(). If the TrapFnc() - // uses some other method to make system calls, then it is responsible - // for computing the correct return code. - // This test verifies that ForwardSyscall() does the correct thing. - - // The glibc system wrapper will ultimately set errno for us. So, from normal - // userspace, all of this should be completely transparent. - errno = 0; - BPF_ASSERT(close(-1) == -1); - BPF_ASSERT(errno == EBADF); - - // Explicitly avoid the glibc wrapper. This is not normally the way anybody - // would make system calls, but it allows us to verify that we don't - // accidentally mess with errno, when we shouldn't. - errno = 0; - struct arch_seccomp_data args = { 0 }; - args.nr = __NR_close; - args.args[0] = -1; - BPF_ASSERT(Sandbox::ForwardSyscall(args) == -EBADF); - BPF_ASSERT(errno == 0); -} - } // namespace diff --git a/sandbox/linux/seccomp-bpf/syscall.cc b/sandbox/linux/seccomp-bpf/syscall.cc deleted file mode 100644 index 619a983..0000000 --- a/sandbox/linux/seccomp-bpf/syscall.cc +++ /dev/null @@ -1,282 +0,0 @@ -// Copyright (c) 2012 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#include <asm/unistd.h> -#include <errno.h> -#include <stdarg.h> - -#include "sandbox/linux/seccomp-bpf/syscall.h" - - -namespace playground2 { - - asm( // We need to be able to tell the kernel exactly where we made a - // system call. The C++ compiler likes to sometimes clone or - // inline code, which would inadvertently end up duplicating - // the entry point. - // "gcc" can suppress code duplication with suitable function - // attributes, but "clang" doesn't have this ability. - // The "clang" developer mailing list suggested that the correct - // and portable solution is a file-scope assembly block. - // N.B. We do mark our code as a proper function so that backtraces - // work correctly. But we make absolutely no attempt to use the - // ABI's calling conventions for passing arguments. We will only - // ever be called from assembly code and thus can pick more - // suitable calling conventions. -#if defined(__i386__) - ".text\n" - ".align 16, 0x90\n" - ".type SyscallAsm, @function\n" - "SyscallAsm:.cfi_startproc\n" - // Check if "%eax" is negative. If so, do not attempt to make a - // system call. Instead, compute the return address that is visible - // to the kernel after we execute "int $0x80". This address can be - // used as a marker that BPF code inspects. - "test %eax, %eax\n" - "jge 1f\n" - // Always, make sure that our code is position-independent, or - // address space randomization might not work on i386. This means, - // we can't use "lea", but instead have to rely on "call/pop". - "call 0f; .cfi_adjust_cfa_offset 4\n" - "0:pop %eax; .cfi_adjust_cfa_offset -4\n" - "addl $2f-0b, %eax\n" - "ret\n" - // Save register that we don't want to clobber. On i386, we need to - // save relatively aggressively, as there are a couple or registers - // that are used internally (e.g. %ebx for position-independent - // code, and %ebp for the frame pointer), and as we need to keep at - // least a few registers available for the register allocator. - "1:push %esi; .cfi_adjust_cfa_offset 4\n" - "push %edi; .cfi_adjust_cfa_offset 4\n" - "push %ebx; .cfi_adjust_cfa_offset 4\n" - "push %ebp; .cfi_adjust_cfa_offset 4\n" - // Copy entries from the array holding the arguments into the - // correct CPU registers. - "movl 0(%edi), %ebx\n" - "movl 4(%edi), %ecx\n" - "movl 8(%edi), %edx\n" - "movl 12(%edi), %esi\n" - "movl 20(%edi), %ebp\n" - "movl 16(%edi), %edi\n" - // Enter the kernel. - "int $0x80\n" - // This is our "magic" return address that the BPF filter sees. - "2:" - // Restore any clobbered registers that we didn't declare to the - // compiler. - "pop %ebp; .cfi_adjust_cfa_offset -4\n" - "pop %ebx; .cfi_adjust_cfa_offset -4\n" - "pop %edi; .cfi_adjust_cfa_offset -4\n" - "pop %esi; .cfi_adjust_cfa_offset -4\n" - "ret\n" - ".cfi_endproc\n" - "9:.size SyscallAsm, 9b-SyscallAsm\n" -#elif defined(__x86_64__) - ".text\n" - ".align 16, 0x90\n" - ".type SyscallAsm, @function\n" - "SyscallAsm:.cfi_startproc\n" - // Check if "%rax" is negative. If so, do not attempt to make a - // system call. Instead, compute the return address that is visible - // to the kernel after we execute "syscall". This address can be - // used as a marker that BPF code inspects. - "test %rax, %rax\n" - "jge 1f\n" - // Always make sure that our code is position-independent, or the - // linker will throw a hissy fit on x86-64. - "call 0f; .cfi_adjust_cfa_offset 8\n" - "0:pop %rax; .cfi_adjust_cfa_offset -8\n" - "addq $2f-0b, %rax\n" - "ret\n" - // We declared all clobbered registers to the compiler. On x86-64, - // there really isn't much of a problem with register pressure. So, - // we can go ahead and directly copy the entries from the arguments - // array into the appropriate CPU registers. - "1:movq 0(%r12), %rdi\n" - "movq 8(%r12), %rsi\n" - "movq 16(%r12), %rdx\n" - "movq 24(%r12), %r10\n" - "movq 32(%r12), %r8\n" - "movq 40(%r12), %r9\n" - // Enter the kernel. - "syscall\n" - // This is our "magic" return address that the BPF filter sees. - "2:ret\n" - ".cfi_endproc\n" - "9:.size SyscallAsm, 9b-SyscallAsm\n" -#elif defined(__arm__) - // Throughout this file, we use the same mode (ARM vs. thumb) - // that the C++ compiler uses. This means, when transfering control - // from C++ to assembly code, we do not need to switch modes (e.g. - // by using the "bx" instruction). It also means that our assembly - // code should not be invoked directly from code that lives in - // other compilation units, as we don't bother implementing thumb - // interworking. That's OK, as we don't make any of the assembly - // symbols public. They are all local to this file. - ".text\n" - ".align 2\n" - ".type SyscallAsm, %function\n" -#if defined(__thumb__) - ".thumb_func\n" -#else - ".arm\n" -#endif - "SyscallAsm:.fnstart\n" - "@ args = 0, pretend = 0, frame = 8\n" - "@ frame_needed = 1, uses_anonymous_args = 0\n" -#if defined(__thumb__) - ".cfi_startproc\n" - "push {r7, lr}\n" - ".cfi_offset 14, -4\n" - ".cfi_offset 7, -8\n" - "mov r7, sp\n" - ".cfi_def_cfa_register 7\n" - ".cfi_def_cfa_offset 8\n" -#else - "stmfd sp!, {fp, lr}\n" - "add fp, sp, #4\n" -#endif - // Check if "r0" is negative. If so, do not attempt to make a - // system call. Instead, compute the return address that is visible - // to the kernel after we execute "swi 0". This address can be - // used as a marker that BPF code inspects. - "cmp r0, #0\n" - "bge 1f\n" - "ldr r0, =2f\n" - "b 2f\n" - // We declared (almost) all clobbered registers to the compiler. On - // ARM there is no particular register pressure. So, we can go - // ahead and directly copy the entries from the arguments array - // into the appropriate CPU registers. - "1:ldr r5, [r6, #20]\n" - "ldr r4, [r6, #16]\n" - "ldr r3, [r6, #12]\n" - "ldr r2, [r6, #8]\n" - "ldr r1, [r6, #4]\n" - "mov r7, r0\n" - "ldr r0, [r6, #0]\n" - // Enter the kernel - "swi 0\n" - // Restore the frame pointer. Also restore the program counter from - // the link register; this makes us return to the caller. -#if defined(__thumb__) - "2:pop {r7, pc}\n" - ".cfi_endproc\n" -#else - "2:ldmfd sp!, {fp, pc}\n" -#endif - ".fnend\n" - "9:.size SyscallAsm, 9b-SyscallAsm\n" -#endif - ); // asm - -intptr_t SandboxSyscall(int nr, ...) { - // It is most convenient for the caller to pass a variadic list of arguments. - // But this is difficult to handle in assembly code without making - // assumptions about internal implementation details of "va_list". So, we - // first use C code to copy all the arguments into an array, where they are - // easily accessible to asm(). - // This is preferable over copying them into individual variables, which - // can result in too much register pressure. - void *args[6]; - va_list ap; - - // System calls take a system call number (typically passed in %eax or - // %rax) and up to six arguments (passed in general-purpose CPU registers). - // - // On 32bit systems, all variadic arguments are passed on the stack as 32bit - // quantities. We can use an arbitrary 32bit type to retrieve them with - // va_arg() and then forward them to the kernel in the appropriate CPU - // register. We do not need to know whether this is an integer or a pointer - // value. - // - // On 64bit systems, variadic arguments can be either 32bit or 64bit wide, - // which would seem to make it more important that we pass the correct type - // to va_arg(). And we really can't know what this type is unless we have a - // table with function signatures for all system calls. - // - // Fortunately, on x86-64 this is less critical. The first six function - // arguments will be passed in CPU registers, no matter whether they were - // named or variadic. This only leaves us with a single argument (if present) - // that could be passed on the stack. And since x86-64 is little endian, - // it will have the correct value both for 32bit and 64bit quantities. - // - // N.B. Because of how the x86-64 ABI works, it is possible that 32bit - // quantities will have undefined garbage bits in the upper 32 bits of a - // 64bit register. This is relatively unlikely for the first five system - // call arguments, as the processor does automatic sign extensions and zero - // filling so frequently, there rarely is garbage in CPU registers. But it - // is quite likely for the last argument, which is passed on the stack. - // That's generally OK, because the kernel has the correct function - // signatures and knows to only inspect the LSB of a 32bit value. - // But callers must be careful in cases, where the compiler cannot tell - // the difference (e.g. when passing NULL to any system call, it must - // always be cast to a pointer type). - // The glibc implementation of syscall() has the exact same issues. - // In the unlikely event that this ever becomes a problem, we could add - // code that handles six-argument system calls specially. The number of - // system calls that take six arguments and expect a 32bit value in the - // sixth argument is very limited. - va_start(ap, nr); - args[0] = va_arg(ap, void *); - args[1] = va_arg(ap, void *); - args[2] = va_arg(ap, void *); - args[3] = va_arg(ap, void *); - args[4] = va_arg(ap, void *); - args[5] = va_arg(ap, void *); - va_end(ap); - - // Invoke our file-scope assembly code. The constraints have been picked - // carefully to match what the rest of the assembly code expects in input, - // output, and clobbered registers. -#if defined(__i386__) - intptr_t ret = nr; - asm volatile( - "call SyscallAsm\n" - // N.B. These are not the calling conventions normally used by the ABI. - : "=a"(ret) - : "0"(ret), "D"(args) - : "esp", "memory", "ecx", "edx"); -#elif defined(__x86_64__) - intptr_t ret = nr; - { - register void **data __asm__("r12") = args; - asm volatile( - "call SyscallAsm\n" - // N.B. These are not the calling conventions normally used by the ABI. - : "=a"(ret) - : "0"(ret), "r"(data) - : "rsp", "memory", - "rcx", "rdi", "rsi", "rdx", "r8", "r9", "r10", "r11"); - } -#elif defined(__arm__) - intptr_t ret; - { - register intptr_t inout __asm__("r0") = nr; - register void **data __asm__("r6") = args; - asm volatile( - "bl SyscallAsm\n" - // N.B. These are not the calling conventions normally used by the ABI. - : "=r"(inout) - : "0"(inout), "r"(data) - : "lr", "memory", "r1", "r2", "r3", "r4", "r5" -#if !defined(__arm__) - // In thumb mode, we cannot use "r7" as a general purpose register, as - // it is our frame pointer. We have to manually manage and preserve it. - // In ARM mode, we have a dedicated frame pointer register and "r7" is - // thus available as a general purpose register. We don't preserve it, - // but instead mark it as clobbered. - , "r7" -#endif - ); - ret = inout; - } -#else - errno = ENOSYS; - intptr_t ret = -1; -#endif - return ret; -} - -} // namespace diff --git a/sandbox/linux/seccomp-bpf/syscall.h b/sandbox/linux/seccomp-bpf/syscall.h deleted file mode 100644 index 932e398..0000000 --- a/sandbox/linux/seccomp-bpf/syscall.h +++ /dev/null @@ -1,23 +0,0 @@ -// Copyright (c) 2012 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#ifndef SANDBOX_LINUX_SECCOMP_BPF_SYSCALL_H__ -#define SANDBOX_LINUX_SECCOMP_BPF_SYSCALL_H__ - -#include <signal.h> -#include <stdint.h> - -namespace playground2 { - -// We have to make sure that we have a single "magic" return address for -// our system calls, which we can check from within a BPF filter. This -// works by writing a little bit of asm() code that a) enters the kernel, and -// that also b) can be invoked in a way that computes this return address. -// Passing "nr" as "-1" computes the "magic" return address. Passing any -// other value invokes the appropriate system call. -intptr_t SandboxSyscall(int nr, ...); - -} // namespace - -#endif // SANDBOX_LINUX_SECCOMP_BPF_SYSCALL_H__ diff --git a/sandbox/linux/seccomp-bpf/syscall_unittest.cc b/sandbox/linux/seccomp-bpf/syscall_unittest.cc deleted file mode 100644 index 374a0fb..0000000 --- a/sandbox/linux/seccomp-bpf/syscall_unittest.cc +++ /dev/null @@ -1,113 +0,0 @@ -// Copyright (c) 2012 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#include <asm/unistd.h> -#include <fcntl.h> -#include <sys/mman.h> -#include <unistd.h> - -#include "base/posix/eintr_wrapper.h" -#include "sandbox/linux/seccomp-bpf/syscall.h" -#include "sandbox/linux/tests/unit_tests.h" -#include "testing/gtest/include/gtest/gtest.h" - -using namespace playground2; - -namespace { - -// Different platforms use different symbols for the six-argument version -// of the mmap() system call. Test for the correct symbol at compile time. -#ifdef __NR_mmap2 -const int kMMapNr = __NR_mmap2; -#else -const int kMMapNr = __NR_mmap; -#endif - -TEST(Syscall, WellKnownEntryPoint) { - // Test that SandboxSyscall(-1) is handled specially. Don't do this on ARM, - // where syscall(-1) crashes with SIGILL. Not running the test is fine, as we - // are still testing ARM code in the next set of tests. -#if !defined(__arm__) - EXPECT_NE(SandboxSyscall(-1), syscall(-1)); -#endif - - // If possible, test that SandboxSyscall(-1) returns the address right after - // a kernel entry point. -#if defined(__i386__) - EXPECT_EQ(0x80CDu, ((uint16_t *)SandboxSyscall(-1))[-1]); // INT 0x80 -#elif defined(__x86_64__) - EXPECT_EQ(0x050Fu, ((uint16_t *)SandboxSyscall(-1))[-1]); // SYSCALL -#elif defined(__arm__) -#if defined(__thumb__) - EXPECT_EQ(0xDF00u, ((uint16_t *)SandboxSyscall(-1))[-1]); // SWI 0 -#else - EXPECT_EQ(0xEF000000u, ((uint32_t *)SandboxSyscall(-1))[-1]); // SVC 0 -#endif -#else - #warning Incomplete test case; need port for target platform -#endif -} - -TEST(Syscall, TrivialSyscallNoArgs) { - // Test that we can do basic system calls - EXPECT_EQ(SandboxSyscall(__NR_getpid), syscall(__NR_getpid)); -} - -TEST(Syscall, ComplexSyscallSixArgs) { - int fd; - ASSERT_LE(0, fd = SandboxSyscall(__NR_open, "/dev/null", O_RDWR, 0L)); - - // Use mmap() to allocate some read-only memory - char *addr0; - ASSERT_NE((char *)NULL, - addr0 = reinterpret_cast<char *>( - SandboxSyscall(kMMapNr, (void *)NULL, 4096, PROT_READ, - MAP_PRIVATE|MAP_ANONYMOUS, fd, 0L))); - - // Try to replace the existing mapping with a read-write mapping - char *addr1; - ASSERT_EQ(addr0, - addr1 = reinterpret_cast<char *>( - SandboxSyscall(kMMapNr, addr0, 4096L, PROT_READ|PROT_WRITE, - MAP_PRIVATE|MAP_ANONYMOUS|MAP_FIXED, - fd, 0L))); - ++*addr1; // This should not seg fault - - // Clean up - EXPECT_EQ(0, SandboxSyscall(__NR_munmap, addr1, 4096L)); - EXPECT_EQ(0, HANDLE_EINTR(SandboxSyscall(__NR_close, fd))); - - // Check that the offset argument (i.e. the sixth argument) is processed - // correctly. - ASSERT_GE(fd = SandboxSyscall(__NR_open, "/proc/self/exe", O_RDONLY, 0L), 0); - char *addr2, *addr3; - ASSERT_NE((char *)NULL, - addr2 = reinterpret_cast<char *>( - SandboxSyscall(kMMapNr, (void *)NULL, 8192L, PROT_READ, - MAP_PRIVATE, fd, 0L))); - ASSERT_NE((char *)NULL, - addr3 = reinterpret_cast<char *>( - SandboxSyscall(kMMapNr, (void *)NULL, 4096L, PROT_READ, - MAP_PRIVATE, fd, -#if defined(__NR_mmap2) - 1L -#else - 4096L -#endif - ))); - EXPECT_EQ(0, memcmp(addr2 + 4096, addr3, 4096)); - - // Just to be absolutely on the safe side, also verify that the file - // contents matches what we are getting from a read() operation. - char buf[8192]; - EXPECT_EQ(8192, SandboxSyscall(__NR_read, fd, buf, 8192L)); - EXPECT_EQ(0, memcmp(addr2, buf, 8192)); - - // Clean up - EXPECT_EQ(0, SandboxSyscall(__NR_munmap, addr2, 8192L)); - EXPECT_EQ(0, SandboxSyscall(__NR_munmap, addr3, 4096L)); - EXPECT_EQ(0, HANDLE_EINTR(SandboxSyscall(__NR_close, fd))); -} - -} // namespace |