summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--sandbox/linux/sandbox_linux.gypi3
-rw-r--r--sandbox/linux/seccomp-bpf/Makefile2
-rw-r--r--sandbox/linux/seccomp-bpf/codegen.cc36
-rw-r--r--sandbox/linux/seccomp-bpf/codegen.h9
-rw-r--r--sandbox/linux/seccomp-bpf/die.cc22
-rw-r--r--sandbox/linux/seccomp-bpf/die.h12
-rw-r--r--sandbox/linux/seccomp-bpf/errorcode.cc4
-rw-r--r--sandbox/linux/seccomp-bpf/errorcode.h3
-rw-r--r--sandbox/linux/seccomp-bpf/sandbox_bpf.cc336
-rw-r--r--sandbox/linux/seccomp-bpf/sandbox_bpf.h71
-rw-r--r--sandbox/linux/seccomp-bpf/sandbox_bpf_unittest.cc159
-rw-r--r--sandbox/linux/seccomp-bpf/syscall.cc282
-rw-r--r--sandbox/linux/seccomp-bpf/syscall.h23
-rw-r--r--sandbox/linux/seccomp-bpf/syscall_unittest.cc113
14 files changed, 67 insertions, 1008 deletions
diff --git a/sandbox/linux/sandbox_linux.gypi b/sandbox/linux/sandbox_linux.gypi
index 535fb89..c02cd31 100644
--- a/sandbox/linux/sandbox_linux.gypi
+++ b/sandbox/linux/sandbox_linux.gypi
@@ -58,7 +58,6 @@
'seccomp-bpf/errorcode_unittest.cc',
'seccomp-bpf/sandbox_bpf_unittest.cc',
'seccomp-bpf/syscall_iterator_unittest.cc',
- 'seccomp-bpf/syscall_unittest.cc',
],
}],
],
@@ -78,8 +77,6 @@
'seccomp-bpf/instruction.h',
'seccomp-bpf/sandbox_bpf.cc',
'seccomp-bpf/sandbox_bpf.h',
- 'seccomp-bpf/syscall.cc',
- 'seccomp-bpf/syscall.h',
'seccomp-bpf/syscall_iterator.cc',
'seccomp-bpf/syscall_iterator.h',
'seccomp-bpf/verifier.cc',
diff --git a/sandbox/linux/seccomp-bpf/Makefile b/sandbox/linux/seccomp-bpf/Makefile
index 6d644b8..a697198 100644
--- a/sandbox/linux/seccomp-bpf/Makefile
+++ b/sandbox/linux/seccomp-bpf/Makefile
@@ -2,7 +2,7 @@ DEF_CFLAGS = -g -O3 -Wall -Werror -Wextra -Wno-missing-field-initializers -fPIC
DEF_CPPFLAGS = -D_GNU_SOURCE -DSECCOMP_BPF_STANDALONE -DSECCOMP_BPF_VALGRIND_HACKS -include valgrind/valgrind.h -iquote ../../..
DEF_LDFLAGS = -g -lpthread
DEPFLAGS = -MMD -MF .$@.d
-MODS := demo sandbox_bpf basicblock codegen die errorcode syscall syscall_iterator util verifier
+MODS := demo sandbox_bpf die codegen errorcode syscall_iterator util verifier
OBJS64 := $(shell echo ${MODS} | xargs -n 1 | sed -e 's/$$/.o64/')
OBJS32 := $(shell echo ${MODS} | xargs -n 1 | sed -e 's/$$/.o32/')
ALL_OBJS = $(OBJS32) $(OBJS64)
diff --git a/sandbox/linux/seccomp-bpf/codegen.cc b/sandbox/linux/seccomp-bpf/codegen.cc
index 649793c..8b36315 100644
--- a/sandbox/linux/seccomp-bpf/codegen.cc
+++ b/sandbox/linux/seccomp-bpf/codegen.cc
@@ -5,31 +5,6 @@
#include "sandbox/linux/seccomp-bpf/codegen.h"
-namespace {
-
-// Helper function for Traverse().
-void TraverseRecursively(std::set<playground2::Instruction *> *visited,
- playground2::Instruction *instruction) {
- if (visited->find(instruction) == visited->end()) {
- visited->insert(instruction);
- switch (BPF_CLASS(instruction->code)) {
- case BPF_JMP:
- if (BPF_OP(instruction->code) != BPF_JA) {
- TraverseRecursively(visited, instruction->jf_ptr);
- }
- TraverseRecursively(visited, instruction->jt_ptr);
- break;
- case BPF_RET:
- break;
- default:
- TraverseRecursively(visited, instruction->next);
- break;
- }
- }
-}
-
-} // namespace
-
namespace playground2 {
CodeGen::CodeGen()
@@ -170,17 +145,6 @@ void CodeGen::JoinInstructions(Instruction *head, Instruction *tail) {
return;
}
-void CodeGen::Traverse(Instruction *instruction,
- void (*fnc)(Instruction *, void *), void *aux) {
- std::set<Instruction *> visited;
- TraverseRecursively(&visited, instruction);
- for (std::set<Instruction *>::const_iterator iter = visited.begin();
- iter != visited.end();
- ++iter) {
- fnc(*iter, aux);
- }
-}
-
void CodeGen::FindBranchTargets(const Instruction& instructions,
BranchTargets *branch_targets) {
// Follow all possible paths through the "instructions" graph and compute
diff --git a/sandbox/linux/seccomp-bpf/codegen.h b/sandbox/linux/seccomp-bpf/codegen.h
index 88521c2..b7d1d39 100644
--- a/sandbox/linux/seccomp-bpf/codegen.h
+++ b/sandbox/linux/seccomp-bpf/codegen.h
@@ -77,15 +77,6 @@ class CodeGen {
// or if a (conditional) jump still has an unsatisfied target.
void JoinInstructions(Instruction *head, Instruction *tail);
- // Traverse the graph of instructions and visit each instruction once.
- // Traversal order is implementation-defined. It is acceptable to make
- // changes to the graph from within the callback function. These changes
- // do not affect traversal.
- // The "fnc" function gets called with both the instruction and the opaque
- // "aux" pointer.
- void Traverse(Instruction *, void (*fnc)(Instruction *, void *aux),
- void *aux);
-
// Compiles the graph of instructions into a BPF program that can be passed
// to the kernel. Please note that this function modifies the graph in place
// and must therefore only be called once per graph.
diff --git a/sandbox/linux/seccomp-bpf/die.cc b/sandbox/linux/seccomp-bpf/die.cc
index 92ffa2a..b141424 100644
--- a/sandbox/linux/seccomp-bpf/die.cc
+++ b/sandbox/linux/seccomp-bpf/die.cc
@@ -5,7 +5,6 @@
#include <string>
#include "sandbox/linux/seccomp-bpf/sandbox_bpf.h"
-#include "sandbox/linux/seccomp-bpf/syscall.h"
namespace playground2 {
@@ -16,7 +15,7 @@ void Die::ExitGroup() {
// Especially, since we are dealing with system call filters. Continuing
// execution would be very bad in most cases where ExitGroup() gets called.
// So, we'll try a few other strategies too.
- SandboxSyscall(__NR_exit_group, 1);
+ syscall(__NR_exit_group, 1);
// We have no idea what our run-time environment looks like. So, signal
// handlers might or might not do the right thing. Try to reset settings
@@ -24,7 +23,7 @@ void Die::ExitGroup() {
// succeeded in doing so. Nonetheless, triggering a fatal signal could help
// us terminate.
signal(SIGSEGV, SIG_DFL);
- SandboxSyscall(__NR_prctl, PR_SET_DUMPABLE, (void *)0, (void *)0, (void *)0);
+ syscall(__NR_prctl, PR_SET_DUMPABLE, (void *)0, (void *)0, (void *)0);
if (*(volatile char *)0) { }
// If there is no way for us to ask for the program to exit, the next
@@ -33,7 +32,7 @@ void Die::ExitGroup() {
// We in fact retry the system call inside of our loop so that it will
// stand out when somebody tries to diagnose the problem by using "strace".
for (;;) {
- SandboxSyscall(__NR_exit_group, 1);
+ syscall(__NR_exit_group, 1);
}
}
@@ -50,16 +49,6 @@ void Die::SandboxDie(const char *msg, const char *file, int line) {
ExitGroup();
}
-void Die::SandboxInfo(const char *msg, const char *file, int line) {
- if (!suppress_info_) {
- #if defined(SECCOMP_BPF_STANDALONE)
- Die::LogToStderr(msg, file, line);
- #else
- logging::LogMessage(file, line, logging::LOG_INFO).stream() << msg;
- #endif
- }
-}
-
void Die::LogToStderr(const char *msg, const char *file, int line) {
if (msg) {
char buf[40];
@@ -68,11 +57,10 @@ void Die::LogToStderr(const char *msg, const char *file, int line) {
// No need to loop. Short write()s are unlikely and if they happen we
// probably prefer them over a loop that blocks.
- if (HANDLE_EINTR(SandboxSyscall(__NR_write, 2, s.c_str(), s.length()))) { }
+ if (HANDLE_EINTR(write(2, s.c_str(), s.length()))) { }
}
}
-bool Die::simple_exit_ = false;
-bool Die::suppress_info_ = false;
+bool Die::simple_exit_ = false;
} // namespace
diff --git a/sandbox/linux/seccomp-bpf/die.h b/sandbox/linux/seccomp-bpf/die.h
index c0ad8fd..608afde 100644
--- a/sandbox/linux/seccomp-bpf/die.h
+++ b/sandbox/linux/seccomp-bpf/die.h
@@ -13,9 +13,6 @@ class Die {
// exits with a fatal error.
#define SANDBOX_DIE(m) Die::SandboxDie(m, __FILE__, __LINE__)
- // Adds an informational message to the log file or stderr as appropriate.
- #define SANDBOX_INFO(m) Die::SandboxInfo(m, __FILE__, __LINE__)
-
// Terminate the program, even if the current sandbox policy prevents some
// of the more commonly used functions used for exiting.
// Most users would want to call SANDBOX_DIE() instead, as it logs extra
@@ -28,10 +25,6 @@ class Die {
static void SandboxDie(const char *msg, const char *file, int line)
__attribute__((noreturn));
- // This method gets called by SANDBOX_INFO(). There is normally no reason
- // to call it directly unless you are defining your own logging macro.
- static void SandboxInfo(const char *msg, const char *file, int line);
-
// Writes a message to stderr. Used as a fall-back choice, if we don't have
// any other way to report an error.
static void LogToStderr(const char *msg, const char *file, int line);
@@ -43,13 +36,8 @@ class Die {
// unit tests or in the supportsSeccompSandbox() method).
static void EnableSimpleExit() { simple_exit_ = true; }
- // Sometimes we need to disable all informational messages (e.g. from within
- // unittests).
- static void SuppressInfoMessages(bool flag) { suppress_info_ = flag; }
-
private:
static bool simple_exit_;
- static bool suppress_info_;
DISALLOW_IMPLICIT_CONSTRUCTORS(Die);
};
diff --git a/sandbox/linux/seccomp-bpf/errorcode.cc b/sandbox/linux/seccomp-bpf/errorcode.cc
index 4d21b792..cc79cb6 100644
--- a/sandbox/linux/seccomp-bpf/errorcode.cc
+++ b/sandbox/linux/seccomp-bpf/errorcode.cc
@@ -22,12 +22,10 @@ ErrorCode::ErrorCode(int err) {
}
}
-ErrorCode::ErrorCode(ErrorCode::TrapFnc fnc, const void *aux, bool safe,
- uint16_t id)
+ErrorCode::ErrorCode(ErrorCode::TrapFnc fnc, const void *aux, uint16_t id)
: error_type_(ET_TRAP),
fnc_(fnc),
aux_(const_cast<void *>(aux)),
- safe_(safe),
err_(SECCOMP_RET_TRAP + id) {
}
diff --git a/sandbox/linux/seccomp-bpf/errorcode.h b/sandbox/linux/seccomp-bpf/errorcode.h
index d2661db..2b941ee 100644
--- a/sandbox/linux/seccomp-bpf/errorcode.h
+++ b/sandbox/linux/seccomp-bpf/errorcode.h
@@ -94,7 +94,7 @@ class ErrorCode {
// If we are wrapping a callback, we must assign a unique id. This id is
// how the kernel tells us which one of our different SECCOMP_RET_TRAP
// cases has been triggered.
- ErrorCode(TrapFnc fnc, const void *aux, bool safe, uint16_t id);
+ ErrorCode(TrapFnc fnc, const void *aux, uint16_t id);
// Some system calls require inspection of arguments. This constructor
// allows us to specify additional constraints.
@@ -108,7 +108,6 @@ class ErrorCode {
struct {
TrapFnc fnc_; // Callback function and arg, if trap was
void *aux_; // triggered by the kernel's BPF filter.
- bool safe_; // Keep sandbox active while calling fnc_()
};
// Fields needed when inspecting additional arguments.
diff --git a/sandbox/linux/seccomp-bpf/sandbox_bpf.cc b/sandbox/linux/seccomp-bpf/sandbox_bpf.cc
index 60ebd50..eb03995 100644
--- a/sandbox/linux/seccomp-bpf/sandbox_bpf.cc
+++ b/sandbox/linux/seccomp-bpf/sandbox_bpf.cc
@@ -2,27 +2,8 @@
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
-#include <endian.h>
-#if __BYTE_ORDER == __BIG_ENDIAN
-// The BPF "struct seccomp_data" layout has to deal with storing 64bit
-// values that need to be inspected by a virtual machine that only ever
-// operates on 32bit values. The kernel developers decided how values
-// should be split into two 32bit words to achieve this goal. But at this
-// time, there is no existing BPF implementation in the kernel that uses
-// 64bit big endian values. So, all we have to go by is the consensus
-// from a discussion on LKLM. Actual implementations, if and when they
-// happen, might very well differ.
-// If this code is ever going to be used with such a kernel, you should
-// disable the "#error" and carefully test the code (e.g. run the unit
-// tests). If things don't work, search for all occurrences of __BYTE_ORDER
-// and verify that the proposed implementation agrees with what the kernel
-// actually does.
-#error Big endian operation is untested and expected to be broken
-#endif
-
#include "sandbox/linux/seccomp-bpf/codegen.h"
#include "sandbox/linux/seccomp-bpf/sandbox_bpf.h"
-#include "sandbox/linux/seccomp-bpf/syscall.h"
#include "sandbox/linux/seccomp-bpf/syscall_iterator.h"
#include "sandbox/linux/seccomp-bpf/verifier.h"
@@ -37,30 +18,6 @@ void WriteFailedStderrSetupMessage(int out_fd) {
}
}
-// We need to tell whether we are performing a "normal" callback, or
-// whether we were called recursively from within a UnsafeTrap() callback.
-// This is a little tricky to do, because we need to somehow get access to
-// per-thread data from within a signal context. Normal TLS storage is not
-// safely accessible at this time. We could roll our own, but that involves
-// a lot of complexity. Instead, we co-opt one bit in the signal mask.
-// If BUS is blocked, we assume that we have been called recursively.
-// There is a possibility for collision with other code that needs to do
-// this, but in practice the risks are low.
-// If SIGBUS turns out to be a problem, we could instead co-opt one of the
-// realtime signals. There are plenty of them. Unfortunately, there is no
-// way to mark a signal as allocated. So, the potential for collision is
-// possibly even worse.
-bool GetIsInSigHandler(const ucontext_t *ctx) {
- return sigismember(&ctx->uc_sigmask, SIGBUS);
-}
-
-void SetIsInSigHandler() {
- sigset_t mask;
- sigemptyset(&mask);
- sigaddset(&mask, SIGBUS);
- sigprocmask(SIG_BLOCK, &mask, NULL);
-}
-
} // namespace
// The kernel gives us a sandbox, we turn it into a playground :-)
@@ -362,48 +319,6 @@ void Sandbox::policySanityChecks(EvaluateSyscall syscallEvaluator,
return;
}
-void Sandbox::CheckForUnsafeErrorCodes(Instruction *insn, void *aux) {
- if (BPF_CLASS(insn->code) == BPF_RET &&
- insn->k > SECCOMP_RET_TRAP &&
- insn->k - SECCOMP_RET_TRAP <= trapArraySize_) {
- const ErrorCode& err = trapArray_[insn->k - SECCOMP_RET_TRAP - 1];
- if (!err.safe_) {
- bool *is_unsafe = static_cast<bool *>(aux);
- *is_unsafe = true;
- }
- }
-}
-
-void Sandbox::RedirectToUserspace(Instruction *insn, void *aux) {
- // When inside an UnsafeTrap() callback, we want to allow all system calls.
- // This means, we must conditionally disable the sandbox -- and that's not
- // something that kernel-side BPF filters can do, as they cannot inspect
- // any state other than the syscall arguments.
- // But if we redirect all error handlers to user-space, then we can easily
- // make this decision.
- // The performance penalty for this extra round-trip to user-space is not
- // actually that bad, as we only ever pay it for denied system calls; and a
- // typical program has very few of these.
- if (BPF_CLASS(insn->code) == BPF_RET &&
- (insn->k & SECCOMP_RET_ACTION) == SECCOMP_RET_ERRNO) {
- insn->k = Trap(ReturnErrno,
- reinterpret_cast<void *>(insn->k & SECCOMP_RET_DATA)).err();
- }
-}
-
-ErrorCode Sandbox::RedirectToUserspaceEvalWrapper(int sysnum, void *aux) {
- // We need to replicate the behavior of RedirectToUserspace(), so that our
- // Verifier can still work correctly.
- Evaluators *evaluators = reinterpret_cast<Evaluators *>(aux);
- const std::pair<EvaluateSyscall, void *>& evaluator = *evaluators->begin();
- ErrorCode err = evaluator.first(sysnum, evaluator.second);
- if ((err.err() & SECCOMP_RET_ACTION) == SECCOMP_RET_ERRNO) {
- return Trap(ReturnErrno,
- reinterpret_cast<void *>(err.err() & SECCOMP_RET_DATA));
- }
- return err;
-}
-
void Sandbox::setSandboxPolicy(EvaluateSyscall syscallEvaluator, void *aux) {
if (status_ == STATUS_ENABLED) {
SANDBOX_DIE("Cannot change policy after sandbox has started");
@@ -422,8 +337,8 @@ void Sandbox::installFilter(bool quiet) {
// Set new SIGSYS handler
struct sigaction sa;
memset(&sa, 0, sizeof(sa));
- sa.sa_sigaction = sigSys;
- sa.sa_flags = SA_SIGINFO | SA_NODEFER;
+ sa.sa_sigaction = &sigSys;
+ sa.sa_flags = SA_SIGINFO;
if (sigaction(SIGSYS, &sa, NULL) < 0) {
goto filter_failed;
}
@@ -454,13 +369,33 @@ void Sandbox::installFilter(bool quiet) {
Instruction *head =
gen->MakeInstruction(BPF_LD+BPF_W+BPF_ABS,
offsetof(struct arch_seccomp_data, arch),
- tail =
gen->MakeInstruction(BPF_JMP+BPF_JEQ+BPF_K, SECCOMP_ARCH,
- NULL,
+ tail =
+ // Grab the system call number, so that we can implement jump tables.
+ gen->MakeInstruction(BPF_LD+BPF_W+BPF_ABS,
+ offsetof(struct arch_seccomp_data, nr)),
gen->MakeInstruction(BPF_RET+BPF_K,
Kill(
"Invalid audit architecture in BPF filter").err_)));
+ // On Intel architectures, verify that system call numbers are in the
+ // expected number range. The older i386 and x86-64 APIs clear bit 30
+ // on all system calls. The newer x32 API always sets bit 30.
+#if defined(__i386__) || defined(__x86_64__)
+ Instruction *invalidX32 =
+ gen->MakeInstruction(BPF_RET+BPF_K,
+ Kill("Illegal mixing of system call ABIs").err_);
+ Instruction *checkX32 =
+#if defined(__x86_64__) && defined(__ILP32__)
+ gen->MakeInstruction(BPF_JMP+BPF_JSET+BPF_K, 0x40000000, 0, invalidX32);
+#else
+ gen->MakeInstruction(BPF_JMP+BPF_JSET+BPF_K, 0x40000000, invalidX32, 0);
+#endif
+ gen->JoinInstructions(tail, checkX32);
+ tail = checkX32;
+#endif
+
+
{
// Evaluate all possible system calls and group their ErrorCodes into
// ranges of identical codes.
@@ -471,109 +406,6 @@ void Sandbox::installFilter(bool quiet) {
Instruction *jumptable =
assembleJumpTable(gen, ranges.begin(), ranges.end());
- // If there is at least one UnsafeTrap() in our program, the entire sandbox
- // is unsafe. We need to modify the program so that all non-
- // SECCOMP_RET_ALLOW ErrorCodes are handled in user-space. This will then
- // allow us to temporarily disable sandboxing rules inside of callbacks to
- // UnsafeTrap().
- has_unsafe_traps_ = false;
- gen->Traverse(jumptable, CheckForUnsafeErrorCodes, &has_unsafe_traps_);
-
- // Grab the system call number, so that we can implement jump tables.
- Instruction *load_nr =
- gen->MakeInstruction(BPF_LD+BPF_W+BPF_ABS,
- offsetof(struct arch_seccomp_data, nr));
-
- // If our BPF program has unsafe jumps, enable support for them. This
- // test happens very early in the BPF filter program. Even before we
- // consider looking at system call numbers.
- // As support for unsafe jumps essentially defeats all the security
- // measures that the sandbox provides, we print a big warning message --
- // and of course, we make sure to only ever enable this feature if it
- // is actually requested by the sandbox policy.
- if (has_unsafe_traps_) {
- if (SandboxSyscall(-1) == -1 && errno == ENOSYS) {
- SANDBOX_DIE("Support for UnsafeTrap() has not yet been ported to this "
- "architecture");
- }
-
- EvaluateSyscall evaluateSyscall = evaluators_.begin()->first;
- void *aux = evaluators_.begin()->second;
- if (!evaluateSyscall(__NR_rt_sigprocmask, aux).
- Equals(ErrorCode(ErrorCode::ERR_ALLOWED)) ||
- !evaluateSyscall(__NR_rt_sigreturn, aux).
- Equals(ErrorCode(ErrorCode::ERR_ALLOWED))
-#if defined(__NR_sigprocmask)
- || !evaluateSyscall(__NR_sigprocmask, aux).
- Equals(ErrorCode(ErrorCode::ERR_ALLOWED))
-#endif
-#if defined(__NR_sigreturn)
- || !evaluateSyscall(__NR_sigreturn, aux).
- Equals(ErrorCode(ErrorCode::ERR_ALLOWED))
-#endif
- ) {
- SANDBOX_DIE("Invalid seccomp policy; if using UnsafeTrap(), you must "
- "unconditionally allow sigreturn() and sigprocmask()");
- }
-
- SANDBOX_INFO("WARNING! Disabling sandbox for debugging purposes");
- gen->Traverse(jumptable, RedirectToUserspace, NULL);
-
- // Allow system calls, if they originate from our magic return address
- // (which we can query by calling SandboxSyscall(-1)).
- uintptr_t syscall_entry_point =
- static_cast<uintptr_t>(SandboxSyscall(-1));
- uint32_t low = static_cast<uint32_t>(syscall_entry_point);
-#if __SIZEOF_POINTER__ > 4
- uint32_t hi = static_cast<uint32_t>(syscall_entry_point >> 32);
-#endif
-
- // BPF cannot do native 64bit comparisons. On 64bit architectures, we
- // have to compare both 32bit halfs of the instruction pointer. If they
- // match what we expect, we return ERR_ALLOWED. If either or both don't
- // match, we continue evalutating the rest of the sandbox policy.
- Instruction *escape_hatch =
- gen->MakeInstruction(BPF_LD+BPF_W+BPF_ABS,
- offsetof(struct arch_seccomp_data,
- instruction_pointer) +
- (__SIZEOF_POINTER__ > 4 &&
- __BYTE_ORDER == __BIG_ENDIAN ? 4 : 0),
- gen->MakeInstruction(BPF_JMP+BPF_JEQ+BPF_K, low,
-#if __SIZEOF_POINTER__ > 4
- gen->MakeInstruction(BPF_LD+BPF_W+BPF_ABS,
- offsetof(struct arch_seccomp_data,
- instruction_pointer) +
- (__BYTE_ORDER == __BIG_ENDIAN ? 0 : 4),
- gen->MakeInstruction(BPF_JMP+BPF_JEQ+BPF_K, hi,
-#endif
- gen->MakeInstruction(BPF_RET+BPF_K, ErrorCode(ErrorCode::ERR_ALLOWED)),
-#if __SIZEOF_POINTER__ > 4
- load_nr)),
-#endif
- load_nr));
- gen->JoinInstructions(tail, escape_hatch);
- } else {
- gen->JoinInstructions(tail, load_nr);
- }
- tail = load_nr;
-
- // On Intel architectures, verify that system call numbers are in the
- // expected number range. The older i386 and x86-64 APIs clear bit 30
- // on all system calls. The newer x32 API always sets bit 30.
-#if defined(__i386__) || defined(__x86_64__)
- Instruction *invalidX32 =
- gen->MakeInstruction(BPF_RET+BPF_K,
- Kill("Illegal mixing of system call ABIs").err_);
- Instruction *checkX32 =
-#if defined(__x86_64__) && defined(__ILP32__)
- gen->MakeInstruction(BPF_JMP+BPF_JSET+BPF_K, 0x40000000, 0, invalidX32);
-#else
- gen->MakeInstruction(BPF_JMP+BPF_JSET+BPF_K, 0x40000000, invalidX32, 0);
-#endif
- gen->JoinInstructions(tail, checkX32);
- tail = checkX32;
-#endif
-
// Append jump table to our pre-amble
gen->JoinInstructions(tail, jumptable);
}
@@ -587,22 +419,9 @@ void Sandbox::installFilter(bool quiet) {
// correctly. Otherwise, there is an internal error in our BPF compiler.
// There is really nothing the caller can do until the bug is fixed.
#ifndef NDEBUG
- {
- // If we previously rewrote the BPF program so that it calls user-space
- // whenever we return an "errno" value from the filter, then we have to
- // wrap our system call evaluator to perform the same operation. Otherwise,
- // the verifier would also report a mismatch in return codes.
- Evaluators redirected_evaluators;
- redirected_evaluators.push_back(
- std::make_pair(RedirectToUserspaceEvalWrapper, &evaluators_));
-
- const char *err = NULL;
- if (!Verifier::VerifyBPF(
- *program,
- has_unsafe_traps_ ? redirected_evaluators : evaluators_,
- &err)) {
- SANDBOX_DIE(err);
- }
+ const char *err = NULL;
+ if (!Verifier::VerifyBPF(*program, evaluators_, &err)) {
+ SANDBOX_DIE(err);
}
#endif
@@ -625,6 +444,7 @@ void Sandbox::installFilter(bool quiet) {
// Release memory that is no longer needed
evaluators_.clear();
+ errMap_.clear();
#if defined(SECCOMP_BPF_VALGRIND_HACKS)
// Valgrind is really not happy about our sandbox. Disable it when running
@@ -741,43 +561,27 @@ void Sandbox::sigSys(int nr, siginfo_t *info, void *void_context) {
goto sigsys_err;
}
- intptr_t rc;
- if (has_unsafe_traps_ && GetIsInSigHandler(ctx)) {
- errno = old_errno;
- if (sigsys.nr == __NR_clone) {
- SANDBOX_DIE("Cannot call clone() from an UnsafeTrap() handler");
- }
- rc = SandboxSyscall(sigsys.nr,
- SECCOMP_PARM1(ctx), SECCOMP_PARM2(ctx),
- SECCOMP_PARM3(ctx), SECCOMP_PARM4(ctx),
- SECCOMP_PARM5(ctx), SECCOMP_PARM6(ctx));
- } else {
- const ErrorCode& err = trapArray_[info->si_errno - 1];
- if (!err.safe_) {
- SetIsInSigHandler();
+ // Copy the seccomp-specific data into a arch_seccomp_data structure. This
+ // is what we are showing to TrapFnc callbacks that the system call evaluator
+ // registered with the sandbox.
+ struct arch_seccomp_data data = {
+ sigsys.nr,
+ SECCOMP_ARCH,
+ reinterpret_cast<uint64_t>(sigsys.ip),
+ {
+ static_cast<uint64_t>(SECCOMP_PARM1(ctx)),
+ static_cast<uint64_t>(SECCOMP_PARM2(ctx)),
+ static_cast<uint64_t>(SECCOMP_PARM3(ctx)),
+ static_cast<uint64_t>(SECCOMP_PARM4(ctx)),
+ static_cast<uint64_t>(SECCOMP_PARM5(ctx)),
+ static_cast<uint64_t>(SECCOMP_PARM6(ctx))
}
+ };
- // Copy the seccomp-specific data into a arch_seccomp_data structure. This
- // is what we are showing to TrapFnc callbacks that the system call
- // evaluator registered with the sandbox.
- struct arch_seccomp_data data = {
- sigsys.nr,
- SECCOMP_ARCH,
- reinterpret_cast<uint64_t>(sigsys.ip),
- {
- static_cast<uint64_t>(SECCOMP_PARM1(ctx)),
- static_cast<uint64_t>(SECCOMP_PARM2(ctx)),
- static_cast<uint64_t>(SECCOMP_PARM3(ctx)),
- static_cast<uint64_t>(SECCOMP_PARM4(ctx)),
- static_cast<uint64_t>(SECCOMP_PARM5(ctx)),
- static_cast<uint64_t>(SECCOMP_PARM6(ctx))
- }
- };
-
- // Now call the TrapFnc callback associated with this particular instance
- // of SECCOMP_RET_TRAP.
- rc = err.fnc_(data, err.aux_);
- }
+ // Now call the TrapFnc callback associated with this particular instance
+ // of SECCOMP_RET_TRAP.
+ const ErrorCode& err = trapArray_[info->si_errno - 1];
+ intptr_t rc = err.fnc_(data, err.aux_);
// Update the CPU register that stores the return code of the system call
// that we just handled, and restore "errno" to the value that it had
@@ -788,21 +592,10 @@ void Sandbox::sigSys(int nr, siginfo_t *info, void *void_context) {
return;
}
-bool Sandbox::TrapKey::operator<(const Sandbox::TrapKey& o) const {
- if (fnc != o.fnc) {
- return fnc < o.fnc;
- } else if (aux != o.aux) {
- return aux < o.aux;
- } else {
- return safe < o.safe;
- }
-}
-
-ErrorCode Sandbox::MakeTrap(ErrorCode::TrapFnc fnc, const void *aux,
- bool safe) {
+ErrorCode Sandbox::Trap(ErrorCode::TrapFnc fnc, const void *aux) {
// Each unique pair of TrapFnc and auxiliary data make up a distinct instance
// of a SECCOMP_RET_TRAP.
- TrapKey key(fnc, aux, safe);
+ std::pair<ErrorCode::TrapFnc, const void *> key(fnc, aux);
TrapIds::const_iterator iter = trapIds_.find(key);
uint16_t id;
if (iter != trapIds_.end()) {
@@ -825,7 +618,7 @@ ErrorCode Sandbox::MakeTrap(ErrorCode::TrapFnc fnc, const void *aux,
}
id = traps_->size() + 1;
- traps_->push_back(ErrorCode(fnc, aux, safe, id));
+ traps_->push_back(ErrorCode(fnc, aux, id));
trapIds_[key] = id;
// We want to access the traps_ vector from our signal handler. But
@@ -836,33 +629,10 @@ ErrorCode Sandbox::MakeTrap(ErrorCode::TrapFnc fnc, const void *aux,
// signal handler, where we can safely do so.
trapArray_ = &(*traps_)[0];
trapArraySize_ = id;
- return traps_->back();
}
- return ErrorCode(fnc, aux, safe, id);
-}
-
-ErrorCode Sandbox::Trap(ErrorCode::TrapFnc fnc, const void *aux) {
- return MakeTrap(fnc, aux, true /* Safe Trap */);
-}
-
-ErrorCode Sandbox::UnsafeTrap(ErrorCode::TrapFnc fnc, const void *aux) {
- return MakeTrap(fnc, aux, false /* Unsafe Trap */);
-}
-
-intptr_t Sandbox::ForwardSyscall(const struct arch_seccomp_data& args) {
- return SandboxSyscall(args.nr,
- args.args[0], args.args[1], args.args[2],
- args.args[3], args.args[4], args.args[5]);
-}
-
-intptr_t Sandbox::ReturnErrno(const struct arch_seccomp_data&, void *aux) {
- // TrapFnc functions report error by following the native kernel convention
- // of returning an exit code in the range of -1..-4096. They do not try to
- // set errno themselves. The glibc wrapper that triggered the SIGSYS will
- // ultimately do so for us.
- int err = reinterpret_cast<intptr_t>(aux) & SECCOMP_RET_DATA;
- return -err;
+ ErrorCode err = ErrorCode(fnc, aux, id);
+ return errMap_[err.err()] = err;
}
intptr_t Sandbox::bpfFailure(const struct arch_seccomp_data&, void *aux) {
@@ -876,10 +646,10 @@ ErrorCode Sandbox::Kill(const char *msg) {
Sandbox::SandboxStatus Sandbox::status_ = STATUS_UNKNOWN;
int Sandbox::proc_fd_ = -1;
Sandbox::Evaluators Sandbox::evaluators_;
+Sandbox::ErrMap Sandbox::errMap_;
Sandbox::Traps *Sandbox::traps_ = NULL;
Sandbox::TrapIds Sandbox::trapIds_;
ErrorCode *Sandbox::trapArray_ = NULL;
size_t Sandbox::trapArraySize_ = 0;
- bool Sandbox::has_unsafe_traps_ = false;
} // namespace
diff --git a/sandbox/linux/seccomp-bpf/sandbox_bpf.h b/sandbox/linux/seccomp-bpf/sandbox_bpf.h
index 5497963..16ab1d3 100644
--- a/sandbox/linux/seccomp-bpf/sandbox_bpf.h
+++ b/sandbox/linux/seccomp-bpf/sandbox_bpf.h
@@ -207,11 +207,6 @@ class Sandbox {
// Please note that TrapFnc is executed from signal context and must be
// async-signal safe:
// http://pubs.opengroup.org/onlinepubs/009695399/functions/xsh_chap02_04.html
- // Also note that it follows the calling convention of native system calls.
- // In other words, it reports an error by returning an exit code in the
- // range -1..-4096. It should not set errno when reporting errors; on the
- // other hand, accidentally modifying errno is harmless and the changes will
- // be undone afterwards.
typedef intptr_t (*TrapFnc)(const struct arch_seccomp_data& args, void *aux);
enum Operation {
@@ -276,25 +271,6 @@ class Sandbox {
// handler.
static ErrorCode Trap(ErrorCode::TrapFnc fnc, const void *aux);
- // Calls a user-space trap handler and disables all sandboxing for system
- // calls made from this trap handler.
- // NOTE: This feature, by definition, disables all security features of
- // the sandbox. It should never be used in production, but it can be
- // very useful to diagnose code that is incompatible with the sandbox.
- // If even a single system call returns "UnsafeTrap", the security of
- // entire sandbox should be considered compromised.
- static ErrorCode UnsafeTrap(ErrorCode::TrapFnc fnc, const void *aux);
-
- // From within an UnsafeTrap() it is often useful to be able to execute
- // the system call that triggered the trap. The ForwardSyscall() method
- // makes this easy. It is more efficient than calling glibc's syscall()
- // function, as it avoid the extra round-trip to the signal handler. And
- // it automatically does the correct thing to report kernel-style error
- // conditions, rather than setting errno. See the comments for TrapFnc for
- // details. In other words, the return value from ForwardSyscall() is
- // directly suitable as a return value for a trap handler.
- static intptr_t ForwardSyscall(const struct arch_seccomp_data& args);
-
// Kill the program and print an error message.
static ErrorCode Kill(const char *msg);
@@ -313,29 +289,18 @@ class Sandbox {
typedef std::vector<struct sock_filter> Program;
struct Range {
- Range(uint32_t f, uint32_t t, const ErrorCode& e)
- : from(f),
- to(t),
- err(e) {
+ Range(uint32_t f, uint32_t t, const ErrorCode& e) :
+ from(f),
+ to(t),
+ err(e) {
}
uint32_t from, to;
ErrorCode err;
};
- struct TrapKey {
- TrapKey(TrapFnc f, const void *a, bool s)
- : fnc(f),
- aux(a),
- safe(s) {
- }
- TrapFnc fnc;
- const void *aux;
- bool safe;
- bool operator<(const TrapKey&) const;
- };
typedef std::vector<Range> Ranges;
typedef std::map<uint32_t, ErrorCode> ErrMap;
typedef std::vector<ErrorCode> Traps;
- typedef std::map<TrapKey, uint16_t> TrapIds;
+ typedef std::map<std::pair<TrapFnc, const void *>, int> TrapIds;
// Get a file descriptor pointing to "/proc", if currently available.
static int proc_fd() { return proc_fd_; }
@@ -355,47 +320,23 @@ class Sandbox {
static bool disableFilesystem();
static void policySanityChecks(EvaluateSyscall syscallEvaluator,
void *aux);
-
- // Function that can be passed as a callback function to CodeGen::Traverse().
- // Checks whether the "insn" returns an UnsafeTrap() ErrorCode. If so, it
- // sets the "bool" variable pointed to by "aux".
- static void CheckForUnsafeErrorCodes(Instruction *insn, void *aux);
-
- // Function that can be passed as a callback function to CodeGen::Traverse().
- // Checks whether the "insn" returns an errno value from a BPF filter. If so,
- // it rewrites the instruction to instead call a Trap() handler that does
- // the same thing. "aux" is ignored.
- static void RedirectToUserspace(Instruction *insn, void *aux);
-
- // Stackable wrapper around an Evaluators handler. Changes ErrorCodes
- // returned by a system call evaluator to match the changes made by
- // RedirectToUserspace(). "aux" should be pointer to wrapped system call
- // evaluator.
- static ErrorCode RedirectToUserspaceEvalWrapper(int sysnum, void *aux);
-
static void installFilter(bool quiet);
static void findRanges(Ranges *ranges);
static Instruction *assembleJumpTable(CodeGen *gen,
Ranges::const_iterator start,
Ranges::const_iterator stop);
static void sigSys(int nr, siginfo_t *info, void *void_context);
- static ErrorCode MakeTrap(ErrorCode::TrapFnc fn, const void *aux, bool safe);
-
- // A Trap() handler that returns an "errno" value. The value is encoded
- // in the "aux" parameter.
- static intptr_t ReturnErrno(const struct arch_seccomp_data&, void *aux);
-
static intptr_t bpfFailure(const struct arch_seccomp_data& data, void *aux);
static int getTrapId(TrapFnc fnc, const void *aux);
static SandboxStatus status_;
static int proc_fd_;
static Evaluators evaluators_;
+ static ErrMap errMap_;
static Traps *traps_;
static TrapIds trapIds_;
static ErrorCode *trapArray_;
static size_t trapArraySize_;
- static bool has_unsafe_traps_;
DISALLOW_IMPLICIT_CONSTRUCTORS(Sandbox);
};
diff --git a/sandbox/linux/seccomp-bpf/sandbox_bpf_unittest.cc b/sandbox/linux/seccomp-bpf/sandbox_bpf_unittest.cc
index 3d1d8b6..8ea23d9 100644
--- a/sandbox/linux/seccomp-bpf/sandbox_bpf_unittest.cc
+++ b/sandbox/linux/seccomp-bpf/sandbox_bpf_unittest.cc
@@ -5,7 +5,6 @@
#include <ostream>
#include "sandbox/linux/seccomp-bpf/bpf_tests.h"
-#include "sandbox/linux/seccomp-bpf/syscall.h"
#include "sandbox/linux/seccomp-bpf/verifier.h"
#include "testing/gtest/include/gtest/gtest.h"
@@ -265,162 +264,4 @@ BPF_TEST(SandboxBpf, ArmPrivatePolicy, ArmPrivatePolicy) {
}
#endif // defined(__arm__)
-intptr_t CountSyscalls(const struct arch_seccomp_data& args, void *aux) {
- // Count all invocations of our callback function.
- ++*reinterpret_cast<int *>(aux);
-
- // Verify that within the callback function all filtering is temporarily
- // disabled.
- BPF_ASSERT(syscall(__NR_getpid) > 1);
-
- // Verify that we can now call the underlying system call without causing
- // infinite recursion.
- return Sandbox::ForwardSyscall(args);
-}
-
-ErrorCode GreyListedPolicy(int sysno, void *aux) {
- // The use of UnsafeTrap() causes us to print a warning message. This is
- // generally desirable, but it results in the unittest failing, as it doesn't
- // expect any messages on "stderr". So, temporarily disable messages. The
- // BPF_TEST() is guaranteed to turn messages back on, after the policy
- // function has completed.
- Die::SuppressInfoMessages(true);
-
- // Some system calls must always be allowed, if our policy wants to make
- // use of UnsafeTrap()
- if (sysno == __NR_rt_sigprocmask ||
- sysno == __NR_rt_sigreturn
-#if defined(__NR_sigprocmask)
- || sysno == __NR_sigprocmask
-#endif
-#if defined(__NR_sigreturn)
- || sysno == __NR_sigreturn
-#endif
- ) {
- return ErrorCode(ErrorCode::ERR_ALLOWED);
- } else if (sysno == __NR_getpid) {
- // Disallow getpid()
- return ErrorCode(EPERM);
- } else if (Sandbox::isValidSyscallNumber(sysno)) {
- // Allow (and count) all other system calls.
- return Sandbox::UnsafeTrap(CountSyscalls, aux);
- } else {
- return ErrorCode(ENOSYS);
- }
-}
-
-BPF_TEST(SandboxBpf, GreyListedPolicy,
- GreyListedPolicy, int /* BPF_AUX */) {
- BPF_ASSERT(syscall(__NR_getpid) == -1);
- BPF_ASSERT(errno == EPERM);
- BPF_ASSERT(BPF_AUX == 0);
- BPF_ASSERT(syscall(__NR_geteuid) == syscall(__NR_getuid));
- BPF_ASSERT(BPF_AUX == 2);
-}
-
-intptr_t AllowRedirectedSyscall(const struct arch_seccomp_data& args, void *) {
- return Sandbox::ForwardSyscall(args);
-}
-
-ErrorCode RedirectAllSyscallsPolicy(int sysno, void *aux) {
- Die::SuppressInfoMessages(true);
-
- // Some system calls must always be allowed, if our policy wants to make
- // use of UnsafeTrap()
- if (sysno == __NR_rt_sigprocmask ||
- sysno == __NR_rt_sigreturn
-#if defined(__NR_sigprocmask)
- || sysno == __NR_sigprocmask
-#endif
-#if defined(__NR_sigreturn)
- || sysno == __NR_sigreturn
-#endif
- ) {
- return ErrorCode(ErrorCode::ERR_ALLOWED);
- } else if (Sandbox::isValidSyscallNumber(sysno)) {
- return Sandbox::UnsafeTrap(AllowRedirectedSyscall, aux);
- } else {
- return ErrorCode(ENOSYS);
- }
-}
-
-int bus_handler_fd_ = -1;
-
-void SigBusHandler(int, siginfo_t *info, void *void_context) {
- BPF_ASSERT(write(bus_handler_fd_, "\x55", 1) == 1);
-}
-
-BPF_TEST(SandboxBpf, SigBus, RedirectAllSyscallsPolicy) {
- // We use the SIGBUS bit in the signal mask as a thread-local boolean
- // value in the implementation of UnsafeTrap(). This is obviously a bit
- // of a hack that could conceivably interfere with code that uses SIGBUS
- // in more traditional ways. This test verifies that basic functionality
- // of SIGBUS is not impacted, but it is certainly possibly to construe
- // more complex uses of signals where our use of the SIGBUS mask is not
- // 100% transparent. This is expected behavior.
- int fds[2];
- BPF_ASSERT(pipe(fds) == 0);
- bus_handler_fd_ = fds[1];
- struct sigaction sa = { };
- sa.sa_sigaction = SigBusHandler;
- sa.sa_flags = SA_SIGINFO;
- BPF_ASSERT(sigaction(SIGBUS, &sa, NULL) == 0);
- raise(SIGBUS);
- char c = '\000';
- BPF_ASSERT(read(fds[0], &c, 1) == 1);
- BPF_ASSERT(close(fds[0]) == 0);
- BPF_ASSERT(close(fds[1]) == 0);
- BPF_ASSERT(c == 0x55);
-}
-
-BPF_TEST(SandboxBpf, SigMask, RedirectAllSyscallsPolicy) {
- // Signal masks are potentially tricky to handle. For instance, if we
- // ever tried to update them from inside a Trap() or UnsafeTrap() handler,
- // the call to sigreturn() at the end of the signal handler would undo
- // all of our efforts. So, it makes sense to test that sigprocmask()
- // works, even if we have a policy in place that makes use of UnsafeTrap().
- // In practice, this works because we force sigprocmask() to be handled
- // entirely in the kernel.
- sigset_t mask0, mask1, mask2;
-
- // Call sigprocmask() to verify that SIGUSR1 wasn't blocked, if we didn't
- // change the mask (it shouldn't have been, as it isn't blocked by default
- // in POSIX).
- sigemptyset(&mask0);
- BPF_ASSERT(!sigprocmask(SIG_BLOCK, &mask0, &mask1));
- BPF_ASSERT(!sigismember(&mask1, SIGUSR1));
-
- // Try again, and this time we verify that we can block it. This
- // requires a second call to sigprocmask().
- sigaddset(&mask0, SIGUSR1);
- BPF_ASSERT(!sigprocmask(SIG_BLOCK, &mask0, NULL));
- BPF_ASSERT(!sigprocmask(SIG_BLOCK, NULL, &mask2));
- BPF_ASSERT( sigismember(&mask2, SIGUSR1));
-}
-
-BPF_TEST(SandboxBpf, UnsafeTrapWithErrno, RedirectAllSyscallsPolicy) {
- // An UnsafeTrap() (or for that matter, a Trap()) has to report error
- // conditions by returning an exit code in the range -1..-4096. This
- // should happen automatically if using ForwardSyscall(). If the TrapFnc()
- // uses some other method to make system calls, then it is responsible
- // for computing the correct return code.
- // This test verifies that ForwardSyscall() does the correct thing.
-
- // The glibc system wrapper will ultimately set errno for us. So, from normal
- // userspace, all of this should be completely transparent.
- errno = 0;
- BPF_ASSERT(close(-1) == -1);
- BPF_ASSERT(errno == EBADF);
-
- // Explicitly avoid the glibc wrapper. This is not normally the way anybody
- // would make system calls, but it allows us to verify that we don't
- // accidentally mess with errno, when we shouldn't.
- errno = 0;
- struct arch_seccomp_data args = { 0 };
- args.nr = __NR_close;
- args.args[0] = -1;
- BPF_ASSERT(Sandbox::ForwardSyscall(args) == -EBADF);
- BPF_ASSERT(errno == 0);
-}
-
} // namespace
diff --git a/sandbox/linux/seccomp-bpf/syscall.cc b/sandbox/linux/seccomp-bpf/syscall.cc
deleted file mode 100644
index 619a983..0000000
--- a/sandbox/linux/seccomp-bpf/syscall.cc
+++ /dev/null
@@ -1,282 +0,0 @@
-// Copyright (c) 2012 The Chromium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#include <asm/unistd.h>
-#include <errno.h>
-#include <stdarg.h>
-
-#include "sandbox/linux/seccomp-bpf/syscall.h"
-
-
-namespace playground2 {
-
- asm( // We need to be able to tell the kernel exactly where we made a
- // system call. The C++ compiler likes to sometimes clone or
- // inline code, which would inadvertently end up duplicating
- // the entry point.
- // "gcc" can suppress code duplication with suitable function
- // attributes, but "clang" doesn't have this ability.
- // The "clang" developer mailing list suggested that the correct
- // and portable solution is a file-scope assembly block.
- // N.B. We do mark our code as a proper function so that backtraces
- // work correctly. But we make absolutely no attempt to use the
- // ABI's calling conventions for passing arguments. We will only
- // ever be called from assembly code and thus can pick more
- // suitable calling conventions.
-#if defined(__i386__)
- ".text\n"
- ".align 16, 0x90\n"
- ".type SyscallAsm, @function\n"
- "SyscallAsm:.cfi_startproc\n"
- // Check if "%eax" is negative. If so, do not attempt to make a
- // system call. Instead, compute the return address that is visible
- // to the kernel after we execute "int $0x80". This address can be
- // used as a marker that BPF code inspects.
- "test %eax, %eax\n"
- "jge 1f\n"
- // Always, make sure that our code is position-independent, or
- // address space randomization might not work on i386. This means,
- // we can't use "lea", but instead have to rely on "call/pop".
- "call 0f; .cfi_adjust_cfa_offset 4\n"
- "0:pop %eax; .cfi_adjust_cfa_offset -4\n"
- "addl $2f-0b, %eax\n"
- "ret\n"
- // Save register that we don't want to clobber. On i386, we need to
- // save relatively aggressively, as there are a couple or registers
- // that are used internally (e.g. %ebx for position-independent
- // code, and %ebp for the frame pointer), and as we need to keep at
- // least a few registers available for the register allocator.
- "1:push %esi; .cfi_adjust_cfa_offset 4\n"
- "push %edi; .cfi_adjust_cfa_offset 4\n"
- "push %ebx; .cfi_adjust_cfa_offset 4\n"
- "push %ebp; .cfi_adjust_cfa_offset 4\n"
- // Copy entries from the array holding the arguments into the
- // correct CPU registers.
- "movl 0(%edi), %ebx\n"
- "movl 4(%edi), %ecx\n"
- "movl 8(%edi), %edx\n"
- "movl 12(%edi), %esi\n"
- "movl 20(%edi), %ebp\n"
- "movl 16(%edi), %edi\n"
- // Enter the kernel.
- "int $0x80\n"
- // This is our "magic" return address that the BPF filter sees.
- "2:"
- // Restore any clobbered registers that we didn't declare to the
- // compiler.
- "pop %ebp; .cfi_adjust_cfa_offset -4\n"
- "pop %ebx; .cfi_adjust_cfa_offset -4\n"
- "pop %edi; .cfi_adjust_cfa_offset -4\n"
- "pop %esi; .cfi_adjust_cfa_offset -4\n"
- "ret\n"
- ".cfi_endproc\n"
- "9:.size SyscallAsm, 9b-SyscallAsm\n"
-#elif defined(__x86_64__)
- ".text\n"
- ".align 16, 0x90\n"
- ".type SyscallAsm, @function\n"
- "SyscallAsm:.cfi_startproc\n"
- // Check if "%rax" is negative. If so, do not attempt to make a
- // system call. Instead, compute the return address that is visible
- // to the kernel after we execute "syscall". This address can be
- // used as a marker that BPF code inspects.
- "test %rax, %rax\n"
- "jge 1f\n"
- // Always make sure that our code is position-independent, or the
- // linker will throw a hissy fit on x86-64.
- "call 0f; .cfi_adjust_cfa_offset 8\n"
- "0:pop %rax; .cfi_adjust_cfa_offset -8\n"
- "addq $2f-0b, %rax\n"
- "ret\n"
- // We declared all clobbered registers to the compiler. On x86-64,
- // there really isn't much of a problem with register pressure. So,
- // we can go ahead and directly copy the entries from the arguments
- // array into the appropriate CPU registers.
- "1:movq 0(%r12), %rdi\n"
- "movq 8(%r12), %rsi\n"
- "movq 16(%r12), %rdx\n"
- "movq 24(%r12), %r10\n"
- "movq 32(%r12), %r8\n"
- "movq 40(%r12), %r9\n"
- // Enter the kernel.
- "syscall\n"
- // This is our "magic" return address that the BPF filter sees.
- "2:ret\n"
- ".cfi_endproc\n"
- "9:.size SyscallAsm, 9b-SyscallAsm\n"
-#elif defined(__arm__)
- // Throughout this file, we use the same mode (ARM vs. thumb)
- // that the C++ compiler uses. This means, when transfering control
- // from C++ to assembly code, we do not need to switch modes (e.g.
- // by using the "bx" instruction). It also means that our assembly
- // code should not be invoked directly from code that lives in
- // other compilation units, as we don't bother implementing thumb
- // interworking. That's OK, as we don't make any of the assembly
- // symbols public. They are all local to this file.
- ".text\n"
- ".align 2\n"
- ".type SyscallAsm, %function\n"
-#if defined(__thumb__)
- ".thumb_func\n"
-#else
- ".arm\n"
-#endif
- "SyscallAsm:.fnstart\n"
- "@ args = 0, pretend = 0, frame = 8\n"
- "@ frame_needed = 1, uses_anonymous_args = 0\n"
-#if defined(__thumb__)
- ".cfi_startproc\n"
- "push {r7, lr}\n"
- ".cfi_offset 14, -4\n"
- ".cfi_offset 7, -8\n"
- "mov r7, sp\n"
- ".cfi_def_cfa_register 7\n"
- ".cfi_def_cfa_offset 8\n"
-#else
- "stmfd sp!, {fp, lr}\n"
- "add fp, sp, #4\n"
-#endif
- // Check if "r0" is negative. If so, do not attempt to make a
- // system call. Instead, compute the return address that is visible
- // to the kernel after we execute "swi 0". This address can be
- // used as a marker that BPF code inspects.
- "cmp r0, #0\n"
- "bge 1f\n"
- "ldr r0, =2f\n"
- "b 2f\n"
- // We declared (almost) all clobbered registers to the compiler. On
- // ARM there is no particular register pressure. So, we can go
- // ahead and directly copy the entries from the arguments array
- // into the appropriate CPU registers.
- "1:ldr r5, [r6, #20]\n"
- "ldr r4, [r6, #16]\n"
- "ldr r3, [r6, #12]\n"
- "ldr r2, [r6, #8]\n"
- "ldr r1, [r6, #4]\n"
- "mov r7, r0\n"
- "ldr r0, [r6, #0]\n"
- // Enter the kernel
- "swi 0\n"
- // Restore the frame pointer. Also restore the program counter from
- // the link register; this makes us return to the caller.
-#if defined(__thumb__)
- "2:pop {r7, pc}\n"
- ".cfi_endproc\n"
-#else
- "2:ldmfd sp!, {fp, pc}\n"
-#endif
- ".fnend\n"
- "9:.size SyscallAsm, 9b-SyscallAsm\n"
-#endif
- ); // asm
-
-intptr_t SandboxSyscall(int nr, ...) {
- // It is most convenient for the caller to pass a variadic list of arguments.
- // But this is difficult to handle in assembly code without making
- // assumptions about internal implementation details of "va_list". So, we
- // first use C code to copy all the arguments into an array, where they are
- // easily accessible to asm().
- // This is preferable over copying them into individual variables, which
- // can result in too much register pressure.
- void *args[6];
- va_list ap;
-
- // System calls take a system call number (typically passed in %eax or
- // %rax) and up to six arguments (passed in general-purpose CPU registers).
- //
- // On 32bit systems, all variadic arguments are passed on the stack as 32bit
- // quantities. We can use an arbitrary 32bit type to retrieve them with
- // va_arg() and then forward them to the kernel in the appropriate CPU
- // register. We do not need to know whether this is an integer or a pointer
- // value.
- //
- // On 64bit systems, variadic arguments can be either 32bit or 64bit wide,
- // which would seem to make it more important that we pass the correct type
- // to va_arg(). And we really can't know what this type is unless we have a
- // table with function signatures for all system calls.
- //
- // Fortunately, on x86-64 this is less critical. The first six function
- // arguments will be passed in CPU registers, no matter whether they were
- // named or variadic. This only leaves us with a single argument (if present)
- // that could be passed on the stack. And since x86-64 is little endian,
- // it will have the correct value both for 32bit and 64bit quantities.
- //
- // N.B. Because of how the x86-64 ABI works, it is possible that 32bit
- // quantities will have undefined garbage bits in the upper 32 bits of a
- // 64bit register. This is relatively unlikely for the first five system
- // call arguments, as the processor does automatic sign extensions and zero
- // filling so frequently, there rarely is garbage in CPU registers. But it
- // is quite likely for the last argument, which is passed on the stack.
- // That's generally OK, because the kernel has the correct function
- // signatures and knows to only inspect the LSB of a 32bit value.
- // But callers must be careful in cases, where the compiler cannot tell
- // the difference (e.g. when passing NULL to any system call, it must
- // always be cast to a pointer type).
- // The glibc implementation of syscall() has the exact same issues.
- // In the unlikely event that this ever becomes a problem, we could add
- // code that handles six-argument system calls specially. The number of
- // system calls that take six arguments and expect a 32bit value in the
- // sixth argument is very limited.
- va_start(ap, nr);
- args[0] = va_arg(ap, void *);
- args[1] = va_arg(ap, void *);
- args[2] = va_arg(ap, void *);
- args[3] = va_arg(ap, void *);
- args[4] = va_arg(ap, void *);
- args[5] = va_arg(ap, void *);
- va_end(ap);
-
- // Invoke our file-scope assembly code. The constraints have been picked
- // carefully to match what the rest of the assembly code expects in input,
- // output, and clobbered registers.
-#if defined(__i386__)
- intptr_t ret = nr;
- asm volatile(
- "call SyscallAsm\n"
- // N.B. These are not the calling conventions normally used by the ABI.
- : "=a"(ret)
- : "0"(ret), "D"(args)
- : "esp", "memory", "ecx", "edx");
-#elif defined(__x86_64__)
- intptr_t ret = nr;
- {
- register void **data __asm__("r12") = args;
- asm volatile(
- "call SyscallAsm\n"
- // N.B. These are not the calling conventions normally used by the ABI.
- : "=a"(ret)
- : "0"(ret), "r"(data)
- : "rsp", "memory",
- "rcx", "rdi", "rsi", "rdx", "r8", "r9", "r10", "r11");
- }
-#elif defined(__arm__)
- intptr_t ret;
- {
- register intptr_t inout __asm__("r0") = nr;
- register void **data __asm__("r6") = args;
- asm volatile(
- "bl SyscallAsm\n"
- // N.B. These are not the calling conventions normally used by the ABI.
- : "=r"(inout)
- : "0"(inout), "r"(data)
- : "lr", "memory", "r1", "r2", "r3", "r4", "r5"
-#if !defined(__arm__)
- // In thumb mode, we cannot use "r7" as a general purpose register, as
- // it is our frame pointer. We have to manually manage and preserve it.
- // In ARM mode, we have a dedicated frame pointer register and "r7" is
- // thus available as a general purpose register. We don't preserve it,
- // but instead mark it as clobbered.
- , "r7"
-#endif
- );
- ret = inout;
- }
-#else
- errno = ENOSYS;
- intptr_t ret = -1;
-#endif
- return ret;
-}
-
-} // namespace
diff --git a/sandbox/linux/seccomp-bpf/syscall.h b/sandbox/linux/seccomp-bpf/syscall.h
deleted file mode 100644
index 932e398..0000000
--- a/sandbox/linux/seccomp-bpf/syscall.h
+++ /dev/null
@@ -1,23 +0,0 @@
-// Copyright (c) 2012 The Chromium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#ifndef SANDBOX_LINUX_SECCOMP_BPF_SYSCALL_H__
-#define SANDBOX_LINUX_SECCOMP_BPF_SYSCALL_H__
-
-#include <signal.h>
-#include <stdint.h>
-
-namespace playground2 {
-
-// We have to make sure that we have a single "magic" return address for
-// our system calls, which we can check from within a BPF filter. This
-// works by writing a little bit of asm() code that a) enters the kernel, and
-// that also b) can be invoked in a way that computes this return address.
-// Passing "nr" as "-1" computes the "magic" return address. Passing any
-// other value invokes the appropriate system call.
-intptr_t SandboxSyscall(int nr, ...);
-
-} // namespace
-
-#endif // SANDBOX_LINUX_SECCOMP_BPF_SYSCALL_H__
diff --git a/sandbox/linux/seccomp-bpf/syscall_unittest.cc b/sandbox/linux/seccomp-bpf/syscall_unittest.cc
deleted file mode 100644
index 374a0fb..0000000
--- a/sandbox/linux/seccomp-bpf/syscall_unittest.cc
+++ /dev/null
@@ -1,113 +0,0 @@
-// Copyright (c) 2012 The Chromium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#include <asm/unistd.h>
-#include <fcntl.h>
-#include <sys/mman.h>
-#include <unistd.h>
-
-#include "base/posix/eintr_wrapper.h"
-#include "sandbox/linux/seccomp-bpf/syscall.h"
-#include "sandbox/linux/tests/unit_tests.h"
-#include "testing/gtest/include/gtest/gtest.h"
-
-using namespace playground2;
-
-namespace {
-
-// Different platforms use different symbols for the six-argument version
-// of the mmap() system call. Test for the correct symbol at compile time.
-#ifdef __NR_mmap2
-const int kMMapNr = __NR_mmap2;
-#else
-const int kMMapNr = __NR_mmap;
-#endif
-
-TEST(Syscall, WellKnownEntryPoint) {
- // Test that SandboxSyscall(-1) is handled specially. Don't do this on ARM,
- // where syscall(-1) crashes with SIGILL. Not running the test is fine, as we
- // are still testing ARM code in the next set of tests.
-#if !defined(__arm__)
- EXPECT_NE(SandboxSyscall(-1), syscall(-1));
-#endif
-
- // If possible, test that SandboxSyscall(-1) returns the address right after
- // a kernel entry point.
-#if defined(__i386__)
- EXPECT_EQ(0x80CDu, ((uint16_t *)SandboxSyscall(-1))[-1]); // INT 0x80
-#elif defined(__x86_64__)
- EXPECT_EQ(0x050Fu, ((uint16_t *)SandboxSyscall(-1))[-1]); // SYSCALL
-#elif defined(__arm__)
-#if defined(__thumb__)
- EXPECT_EQ(0xDF00u, ((uint16_t *)SandboxSyscall(-1))[-1]); // SWI 0
-#else
- EXPECT_EQ(0xEF000000u, ((uint32_t *)SandboxSyscall(-1))[-1]); // SVC 0
-#endif
-#else
- #warning Incomplete test case; need port for target platform
-#endif
-}
-
-TEST(Syscall, TrivialSyscallNoArgs) {
- // Test that we can do basic system calls
- EXPECT_EQ(SandboxSyscall(__NR_getpid), syscall(__NR_getpid));
-}
-
-TEST(Syscall, ComplexSyscallSixArgs) {
- int fd;
- ASSERT_LE(0, fd = SandboxSyscall(__NR_open, "/dev/null", O_RDWR, 0L));
-
- // Use mmap() to allocate some read-only memory
- char *addr0;
- ASSERT_NE((char *)NULL,
- addr0 = reinterpret_cast<char *>(
- SandboxSyscall(kMMapNr, (void *)NULL, 4096, PROT_READ,
- MAP_PRIVATE|MAP_ANONYMOUS, fd, 0L)));
-
- // Try to replace the existing mapping with a read-write mapping
- char *addr1;
- ASSERT_EQ(addr0,
- addr1 = reinterpret_cast<char *>(
- SandboxSyscall(kMMapNr, addr0, 4096L, PROT_READ|PROT_WRITE,
- MAP_PRIVATE|MAP_ANONYMOUS|MAP_FIXED,
- fd, 0L)));
- ++*addr1; // This should not seg fault
-
- // Clean up
- EXPECT_EQ(0, SandboxSyscall(__NR_munmap, addr1, 4096L));
- EXPECT_EQ(0, HANDLE_EINTR(SandboxSyscall(__NR_close, fd)));
-
- // Check that the offset argument (i.e. the sixth argument) is processed
- // correctly.
- ASSERT_GE(fd = SandboxSyscall(__NR_open, "/proc/self/exe", O_RDONLY, 0L), 0);
- char *addr2, *addr3;
- ASSERT_NE((char *)NULL,
- addr2 = reinterpret_cast<char *>(
- SandboxSyscall(kMMapNr, (void *)NULL, 8192L, PROT_READ,
- MAP_PRIVATE, fd, 0L)));
- ASSERT_NE((char *)NULL,
- addr3 = reinterpret_cast<char *>(
- SandboxSyscall(kMMapNr, (void *)NULL, 4096L, PROT_READ,
- MAP_PRIVATE, fd,
-#if defined(__NR_mmap2)
- 1L
-#else
- 4096L
-#endif
- )));
- EXPECT_EQ(0, memcmp(addr2 + 4096, addr3, 4096));
-
- // Just to be absolutely on the safe side, also verify that the file
- // contents matches what we are getting from a read() operation.
- char buf[8192];
- EXPECT_EQ(8192, SandboxSyscall(__NR_read, fd, buf, 8192L));
- EXPECT_EQ(0, memcmp(addr2, buf, 8192));
-
- // Clean up
- EXPECT_EQ(0, SandboxSyscall(__NR_munmap, addr2, 8192L));
- EXPECT_EQ(0, SandboxSyscall(__NR_munmap, addr3, 4096L));
- EXPECT_EQ(0, HANDLE_EINTR(SandboxSyscall(__NR_close, fd)));
-}
-
-} // namespace