summaryrefslogtreecommitdiffstats
path: root/sandbox/linux
diff options
context:
space:
mode:
authormarkus@chromium.org <markus@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2012-06-15 07:42:24 +0000
committermarkus@chromium.org <markus@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2012-06-15 07:42:24 +0000
commitc4fc5f3b0f783b7662bfe8c128e3cde7c6352fda (patch)
treef58979add793d46f6aaf3e72709d1d0c2019f944 /sandbox/linux
parent400d3153e3d7a5d7ba120863de0ce3662a886962 (diff)
downloadchromium_src-c4fc5f3b0f783b7662bfe8c128e3cde7c6352fda.zip
chromium_src-c4fc5f3b0f783b7662bfe8c128e3cde7c6352fda.tar.gz
chromium_src-c4fc5f3b0f783b7662bfe8c128e3cde7c6352fda.tar.bz2
Added support for SECCOMP_RET_TRAP handlers.
When setting a sandbox policy, the user has to write a system call evaluator function. This function is passed a system call number and returns a suitable ErrorCode (e.g. an "errno" value). This change list extends ErrorCode, so that in addition to static "errno" values, the system call evaluator can also request that a callback gets called. This allows the sandbox to handle system calls in user space. BUG=130662 TEST=make && ./demo32 && ./demo64 Review URL: https://chromiumcodereview.appspot.com/10533076 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@142353 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'sandbox/linux')
-rw-r--r--sandbox/linux/seccomp-bpf/demo.cc127
-rw-r--r--sandbox/linux/seccomp-bpf/sandbox_bpf.cc185
-rw-r--r--sandbox/linux/seccomp-bpf/sandbox_bpf.h93
-rw-r--r--sandbox/linux/seccomp-bpf/verifier.cc22
4 files changed, 330 insertions, 97 deletions
diff --git a/sandbox/linux/seccomp-bpf/demo.cc b/sandbox/linux/seccomp-bpf/demo.cc
index 1286a01..ebedcbe 100644
--- a/sandbox/linux/seccomp-bpf/demo.cc
+++ b/sandbox/linux/seccomp-bpf/demo.cc
@@ -28,6 +28,9 @@
#include "sandbox/linux/seccomp-bpf/sandbox_bpf.h"
#include "sandbox/linux/seccomp-bpf/util.h"
+using playground2::Sandbox;
+using playground2::Util;
+
#define ERR EPERM
// We don't expect our sandbox to do anything useful yet. So, we will fail
@@ -36,7 +39,103 @@
// actually enforce restrictions in a meaningful way:
#define _exit(x) do { } while (0)
-static playground2::Sandbox::ErrorCode evaluator(int sysno) {
+
+// POSIX doesn't define any async-signal safe function for converting
+// an integer to ASCII. We'll have to define our own version.
+// itoa_r() converts a (signed) integer to ASCII. It returns "buf", if the
+// conversion was successful or NULL otherwise. It never writes more than "sz"
+// bytes. Output will be truncated as needed, and a NUL character is always
+// appended.
+static char *itoa_r(int i, char *buf, size_t sz) {
+ // Make sure we can write at least one NUL byte.
+ size_t n = 1;
+ if (n > sz) {
+ return NULL;
+ }
+
+ // Handle negative numbers.
+ char *start = buf;
+ int minint = 0;
+ if (i < 0) {
+ // Make sure we can write the '-' character.
+ if (++n > sz) {
+ *start = '\000';
+ return NULL;
+ }
+ *start++ = '-';
+
+ // Turn our number positive.
+ if (i == -i) {
+ // The lowest-most negative integer needs special treatment.
+ minint = 1;
+ i = -(i + 1);
+ } else {
+ // "Normal" negative numbers are easy.
+ i = -i;
+ }
+ }
+
+ // Loop until we have converted the entire number. Output at least one
+ // character (i.e. '0').
+ char *ptr = start;
+ do {
+ // Make sure there is still enough space left in our output buffer.
+ if (++n > sz) {
+ buf = NULL;
+ goto truncate;
+ }
+
+ // Output the next digit and (if necessary) compensate for the lowest-most
+ // negative integer needing special treatment. This works because, no
+ // matter the bit width of the integer, the lowest-most integer always ends
+ // in 2, 4, 6, or 8.
+ *ptr++ = i%10 + '0' + minint;
+ minint = 0;
+ i /= 10;
+ } while (i);
+ truncate: // Terminate the output with a NUL character.
+ *ptr = '\000';
+
+ // Conversion to ASCII actually resulted in the digits being in reverse
+ // order. We can't easily generate them in forward order, as we can't tell
+ // the number of characters needed until we are done converting.
+ // So, now, we reverse the string (except for the possible "-" sign).
+ while (--ptr > start) {
+ char ch = *ptr;
+ *ptr = *start;
+ *start++ = ch;
+ }
+ return buf;
+}
+
+// This handler gets called, whenever we encounter a system call that we
+// don't recognize explicitly. For the purposes of this program, we just
+// log the system call and then deny it. More elaborate sandbox policies
+// might try to evaluate the system call in user-space, instead.
+// The only notable complication is that this function must be async-signal
+// safe. This restricts the libary functions that we can call.
+static intptr_t defaultHandler(const struct arch_seccomp_data& data,
+ void *) {
+ static const char msg0[] = "Disallowed system call #";
+ static const char msg1[] = "\n";
+ char buf[sizeof(msg0) - 1 + 25 + sizeof(msg1)];
+
+ *buf = '\000';
+ strncat(buf, msg0, sizeof(buf));
+
+ char *ptr = strrchr(buf, '\000');
+ itoa_r(data.nr, ptr, sizeof(buf) - (ptr - buf));
+
+ ptr = strrchr(ptr, '\000');
+ strncat(ptr, msg1, sizeof(buf) - (ptr - buf));
+
+ ptr = strrchr(ptr, '\000');
+ if (HANDLE_EINTR(write(2, buf, ptr - buf))) { }
+
+ return -ERR;
+}
+
+static Sandbox::ErrorCode evaluator(int sysno) {
switch (sysno) {
#if defined(__NR_accept)
case __NR_accept: case __NR_accept4:
@@ -121,7 +220,7 @@ static playground2::Sandbox::ErrorCode evaluator(int sysno) {
case __NR_time:
case __NR_uname:
case __NR_write: case __NR_writev:
- return playground2::Sandbox::SB_ALLOWED;
+ return Sandbox::SB_ALLOWED;
// The following system calls are temporarily permitted. This must be
// tightened later. But we currently don't implement enough of the sandboxing
@@ -153,11 +252,11 @@ static playground2::Sandbox::ErrorCode evaluator(int sysno) {
case __NR_clone:
case __NR_munmap: case __NR_mprotect: case __NR_madvise:
case __NR_remap_file_pages:
- return playground2::Sandbox::SB_ALLOWED;
+ return Sandbox::SB_ALLOWED;
// Everything that isn't explicitly allowed is denied.
default:
- return (playground2::Sandbox::ErrorCode)ERR;
+ return Sandbox::ErrorCode(defaultHandler, NULL);
}
}
@@ -177,10 +276,8 @@ static void *sendmsgStressThreadFnc(void *arg) {
}
size_t len = 4;
char buf[4];
- if (!playground2::Util::sendFds(fds[0], "test", 4,
- fds[1], fds[1], fds[1], -1) ||
- !playground2::Util::getFds(fds[1], buf, &len,
- fds+2, fds+3, fds+4, NULL) ||
+ if (!Util::sendFds(fds[0], "test", 4, fds[1], fds[1], fds[1], -1) ||
+ !Util::getFds(fds[1], buf, &len, fds+2, fds+3, fds+4, NULL) ||
len != 4 ||
memcmp(buf, "test", len) ||
write(fds[2], "demo", 4) != 4 ||
@@ -203,14 +300,14 @@ int main(int argc, char *argv[]) {
if (argc) { }
if (argv) { }
int proc_fd = open("/proc", O_RDONLY|O_DIRECTORY);
- if (playground2::Sandbox::supportsSeccompSandbox(proc_fd) !=
- playground2::Sandbox::STATUS_AVAILABLE) {
+ if (Sandbox::supportsSeccompSandbox(proc_fd) !=
+ Sandbox::STATUS_AVAILABLE) {
perror("sandbox");
_exit(1);
}
- playground2::Sandbox::setProcFd(proc_fd);
- playground2::Sandbox::setSandboxPolicy(evaluator, NULL);
- playground2::Sandbox::startSandbox();
+ Sandbox::setProcFd(proc_fd);
+ Sandbox::setSandboxPolicy(evaluator, NULL);
+ Sandbox::startSandbox();
// Check that we can create threads
pthread_t thr;
@@ -268,8 +365,8 @@ int main(int argc, char *argv[]) {
}
size_t len = 4;
char buf[4];
- if (!playground2::Util::sendFds(fds[0], "test", 4, fds[1], -1) ||
- !playground2::Util::getFds(fds[1], buf, &len, fds+2, NULL) ||
+ if (!Util::sendFds(fds[0], "test", 4, fds[1], -1) ||
+ !Util::getFds(fds[1], buf, &len, fds+2, NULL) ||
len != 4 ||
memcmp(buf, "test", len) ||
write(fds[2], "demo", 4) != 4 ||
diff --git a/sandbox/linux/seccomp-bpf/sandbox_bpf.cc b/sandbox/linux/seccomp-bpf/sandbox_bpf.cc
index e37772d..60a400d 100644
--- a/sandbox/linux/seccomp-bpf/sandbox_bpf.cc
+++ b/sandbox/linux/seccomp-bpf/sandbox_bpf.cc
@@ -10,17 +10,19 @@
// pre-BPF seccomp mode.
namespace playground2 {
+// We define a really simple sandbox policy. It is just good enough for us
+// to tell that the sandbox has actually been activated.
Sandbox::ErrorCode Sandbox::probeEvaluator(int signo) {
switch (signo) {
case __NR_getpid:
// Return EPERM so that we can check that the filter actually ran.
- return (ErrorCode)EPERM;
+ return EPERM;
case __NR_exit_group:
// Allow exit() with a non-default return code.
return SB_ALLOWED;
default:
// Make everything else fail in an easily recognizable way.
- return (ErrorCode)EINVAL;
+ return EINVAL;
}
}
@@ -214,9 +216,9 @@ bool Sandbox::isSingleThreaded(int proc_fd) {
}
static bool isDenied(Sandbox::ErrorCode code) {
- return code == Sandbox::SB_TRAP ||
- (code >= (Sandbox::ErrorCode)1 &&
- code <= (Sandbox::ErrorCode)4095); // errno value
+ return (code & SECCOMP_RET_ACTION) == SECCOMP_RET_TRAP ||
+ (code >= (SECCOMP_RET_ERRNO + 1) &&
+ code <= (SECCOMP_RET_ERRNO + 4095));
}
void Sandbox::policySanityChecks(EvaluateSyscall syscallEvaluator,
@@ -265,6 +267,9 @@ void Sandbox::policySanityChecks(EvaluateSyscall syscallEvaluator,
void Sandbox::setSandboxPolicy(EvaluateSyscall syscallEvaluator,
EvaluateArguments argumentEvaluator) {
+ if (status_ == STATUS_ENABLED) {
+ die("Cannot change policy after sandbox has started");
+ }
policySanityChecks(syscallEvaluator, argumentEvaluator);
evaluators_.push_back(std::make_pair(syscallEvaluator, argumentEvaluator));
}
@@ -312,12 +317,10 @@ void Sandbox::installFilter() {
program->push_back((struct sock_filter)
BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, SECCOMP_ARCH, 1, 0));
- // TODO: Instead of killing outright, we should raise a SIGSYS and
- // report a useful error message. SIGKILL cannot be trapped by the
- // debugger and essentially makes the program fail in a way that is
- // almost impossible to debug.
program->push_back((struct sock_filter)
- BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_KILL));
+ BPF_STMT(BPF_RET+BPF_K,
+ ErrorCode(bpfFailure,
+ "Invalid audit architecture in BPF filter")));
// Grab the system call number, so that we can implement jump tables.
program->push_back((struct sock_filter)
@@ -334,9 +337,10 @@ void Sandbox::installFilter() {
program->push_back((struct sock_filter)
BPF_JUMP(BPF_JMP+BPF_JSET+BPF_K, 0x40000000, 0, 1));
#endif
- // TODO: raise a suitable SIGSYS signal
program->push_back((struct sock_filter)
- BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_KILL));
+ BPF_STMT(BPF_RET+BPF_K,
+ ErrorCode(bpfFailure,
+ "Illegal mixing of system call ABIs")));
#endif
// Evaluate all possible system calls and group their ErrorCodes into
@@ -347,10 +351,11 @@ void Sandbox::installFilter() {
// Compile the system call ranges to an optimized BPF program.
rangesToBPF(program, ranges);
- // Everything that isn't allowed is forbidden. Eventually, we would
- // like to have a way to log forbidden calls, when in debug mode.
+ // Unless there is a bug in the compiler, there is no execution path through
+ // the BPF program that falls through to the end.
program->push_back((struct sock_filter)
- BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ERRNO + SECCOMP_DENY_ERRNO));
+ BPF_STMT(BPF_RET+BPF_K,
+ ErrorCode(bpfFailure, "Detected unfiltered system call")));
// Make sure compilation resulted in BPF program that executes
// correctly. Otherwise, there is an internal error in our BPF compiler.
@@ -378,6 +383,9 @@ void Sandbox::installFilter() {
memcpy(bpf, &(*program)[0], sizeof(bpf));
delete program;
+ // Release memory that is no longer needed
+ evaluators_.clear();
+
// Install BPF filter program
if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
die(dryRun_ ? NULL : "Kernel refuses to enable no-new-privs");
@@ -449,75 +457,138 @@ void Sandbox::rangesToBPF(Program *program, const Ranges& ranges) {
}
from = iter->to + 1;
- // Convert ErrorCodes to return values that are acceptable for
- // BPF filters.
- int ret;
- switch (iter->err) {
- case SB_INSPECT_ARG_1...SB_INSPECT_ARG_6:
- die("Not implemented");
- case SB_TRAP:
- ret = SECCOMP_RET_TRAP;
- break;
- case SB_ALLOWED:
- ret = SECCOMP_RET_ALLOW;
- break;
- default:
- if (iter->err >= static_cast<ErrorCode>(1) &&
- iter->err <= static_cast<ErrorCode>(4096)) {
- // We limit errno values to a reasonable range. In fact, the Linux ABI
- // doesn't support errno values outside of this range.
- ret = SECCOMP_RET_ERRNO + iter->err;
- } else {
- die("Invalid ErrorCode reported by sandbox system call evaluator");
- }
- break;
- }
-
// Emit BPF instructions matching this range.
if (iter->to != std::numeric_limits<unsigned>::max()) {
program->push_back((struct sock_filter)
BPF_JUMP(BPF_JMP+BPF_JGT+BPF_K, iter->to, 1, 0));
}
program->push_back((struct sock_filter)
- BPF_STMT(BPF_RET+BPF_K, ret));
+ BPF_STMT(BPF_RET+BPF_K, iter->err));
}
return;
}
void Sandbox::sigSys(int nr, siginfo_t *info, void *void_context) {
- if (nr != SIGSYS || info->si_code != SYS_SECCOMP || !void_context) {
+ // Various sanity checks to make sure we actually received a signal
+ // triggered by a BPF filter. If something else triggered SIGSYS
+ // (e.g. kill()), there is really nothing we can do with this signal.
+ if (nr != SIGSYS || info->si_code != SYS_SECCOMP || !void_context ||
+ info->si_errno <= 0 ||
+ static_cast<size_t>(info->si_errno) > trapArraySize_) {
// die() can call LOG(FATAL). This is not normally async-signal safe
// and can lead to bugs. We should eventually implement a different
// logging and reporting mechanism that is safe to be called from
// the sigSys() handler.
+ // TODO: If we feel confident that our code otherwise works correctly, we
+ // could actually make an argument that spurious SIGSYS should
+ // just get silently ignored. TBD
+ sigsys_err:
die("Unexpected SIGSYS received");
}
- ucontext_t *ctx = reinterpret_cast<ucontext_t *>(void_context);
+
+ // Signal handlers should always preserve "errno". Otherwise, we could
+ // trigger really subtle bugs.
int old_errno = errno;
- // In case of error, set the REG_RESULT CPU register to the default
- // errno value (i.e. EPERM).
- // We need to be very careful when doing this, as some of our target
- // platforms have pointer types and CPU registers that are wider than
- // ints. Furthermore, the kernel ABI requires us to return a negative
- // value, but errno values are usually positive. And in fact, it would
- // be perfectly reasonable for somebody to have defined them as unsigned
- // properties. This makes the correct incantation of type casts rather
- // subtle. Sometimes, C++ is just too smart for its own good.
- void *rc = (void *)(intptr_t)-(int)SECCOMP_DENY_ERRNO;
-
- // This is where we can add extra code to handle complex system calls.
- // ...
-
- ctx->uc_mcontext.gregs[REG_RESULT] = reinterpret_cast<greg_t>(rc);
+ // Obtain the signal context. This, most notably, gives us access to
+ // all CPU registers at the time of the signal.
+ ucontext_t *ctx = reinterpret_cast<ucontext_t *>(void_context);
+
+ // Obtain the siginfo information that is specific to SIGSYS. Unfortunately,
+ // most versions of glibc don't include this information in siginfo_t. So,
+ // we need to explicitly copy it into a arch_sigsys structure.
+ struct arch_sigsys sigsys;
+ memcpy(&sigsys, &info->_sifields, sizeof(sigsys));
+
+ // Some more sanity checks.
+ if (sigsys.ip != reinterpret_cast<void *>(ctx->uc_mcontext.gregs[REG_IP]) ||
+ sigsys.nr != static_cast<int>(ctx->uc_mcontext.gregs[REG_SYSCALL]) ||
+ sigsys.arch != SECCOMP_ARCH) {
+ goto sigsys_err;
+ }
+
+ // Copy the seccomp-specific data into a arch_seccomp_data structure. This
+ // is what we are showing to TrapFnc callbacks that the system call evaluator
+ // registered with the sandbox.
+ struct arch_seccomp_data data = {
+ sigsys.nr,
+ SECCOMP_ARCH,
+ reinterpret_cast<uint64_t>(sigsys.ip),
+ {
+ static_cast<uint64_t>(ctx->uc_mcontext.gregs[REG_PARM1]),
+ static_cast<uint64_t>(ctx->uc_mcontext.gregs[REG_PARM2]),
+ static_cast<uint64_t>(ctx->uc_mcontext.gregs[REG_PARM3]),
+ static_cast<uint64_t>(ctx->uc_mcontext.gregs[REG_PARM4]),
+ static_cast<uint64_t>(ctx->uc_mcontext.gregs[REG_PARM5]),
+ static_cast<uint64_t>(ctx->uc_mcontext.gregs[REG_PARM6])
+ }
+ };
+
+ // Now call the TrapFnc callback associated with this particular instance
+ // of SECCOMP_RET_TRAP.
+ const ErrorCode& err = trapArray_[info->si_errno - 1];
+ intptr_t rc = err.fnc_(data, err.aux_);
+
+ // Update the CPU register that stores the return code of the system call
+ // that we just handled, and restore "errno" to the value that it had
+ // before entering the signal handler.
+ ctx->uc_mcontext.gregs[REG_RESULT] = static_cast<greg_t>(rc);
errno = old_errno;
+
return;
}
+intptr_t Sandbox::bpfFailure(const struct arch_seccomp_data&, void *aux) {
+ die(static_cast<char *>(aux));
+}
+
+int Sandbox::getTrapId(Sandbox::TrapFnc fnc, const void *aux) {
+ // Each unique pair of TrapFnc and auxiliary data make up a distinct instance
+ // of a SECCOMP_RET_TRAP.
+ std::pair<TrapFnc, const void *> key(fnc, aux);
+ TrapIds::const_iterator iter = trapIds_.find(key);
+ if (iter != trapIds_.end()) {
+ // We have seen this pair before. Return the same id that we assigned
+ // earlier.
+ return iter->second;
+ } else {
+ // This is a new pair. Remember it and assign a new id.
+ // Please note that we have to store traps in memory that doesn't get
+ // deallocated when the program is shutting down. A memory leak is
+ // intentional, because we might otherwise not be able to execute
+ // system calls part way through the program shutting down
+ if (!traps_) {
+ traps_ = new Traps();
+ }
+ int id = traps_->size() + 1;
+ if (id > static_cast<int>(SECCOMP_RET_DATA)) {
+ // In practice, this is pretty much impossible to trigger, as there
+ // are other kernel limitations that restrict overall BPF program sizes.
+ die("Too many SECCOMP_RET_TRAP callback instances");
+ }
+
+ traps_->push_back(ErrorCode(fnc, aux, id));
+ trapIds_[key] = id;
+
+ // We want to access the traps_ vector from our signal handler. But
+ // we are not assured that doing so is async-signal safe. On the other
+ // hand, C++ guarantees that the contents of a vector is stored in a
+ // contiguous C-style array.
+ // So, we look up the address and size of this array outside of the
+ // signal handler, where we can safely do so.
+ trapArray_ = &(*traps_)[0];
+ trapArraySize_ = id;
+ return id;
+ }
+}
bool Sandbox::dryRun_ = false;
Sandbox::SandboxStatus Sandbox::status_ = STATUS_UNKNOWN;
int Sandbox::proc_fd_ = -1;
Sandbox::Evaluators Sandbox::evaluators_;
+Sandbox::Traps *Sandbox::traps_ = NULL;
+Sandbox::TrapIds Sandbox::trapIds_;
+Sandbox::ErrorCode *Sandbox::trapArray_ = NULL;
+size_t Sandbox::trapArraySize_ = 0;
} // namespace
diff --git a/sandbox/linux/seccomp-bpf/sandbox_bpf.h b/sandbox/linux/seccomp-bpf/sandbox_bpf.h
index 25d623e..3a297e9 100644
--- a/sandbox/linux/seccomp-bpf/sandbox_bpf.h
+++ b/sandbox/linux/seccomp-bpf/sandbox_bpf.h
@@ -16,6 +16,7 @@
#include <netinet/tcp.h>
#include <netinet/udp.h>
#include <sched.h>
+#include <signal.h>
#include <stddef.h>
#include <stdint.h>
#include <stdio.h>
@@ -34,6 +35,7 @@
#include <algorithm>
#include <limits>
+#include <map>
#include <utility>
#include <vector>
@@ -63,6 +65,7 @@
#define SECCOMP_RET_ERRNO 0x00050000U // Returns an errno
#define SECCOMP_RET_TRACE 0x7ff00000U // Pass to a tracer or disallow
#define SECCOMP_RET_ALLOW 0x7fff0000U // Allow
+#define SECCOMP_RET_INVALID 0x8f8f8f8fU // Illegal return value
#define SECCOMP_RET_ACTION 0xffff0000U // Masks for the return value
#define SECCOMP_RET_DATA 0x0000ffffU // sections
#endif
@@ -77,6 +80,7 @@
#define SECCOMP_ARCH AUDIT_ARCH_I386
#define REG_RESULT REG_EAX
#define REG_SYSCALL REG_EAX
+#define REG_IP REG_EIP
#define REG_PARM1 REG_EBX
#define REG_PARM2 REG_ECX
#define REG_PARM3 REG_EDX
@@ -89,6 +93,7 @@
#define SECCOMP_ARCH AUDIT_ARCH_X86_64
#define REG_RESULT REG_RAX
#define REG_SYSCALL REG_RAX
+#define REG_IP REG_RIP
#define REG_PARM1 REG_RDI
#define REG_PARM2 REG_RSI
#define REG_PARM3 REG_RDX
@@ -100,12 +105,18 @@
#endif
struct arch_seccomp_data {
- int nr;
+ int nr;
uint32_t arch;
uint64_t instruction_pointer;
uint64_t args[6];
};
+struct arch_sigsys {
+ void *ip;
+ int nr;
+ unsigned int arch;
+};
+
#ifdef SECCOMP_BPF_STANDALONE
#define arraysize(x) sizeof(x)/sizeof(*(x)))
#define HANDLE_EINTR TEMP_FAILURE_RETRY
@@ -131,8 +142,8 @@ class Sandbox {
STATUS_ENABLED // The sandbox is now active
};
- enum ErrorCode {
- SB_TRAP = -1,
+ enum {
+ SB_INVALID = -1,
SB_ALLOWED = 0x0000,
SB_INSPECT_ARG_1 = 0x8001,
SB_INSPECT_ARG_2 = 0x8002,
@@ -140,8 +151,74 @@ class Sandbox {
SB_INSPECT_ARG_4 = 0x8008,
SB_INSPECT_ARG_5 = 0x8010,
SB_INSPECT_ARG_6 = 0x8020
+ };
+
+ // TrapFnc is a pointer to a function that handles Seccomp traps in
+ // user-space. The seccomp policy can request that a trap handler gets
+ // installed; it does so by returning a suitable ErrorCode() from the
+ // syscallEvaluator. See the ErrorCode() constructor for how to pass in
+ // the function pointer.
+ // Please note that TrapFnc is executed from signal context and must be
+ // async-signal safe:
+ // http://pubs.opengroup.org/onlinepubs/009695399/functions/xsh_chap02_04.html
+ typedef intptr_t (*TrapFnc)(const struct arch_seccomp_data& args, void *aux);
+
+ class ErrorCode {
+ friend class Sandbox;
+ public:
+ // We can either wrap a symbolic ErrorCode (i.e. enum values), an errno
+ // value (in the range 1..4095), or a pointer to a TrapFnc callback
+ // handling a SECCOMP_RET_TRAP trap.
+ // All of these different values are stored in the "err_" field. So, code
+ // that is using the ErrorCode class typically operates on a single 32bit
+ // field.
+ // This is not only quiet efficient, it also makes the API really easy to
+ // use.
+ ErrorCode(int err = SB_INVALID) {
+ switch (err) {
+ case SB_INVALID:
+ err_ = SECCOMP_RET_INVALID;
+ break;
+ case SB_ALLOWED:
+ err_ = SECCOMP_RET_ALLOW;
+ break;
+ case SB_INSPECT_ARG_1...SB_INSPECT_ARG_6:
+ die("Not implemented");
+ break;
+ case 1 ... 4095:
+ err_ = SECCOMP_RET_ERRNO + err;
+ break;
+ default:
+ die("Invalid use of ErrorCode object");
+ }
+ }
+
+ // If we are wrapping a callback, we must assign a unique id. This id is
+ // how the kernel tells us which one of our different SECCOMP_RET_TRAP
+ // cases has been triggered.
+ // The getTrapId() function assigns one unique id (starting at 1) for
+ // each distinct pair of TrapFnc and auxiliary data.
+ ErrorCode(TrapFnc fnc, const void *aux, int id = 0) :
+ id_(id ? id : getTrapId(fnc, aux)),
+ fnc_(fnc),
+ aux_(const_cast<void *>(aux)),
+ err_(SECCOMP_RET_TRAP + id_) {
+ }
+
+ // Destructor doesn't need to do anything.
+ ~ErrorCode() { }
+
+ // Always return the value that goes into the BPF filter program.
+ operator uint32_t() const { return err_; }
+
+ protected:
+ // Fields needed for SECCOMP_RET_TRAP callbacks
+ int id_;
+ TrapFnc fnc_;
+ void *aux_;
- // Also, any errno value is valid when cast to ErrorCode.
+ // 32bit field used for all possible types of ErrorCode values
+ uint32_t err_;
};
enum Operation {
@@ -251,6 +328,8 @@ class Sandbox {
};
typedef std::vector<Range> Ranges;
typedef std::vector<struct sock_filter> Program;
+ typedef std::vector<ErrorCode> Traps;
+ typedef std::map<std::pair<TrapFnc, const void *>, int> TrapIds;
static ErrorCode probeEvaluator(int signo) __attribute__((const));
static bool kernelSupportSeccompBPF(int proc_fd);
@@ -262,11 +341,17 @@ class Sandbox {
static void findRanges(Ranges *ranges);
static void rangesToBPF(Program *program, const Ranges& ranges);
static void sigSys(int nr, siginfo_t *info, void *void_context);
+ static intptr_t bpfFailure(const struct arch_seccomp_data& data, void *aux);
+ static int getTrapId(TrapFnc fnc, const void *aux);
static bool dryRun_;
static SandboxStatus status_;
static int proc_fd_;
static Evaluators evaluators_;
+ static Traps *traps_;
+ static TrapIds trapIds_;
+ static ErrorCode *trapArray_;
+ static size_t trapArraySize_;
};
} // namespace
diff --git a/sandbox/linux/seccomp-bpf/verifier.cc b/sandbox/linux/seccomp-bpf/verifier.cc
index 352b36f..71b743c 100644
--- a/sandbox/linux/seccomp-bpf/verifier.cc
+++ b/sandbox/linux/seccomp-bpf/verifier.cc
@@ -36,31 +36,11 @@ bool Verifier::verifyBPF(const std::vector<struct sock_filter>& program,
#endif
struct arch_seccomp_data data = { sysnum, SECCOMP_ARCH };
- uint32_t expectedRet;
Sandbox::ErrorCode code = evaluateSyscall(sysnum);
- switch (code) {
- case Sandbox::SB_TRAP:
- expectedRet = SECCOMP_RET_TRAP;
- break;
- case Sandbox::SB_ALLOWED:
- expectedRet = SECCOMP_RET_ALLOW;
- break;
- case Sandbox::SB_INSPECT_ARG_1...Sandbox::SB_INSPECT_ARG_6:
- *err = "Not implemented";
- return false;
- default:
- if (code >= 1 && code < 4096) {
- expectedRet = SECCOMP_RET_ERRNO + static_cast<int>(code);
- } else {
- *err = "Invalid errno value";
- return false;
- }
- break;
- }
uint32_t computedRet = evaluateBPF(program, data, err);
if (*err) {
return false;
- } else if (computedRet != expectedRet) {
+ } else if (computedRet != code) {
*err = "Exit code from BPF program doesn't match";
return false;
}