Added support for SECCOMP_RET_TRAP handlers.

When setting a sandbox policy, the user has to write a system call evaluator function. This function is passed a system call number and returns a suitable ErrorCode (e.g. an "errno" value). This change list extends ErrorCode, so that in addition to static "errno" values, the system call evaluator can also request that a callback gets called. This allows the sandbox to handle system calls in user space. BUG=130662 TEST=make && ./demo32 && ./demo64 Review URL: https://chromiumcodereview.appspot.com/10533076 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@142353 0039d316-1c4b-4281-b951-d872f2087c98
author: markus@chromium.org <markus@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> 2012-06-15 07:42:24 +0000
committer: markus@chromium.org <markus@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> 2012-06-15 07:42:24 +0000
commit: c4fc5f3b0f783b7662bfe8c128e3cde7c6352fda (patch)
tree: f58979add793d46f6aaf3e72709d1d0c2019f944 /sandbox/linux
parent: 400d3153e3d7a5d7ba120863de0ce3662a886962 (diff)
download: chromium_src-c4fc5f3b0f783b7662bfe8c128e3cde7c6352fda.zip
chromium_src-c4fc5f3b0f783b7662bfe8c128e3cde7c6352fda.tar.gz
chromium_src-c4fc5f3b0f783b7662bfe8c128e3cde7c6352fda.tar.bz2
4 files changed, 330 insertions, 97 deletions
diff --git a/sandbox/linux/seccomp-bpf/demo.cc b/sandbox/linux/seccomp-bpf/demo.cc
index 1286a01..ebedcbe 100644
--- a/sandbox/linux/seccomp-bpf/demo.cc
+++ b/sandbox/linux/seccomp-bpf/demo.cc
@@ -28,6 +28,9 @@
 #include "sandbox/linux/seccomp-bpf/sandbox_bpf.h"
 #include "sandbox/linux/seccomp-bpf/util.h"
 
+using playground2::Sandbox;
+using playground2::Util;
+
 #define ERR EPERM
 
 // We don't expect our sandbox to do anything useful yet. So, we will fail
@@ -36,7 +39,103 @@
 // actually enforce restrictions in a meaningful way:
 #define _exit(x) do { } while (0)
 
-static playground2::Sandbox::ErrorCode evaluator(int sysno) {
+
+// POSIX doesn't define any async-signal safe function for converting
+// an integer to ASCII. We'll have to define our own version.
+// itoa_r() converts a (signed) integer to ASCII. It returns "buf", if the
+// conversion was successful or NULL otherwise. It never writes more than "sz"
+// bytes. Output will be truncated as needed, and a NUL character is always
+// appended.
+static char *itoa_r(int i, char *buf, size_t sz) {
+  // Make sure we can write at least one NUL byte.
+  size_t n = 1;
+  if (n > sz) {
+    return NULL;
+  }
+
+  // Handle negative numbers.
+  char *start = buf;
+  int minint = 0;
+  if (i < 0) {
+    // Make sure we can write the '-' character.
+    if (++n > sz) {
+      *start = '\000';
+      return NULL;
+    }
+    *start++ = '-';
+
+    // Turn our number positive.
+    if (i == -i) {
+      // The lowest-most negative integer needs special treatment.
+      minint = 1;
+      i = -(i + 1);
+    } else {
+      // "Normal" negative numbers are easy.
+      i = -i;
+    }
+  }
+
+  // Loop until we have converted the entire number. Output at least one
+  // character (i.e. '0').
+  char *ptr = start;
+  do {
+    // Make sure there is still enough space left in our output buffer.
+    if (++n > sz) {
+      buf = NULL;
+      goto truncate;
+    }
+
+    // Output the next digit and (if necessary) compensate for the lowest-most
+    // negative integer needing special treatment. This works because, no
+    // matter the bit width of the integer, the lowest-most integer always ends
+    // in 2, 4, 6, or 8.
+    *ptr++ = i%10 + '0' + minint;
+    minint = 0;
+    i /= 10;
+  } while (i);
+ truncate:  // Terminate the output with a NUL character.
+  *ptr = '\000';
+
+  // Conversion to ASCII actually resulted in the digits being in reverse
+  // order. We can't easily generate them in forward order, as we can't tell
+  // the number of characters needed until we are done converting.
+  // So, now, we reverse the string (except for the possible "-" sign).
+  while (--ptr > start) {
+    char ch = *ptr;
+    *ptr = *start;
+    *start++ = ch;
+  }
+  return buf;
+}
+
+// This handler gets called, whenever we encounter a system call that we
+// don't recognize explicitly. For the purposes of this program, we just
+// log the system call and then deny it. More elaborate sandbox policies
+// might try to evaluate the system call in user-space, instead.
+// The only notable complication is that this function must be async-signal
+// safe. This restricts the libary functions that we can call.
+static intptr_t defaultHandler(const struct arch_seccomp_data& data,
+                               void *) {
+  static const char msg0[] = "Disallowed system call #";
+  static const char msg1[] = "\n";
+  char buf[sizeof(msg0) - 1 + 25 + sizeof(msg1)];
+
+  *buf = '\000';
+  strncat(buf, msg0, sizeof(buf));
+
+  char *ptr = strrchr(buf, '\000');
+  itoa_r(data.nr, ptr, sizeof(buf) - (ptr - buf));
+
+  ptr = strrchr(ptr, '\000');
+  strncat(ptr, msg1, sizeof(buf) - (ptr - buf));
+
+  ptr = strrchr(ptr, '\000');
+  if (HANDLE_EINTR(write(2, buf, ptr - buf))) { }
+
+  return -ERR;
+}
+
+static Sandbox::ErrorCode evaluator(int sysno) {
   switch (sysno) {
   #if defined(__NR_accept)
     case __NR_accept: case __NR_accept4:
@@ -121,7 +220,7 @@ static playground2::Sandbox::ErrorCode evaluator(int sysno) {
     case __NR_time:
     case __NR_uname:
     case __NR_write: case __NR_writev:
-      return playground2::Sandbox::SB_ALLOWED;
+      return Sandbox::SB_ALLOWED;
 
   // The following system calls are temporarily permitted. This must be
   // tightened later. But we currently don't implement enough of the sandboxing
@@ -153,11 +252,11 @@ static playground2::Sandbox::ErrorCode evaluator(int sysno) {
   case __NR_clone:
   case __NR_munmap: case __NR_mprotect: case __NR_madvise:
   case __NR_remap_file_pages:
-      return playground2::Sandbox::SB_ALLOWED;
+      return Sandbox::SB_ALLOWED;
 
   // Everything that isn't explicitly allowed is denied.
   default:
-    return (playground2::Sandbox::ErrorCode)ERR;
+    return Sandbox::ErrorCode(defaultHandler, NULL);
   }
 }
 
@@ -177,10 +276,8 @@ static void *sendmsgStressThreadFnc(void *arg) {
     }
     size_t len = 4;
     char buf[4];
-    if (!playground2::Util::sendFds(fds[0], "test", 4,
-                                    fds[1], fds[1], fds[1], -1) ||
-        !playground2::Util::getFds(fds[1], buf, &len,
-                                   fds+2, fds+3, fds+4, NULL) ||
+    if (!Util::sendFds(fds[0], "test", 4, fds[1], fds[1], fds[1], -1) ||
+        !Util::getFds(fds[1], buf, &len, fds+2, fds+3, fds+4, NULL) ||
         len != 4 ||
         memcmp(buf, "test", len) ||
         write(fds[2], "demo", 4) != 4 ||
@@ -203,14 +300,14 @@ int main(int argc, char *argv[]) {
   if (argc) { }
   if (argv) { }
   int proc_fd = open("/proc", O_RDONLY|O_DIRECTORY);
-  if (playground2::Sandbox::supportsSeccompSandbox(proc_fd) !=
-      playground2::Sandbox::STATUS_AVAILABLE) {
+  if (Sandbox::supportsSeccompSandbox(proc_fd) !=
+      Sandbox::STATUS_AVAILABLE) {
     perror("sandbox");
     _exit(1);
   }
-  playground2::Sandbox::setProcFd(proc_fd);
-  playground2::Sandbox::setSandboxPolicy(evaluator, NULL);
-  playground2::Sandbox::startSandbox();
+  Sandbox::setProcFd(proc_fd);
+  Sandbox::setSandboxPolicy(evaluator, NULL);
+  Sandbox::startSandbox();
 
   // Check that we can create threads
   pthread_t thr;
@@ -268,8 +365,8 @@ int main(int argc, char *argv[]) {
   }
   size_t len = 4;
   char buf[4];
-  if (!playground2::Util::sendFds(fds[0], "test", 4, fds[1], -1) ||
-      !playground2::Util::getFds(fds[1], buf, &len, fds+2, NULL) ||
+  if (!Util::sendFds(fds[0], "test", 4, fds[1], -1) ||
+      !Util::getFds(fds[1], buf, &len, fds+2, NULL) ||
       len != 4 ||
       memcmp(buf, "test", len) ||
       write(fds[2], "demo", 4) != 4 ||
diff --git a/sandbox/linux/seccomp-bpf/sandbox_bpf.cc b/sandbox/linux/seccomp-bpf/sandbox_bpf.cc
index e37772d..60a400d 100644
--- a/sandbox/linux/seccomp-bpf/sandbox_bpf.cc
+++ b/sandbox/linux/seccomp-bpf/sandbox_bpf.cc
@@ -10,17 +10,19 @@
 // pre-BPF seccomp mode.
 namespace playground2 {
 
+// We define a really simple sandbox policy. It is just good enough for us
+// to tell that the sandbox has actually been activated.
 Sandbox::ErrorCode Sandbox::probeEvaluator(int signo) {
   switch (signo) {
   case __NR_getpid:
     // Return EPERM so that we can check that the filter actually ran.
-    return (ErrorCode)EPERM;
+    return EPERM;
   case __NR_exit_group:
     // Allow exit() with a non-default return code.
     return SB_ALLOWED;
   default:
     // Make everything else fail in an easily recognizable way.
-    return (ErrorCode)EINVAL;
+    return EINVAL;
   }
 }
 
@@ -214,9 +216,9 @@ bool Sandbox::isSingleThreaded(int proc_fd) {
 }
 
 static bool isDenied(Sandbox::ErrorCode code) {
-  return code == Sandbox::SB_TRAP ||
-        (code >= (Sandbox::ErrorCode)1 &&
-         code <= (Sandbox::ErrorCode)4095);  // errno value
+  return (code & SECCOMP_RET_ACTION) == SECCOMP_RET_TRAP ||
+         (code >= (SECCOMP_RET_ERRNO + 1) &&
+          code <= (SECCOMP_RET_ERRNO + 4095));
 }
 
 void Sandbox::policySanityChecks(EvaluateSyscall syscallEvaluator,
@@ -265,6 +267,9 @@ void Sandbox::policySanityChecks(EvaluateSyscall syscallEvaluator,
 
 void Sandbox::setSandboxPolicy(EvaluateSyscall syscallEvaluator,
                                EvaluateArguments argumentEvaluator) {
+  if (status_ == STATUS_ENABLED) {
+    die("Cannot change policy after sandbox has started");
+  }
   policySanityChecks(syscallEvaluator, argumentEvaluator);
   evaluators_.push_back(std::make_pair(syscallEvaluator, argumentEvaluator));
 }
@@ -312,12 +317,10 @@ void Sandbox::installFilter() {
   program->push_back((struct sock_filter)
     BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, SECCOMP_ARCH, 1, 0));
 
-  // TODO: Instead of killing outright, we should raise a SIGSYS and
-  //       report a useful error message. SIGKILL cannot be trapped by the
-  //       debugger and essentially makes the program fail in a way that is
-  //       almost impossible to debug.
   program->push_back((struct sock_filter)
-    BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_KILL));
+    BPF_STMT(BPF_RET+BPF_K,
+             ErrorCode(bpfFailure,
+                       "Invalid audit architecture in BPF filter")));
 
   // Grab the system call number, so that we can implement jump tables.
   program->push_back((struct sock_filter)
@@ -334,9 +337,10 @@ void Sandbox::installFilter() {
   program->push_back((struct sock_filter)
     BPF_JUMP(BPF_JMP+BPF_JSET+BPF_K, 0x40000000, 0, 1));
 #endif
-  // TODO: raise a suitable SIGSYS signal
   program->push_back((struct sock_filter)
-    BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_KILL));
+    BPF_STMT(BPF_RET+BPF_K,
+             ErrorCode(bpfFailure,
+                       "Illegal mixing of system call ABIs")));
 #endif
 
   // Evaluate all possible system calls and group their ErrorCodes into
@@ -347,10 +351,11 @@ void Sandbox::installFilter() {
   // Compile the system call ranges to an optimized BPF program.
   rangesToBPF(program, ranges);
 
-  // Everything that isn't allowed is forbidden. Eventually, we would
-  // like to have a way to log forbidden calls, when in debug mode.
+  // Unless there is a bug in the compiler, there is no execution path through
+  // the BPF program that falls through to the end.
   program->push_back((struct sock_filter)
-    BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ERRNO + SECCOMP_DENY_ERRNO));
+    BPF_STMT(BPF_RET+BPF_K,
+             ErrorCode(bpfFailure, "Detected unfiltered system call")));
 
   // Make sure compilation resulted in BPF program that executes
   // correctly. Otherwise, there is an internal error in our BPF compiler.
@@ -378,6 +383,9 @@ void Sandbox::installFilter() {
   memcpy(bpf, &(*program)[0], sizeof(bpf));
   delete program;
 
+  // Release memory that is no longer needed
+  evaluators_.clear();
+
   // Install BPF filter program
   if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
     die(dryRun_ ? NULL : "Kernel refuses to enable no-new-privs");
@@ -449,75 +457,138 @@ void Sandbox::rangesToBPF(Program *program, const Ranges& ranges) {
     }
     from = iter->to + 1;
 
-    // Convert ErrorCodes to return values that are acceptable for
-    // BPF filters.
-    int ret;
-    switch (iter->err) {
-    case SB_INSPECT_ARG_1...SB_INSPECT_ARG_6:
-      die("Not implemented");
-    case SB_TRAP:
-      ret = SECCOMP_RET_TRAP;
-      break;
-    case SB_ALLOWED:
-      ret = SECCOMP_RET_ALLOW;
-      break;
-    default:
-      if (iter->err >= static_cast<ErrorCode>(1) &&
-          iter->err <= static_cast<ErrorCode>(4096)) {
-        // We limit errno values to a reasonable range. In fact, the Linux ABI
-        // doesn't support errno values outside of this range.
-        ret = SECCOMP_RET_ERRNO + iter->err;
-      } else {
-        die("Invalid ErrorCode reported by sandbox system call evaluator");
-      }
-      break;
-    }
-
     // Emit BPF instructions matching this range.
     if (iter->to != std::numeric_limits<unsigned>::max()) {
       program->push_back((struct sock_filter)
         BPF_JUMP(BPF_JMP+BPF_JGT+BPF_K, iter->to, 1, 0));
     }
     program->push_back((struct sock_filter)
-      BPF_STMT(BPF_RET+BPF_K, ret));
+      BPF_STMT(BPF_RET+BPF_K, iter->err));
   }
   return;
 }
 
 void Sandbox::sigSys(int nr, siginfo_t *info, void *void_context) {
-  if (nr != SIGSYS || info->si_code != SYS_SECCOMP || !void_context) {
+  // Various sanity checks to make sure we actually received a signal
+  // triggered by a BPF filter. If something else triggered SIGSYS
+  // (e.g. kill()), there is really nothing we can do with this signal.
+  if (nr != SIGSYS || info->si_code != SYS_SECCOMP || !void_context ||
+      info->si_errno <= 0 ||
+      static_cast<size_t>(info->si_errno) > trapArraySize_) {
     // die() can call LOG(FATAL). This is not normally async-signal safe
     // and can lead to bugs. We should eventually implement a different
     // logging and reporting mechanism that is safe to be called from
     // the sigSys() handler.
+    // TODO: If we feel confident that our code otherwise works correctly, we
+    //       could actually make an argument that spurious SIGSYS should
+    //       just get silently ignored. TBD
+  sigsys_err:
     die("Unexpected SIGSYS received");
   }
-  ucontext_t *ctx = reinterpret_cast<ucontext_t *>(void_context);
+
+  // Signal handlers should always preserve "errno". Otherwise, we could
+  // trigger really subtle bugs.
   int old_errno   = errno;
 
-  // In case of error, set the REG_RESULT CPU register to the default
-  // errno value (i.e. EPERM).
-  // We need to be very careful when doing this, as some of our target
-  // platforms have pointer types and CPU registers that are wider than
-  // ints. Furthermore, the kernel ABI requires us to return a negative
-  // value, but errno values are usually positive. And in fact, it would
-  // be perfectly reasonable for somebody to have defined them as unsigned
-  // properties. This makes the correct incantation of type casts rather
-  // subtle. Sometimes, C++ is just too smart for its own good.
-  void *rc        = (void *)(intptr_t)-(int)SECCOMP_DENY_ERRNO;
-
-  // This is where we can add extra code to handle complex system calls.
-  // ...
-
-  ctx->uc_mcontext.gregs[REG_RESULT] = reinterpret_cast<greg_t>(rc);
+  // Obtain the signal context. This, most notably, gives us access to
+  // all CPU registers at the time of the signal.
+  ucontext_t *ctx = reinterpret_cast<ucontext_t *>(void_context);
+
+  // Obtain the siginfo information that is specific to SIGSYS. Unfortunately,
+  // most versions of glibc don't include this information in siginfo_t. So,
+  // we need to explicitly copy it into a arch_sigsys structure.
+  struct arch_sigsys sigsys;
+  memcpy(&sigsys, &info->_sifields, sizeof(sigsys));
+
+  // Some more sanity checks.
+  if (sigsys.ip != reinterpret_cast<void *>(ctx->uc_mcontext.gregs[REG_IP]) ||
+      sigsys.nr != static_cast<int>(ctx->uc_mcontext.gregs[REG_SYSCALL]) ||
+      sigsys.arch != SECCOMP_ARCH) {
+    goto sigsys_err;
+  }
+
+  // Copy the seccomp-specific data into a arch_seccomp_data structure. This
+  // is what we are showing to TrapFnc callbacks that the system call evaluator
+  // registered with the sandbox.
+  struct arch_seccomp_data data = {
+    sigsys.nr,
+    SECCOMP_ARCH,
+    reinterpret_cast<uint64_t>(sigsys.ip),
+    {
+      static_cast<uint64_t>(ctx->uc_mcontext.gregs[REG_PARM1]),
+      static_cast<uint64_t>(ctx->uc_mcontext.gregs[REG_PARM2]),
+      static_cast<uint64_t>(ctx->uc_mcontext.gregs[REG_PARM3]),
+      static_cast<uint64_t>(ctx->uc_mcontext.gregs[REG_PARM4]),
+      static_cast<uint64_t>(ctx->uc_mcontext.gregs[REG_PARM5]),
+      static_cast<uint64_t>(ctx->uc_mcontext.gregs[REG_PARM6])
+    }
+  };
+
+  // Now call the TrapFnc callback associated with this particular instance
+  // of SECCOMP_RET_TRAP.
+  const ErrorCode& err = trapArray_[info->si_errno - 1];
+  intptr_t rc          = err.fnc_(data, err.aux_);
+
+  // Update the CPU register that stores the return code of the system call
+  // that we just handled, and restore "errno" to the value that it had
+  // before entering the signal handler.
+  ctx->uc_mcontext.gregs[REG_RESULT] = static_cast<greg_t>(rc);
   errno                              = old_errno;
+
   return;
 }
 
+intptr_t Sandbox::bpfFailure(const struct arch_seccomp_data&, void *aux) {
+  die(static_cast<char *>(aux));
+}
+
+int Sandbox::getTrapId(Sandbox::TrapFnc fnc, const void *aux) {
+  // Each unique pair of TrapFnc and auxiliary data make up a distinct instance
+  // of a SECCOMP_RET_TRAP.
+  std::pair<TrapFnc, const void *> key(fnc, aux);
+  TrapIds::const_iterator iter = trapIds_.find(key);
+  if (iter != trapIds_.end()) {
+    // We have seen this pair before. Return the same id that we assigned
+    // earlier.
+    return iter->second;
+  } else {
+    // This is a new pair. Remember it and assign a new id.
+    // Please note that we have to store traps in memory that doesn't get
+    // deallocated when the program is shutting down. A memory leak is
+    // intentional, because we might otherwise not be able to execute
+    // system calls part way through the program shutting down
+    if (!traps_) {
+      traps_ = new Traps();
+    }
+    int id   = traps_->size() + 1;
+    if (id > static_cast<int>(SECCOMP_RET_DATA)) {
+      // In practice, this is pretty much impossible to trigger, as there
+      // are other kernel limitations that restrict overall BPF program sizes.
+      die("Too many SECCOMP_RET_TRAP callback instances");
+    }
+
+    traps_->push_back(ErrorCode(fnc, aux, id));
+    trapIds_[key] = id;
+
+    // We want to access the traps_ vector from our signal handler. But
+    // we are not assured that doing so is async-signal safe. On the other
+    // hand, C++ guarantees that the contents of a vector is stored in a
+    // contiguous C-style array.
+    // So, we look up the address and size of this array outside of the
+    // signal handler, where we can safely do so.
+    trapArray_     = &(*traps_)[0];
+    trapArraySize_ = id;
+    return id;
+  }
+}
 
 bool Sandbox::dryRun_                   = false;
 Sandbox::SandboxStatus Sandbox::status_ = STATUS_UNKNOWN;
 int    Sandbox::proc_fd_                = -1;
 Sandbox::Evaluators Sandbox::evaluators_;
+Sandbox::Traps *Sandbox::traps_         = NULL;
+Sandbox::TrapIds Sandbox::trapIds_;
+Sandbox::ErrorCode *Sandbox::trapArray_ = NULL;
+size_t Sandbox::trapArraySize_          = 0;
 
 }  // namespace
diff --git a/sandbox/linux/seccomp-bpf/sandbox_bpf.h b/sandbox/linux/seccomp-bpf/sandbox_bpf.h
index 25d623e..3a297e9 100644
--- a/sandbox/linux/seccomp-bpf/sandbox_bpf.h
+++ b/sandbox/linux/seccomp-bpf/sandbox_bpf.h
@@ -16,6 +16,7 @@
 #include <netinet/tcp.h>
 #include <netinet/udp.h>
 #include <sched.h>
+#include <signal.h>
 #include <stddef.h>
 #include <stdint.h>
 #include <stdio.h>
@@ -34,6 +35,7 @@
 
 #include <algorithm>
 #include <limits>
+#include <map>
 #include <utility>
 #include <vector>
 
@@ -63,6 +65,7 @@
 #define SECCOMP_RET_ERRNO   0x00050000U  // Returns an errno
 #define SECCOMP_RET_TRACE   0x7ff00000U  // Pass to a tracer or disallow
 #define SECCOMP_RET_ALLOW   0x7fff0000U  // Allow
+#define SECCOMP_RET_INVALID 0x8f8f8f8fU  // Illegal return value
 #define SECCOMP_RET_ACTION  0xffff0000U  // Masks for the return value
 #define SECCOMP_RET_DATA    0x0000ffffU  //   sections
 #endif
@@ -77,6 +80,7 @@
 #define SECCOMP_ARCH AUDIT_ARCH_I386
 #define REG_RESULT   REG_EAX
 #define REG_SYSCALL  REG_EAX
+#define REG_IP       REG_EIP
 #define REG_PARM1    REG_EBX
 #define REG_PARM2    REG_ECX
 #define REG_PARM3    REG_EDX
@@ -89,6 +93,7 @@
 #define SECCOMP_ARCH AUDIT_ARCH_X86_64
 #define REG_RESULT   REG_RAX
 #define REG_SYSCALL  REG_RAX
+#define REG_IP       REG_RIP
 #define REG_PARM1    REG_RDI
 #define REG_PARM2    REG_RSI
 #define REG_PARM3    REG_RDX
@@ -100,12 +105,18 @@
 #endif
 
 struct arch_seccomp_data {
-  int nr;
+  int      nr;
   uint32_t arch;
   uint64_t instruction_pointer;
   uint64_t args[6];
 };
 
+struct arch_sigsys {
+  void         *ip;
+  int          nr;
+  unsigned int arch;
+};
+
 #ifdef SECCOMP_BPF_STANDALONE
 #define arraysize(x) sizeof(x)/sizeof(*(x)))
 #define HANDLE_EINTR TEMP_FAILURE_RETRY
@@ -131,8 +142,8 @@ class Sandbox {
     STATUS_ENABLED       // The sandbox is now active
   };
 
-  enum ErrorCode {
-    SB_TRAP          = -1,
+  enum {
+    SB_INVALID       = -1,
     SB_ALLOWED       = 0x0000,
     SB_INSPECT_ARG_1 = 0x8001,
     SB_INSPECT_ARG_2 = 0x8002,
@@ -140,8 +151,74 @@ class Sandbox {
     SB_INSPECT_ARG_4 = 0x8008,
     SB_INSPECT_ARG_5 = 0x8010,
     SB_INSPECT_ARG_6 = 0x8020
+  };
+
+  // TrapFnc is a pointer to a function that handles Seccomp traps in
+  // user-space. The seccomp policy can request that a trap handler gets
+  // installed; it does so by returning a suitable ErrorCode() from the
+  // syscallEvaluator. See the ErrorCode() constructor for how to pass in
+  // the function pointer.
+  // Please note that TrapFnc is executed from signal context and must be
+  // async-signal safe:
+  // http://pubs.opengroup.org/onlinepubs/009695399/functions/xsh_chap02_04.html
+  typedef intptr_t (*TrapFnc)(const struct arch_seccomp_data& args, void *aux);
+
+  class ErrorCode {
+    friend class Sandbox;
+  public:
+    // We can either wrap a symbolic ErrorCode (i.e. enum values), an errno
+    // value (in the range 1..4095), or a pointer to a TrapFnc callback
+    // handling a SECCOMP_RET_TRAP trap.
+    // All of these different values are stored in the "err_" field. So, code
+    // that is using the ErrorCode class typically operates on a single 32bit
+    // field.
+    // This is not only quiet efficient, it also makes the API really easy to
+    // use.
+    ErrorCode(int err = SB_INVALID) {
+      switch (err) {
+      case SB_INVALID:
+        err_ = SECCOMP_RET_INVALID;
+        break;
+      case SB_ALLOWED:
+        err_ = SECCOMP_RET_ALLOW;
+        break;
+      case SB_INSPECT_ARG_1...SB_INSPECT_ARG_6:
+        die("Not implemented");
+        break;
+      case 1 ... 4095:
+        err_ = SECCOMP_RET_ERRNO + err;
+        break;
+      default:
+        die("Invalid use of ErrorCode object");
+      }
+    }
+
+    // If we are wrapping a callback, we must assign a unique id. This id is
+    // how the kernel tells us which one of our different SECCOMP_RET_TRAP
+    // cases has been triggered.
+    // The getTrapId() function assigns one unique id (starting at 1) for
+    // each distinct pair of TrapFnc and auxiliary data.
+    ErrorCode(TrapFnc fnc, const void *aux, int id = 0) :
+      id_(id ? id : getTrapId(fnc, aux)),
+      fnc_(fnc),
+      aux_(const_cast<void *>(aux)),
+      err_(SECCOMP_RET_TRAP + id_) {
+    }
+
+    // Destructor doesn't need to do anything.
+    ~ErrorCode() { }
+
+    // Always return the value that goes into the BPF filter program.
+    operator uint32_t() const { return err_; }
+
+  protected:
+    // Fields needed for SECCOMP_RET_TRAP callbacks
+    int      id_;
+    TrapFnc  fnc_;
+    void     *aux_;
 
-    // Also, any errno value is valid when cast to ErrorCode.
+    // 32bit field used for all possible types of ErrorCode values
+    uint32_t err_;
   };
 
   enum Operation {
@@ -251,6 +328,8 @@ class Sandbox {
   };
   typedef std::vector<Range> Ranges;
   typedef std::vector<struct sock_filter> Program;
+  typedef std::vector<ErrorCode> Traps;
+  typedef std::map<std::pair<TrapFnc, const void *>, int> TrapIds;
 
   static ErrorCode probeEvaluator(int signo) __attribute__((const));
   static bool      kernelSupportSeccompBPF(int proc_fd);
@@ -262,11 +341,17 @@ class Sandbox {
   static void      findRanges(Ranges *ranges);
   static void      rangesToBPF(Program *program, const Ranges& ranges);
   static void      sigSys(int nr, siginfo_t *info, void *void_context);
+  static intptr_t  bpfFailure(const struct arch_seccomp_data& data, void *aux);
+  static int       getTrapId(TrapFnc fnc, const void *aux);
 
   static bool          dryRun_;
   static SandboxStatus status_;
   static int           proc_fd_;
   static Evaluators    evaluators_;
+  static Traps         *traps_;
+  static TrapIds       trapIds_;
+  static ErrorCode     *trapArray_;
+  static size_t        trapArraySize_;
 };
 
 }  // namespace
diff --git a/sandbox/linux/seccomp-bpf/verifier.cc b/sandbox/linux/seccomp-bpf/verifier.cc
index 352b36f..71b743c 100644
--- a/sandbox/linux/seccomp-bpf/verifier.cc
+++ b/sandbox/linux/seccomp-bpf/verifier.cc
@@ -36,31 +36,11 @@ bool Verifier::verifyBPF(const std::vector<struct sock_filter>& program,
 #endif
 
     struct arch_seccomp_data data = { sysnum, SECCOMP_ARCH };
-    uint32_t expectedRet;
     Sandbox::ErrorCode code = evaluateSyscall(sysnum);
-    switch (code) {
-    case Sandbox::SB_TRAP:
-      expectedRet = SECCOMP_RET_TRAP;
-      break;
-    case Sandbox::SB_ALLOWED:
-      expectedRet = SECCOMP_RET_ALLOW;
-      break;
-    case Sandbox::SB_INSPECT_ARG_1...Sandbox::SB_INSPECT_ARG_6:
-      *err = "Not implemented";
-      return false;
-    default:
-      if (code >= 1 && code < 4096) {
-        expectedRet = SECCOMP_RET_ERRNO + static_cast<int>(code);
-      } else {
-        *err = "Invalid errno value";
-        return false;
-      }
-      break;
-    }
     uint32_t computedRet = evaluateBPF(program, data, err);
     if (*err) {
       return false;
-    } else if (computedRet != expectedRet) {
+    } else if (computedRet != code) {
       *err = "Exit code from BPF program doesn't match";
       return false;
     }
author	markus@chromium.org <markus@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>	2012-06-15 07:42:24 +0000
committer	markus@chromium.org <markus@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>	2012-06-15 07:42:24 +0000
commit	c4fc5f3b0f783b7662bfe8c128e3cde7c6352fda (patch)
tree	f58979add793d46f6aaf3e72709d1d0c2019f944 /sandbox/linux
parent	400d3153e3d7a5d7ba120863de0ce3662a886962 (diff)
download	chromium_src-c4fc5f3b0f783b7662bfe8c128e3cde7c6352fda.zip chromium_src-c4fc5f3b0f783b7662bfe8c128e3cde7c6352fda.tar.gz chromium_src-c4fc5f3b0f783b7662bfe8c128e3cde7c6352fda.tar.bz2