SANDBOX-BPF: Initial version of the updated code generator.

New code generator that is more generic and can automatically reorder instructions to meet the constraints of BPF programs. Previously, we were very careful to emit instructions in just the right order so that there would only ever be forward jumps. As we add more features to our BPF programs, this code is getting fragile. So, instead, we now use standard compiler techniques; we first build a graph of all the instructions, then we split them into basic blocks, we perform some basic optimizations (at the moment, this is just the merging of common tails of instructions), we sort the basic blocks topologically, and then we reassemble all the blocks into a BPF program. There should be no functional change, but this code is the pre-requisite for upcoming changes. BUG=130662 TEST=sandbox_linux_unittests Review URL: https://chromiumcodereview.appspot.com/10690011 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@162924 0039d316-1c4b-4281-b951-d872f2087c98
author: markus@chromium.org <markus@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> 2012-10-19 03:43:55 +0000
committer: markus@chromium.org <markus@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> 2012-10-19 03:43:55 +0000
commit: 772c04797682f3936e8323dbd04c311268ee01fe (patch)
tree: dd4e52496a4c2feaaabee5b362fea349a9cc5af7 /sandbox/linux
parent: 02b98be97669cd18d4e60084446aa42973fc12e3 (diff)
download: chromium_src-772c04797682f3936e8323dbd04c311268ee01fe.zip
chromium_src-772c04797682f3936e8323dbd04c311268ee01fe.tar.gz
chromium_src-772c04797682f3936e8323dbd04c311268ee01fe.tar.bz2
14 files changed, 1469 insertions, 153 deletions
diff --git a/sandbox/linux/sandbox_linux.gypi b/sandbox/linux/sandbox_linux.gypi
index 0508f70..1d49b47 100644
--- a/sandbox/linux/sandbox_linux.gypi
+++ b/sandbox/linux/sandbox_linux.gypi
@@ -55,6 +55,7 @@
           'sources': [
             'seccomp-bpf/bpf_tests.cc',
             'seccomp-bpf/bpf_tests.h',
+            'seccomp-bpf/codegen_unittest.cc',
             'seccomp-bpf/errorcode_unittest.cc',
             'seccomp-bpf/sandbox_bpf_unittest.cc',
             'seccomp-bpf/syscall_iterator_unittest.cc',
@@ -66,10 +67,15 @@
       'target_name': 'seccomp_bpf',
       'type': 'static_library',
       'sources': [
+        'seccomp-bpf/basicblock.cc',
+        'seccomp-bpf/basicblock.h',
+        'seccomp-bpf/codegen.cc',
+        'seccomp-bpf/codegen.h',
         'seccomp-bpf/die.cc',
         'seccomp-bpf/die.h',
         'seccomp-bpf/errorcode.cc',
         'seccomp-bpf/errorcode.h',
+        'seccomp-bpf/instruction.h',
         'seccomp-bpf/sandbox_bpf.cc',
         'seccomp-bpf/sandbox_bpf.h',
         'seccomp-bpf/syscall_iterator.cc',
diff --git a/sandbox/linux/seccomp-bpf/Makefile b/sandbox/linux/seccomp-bpf/Makefile
index 920b754..a697198 100644
--- a/sandbox/linux/seccomp-bpf/Makefile
+++ b/sandbox/linux/seccomp-bpf/Makefile
@@ -2,7 +2,7 @@ DEF_CFLAGS = -g -O3 -Wall -Werror -Wextra -Wno-missing-field-initializers -fPIC
 DEF_CPPFLAGS = -D_GNU_SOURCE -DSECCOMP_BPF_STANDALONE -DSECCOMP_BPF_VALGRIND_HACKS -include valgrind/valgrind.h -iquote ../../..
 DEF_LDFLAGS = -g -lpthread
 DEPFLAGS = -MMD -MF .$@.d
-MODS := demo sandbox_bpf die errorcode syscall_iterator util verifier
+MODS := demo sandbox_bpf die codegen errorcode syscall_iterator util verifier
 OBJS64 := $(shell echo ${MODS} | xargs -n 1 | sed -e 's/$$/.o64/')
 OBJS32 := $(shell echo ${MODS} | xargs -n 1 | sed -e 's/$$/.o32/')
 ALL_OBJS = $(OBJS32) $(OBJS64)
diff --git a/sandbox/linux/seccomp-bpf/basicblock.cc b/sandbox/linux/seccomp-bpf/basicblock.cc
new file mode 100644
index 0000000..bf27c58
--- /dev/null
+++ b/sandbox/linux/seccomp-bpf/basicblock.cc
@@ -0,0 +1,16 @@
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "sandbox/linux/seccomp-bpf/basicblock.h"
+
+
+namespace playground2 {
+
+BasicBlock::BasicBlock() {
+}
+
+BasicBlock::~BasicBlock() {
+}
+
+}  // namespace
diff --git a/sandbox/linux/seccomp-bpf/basicblock.h b/sandbox/linux/seccomp-bpf/basicblock.h
new file mode 100644
index 0000000..1782a80
--- /dev/null
+++ b/sandbox/linux/seccomp-bpf/basicblock.h
@@ -0,0 +1,51 @@
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef SANDBOX_LINUX_SECCOMP_BPF_BASICBLOCK_H__
+#define SANDBOX_LINUX_SECCOMP_BPF_BASICBLOCK_H__
+
+#include <vector>
+
+#include "sandbox/linux/seccomp-bpf/instruction.h"
+
+
+namespace playground2 {
+
+struct BasicBlock {
+  BasicBlock();
+  ~BasicBlock();
+
+  // Our implementation of the code generator uses a "Less" operator to
+  // identify common sequences of basic blocks. This would normally be
+  // really easy to do, but STL requires us to wrap the comparator into
+  // a class. We begrudgingly add some code here that provides this wrapping.
+  template<class T> class Less {
+   public:
+    Less(const T& data, int (*cmp)(const BasicBlock *, const BasicBlock *,
+                                   const T& data))
+        : data_(data),
+          cmp_(cmp) {
+    }
+
+    bool operator() (const BasicBlock *a, const BasicBlock *b) const {
+      return cmp_(a, b, data_) < 0;
+    }
+
+   private:
+    const T& data_;
+    int (*cmp_)(const BasicBlock *, const BasicBlock *, const T&);
+  };
+
+  // Basic blocks are essentially nothing more than a set of instructions.
+  std::vector<Instruction *> instructions;
+
+  // In order to compute relative branch offsets we need to keep track of
+  // how far our block is away from the very last basic block. The "offset_"
+  // is measured in number of BPF instructions.
+  int offset;
+};
+
+}  // namespace playground2
+
+#endif  // SANDBOX_LINUX_SECCOMP_BPF_BASICBLOCK_H__
diff --git a/sandbox/linux/seccomp-bpf/codegen.cc b/sandbox/linux/seccomp-bpf/codegen.cc
new file mode 100644
index 0000000..8b36315
--- /dev/null
+++ b/sandbox/linux/seccomp-bpf/codegen.cc
@@ -0,0 +1,657 @@
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "sandbox/linux/seccomp-bpf/codegen.h"
+
+
+namespace playground2 {
+
+CodeGen::CodeGen()
+    : compiled_(false) {
+}
+
+CodeGen::~CodeGen() {
+  for (Instructions::iterator iter = instructions_.begin();
+       iter != instructions_.end();
+       ++iter) {
+    delete *iter;
+  }
+  for (BasicBlocks::iterator iter = basic_blocks_.begin();
+       iter != basic_blocks_.end();
+       ++iter) {
+    delete *iter;
+  }
+}
+
+void CodeGen::PrintProgram(const Sandbox::Program& program) {
+  for (Sandbox::Program::const_iterator iter = program.begin();
+       iter != program.end();
+       ++iter) {
+    int ip = (int)(iter - program.begin());
+    fprintf(stderr, "%3d) ", ip);
+    switch (BPF_CLASS(iter->code)) {
+    case BPF_LD:
+      if (iter->code == BPF_LD+BPF_W+BPF_ABS) {
+        fprintf(stderr, "LOAD %d\n", (int)iter->k);
+      } else {
+        fprintf(stderr, "LOAD ???\n");
+      }
+      break;
+    case BPF_JMP:
+      if (BPF_OP(iter->code) == BPF_JA) {
+        fprintf(stderr, "JMP %d\n", ip + iter->k + 1);
+      } else {
+        fprintf(stderr, "if A %s 0x%x; then JMP %d else JMP %d\n",
+                BPF_OP(iter->code) == BPF_JSET ? "&" :
+                BPF_OP(iter->code) == BPF_JEQ ? "==" :
+                BPF_OP(iter->code) == BPF_JGE ? ">=" :
+                BPF_OP(iter->code) == BPF_JGT ? ">"  : "???",
+                (int)iter->k,
+                ip + iter->jt + 1, ip + iter->jf + 1);
+      }
+      break;
+    case BPF_RET:
+      fprintf(stderr, "RET 0x%x\n", iter->k);
+      break;
+    default:
+      fprintf(stderr, "???\n");
+      break;
+    }
+  }
+  return;
+}
+
+Instruction *CodeGen::MakeInstruction(uint16_t code, uint32_t k,
+                                      Instruction *next) {
+  // We can handle non-jumping instructions and "always" jumps. Both of
+  // them are followed by exactly one "next" instruction.
+  // We allow callers to defer specifying "next", but then they must call
+  // "joinInstructions" later.
+  if (BPF_CLASS(code) == BPF_JMP && BPF_OP(code) != BPF_JA) {
+    SANDBOX_DIE("Must provide both \"true\" and \"false\" branch "
+                "for a BPF_JMP");
+  }
+  if (next && BPF_CLASS(code) == BPF_RET) {
+    SANDBOX_DIE("Cannot append instructions after a return statement");
+  }
+  if (BPF_CLASS(code) == BPF_JMP) {
+    // "Always" jumps use the "true" branch target, only.
+    Instruction *insn = new Instruction(code, 0, next, NULL);
+    instructions_.push_back(insn);
+    return insn;
+  } else {
+    // Non-jumping instructions do not use any of the branch targets.
+    Instruction *insn = new Instruction(code, k, next);
+    instructions_.push_back(insn);
+    return insn;
+  }
+}
+
+Instruction *CodeGen::MakeInstruction(uint16_t code, const ErrorCode& err) {
+  if (BPF_CLASS(code) != BPF_RET) {
+    SANDBOX_DIE("ErrorCodes can only be used in return expressions");
+  }
+  if (err.error_type_ != ErrorCode::ET_SIMPLE &&
+      err.error_type_ != ErrorCode::ET_TRAP) {
+    SANDBOX_DIE("ErrorCode is not suitable for returning from a BPF program");
+  }
+  return MakeInstruction(code, err.err_);
+}
+
+Instruction *CodeGen::MakeInstruction(uint16_t code, uint32_t k,
+                                      Instruction *jt, Instruction *jf) {
+  // We can handle all conditional jumps. They are followed by both a
+  // "true" and a "false" branch.
+  if (BPF_CLASS(code) != BPF_JMP || BPF_OP(code) == BPF_JA) {
+    SANDBOX_DIE("Expected a BPF_JMP instruction");
+  }
+  if (!jt && !jf) {
+    // We allow callers to defer specifying exactly one of the branch
+    // targets. It must then be set later by calling "JoinInstructions".
+    SANDBOX_DIE("Branches must jump to a valid instruction");
+  }
+  Instruction *insn = new Instruction(code, k, jt, jf);
+  instructions_.push_back(insn);
+  return insn;
+}
+
+void CodeGen::JoinInstructions(Instruction *head, Instruction *tail) {
+  // Merge two instructions, or set the branch target for an "always" jump.
+  // This function should be called, if the caller didn't initially provide
+  // a value for "next" when creating the instruction.
+  if (BPF_CLASS(head->code) == BPF_JMP) {
+    if (BPF_OP(head->code) == BPF_JA) {
+      if (head->jt_ptr) {
+        SANDBOX_DIE("Cannot append instructions in the middle of a sequence");
+      }
+      head->jt_ptr = tail;
+    } else {
+      if (!head->jt_ptr && head->jf_ptr) {
+        head->jt_ptr = tail;
+      } else if (!head->jf_ptr && head->jt_ptr) {
+        head->jf_ptr = tail;
+      } else {
+        SANDBOX_DIE("Cannot append instructions after a jump");
+      }
+    }
+  } else if (BPF_CLASS(head->code) == BPF_RET) {
+    SANDBOX_DIE("Cannot append instructions after a return statement");
+  } else if (head->next) {
+    SANDBOX_DIE("Cannot append instructions in the middle of a sequence");
+  } else {
+    head->next = tail;
+  }
+  return;
+}
+
+void CodeGen::FindBranchTargets(const Instruction& instructions,
+                                BranchTargets *branch_targets) {
+  // Follow all possible paths through the "instructions" graph and compute
+  // a list of branch targets. This will later be needed to compute the
+  // boundaries of basic blocks.
+  // We maintain a set of all instructions that we have previously seen. This
+  // set ultimately converges on all instructions in the program.
+  std::set<const Instruction *> seen_instructions;
+  Instructions stack;
+  for (const Instruction *insn = &instructions; insn; ) {
+    seen_instructions.insert(insn);
+    if (BPF_CLASS(insn->code) == BPF_JMP) {
+      // Found a jump. Increase count of incoming edges for each of the jump
+      // targets.
+      ++(*branch_targets)[insn->jt_ptr];
+      if (BPF_OP(insn->code) != BPF_JA) {
+        ++(*branch_targets)[insn->jf_ptr];
+        stack.push_back(const_cast<Instruction *>(insn));
+      }
+      // Start a recursive decent for depth-first traversal.
+      if (seen_instructions.find(insn->jt_ptr) == seen_instructions.end()) {
+        // We haven't seen the "true" branch yet. Traverse it now. We have
+        // already remembered the "false" branch on the stack and will
+        // traverse it later.
+        insn = insn->jt_ptr;
+        continue;
+      } else {
+        // Now try traversing the "false" branch.
+        insn = NULL;
+      }
+    } else {
+      // This is a non-jump instruction, just continue to the next instruction
+      // (if any). It's OK if "insn" becomes NULL when reaching a return
+      // instruction.
+      if (!insn->next != (BPF_CLASS(insn->code) == BPF_RET)) {
+        SANDBOX_DIE("Internal compiler error; return instruction must be at "
+                    "the end of the BPF program");
+      }
+      if (seen_instructions.find(insn->next) == seen_instructions.end()) {
+        insn = insn->next;
+      } else {
+        // We have seen this instruction before. That could happen if it is
+        // a branch target. No need to continue processing.
+        insn = NULL;
+      }
+    }
+    while (!insn && !stack.empty()) {
+      // We are done processing all the way to a leaf node, backtrack up the
+      // stack to any branches that we haven't processed yet. By definition,
+      // this has to be a "false" branch, as we always process the "true"
+      // branches right away.
+      insn = stack.back();
+      stack.pop_back();
+      if (seen_instructions.find(insn->jf_ptr) == seen_instructions.end()) {
+        // We haven't seen the "false" branch yet. So, that's where we'll
+        // go now.
+        insn = insn->jf_ptr;
+      } else {
+        // We have seen both the "true" and the "false" branch, continue
+        // up the stack.
+        if (seen_instructions.find(insn->jt_ptr) == seen_instructions.end()) {
+          SANDBOX_DIE("Internal compiler error; cannot find all "
+                      "branch targets");
+        }
+        insn = NULL;
+      }
+    }
+  }
+  return;
+}
+
+BasicBlock *CodeGen::MakeBasicBlock(Instruction *head,
+                                    Instruction *tail) {
+  // Iterate over all the instructions between "head" and "tail" and
+  // insert them into a new basic block.
+  BasicBlock *bb = new BasicBlock;
+  for (;; head = head->next) {
+    bb->instructions.push_back(head);
+    if (head == tail) {
+      break;
+    }
+    if (BPF_CLASS(head->code) == BPF_JMP) {
+      SANDBOX_DIE("Found a jump inside of a basic block");
+    }
+  }
+  basic_blocks_.push_back(bb);
+  return bb;
+}
+
+void CodeGen::AddBasicBlock(Instruction *head,
+                            Instruction *tail,
+                            const BranchTargets& branch_targets,
+                            TargetsToBlocks *basic_blocks,
+                            BasicBlock **firstBlock) {
+  // Add a new basic block to "basic_blocks". Also set "firstBlock", if it
+  // has not been set before.
+  BranchTargets::const_iterator iter = branch_targets.find(head);
+  if ((iter == branch_targets.end()) != !*firstBlock ||
+      !*firstBlock != basic_blocks->empty()) {
+    SANDBOX_DIE("Only the very first basic block should have no "
+                "incoming jumps");
+  }
+  BasicBlock *bb = MakeBasicBlock(head, tail);
+  if (!*firstBlock) {
+    *firstBlock = bb;
+  }
+  (*basic_blocks)[head] = bb;
+  return;
+}
+
+BasicBlock *CodeGen::CutGraphIntoBasicBlocks(
+    Instruction *instructions, const BranchTargets& branch_targets,
+    TargetsToBlocks *basic_blocks) {
+  // Textbook implementation of a basic block generator. All basic blocks
+  // start with a branch target and end with either a return statement or
+  // a jump (or are followed by an instruction that forms the beginning of a
+  // new block). Both conditional and "always" jumps are supported.
+  BasicBlock *first_block = NULL;
+  std::set<const Instruction *> seen_instructions;
+  Instructions stack;
+  Instruction *tail = NULL;
+  Instruction *head = instructions;
+  for (Instruction *insn = head; insn; ) {
+    if (seen_instructions.find(insn) != seen_instructions.end()) {
+      // We somehow went in a circle. This should never be possible. Not even
+      // cyclic graphs are supposed to confuse us this much.
+      SANDBOX_DIE("Internal compiler error; cannot compute basic blocks");
+    }
+    seen_instructions.insert(insn);
+    if (tail && branch_targets.find(insn) != branch_targets.end()) {
+      // We reached a branch target. Start a new basic block (this means,
+      // flushing the previous basic block first).
+      AddBasicBlock(head, tail, branch_targets, basic_blocks, &first_block);
+      head = insn;
+    }
+    if (BPF_CLASS(insn->code) == BPF_JMP) {
+      // We reached a jump instruction, this completes our current basic
+      // block. Flush it and continue by traversing both the true and the
+      // false branch of the jump. We need to maintain a stack to do so.
+      AddBasicBlock(head, insn, branch_targets, basic_blocks, &first_block);
+      if (BPF_OP(insn->code) != BPF_JA) {
+        stack.push_back(insn->jf_ptr);
+      }
+      insn = insn->jt_ptr;
+
+      // If we are jumping to an instruction that we have previously
+      // processed, we are done with this branch. Continue by backtracking
+      // up the stack.
+      while (seen_instructions.find(insn) != seen_instructions.end()) {
+      backtracking:
+        if (stack.empty()) {
+          // We successfully traversed all reachable instructions.
+          return first_block;
+        } else {
+          // Going up the stack.
+          insn = stack.back();
+          stack.pop_back();
+        }
+      }
+      // Starting a new basic block.
+      tail = NULL;
+      head = insn;
+    } else {
+      // We found a non-jumping instruction, append it to current basic
+      // block.
+      tail = insn;
+      insn = insn->next;
+      if (!insn) {
+        // We reached a return statement, flush the current basic block and
+        // backtrack up the stack.
+        AddBasicBlock(head, tail, branch_targets, basic_blocks, &first_block);
+        goto backtracking;
+      }
+    }
+  }
+  return first_block;
+}
+
+// We define a comparator that inspects the sequence of instructions in our
+// basic block and any blocks referenced by this block. This function can be
+// used in a "less" comparator for the purpose of storing pointers to basic
+// blocks in STL containers; this gives an easy option to use STL to find
+// shared tail  sequences of basic blocks.
+static int PointerCompare(const BasicBlock *block1, const BasicBlock *block2,
+                          const TargetsToBlocks& blocks) {
+  // Return <0, 0, or >0 depending on the ordering of "block1" and "block2".
+  // If we are looking at the exact same block, this is trivial and we don't
+  // need to do a full comparison.
+  if (block1 == block2) {
+    return 0;
+  }
+
+  // We compare the sequence of instructions in both basic blocks.
+  const Instructions& insns1 = block1->instructions;
+  const Instructions& insns2 = block2->instructions;
+  Instructions::const_iterator iter1 = insns1.begin();
+  Instructions::const_iterator iter2 = insns2.begin();
+  for (;; ++iter1, ++iter2) {
+    // If we have reached the end of the sequence of instructions in one or
+    // both basic blocks, we know the relative ordering between the two blocks
+    // and can return.
+    if (iter1 == insns1.end()) {
+      return iter2 == insns2.end() ? 0 : -1;
+    } else if (iter2 == insns2.end()) {
+      return 1;
+    }
+
+    // Compare the individual fields for both instructions.
+    const Instruction& insn1 = **iter1;
+    const Instruction& insn2 = **iter2;
+    if (insn1.code == insn2.code) {
+      if (insn1.k == insn2.k) {
+        // Only conditional jump instructions use the jt_ptr and jf_ptr
+        // fields.
+        if (BPF_CLASS(insn1.code) == BPF_JMP) {
+          if (BPF_OP(insn1.code) != BPF_JA) {
+            // Recursively compare the "true" and "false" branches.
+            // A well-formed BPF program can't have any cycles, so we know
+            // that our recursive algorithm will ultimately terminate.
+            // In the unlikely event that the programmer made a mistake and
+            // went out of the way to give us a cyclic program, we will crash
+            // with a stack overflow. We are OK with that.
+            int c = PointerCompare(blocks.find(insn1.jt_ptr)->second,
+                                   blocks.find(insn2.jt_ptr)->second,
+                                   blocks);
+            if (c == 0) {
+              c = PointerCompare(blocks.find(insn1.jf_ptr)->second,
+                                 blocks.find(insn2.jf_ptr)->second,
+                                 blocks);
+              if (c == 0) {
+                continue;
+              } else {
+                return c;
+              }
+            } else {
+              return c;
+            }
+          } else {
+            int c = PointerCompare(blocks.find(insn1.jt_ptr)->second,
+                                   blocks.find(insn2.jt_ptr)->second,
+                                   blocks);
+            if (c == 0) {
+              continue;
+            } else {
+              return c;
+            }
+          }
+        } else {
+          continue;
+        }
+      } else {
+        return insn1.k - insn2.k;
+      }
+    } else {
+      return insn1.code - insn2.code;
+    }
+  }
+}
+
+void CodeGen::MergeTails(TargetsToBlocks *blocks) {
+  // We enter all of our basic blocks into a set using the BasicBlock::Less()
+  // comparator. This naturally results in blocks with identical tails of
+  // instructions to map to the same entry in the set. Whenever we discover
+  // that a particular chain of instructions is already in the set, we merge
+  // the basic blocks and update the pointer in the "blocks" map.
+  // Returns the number of unique basic blocks.
+  // N.B. We don't merge instructions on a granularity that is finer than
+  //      a basic block. In practice, this is sufficiently rare that we don't
+  //      incur a big cost.
+  //      Similarly, we currently don't merge anything other than tails. In
+  //      the future, we might decide to revisit this decision and attempt to
+  //      merge arbitrary sub-sequences of instructions.
+  BasicBlock::Less<TargetsToBlocks> less(*blocks, PointerCompare);
+  typedef std::set<BasicBlock *, BasicBlock::Less<TargetsToBlocks> > Set;
+  Set seen_basic_blocks(less);
+  for (TargetsToBlocks::iterator iter = blocks->begin();
+       iter != blocks->end();
+       ++iter) {
+    BasicBlock *bb = iter->second;
+    Set::const_iterator entry = seen_basic_blocks.find(bb);
+    if (entry == seen_basic_blocks.end()) {
+      // This is the first time we see this particular sequence of
+      // instructions. Enter the basic block into the set of known
+      // basic blocks.
+      seen_basic_blocks.insert(bb);
+    } else {
+      // We have previously seen another basic block that defines the same
+      // sequence of instructions. Merge the two blocks and update the
+      // pointer in the "blocks" map.
+      iter->second = *entry;
+    }
+  }
+}
+
+void CodeGen::ComputeIncomingBranches(BasicBlock *block,
+                                      const TargetsToBlocks& targets_to_blocks,
+                                      IncomingBranches *incoming_branches) {
+  // We increment the number of incoming branches each time we encounter a
+  // basic block. But we only traverse recursively the very first time we
+  // encounter a new block. This is necessary to make topological sorting
+  // work correctly.
+  if (++(*incoming_branches)[block] == 1) {
+    Instruction *last_insn = block->instructions.back();
+    if (BPF_CLASS(last_insn->code) == BPF_JMP) {
+      ComputeIncomingBranches(
+                             targets_to_blocks.find(last_insn->jt_ptr)->second,
+                             targets_to_blocks, incoming_branches);
+      if (BPF_OP(last_insn->code) != BPF_JA) {
+        ComputeIncomingBranches(
+                             targets_to_blocks.find(last_insn->jf_ptr)->second,
+                             targets_to_blocks, incoming_branches);
+      }
+    } else if (BPF_CLASS(last_insn->code) != BPF_RET) {
+      ComputeIncomingBranches(targets_to_blocks.find(last_insn->next)->second,
+                              targets_to_blocks, incoming_branches);
+    }
+  }
+}
+
+void CodeGen::TopoSortBasicBlocks(BasicBlock *first_block,
+                                  const TargetsToBlocks& blocks,
+                                  BasicBlocks *basic_blocks) {
+  // Textbook implementation of a toposort. We keep looking for basic blocks
+  // that don't have any incoming branches (initially, this is just the
+  // "first_block") and add them to the topologically sorted list of
+  // "basic_blocks". As we do so, we remove outgoing branches. This potentially
+  // ends up making our descendants eligible for the sorted list. The
+  // sorting algorithm terminates when there are no more basic blocks that have
+  // no incoming branches. If we didn't move all blocks from the set of
+  // "unordered_blocks" to the sorted list of "basic_blocks", there must have
+  // been a cyclic dependency. This should never happen in a BPF program, as
+  // well-formed BPF programs only ever have forward branches.
+  IncomingBranches unordered_blocks;
+  ComputeIncomingBranches(first_block, blocks, &unordered_blocks);
+
+  std::set<BasicBlock *> heads;
+  for (;;) {
+    // Move block from "unordered_blocks" to "basic_blocks".
+    basic_blocks->push_back(first_block);
+
+    // Inspect last instruction in the basic block. This is typically either a
+    // jump or a return statement. But it could also be a "normal" instruction
+    // that is followed by a jump target.
+    Instruction *last_insn = first_block->instructions.back();
+    if (BPF_CLASS(last_insn->code) == BPF_JMP) {
+      // Remove outgoing branches. This might end up moving our descendants
+      // into set of "head" nodes that no longer have any incoming branches.
+      TargetsToBlocks::const_iterator iter;
+      if (BPF_OP(last_insn->code) != BPF_JA) {
+        iter = blocks.find(last_insn->jf_ptr);
+        if (!--unordered_blocks[iter->second]) {
+          heads.insert(iter->second);
+        }
+      }
+      iter = blocks.find(last_insn->jt_ptr);
+      if (!--unordered_blocks[iter->second]) {
+        first_block = iter->second;
+        continue;
+      }
+    } else if (BPF_CLASS(last_insn->code) != BPF_RET) {
+      // We encountered an instruction that doesn't change code flow. Try to
+      // pick the next "first_block" from "last_insn->next", if possible.
+      TargetsToBlocks::const_iterator iter;
+      iter = blocks.find(last_insn->next);
+      if (!--unordered_blocks[iter->second]) {
+        first_block = iter->second;
+        continue;
+      } else {
+        // Our basic block is supposed to be followed by "last_insn->next",
+        // but dependencies prevent this from happening. Insert a BPF_JA
+        // instruction to correct the code flow.
+        Instruction *ja = MakeInstruction(BPF_JMP+BPF_JA, 0, last_insn->next);
+        first_block->instructions.push_back(ja);
+        last_insn->next = ja;
+      }
+    }
+    if (heads.empty()) {
+      if (unordered_blocks.size() != basic_blocks->size()) {
+        SANDBOX_DIE("Internal compiler error; cyclic graph detected");
+      }
+      return;
+    }
+    // Proceed by picking an arbitrary node from the set of basic blocks that
+    // do not have any incoming branches.
+    first_block = *heads.begin();
+    heads.erase(heads.begin());
+  }
+}
+
+void CodeGen::ComputeRelativeJumps(BasicBlocks *basic_blocks,
+                                   const TargetsToBlocks& targets_to_blocks) {
+  // While we previously used pointers in jt_ptr and jf_ptr to link jump
+  // instructions to their targets, we now convert these jumps to relative
+  // jumps that are suitable for loading the BPF program into the kernel.
+  int offset = 0;
+
+  // Since we just completed a toposort, all jump targets are guaranteed to
+  // go forward. This means, iterating over the basic blocks in reverse makes
+  // it trivial to compute the correct offsets.
+  BasicBlock *bb = NULL;
+  BasicBlock *last_bb = NULL;
+  for (BasicBlocks::reverse_iterator iter = basic_blocks->rbegin();
+       iter != basic_blocks->rend();
+       ++iter) {
+    last_bb = bb;
+    bb = *iter;
+    Instruction *insn = bb->instructions.back();
+    if (BPF_CLASS(insn->code) == BPF_JMP) {
+      // Basic block ended in a jump instruction. We can now compute the
+      // appropriate offsets.
+      if (BPF_OP(insn->code) == BPF_JA) {
+        // "Always" jumps use the 32bit "k" field for the offset, instead
+        // of the 8bit "jt" and "jf" fields.
+        int jmp =
+          offset - targets_to_blocks.find(insn->jt_ptr)->second->offset;
+        insn->k  = jmp;
+        insn->jt = insn->jf = 0;
+      } else {
+        // The offset computations for conditional jumps are just the same
+        // as for "always" jumps.
+        int jt = offset-targets_to_blocks.find(insn->jt_ptr)->second->offset;
+        int jf = offset-targets_to_blocks.find(insn->jf_ptr)->second->offset;
+
+        // There is an added complication, because conditional relative jumps
+        // can only jump at most 255 instructions forward. If we have to jump
+        // further, insert an extra "always" jump.
+        Instructions::size_type jmp = bb->instructions.size();
+        if (jt > 255 || (jt == 255 && jf > 255)) {
+          Instruction *ja = MakeInstruction(BPF_JMP+BPF_JA, 0, insn->jt_ptr);
+          bb->instructions.push_back(ja);
+          ja->k  = jt;
+          ja->jt = ja->jf = 0;
+
+          // The newly inserted "always" jump, of course, requires us to adjust
+          // the jump targets in the original conditional jump.
+          jt = 0;
+          ++jf;
+        }
+        if (jf > 255) {
+          Instruction *ja = MakeInstruction(BPF_JMP+BPF_JA, 0, insn->jf_ptr);
+          bb->instructions.insert(bb->instructions.begin() + jmp, ja);
+          ja->k  = jf;
+          ja->jt = ja->jf = 0;
+
+          // Again, we have to adjust the jump targets in the original
+          // conditional jump.
+          ++jt;
+          jf = 0;
+        }
+
+        // Now we can finally set the relative jump targets in the conditional
+        // jump instruction. Afterwards, we must no longer access the jt_ptr
+        // and jf_ptr fields.
+        insn->jt = jt;
+        insn->jf = jf;
+      }
+    } else if (BPF_CLASS(insn->code) != BPF_RET &&
+               targets_to_blocks.find(insn->next)->second != last_bb) {
+      SANDBOX_DIE("Internal compiler error; invalid basic block encountered");
+    }
+
+    // Proceed to next basic block.
+    offset += bb->instructions.size();
+    bb->offset = offset;
+  }
+  return;
+}
+
+void CodeGen::ConcatenateBasicBlocks(const BasicBlocks& basic_blocks,
+                                     Sandbox::Program *program) {
+  // Our basic blocks have been sorted and relative jump offsets have been
+  // computed. The last remaining step is for all the instructions in our
+  // basic blocks to be concatenated into a BPF program.
+  program->clear();
+  for (BasicBlocks::const_iterator bb_iter = basic_blocks.begin();
+       bb_iter != basic_blocks.end();
+       ++bb_iter) {
+    const BasicBlock& bb = **bb_iter;
+    for (Instructions::const_iterator insn_iter = bb.instructions.begin();
+         insn_iter != bb.instructions.end();
+         ++insn_iter) {
+      const Instruction& insn = **insn_iter;
+      program->push_back(
+        (struct sock_filter) { insn.code, insn.jt, insn.jf, insn.k });
+    }
+  }
+  return;
+}
+
+void CodeGen::Compile(Instruction *instructions, Sandbox::Program *program) {
+  if (compiled_) {
+    SANDBOX_DIE("Cannot call Compile() multiple times. Create a new code "
+                "generator instead");
+  }
+  compiled_ = true;
+
+  BranchTargets branch_targets;
+  FindBranchTargets(*instructions, &branch_targets);
+  TargetsToBlocks all_blocks;
+  BasicBlock *first_block =
+    CutGraphIntoBasicBlocks(instructions, branch_targets, &all_blocks);
+  MergeTails(&all_blocks);
+  BasicBlocks basic_blocks;
+  TopoSortBasicBlocks(first_block, all_blocks, &basic_blocks);
+  ComputeRelativeJumps(&basic_blocks, all_blocks);
+  ConcatenateBasicBlocks(basic_blocks, program);
+  return;
+}
+
+}  // namespace
diff --git a/sandbox/linux/seccomp-bpf/codegen.h b/sandbox/linux/seccomp-bpf/codegen.h
new file mode 100644
index 0000000..b7d1d39
--- /dev/null
+++ b/sandbox/linux/seccomp-bpf/codegen.h
@@ -0,0 +1,147 @@
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef SANDBOX_LINUX_SECCOMP_BPF_CODEGEN_H__
+#define SANDBOX_LINUX_SECCOMP_BPF_CODEGEN_H__
+
+#include <map>
+#include <set>
+#include <vector>
+
+#include "sandbox/linux/seccomp-bpf/basicblock.h"
+#include "sandbox/linux/seccomp-bpf/instruction.h"
+#include "sandbox/linux/seccomp-bpf/sandbox_bpf.h"
+
+
+namespace playground2 {
+
+typedef std::vector<Instruction *> Instructions;
+typedef std::vector<BasicBlock *> BasicBlocks;
+typedef std::map<const Instruction *, int> BranchTargets;
+typedef std::map<const Instruction *, BasicBlock *> TargetsToBlocks;
+typedef std::map<const BasicBlock *, int> IncomingBranches;
+
+// The code generator instantiates a basic compiler that can convert a
+// graph of BPF instructions into a well-formed stream of BPF instructions.
+// Most notably, it ensures that jumps are always forward and don't exceed
+// the limit of 255 instructions imposed by the instruction set.
+//
+// Callers would typically create a new CodeGen object and then use it to
+// build a DAG of Instructions. They'll eventually call Compile() to convert
+// this DAG to a Sandbox::Program.
+//
+// Instructions can be chained at the time when they are created, or they
+// can be joined later by calling JoinInstructions().
+//
+//   CodeGen gen;
+//   Instruction *dag, *branch;
+//   dag =
+//     gen.MakeInstruction(BPF_LD+BPF_W+BPF_ABS,
+//                         offsetof(struct arch_seccomp_data, nr),
+//   branch =
+//     gen.MakeInstruction(BPF_JMP+BPF_EQ+BPF_K, __NR_getpid,
+//                         Trap(GetPidHandler, NULL), NULL);
+//   gen.JoinInstructions(branch,
+//     gen.MakeInstruction(BPF_RET+BPF_K, ErrorCode(ErrorCode::ERR_ALLOWED)));
+//
+//   // Simplified code follows; in practice, it is important to avoid calling
+//   // any C++ destructors after starting the sandbox.
+//   Sandbox::Program program;
+//   gen.Compile(dag, program);
+//   const struct sock_fprog prog = {
+//     static_cast<unsigned short>(program->size()), &program[0] };
+//   prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
+//
+class CodeGen {
+ public:
+  CodeGen();
+  ~CodeGen();
+
+  // This is a helper method that can be used for debugging purposes. It is
+  // not normally called.
+  static void PrintProgram(const Sandbox::Program& program);
+
+  // Create a new instruction. Instructions form a DAG. The instruction objects
+  // are owned by the CodeGen object. They do not need to be explicitly
+  // deleted.
+  // For details on the possible parameters refer to <linux/filter.h>
+  Instruction *MakeInstruction(uint16_t code, uint32_t k,
+                               Instruction *next = NULL);
+  Instruction *MakeInstruction(uint16_t code, const ErrorCode& err);
+  Instruction *MakeInstruction(uint16_t code, uint32_t k,
+                               Instruction *jt, Instruction *jf);
+
+  // Join two (sequences of) instructions. This is useful, if the "next"
+  // parameter had not originally been given in the call to MakeInstruction(),
+  // or if a (conditional) jump still has an unsatisfied target.
+  void JoinInstructions(Instruction *head, Instruction *tail);
+
+  // Compiles the graph of instructions into a BPF program that can be passed
+  // to the kernel. Please note that this function modifies the graph in place
+  // and must therefore only be called once per graph.
+  void Compile(Instruction *instructions, Sandbox::Program *program);
+
+ private:
+  friend class CodeGenUnittestHelper;
+
+  // Find all the instructions that are the target of BPF_JMPs.
+  void FindBranchTargets(const Instruction& instructions,
+                         BranchTargets *branch_targets);
+
+  // Combine instructions between "head" and "tail" into a new basic block.
+  // Basic blocks are defined as sequences of instructions whose only branch
+  // target is the very first instruction; furthermore, any BPF_JMP or BPF_RET
+  // instruction must be at the very end of the basic block.
+  BasicBlock *MakeBasicBlock(Instruction *head, Instruction *tail);
+
+  // Creates a basic block and adds it to "basic_blocks"; sets "first_block"
+  // if it is still NULL.
+  void AddBasicBlock(Instruction *head, Instruction *tail,
+                     const BranchTargets& branch_targets,
+                     TargetsToBlocks *basic_blocks, BasicBlock **first_block);
+
+  // Cuts the DAG of instructions into basic blocks.
+  BasicBlock *CutGraphIntoBasicBlocks(Instruction *instructions,
+                                      const BranchTargets& branch_targets,
+                                      TargetsToBlocks *blocks);
+
+  // Find common tail sequences of basic blocks and coalesce them.
+  void MergeTails(TargetsToBlocks *blocks);
+
+  // For each basic block, compute the number of incoming branches.
+  void ComputeIncomingBranches(BasicBlock *block,
+                               const TargetsToBlocks& targets_to_blocks,
+                               IncomingBranches *incoming_branches);
+
+  // Topologically sort the basic blocks so that all jumps are forward jumps.
+  // This is a requirement for any well-formed BPF program.
+  void TopoSortBasicBlocks(BasicBlock *first_block,
+                           const TargetsToBlocks& blocks,
+                           BasicBlocks *basic_blocks);
+
+  // Convert jt_ptr_ and jf_ptr_ fields in BPF_JMP instructions to valid
+  // jt_ and jf_ jump offsets. This can result in BPF_JA instructions being
+  // inserted, if we need to jump over more than 256 instructions.
+  void ComputeRelativeJumps(BasicBlocks *basic_blocks,
+                            const TargetsToBlocks& targets_to_blocks);
+
+  // Concatenate instructions from all basic blocks into a BPF program that
+  // can be passed to the kernel.
+  void ConcatenateBasicBlocks(const BasicBlocks&, Sandbox::Program *program);
+
+  // We stick all instructions and basic blocks into pools that get destroyed
+  // when the CodeGen object is destroyed. This way, we neither need to worry
+  // about explicitly managing ownership, nor do we need to worry about using
+  // smart pointers in the presence of circular references.
+  Instructions instructions_;
+  BasicBlocks  basic_blocks_;
+
+  // Compile() must only ever be called once as it makes destructive changes
+  // to the DAG.
+  bool compiled_;
+};
+
+}  // namespace
+
+#endif  // SANDBOX_LINUX_SECCOMP_BPF_CODEGEN_H__
diff --git a/sandbox/linux/seccomp-bpf/codegen_unittest.cc b/sandbox/linux/seccomp-bpf/codegen_unittest.cc
new file mode 100644
index 0000000..d24bcf2
--- /dev/null
+++ b/sandbox/linux/seccomp-bpf/codegen_unittest.cc
@@ -0,0 +1,445 @@
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include <algorithm>
+#include <set>
+#include <vector>
+
+#include "sandbox/linux/seccomp-bpf/codegen.h"
+#include "sandbox/linux/seccomp-bpf/sandbox_bpf.h"
+#include "sandbox/linux/tests/unit_tests.h"
+
+namespace playground2 {
+
+class SandboxUnittestHelper : public Sandbox {
+ public:
+  typedef Sandbox::Program Program;
+};
+
+// We want to access some of the private methods in the code generator. We
+// do so by defining a "friend" that makes these methods public for us.
+class CodeGenUnittestHelper : public CodeGen {
+ public:
+  void FindBranchTargets(const Instruction& instructions,
+                         BranchTargets *branch_targets) {
+    CodeGen::FindBranchTargets(instructions, branch_targets);
+  }
+
+  BasicBlock *CutGraphIntoBasicBlocks(Instruction *insns,
+                                      const BranchTargets& branch_targets,
+                                      TargetsToBlocks *blocks) {
+    return CodeGen::CutGraphIntoBasicBlocks(insns, branch_targets, blocks);
+  }
+
+  void MergeTails(TargetsToBlocks *blocks) {
+    CodeGen::MergeTails(blocks);
+  }
+};
+
+enum { NO_FLAGS            = 0x0000,
+       HAS_MERGEABLE_TAILS = 0x0001,
+};
+
+Instruction *SampleProgramOneInstruction(CodeGen *codegen, int *flags) {
+  // Create the most basic valid BPF program:
+  //    RET ERR_ALLOWED
+  *flags = NO_FLAGS;
+  return codegen->MakeInstruction(BPF_RET+BPF_K,
+                                  ErrorCode(ErrorCode::ERR_ALLOWED));
+}
+
+Instruction *SampleProgramSimpleBranch(CodeGen *codegen, int *flags) {
+  // Create a program with a single branch:
+  //    JUMP if eq 42 then $0 else $1
+  // 0: RET EPERM
+  // 1: RET ERR_ALLOWED
+  *flags = NO_FLAGS;
+  return codegen->MakeInstruction(BPF_JMP+BPF_JEQ+BPF_K, 42,
+         codegen->MakeInstruction(BPF_RET+BPF_K,
+                                  ErrorCode(EPERM)),
+         codegen->MakeInstruction(BPF_RET+BPF_K,
+                                  ErrorCode(ErrorCode::ERR_ALLOWED)));
+}
+
+Instruction *SampleProgramAtypicalBranch(CodeGen *codegen, int *flags) {
+  // Create a program with a single branch:
+  //    JUMP if eq 42 then $0 else $0
+  // 0: RET ERR_ALLOWED
+
+  // N.B.: As the instructions in both sides of the branch are already
+  //       the same object, we do not actually have any "mergeable" branches.
+  //       This needs to be reflected in our choice of "flags".
+  *flags = NO_FLAGS;
+
+  Instruction *ret =
+    codegen->MakeInstruction(BPF_RET+BPF_K,
+                             ErrorCode(ErrorCode::ERR_ALLOWED));
+  return codegen->MakeInstruction(BPF_JMP+BPF_JEQ+BPF_K, 42, ret, ret);
+}
+
+Instruction *SampleProgramComplex(CodeGen *codegen, int *flags) {
+  // Creates a basic BPF program that we'll use to test some of the code:
+  //    JUMP if eq 42 the $0 else $1     (insn6)
+  // 0: LD 23                            (insn5)
+  // 1: JUMP if eq 42 then $2 else $4    (insn4)
+  // 2: JUMP to $3                       (insn1)
+  // 3: LD 42                            (insn0)
+  //    RET ErrorCode(42)                (insn2)
+  // 4: LD 42                            (insn3)
+  //    RET ErrorCode(42)                (insn3+)
+  *flags = HAS_MERGEABLE_TAILS;
+
+  Instruction *insn0 = codegen->MakeInstruction(BPF_LD+BPF_W+BPF_ABS, 42);
+  SANDBOX_ASSERT(insn0);
+  SANDBOX_ASSERT(insn0->code == BPF_LD+BPF_W+BPF_ABS);
+  SANDBOX_ASSERT(insn0->k == 42);
+  SANDBOX_ASSERT(insn0->next == NULL);
+
+  Instruction *insn1 = codegen->MakeInstruction(BPF_JMP+BPF_JA, 0, insn0);
+  SANDBOX_ASSERT(insn1);
+  SANDBOX_ASSERT(insn1->code == BPF_JMP+BPF_JA);
+  SANDBOX_ASSERT(insn1->jt_ptr == insn0);
+
+  Instruction *insn2 = codegen->MakeInstruction(BPF_RET+BPF_K, ErrorCode(42));
+  SANDBOX_ASSERT(insn2);
+  SANDBOX_ASSERT(insn2->code == BPF_RET+BPF_K);
+  SANDBOX_ASSERT(insn2->next == NULL);
+
+  // We explicitly duplicate instructions so that MergeTails() can coalesce
+  // them later.
+  Instruction *insn3 = codegen->MakeInstruction(BPF_LD+BPF_W+BPF_ABS, 42,
+    codegen->MakeInstruction(BPF_RET+BPF_K, ErrorCode(42)));
+
+  Instruction *insn4 = codegen->MakeInstruction(BPF_JMP+BPF_JEQ+BPF_K, 42,
+                                                insn1, insn3);
+  SANDBOX_ASSERT(insn4);
+  SANDBOX_ASSERT(insn4->code == BPF_JMP+BPF_JEQ+BPF_K);
+  SANDBOX_ASSERT(insn4->k == 42);
+  SANDBOX_ASSERT(insn4->jt_ptr == insn1);
+  SANDBOX_ASSERT(insn4->jf_ptr == insn3);
+
+  codegen->JoinInstructions(insn0, insn2);
+  SANDBOX_ASSERT(insn0->next == insn2);
+
+  Instruction *insn5 = codegen->MakeInstruction(BPF_LD+BPF_W+BPF_ABS,
+                                                23, insn4);
+  SANDBOX_ASSERT(insn5);
+  SANDBOX_ASSERT(insn5->code == BPF_LD+BPF_W+BPF_ABS);
+  SANDBOX_ASSERT(insn5->k == 23);
+  SANDBOX_ASSERT(insn5->next == insn4);
+
+  // Force a basic block that ends in neither a jump instruction nor a return
+  // instruction. It only contains "insn5". This exercises one of the less
+  // common code paths in the topo-sort algorithm.
+  // This also gives us a diamond-shaped pattern in our graph, which stresses
+  // another aspect of the topo-sort algorithm (namely, the ability to
+  // correctly count the incoming branches for subtrees that are not disjunct).
+  Instruction *insn6 = codegen->MakeInstruction(BPF_JMP+BPF_JEQ+BPF_K, 42,
+                                                insn5, insn4);
+
+  return insn6;
+}
+
+void ForAllPrograms(void (*test)(CodeGenUnittestHelper *, Instruction *, int)){
+  Instruction *(*function_table[])(CodeGen *codegen, int *flags) = {
+    SampleProgramOneInstruction,
+    SampleProgramSimpleBranch,
+    SampleProgramAtypicalBranch,
+    SampleProgramComplex,
+  };
+
+  for (size_t i = 0; i < arraysize(function_table); ++i) {
+    CodeGenUnittestHelper codegen;
+    int flags = NO_FLAGS;
+    Instruction *prg = function_table[i](&codegen, &flags);
+    test(&codegen, prg, flags);
+  }
+}
+
+void MakeInstruction(CodeGenUnittestHelper *codegen,
+                     Instruction *program, int) {
+  // Nothing to do here
+}
+
+SANDBOX_TEST(CodeGen, MakeInstruction) {
+  ForAllPrograms(MakeInstruction);
+}
+
+void FindBranchTargets(CodeGenUnittestHelper *codegen, Instruction *prg, int) {
+  BranchTargets branch_targets;
+  codegen->FindBranchTargets(*prg, &branch_targets);
+
+  // Verifying the general properties that should be true for every
+  // well-formed BPF program.
+  // Perform a depth-first traversal of the BPF program an verify that all
+  // targets of BPF_JMP instructions are represented in the "branch_targets".
+  // At the same time, compute a set of both the branch targets and all the
+  // instructions in the program.
+  std::vector<Instruction *> stack;
+  std::set<Instruction *> all_instructions;
+  std::set<Instruction *> target_instructions;
+  BranchTargets::const_iterator end = branch_targets.end();
+  for (Instruction *insn = prg;;) {
+    all_instructions.insert(insn);
+    if (BPF_CLASS(insn->code) == BPF_JMP) {
+      target_instructions.insert(insn->jt_ptr);
+      SANDBOX_ASSERT(insn->jt_ptr != NULL);
+      SANDBOX_ASSERT(branch_targets.find(insn->jt_ptr) != end);
+      if (BPF_OP(insn->code) != BPF_JA) {
+        target_instructions.insert(insn->jf_ptr);
+        SANDBOX_ASSERT(insn->jf_ptr != NULL);
+        SANDBOX_ASSERT(branch_targets.find(insn->jf_ptr) != end);
+        stack.push_back(insn->jf_ptr);
+      }
+      insn = insn->jt_ptr;
+    } else if (BPF_CLASS(insn->code) == BPF_RET) {
+      SANDBOX_ASSERT(insn->next == NULL);
+      if (stack.empty()) {
+        break;
+      }
+      insn = stack.back();
+      stack.pop_back();
+    } else {
+      SANDBOX_ASSERT(insn->next != NULL);
+      insn = insn->next;
+    }
+  }
+  SANDBOX_ASSERT(target_instructions.size() == branch_targets.size());
+
+  // We can now subtract the set of the branch targets from the set of all
+  // instructions. This gives us a set with the instructions that nobody
+  // ever jumps to. Verify that they are no included in the
+  // "branch_targets" that FindBranchTargets() computed for us.
+  Instructions non_target_instructions(all_instructions.size() -
+                                       target_instructions.size());
+  set_difference(all_instructions.begin(), all_instructions.end(),
+                 target_instructions.begin(), target_instructions.end(),
+                 non_target_instructions.begin());
+  for (Instructions::const_iterator iter = non_target_instructions.begin();
+       iter != non_target_instructions.end();
+       ++iter) {
+    SANDBOX_ASSERT(branch_targets.find(*iter) == end);
+  }
+}
+
+SANDBOX_TEST(CodeGen, FindBranchTargets) {
+  ForAllPrograms(FindBranchTargets);
+}
+
+void CutGraphIntoBasicBlocks(CodeGenUnittestHelper *codegen,
+                             Instruction *prg, int) {
+  BranchTargets branch_targets;
+  codegen->FindBranchTargets(*prg, &branch_targets);
+  TargetsToBlocks all_blocks;
+  BasicBlock *first_block =
+    codegen->CutGraphIntoBasicBlocks(prg, branch_targets, &all_blocks);
+  SANDBOX_ASSERT(first_block != NULL);
+  SANDBOX_ASSERT(first_block->instructions.size() > 0);
+  Instruction *first_insn = first_block->instructions[0];
+
+  // Basic blocks are supposed to start with a branch target and end with
+  // either a jump or a return instruction. It can also end, if the next
+  // instruction forms the beginning of a new basic block. There should be
+  // no other jumps or return instructions in the middle of a basic block.
+  for (TargetsToBlocks::const_iterator bb_iter = all_blocks.begin();
+       bb_iter != all_blocks.end();
+       ++bb_iter) {
+    BasicBlock *bb = bb_iter->second;
+    SANDBOX_ASSERT(bb != NULL);
+    SANDBOX_ASSERT(bb->instructions.size() > 0);
+    Instruction *insn = bb->instructions[0];
+    SANDBOX_ASSERT(insn == first_insn ||
+                   branch_targets.find(insn) != branch_targets.end());
+    for (Instructions::const_iterator insn_iter = bb->instructions.begin();;){
+      insn = *insn_iter;
+      if (++insn_iter != bb->instructions.end()) {
+        SANDBOX_ASSERT(BPF_CLASS(insn->code) != BPF_JMP);
+        SANDBOX_ASSERT(BPF_CLASS(insn->code) != BPF_RET);
+      } else {
+        SANDBOX_ASSERT(BPF_CLASS(insn->code) == BPF_JMP ||
+                       BPF_CLASS(insn->code) == BPF_RET ||
+                       branch_targets.find(insn->next) !=
+                         branch_targets.end());
+        break;
+      }
+      SANDBOX_ASSERT(branch_targets.find(*insn_iter) == branch_targets.end());
+    }
+  }
+}
+
+SANDBOX_TEST(CodeGen, CutGraphIntoBasicBlocks) {
+  ForAllPrograms(CutGraphIntoBasicBlocks);
+}
+
+void MergeTails(CodeGenUnittestHelper *codegen, Instruction *prg,
+                int flags) {
+  BranchTargets branch_targets;
+  codegen->FindBranchTargets(*prg, &branch_targets);
+  TargetsToBlocks all_blocks;
+  BasicBlock *first_block =
+    codegen->CutGraphIntoBasicBlocks(prg, branch_targets, &all_blocks);
+
+  // The shape of our graph and thus the function of our program should
+  // still be unchanged after we run MergeTails(). We verify this by
+  // serializing the graph and verifying that it is still the same.
+  // We also verify that at least some of the edges changed because of
+  // tail merging.
+  std::string graph[2];
+  std::string edges[2];
+
+  // The loop executes twice. After the first run, we call MergeTails() on
+  // our graph.
+  for (int i = 0;;) {
+    // Traverse the entire program in depth-first order.
+    std::vector<BasicBlock *> stack;
+    for (BasicBlock *bb = first_block;;) {
+      // Serialize the instructions in this basic block. In general, we only
+      // need to serialize "code" and "k"; except for a BPF_JA instruction
+      // where "k" isn't set.
+      // The stream of instructions should be unchanged after MergeTails().
+      for (Instructions::const_iterator iter = bb->instructions.begin();
+           iter != bb->instructions.end();
+           ++iter) {
+        graph[i].append(reinterpret_cast<char *>(&(*iter)->code),
+                        sizeof((*iter)->code));
+        if (BPF_CLASS((*iter)->code) != BPF_JMP ||
+            BPF_OP((*iter)->code) != BPF_JA) {
+          graph[i].append(reinterpret_cast<char *>(&(*iter)->k),
+                          sizeof((*iter)->k));
+        }
+      }
+
+      // Also serialize the addresses the basic blocks as we encounter them.
+      // This will change as basic blocks are coalesed by MergeTails().
+      edges[i].append(reinterpret_cast<char *>(&bb), sizeof(bb));
+
+      // Depth-first traversal of the graph. We only ever need to look at the
+      // very last instruction in the basic block, as that is the only one that
+      // can change code flow.
+      Instruction *insn = bb->instructions.back();
+      if (BPF_CLASS(insn->code) == BPF_JMP) {
+        // For jump instructions, we need to remember the "false" branch while
+        // traversing the "true" branch. This is not necessary for BPF_JA which
+        // only has a single branch.
+        if (BPF_OP(insn->code) != BPF_JA) {
+          stack.push_back(all_blocks[insn->jf_ptr]);
+        }
+        bb = all_blocks[insn->jt_ptr];
+      } else if (BPF_CLASS(insn->code) == BPF_RET) {
+        // After a BPF_RET, see if we need to back track.
+        if (stack.empty()) {
+          break;
+        }
+        bb = stack.back();
+        stack.pop_back();
+      } else {
+        // For "normal" instructions, just follow to the next basic block.
+        bb = all_blocks[insn->next];
+      }
+    }
+
+    // Our loop runs exactly two times.
+    if (++i > 1) {
+      break;
+    }
+    codegen->MergeTails(&all_blocks);
+  }
+  SANDBOX_ASSERT(graph[0] == graph[1]);
+  if (flags & HAS_MERGEABLE_TAILS) {
+    SANDBOX_ASSERT(edges[0] != edges[1]);
+  } else {
+    SANDBOX_ASSERT(edges[0] == edges[1]);
+  }
+}
+
+SANDBOX_TEST(CodeGen, MergeTails) {
+  ForAllPrograms(MergeTails);
+}
+
+void CompileAndCompare(CodeGenUnittestHelper *codegen, Instruction *prg, int) {
+  // TopoSortBasicBlocks() has internal checks that cause it to fail, if it
+  // detects a problem. Typically, if anything goes wrong, this looks to the
+  // TopoSort algorithm as if there had been cycles in the input data.
+  // This provides a pretty good unittest.
+  // We hand-crafted the program returned by SampleProgram() to exercise
+  // several of the more interesting code-paths. See comments in
+  // SampleProgram() for details.
+  // In addition to relying on the internal consistency checks in the compiler,
+  // we also serialize the graph and the resulting BPF program and compare
+  // them. With the exception of BPF_JA instructions that might have been
+  // inserted, both instruction streams should be equivalent.
+  // As Compile() modifies the instructions, we have to serialize the graph
+  // before calling Compile().
+  std::string source;
+  Instructions source_stack;
+  for (const Instruction *insn = prg, *next; insn; insn = next) {
+    if (BPF_CLASS(insn->code) == BPF_JMP) {
+      if (BPF_OP(insn->code) == BPF_JA) {
+        // Do not serialize BPF_JA instructions (see above).
+        next = insn->jt_ptr;
+        continue;
+      } else {
+        source_stack.push_back(insn->jf_ptr);
+        next = insn->jt_ptr;
+      }
+    } else if (BPF_CLASS(insn->code) == BPF_RET) {
+      if (source_stack.empty()) {
+        next = NULL;
+      } else {
+        next = source_stack.back();
+        source_stack.pop_back();
+      }
+    } else {
+      next = insn->next;
+    }
+    // Only serialize "code" and "k". That's all the information we need to
+    // compare. The rest of the information is encoded in the order of
+    // instructions.
+    source.append(reinterpret_cast<const char *>(&insn->code),
+                  sizeof(insn->code));
+    source.append(reinterpret_cast<const char *>(&insn->k),
+                  sizeof(insn->k));
+  }
+
+  // Compile the program
+  SandboxUnittestHelper::Program bpf;
+  codegen->Compile(prg, &bpf);
+
+  // Serialize the resulting BPF instructions.
+  std::string assembly;
+  std::vector<int> assembly_stack;
+  for (int idx = 0; idx >= 0;) {
+    SANDBOX_ASSERT(idx < (int)bpf.size());
+    struct sock_filter& insn = bpf[idx];
+    if (BPF_CLASS(insn.code) == BPF_JMP) {
+      if (BPF_OP(insn.code) == BPF_JA) {
+        // Do not serialize BPF_JA instructions (see above).
+        idx += insn.k + 1;
+        continue;
+      } else {
+        assembly_stack.push_back(idx + insn.jf + 1);
+        idx += insn.jt + 1;
+      }
+    } else if (BPF_CLASS(insn.code) == BPF_RET) {
+      if (assembly_stack.empty()) {
+        idx = -1;
+      } else {
+        idx = assembly_stack.back();
+        assembly_stack.pop_back();
+      }
+    } else {
+      ++idx;
+    }
+    // Serialize the same information that we serialized before compilation.
+    assembly.append(reinterpret_cast<char *>(&insn.code), sizeof(insn.code));
+    assembly.append(reinterpret_cast<char *>(&insn.k),    sizeof(insn.k));
+  }
+  SANDBOX_ASSERT(source == assembly);
+}
+
+SANDBOX_TEST(CodeGen, All) {
+  ForAllPrograms(CompileAndCompare);
+}
+
+}  // namespace playground2
diff --git a/sandbox/linux/seccomp-bpf/errorcode.h b/sandbox/linux/seccomp-bpf/errorcode.h
index 53622c4..2b941ee 100644
--- a/sandbox/linux/seccomp-bpf/errorcode.h
+++ b/sandbox/linux/seccomp-bpf/errorcode.h
@@ -83,6 +83,7 @@ class ErrorCode {
   };
 
  private:
+  friend class CodeGen;
   friend class Sandbox;
   friend class Verifier;
 
diff --git a/sandbox/linux/seccomp-bpf/instruction.h b/sandbox/linux/seccomp-bpf/instruction.h
new file mode 100644
index 0000000..0fc8123
--- /dev/null
+++ b/sandbox/linux/seccomp-bpf/instruction.h
@@ -0,0 +1,63 @@
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef SANDBOX_LINUX_SECCOMP_BPF_INSTRUCTION_H__
+#define SANDBOX_LINUX_SECCOMP_BPF_INSTRUCTION_H__
+
+#include <stdint.h>
+
+
+namespace playground2 {
+
+// The fields in this structure have the same meaning as the corresponding
+// fields in "struct sock_filter". See <linux/filter.h> for a lot more
+// detail.
+// code     -- Opcode of the instruction. This is typically a bitwise
+//             combination BPF_XXX values.
+// k        -- Operand; BPF instructions take zero or one operands. Operands
+//             are 32bit-wide constants, if present. They can be immediate
+//             values (if BPF_K is present in "code_"), addresses (if BPF_ABS
+//             is present in "code_"), or relative jump offsets (if BPF_JMP
+//             and BPF_JA are present in "code_").
+// jt, jf   -- all conditional jumps have a 8bit-wide jump offset that allows
+//             jumps of up to 256 instructions forward. Conditional jumps are
+//             identified by BPF_JMP in "code_", but the lack of BPF_JA.
+//             Conditional jumps have a "t"rue and "f"alse branch.
+struct Instruction {
+  // Constructor for an non-jumping instruction or for an unconditional
+  // "always" jump.
+  Instruction(uint16_t c, uint32_t parm, Instruction *n) :
+    code(c), next(n), k(parm) { }
+
+  // Constructor for a conditional jump instruction.
+  Instruction(uint16_t c, uint32_t parm, Instruction *jt, Instruction *jf) :
+    code(c), jt_ptr(jt), jf_ptr(jf), k(parm) { }
+
+  uint16_t code;
+  union {
+    // When code generation is complete, we will have computed relative
+    // branch targets that are in the range 0..255.
+    struct {
+      uint8_t jt, jf;
+    };
+
+    // While assembling the BPF program, we use pointers for branch targets.
+    // Once we have computed basic blocks, these pointers will be entered as
+    // keys in a TargetsToBlocks map and should no longer be dereferenced
+    // directly.
+    struct {
+      Instruction *jt_ptr, *jf_ptr;
+    };
+
+    // While assembling the BPF program, non-jumping instructions are linked
+    // by the "next_" pointer. This field is no longer needed when we have
+    // computed basic blocks.
+    Instruction *next;
+  };
+  uint32_t k;
+};
+
+}  // namespace
+
+#endif  // SANDBOX_LINUX_SECCOMP_BPF_INSTRUCTION_H__
diff --git a/sandbox/linux/seccomp-bpf/sandbox_bpf.cc b/sandbox/linux/seccomp-bpf/sandbox_bpf.cc
index 643eacb..6926b5d 100644
--- a/sandbox/linux/seccomp-bpf/sandbox_bpf.cc
+++ b/sandbox/linux/seccomp-bpf/sandbox_bpf.cc
@@ -2,8 +2,7 @@
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.
 
-#include <time.h>
-
+#include "sandbox/linux/seccomp-bpf/codegen.h"
 #include "sandbox/linux/seccomp-bpf/sandbox_bpf.h"
 #include "sandbox/linux/seccomp-bpf/syscall_iterator.h"
 #include "sandbox/linux/seccomp-bpf/verifier.h"
@@ -330,40 +329,41 @@ void Sandbox::installFilter(bool quiet) {
   }
 
   // Assemble the BPF filter program.
-  Program *program = new Program();
-  if (!program) {
+  CodeGen *gen = new CodeGen();
+  if (!gen) {
     SANDBOX_DIE("Out of memory");
   }
 
   // If the architecture doesn't match SECCOMP_ARCH, disallow the
   // system call.
-  program->push_back((struct sock_filter)
-    BPF_STMT(BPF_LD+BPF_W+BPF_ABS, offsetof(struct arch_seccomp_data, arch)));
-  program->push_back((struct sock_filter)
-    BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, SECCOMP_ARCH, 1, 0));
-
-  program->push_back((struct sock_filter)
-    BPF_STMT(BPF_RET+BPF_K,
-             Kill("Invalid audit architecture in BPF filter").err()));
-
-  // Grab the system call number, so that we can implement jump tables.
-  program->push_back((struct sock_filter)
-    BPF_STMT(BPF_LD+BPF_W+BPF_ABS, offsetof(struct arch_seccomp_data, nr)));
+  Instruction *tail;
+  Instruction *head =
+    gen->MakeInstruction(BPF_LD+BPF_W+BPF_ABS,
+                         offsetof(struct arch_seccomp_data, arch),
+    gen->MakeInstruction(BPF_JMP+BPF_JEQ+BPF_K, SECCOMP_ARCH,
+  tail =
+    // Grab the system call number, so that we can implement jump tables.
+    gen->MakeInstruction(BPF_LD+BPF_W+BPF_ABS,
+                         offsetof(struct arch_seccomp_data, nr)),
+    gen->MakeInstruction(BPF_RET+BPF_K,
+                         Kill(
+                           "Invalid audit architecture in BPF filter").err_)));
 
   // On Intel architectures, verify that system call numbers are in the
   // expected number range. The older i386 and x86-64 APIs clear bit 30
-  // on all system calls. The newer x86-32 API always sets bit 30.
+  // on all system calls. The newer x32 API always sets bit 30.
 #if defined(__i386__) || defined(__x86_64__)
+  Instruction *invalidX32 =
+    gen->MakeInstruction(BPF_RET+BPF_K,
+                         Kill("Illegal mixing of system call ABIs").err_);
+  Instruction *checkX32 =
 #if defined(__x86_64__) && defined(__ILP32__)
-  program->push_back((struct sock_filter)
-    BPF_JUMP(BPF_JMP+BPF_JSET+BPF_K, 0x40000000, 1, 0));
+    gen->MakeInstruction(BPF_JMP+BPF_JSET+BPF_K, 0x40000000, 0, invalidX32);
 #else
-  program->push_back((struct sock_filter)
-    BPF_JUMP(BPF_JMP+BPF_JSET+BPF_K, 0x40000000, 0, 1));
+    gen->MakeInstruction(BPF_JMP+BPF_JSET+BPF_K, 0x40000000, invalidX32, 0);
 #endif
-  program->push_back((struct sock_filter)
-    BPF_STMT(BPF_RET+BPF_K,
-             Kill("Illegal mixing of system call ABIs").err()));
+    gen->JoinInstructions(tail, checkX32);
+    tail = checkX32;
 #endif
 
 
@@ -373,18 +373,25 @@ void Sandbox::installFilter(bool quiet) {
     Ranges ranges;
     findRanges(&ranges);
 
-    // Compile the system call ranges to an optimized BPF program
-    RetInsns rets;
-    emitJumpStatements(program, &rets, ranges.begin(), ranges.end());
-    emitReturnStatements(program, rets);
+    // Compile the system call ranges to an optimized BPF jumptable
+    Instruction *jumptable =
+      assembleJumpTable(gen, ranges.begin(), ranges.end());
+
+    // Append jump table to our pre-amble
+    gen->JoinInstructions(tail, jumptable);
   }
 
+  // Turn the DAG into a vector of instructions.
+  Program *program = new Program();
+  gen->Compile(head, program);
+  delete gen;
+
   // Make sure compilation resulted in BPF program that executes
   // correctly. Otherwise, there is an internal error in our BPF compiler.
   // There is really nothing the caller can do until the bug is fixed.
 #ifndef NDEBUG
   const char *err = NULL;
-  if (!Verifier::verifyBPF(*program, evaluators_, &err)) {
+  if (!Verifier::VerifyBPF(*program, evaluators_, &err)) {
     SANDBOX_DIE(err);
   }
 #endif
@@ -458,15 +465,19 @@ void Sandbox::findRanges(Ranges *ranges) {
   }
 }
 
-void Sandbox::emitJumpStatements(Program *program, RetInsns *rets,
-                                 Ranges::const_iterator start,
-                                 Ranges::const_iterator stop) {
+Instruction *Sandbox::assembleJumpTable(CodeGen *gen,
+                                        Ranges::const_iterator start,
+                                        Ranges::const_iterator stop) {
   // We convert the list of system call ranges into jump table that performs
   // a binary search over the ranges.
-  // As a sanity check, we need to have at least two distinct ranges for us
+  // As a sanity check, we need to have at least one distinct ranges for us
   // to be able to build a jump table.
-  if (stop - start <= 1) {
+  if (stop - start <= 0) {
     SANDBOX_DIE("Invalid set of system call ranges");
+  } else if (stop - start == 1) {
+    // If we have narrowed things down to a single range object, we can
+    // return from the BPF filter program.
+    return gen->MakeInstruction(BPF_RET+BPF_K, start->err);
   }
 
   // Pick the range object that is located at the mid point of our list.
@@ -474,88 +485,11 @@ void Sandbox::emitJumpStatements(Program *program, RetInsns *rets,
   // number in this range object. If our number is lower, it is outside of
   // this range object. If it is greater or equal, it might be inside.
   Ranges::const_iterator mid = start + (stop - start)/2;
-  Program::size_type jmp = program->size();
-  if (jmp >= SECCOMP_MAX_PROGRAM_SIZE) {
-  compiler_err:
-    SANDBOX_DIE("Internal compiler error; failed to compile jump table");
-  }
-  program->push_back((struct sock_filter)
-    BPF_JUMP(BPF_JMP+BPF_JGE+BPF_K, mid->from,
-             // Jump targets are place-holders that will be fixed up later.
-             0, 0));
-
-  // The comparison turned out to be false; i.e. our system call number is
-  // less than the range object at the mid point of the list.
-  if (mid - start == 1) {
-    // If we have narrowed things down to a single range object, we can
-    // return from the BPF filter program.
-    // Instead of emitting a BPF_RET statement, we want to coalesce all
-    // identical BPF_RET statements into a single instance. This results in
-    // a more efficient BPF program that uses less CPU cache.
-    // Since all branches in BPF programs have to be forward branches, we
-    // keep track of our current instruction pointer and then fix up the
-    // branch when we emit the BPF_RET statement in emitReturnStatements().
-    (*rets)[start->err.err()].push_back(FixUp(jmp, false));
-  } else {
-    // Sub-divide the list of ranges and continue recursively.
-    emitJumpStatements(program, rets, start, mid);
-  }
 
-  // The comparison turned out to be true; i.e. our system call number is
-  // greater or equal to the range object at the mid point of the list.
-  if (stop - mid == 1) {
-    // We narrowed things down to a single range object. Remember instruction
-    // pointer and exit code, so that we can patch up the target of the jump
-    // instruction in emitReturnStatements().
-    (*rets)[mid->err.err()].push_back(FixUp(jmp, true));
-  } else {
-    // We now know where the block of instructions for the "true" comparison
-    // starts. Patch up the jump target of the BPF_JMP instruction that we
-    // emitted earlier.
-    int distance = program->size() - jmp - 1;
-    if (distance < 0 || distance > 255) {
-      goto compiler_err;
-    }
-    (*program)[jmp].jt = distance;
-
-    // Sub-divide the list of ranges and continue recursively.
-    emitJumpStatements(program, rets, mid, stop);
-  }
-}
-
-void Sandbox::emitReturnStatements(Program *program, const RetInsns& rets) {
-  // Iterate over the list of distinct exit codes from our BPF filter
-  // program and emit the BPF_RET statements.
-  for (RetInsns::const_iterator ret_iter = rets.begin();
-       ret_iter != rets.end();
-       ++ret_iter) {
-    Program::size_type ip = program->size();
-    if (ip >= SECCOMP_MAX_PROGRAM_SIZE) {
-      SANDBOX_DIE("Internal compiler error; failed to compile jump table");
-    }
-    program->push_back((struct sock_filter)
-      BPF_STMT(BPF_RET+BPF_K, ret_iter->first));
-
-    // Iterate over the instruction pointers for the BPF_JMP instructions
-    // that need to be patched up.
-    for (std::vector<FixUp>::const_iterator insn_iter=ret_iter->second.begin();
-         insn_iter != ret_iter->second.end();
-         ++insn_iter) {
-      // Jumps are always relative and they are always forward.
-      int distance = ip - insn_iter->addr - 1;
-      if (distance < 0 || distance > 255) {
-        SANDBOX_DIE("Internal compiler error; failed to compile jump table");
-      }
-
-      // Decide whether we need to patch up the "true" or the "false" jump
-      // target.
-      if (insn_iter->jt) {
-        (*program)[insn_iter->addr].jt = distance;
-      } else {
-        (*program)[insn_iter->addr].jf = distance;
-      }
-    }
-  }
+  // Sub-divide the list of ranges and continue recursively.
+  Instruction *jf = assembleJumpTable(gen, start, mid);
+  Instruction *jt = assembleJumpTable(gen, mid, stop);
+  return gen->MakeInstruction(BPF_JMP+BPF_JGE+BPF_K, mid->from, jt, jf);
 }
 
 void Sandbox::sigSys(int nr, siginfo_t *info, void *void_context) {
diff --git a/sandbox/linux/seccomp-bpf/sandbox_bpf.h b/sandbox/linux/seccomp-bpf/sandbox_bpf.h
index 8cc3b7b..d0764dd 100644
--- a/sandbox/linux/seccomp-bpf/sandbox_bpf.h
+++ b/sandbox/linux/seccomp-bpf/sandbox_bpf.h
@@ -31,6 +31,7 @@
 #include <sys/types.h>
 #include <sys/uio.h>
 #include <sys/wait.h>
+#include <time.h>
 #include <unistd.h>
 
 #include <algorithm>
@@ -184,6 +185,10 @@ struct arch_sigsys {
   unsigned int arch;
 };
 
+class CodeGen;
+class SandboxUnittestHelper;
+struct Instruction;
+
 class Sandbox {
  public:
   enum SandboxStatus {
@@ -271,9 +276,12 @@ class Sandbox {
 
  private:
   friend class ErrorCode;
+  friend class CodeGen;
+  friend class SandboxUnittestHelper;
   friend class Util;
   friend class Verifier;
 
+  typedef std::vector<struct sock_filter> Program;
 
   struct Range {
     Range(uint32_t f, uint32_t t, const ErrorCode& e) :
@@ -284,15 +292,7 @@ class Sandbox {
     uint32_t  from, to;
     ErrorCode err;
   };
-  struct FixUp {
-    FixUp(unsigned int a, bool j) :
-      jt(j), addr(a) { }
-    bool     jt:1;
-    unsigned addr:31;
-  };
   typedef std::vector<Range> Ranges;
-  typedef std::map<uint32_t, std::vector<FixUp> > RetInsns;
-  typedef std::vector<struct sock_filter> Program;
   typedef std::map<uint32_t, ErrorCode> ErrMap;
   typedef std::vector<ErrorCode> Traps;
   typedef std::map<std::pair<TrapFnc, const void *>, int> TrapIds;
@@ -316,10 +316,9 @@ class Sandbox {
                                       EvaluateArguments argumentEvaluator);
   static void      installFilter(bool quiet);
   static void      findRanges(Ranges *ranges);
-  static void      emitJumpStatements(Program *program, RetInsns *rets,
-                                      Ranges::const_iterator start,
-                                      Ranges::const_iterator stop);
-  static void      emitReturnStatements(Program *prog, const RetInsns& rets);
+  static Instruction *assembleJumpTable(CodeGen *gen,
+                                        Ranges::const_iterator start,
+                                        Ranges::const_iterator stop);
   static void      sigSys(int nr, siginfo_t *info, void *void_context);
   static intptr_t  bpfFailure(const struct arch_seccomp_data& data, void *aux);
   static int       getTrapId(TrapFnc fnc, const void *aux);
diff --git a/sandbox/linux/seccomp-bpf/sandbox_bpf_unittest.cc b/sandbox/linux/seccomp-bpf/sandbox_bpf_unittest.cc
index 60554b3..91d30ae 100644
--- a/sandbox/linux/seccomp-bpf/sandbox_bpf_unittest.cc
+++ b/sandbox/linux/seccomp-bpf/sandbox_bpf_unittest.cc
@@ -157,10 +157,7 @@ ErrorCode SyntheticPolicy(int sysno) {
   }
 #endif
 
-  // TODO(markus): allow calls to write(). This should start working as soon
-  //   as we switch to the new code generator. Currently we are blowing up,
-  //   because our jumptable is getting too big.
-  if (sysno == __NR_exit_group /* || sysno == __NR_write */) {
+  if (sysno == __NR_exit_group || sysno == __NR_write) {
     // exit_group() is special, we really need it to work.
     // write() is needed for BPF_ASSERT() to report a useful error message.
     return ErrorCode(ErrorCode::ERR_ALLOWED);
diff --git a/sandbox/linux/seccomp-bpf/verifier.cc b/sandbox/linux/seccomp-bpf/verifier.cc
index 343a6b4..95494e6 100644
--- a/sandbox/linux/seccomp-bpf/verifier.cc
+++ b/sandbox/linux/seccomp-bpf/verifier.cc
@@ -9,7 +9,7 @@
 
 namespace playground2 {
 
-bool Verifier::verifyBPF(const std::vector<struct sock_filter>& program,
+bool Verifier::VerifyBPF(const std::vector<struct sock_filter>& program,
                          const Sandbox::Evaluators& evaluators,
                          const char **err) {
   *err = NULL;
@@ -17,7 +17,7 @@ bool Verifier::verifyBPF(const std::vector<struct sock_filter>& program,
     *err = "Not implemented";
     return false;
   }
-  Sandbox::EvaluateSyscall evaluateSyscall = evaluators.begin()->first;
+  Sandbox::EvaluateSyscall evaluate_syscall = evaluators.begin()->first;
   for (SyscallIterator iter(false); !iter.Done(); ) {
     uint32_t sysnum = iter.Next();
     // We ideally want to iterate over the full system call range and values
@@ -39,11 +39,11 @@ bool Verifier::verifyBPF(const std::vector<struct sock_filter>& program,
     }
 #endif
 #endif
-    ErrorCode code = evaluateSyscall(sysnum);
-    uint32_t computedRet = evaluateBPF(program, data, err);
+    ErrorCode code = evaluate_syscall(sysnum);
+    uint32_t computed_ret = EvaluateBPF(program, data, err);
     if (*err) {
       return false;
-    } else if (computedRet != code.err()) {
+    } else if (computed_ret != code.err()) {
       *err = "Exit code from BPF program doesn't match";
       return false;
     }
@@ -51,7 +51,7 @@ bool Verifier::verifyBPF(const std::vector<struct sock_filter>& program,
   return true;
 }
 
-uint32_t Verifier::evaluateBPF(const std::vector<struct sock_filter>& program,
+uint32_t Verifier::EvaluateBPF(const std::vector<struct sock_filter>& program,
                                const struct arch_seccomp_data& data,
                                const char **err) {
   *err = NULL;
@@ -67,13 +67,13 @@ uint32_t Verifier::evaluateBPF(const std::vector<struct sock_filter>& program,
     const struct sock_filter& insn = program[state.ip];
     switch (BPF_CLASS(insn.code)) {
     case BPF_LD:
-      ld(&state, insn, err);
+      Ld(&state, insn, err);
       break;
     case BPF_JMP:
-      jmp(&state, insn, err);
+      Jmp(&state, insn, err);
       break;
     case BPF_RET:
-      return ret(&state, insn, err);
+      return Ret(&state, insn, err);
     default:
       *err = "Unexpected instruction in BPF program";
       break;
@@ -82,7 +82,7 @@ uint32_t Verifier::evaluateBPF(const std::vector<struct sock_filter>& program,
   return 0;
 }
 
-void Verifier::ld(State *state, const struct sock_filter& insn,
+void Verifier::Ld(State *state, const struct sock_filter& insn,
                   const char **err) {
   if (BPF_SIZE(insn.code) != BPF_W ||
       BPF_MODE(insn.code) != BPF_ABS) {
@@ -98,11 +98,11 @@ void Verifier::ld(State *state, const struct sock_filter& insn,
     *err = "Invalid operand in BPF_LD instruction";
     return;
   }
-  state->accIsValid = true;
+  state->acc_is_valid = true;
   return;
 }
 
-void Verifier::jmp(State *state, const struct sock_filter& insn,
+void Verifier::Jmp(State *state, const struct sock_filter& insn,
                    const char **err) {
   if (BPF_OP(insn.code) == BPF_JA) {
     if (state->ip + insn.k + 1 >= state->program.size() ||
@@ -114,7 +114,7 @@ void Verifier::jmp(State *state, const struct sock_filter& insn,
     state->ip += insn.k;
   } else {
     if (BPF_SRC(insn.code) != BPF_K ||
-        !state->accIsValid ||
+        !state->acc_is_valid ||
         state->ip + insn.jt + 1 >= state->program.size() ||
         state->ip + insn.jf + 1 >= state->program.size()) {
       goto compilation_failure;
@@ -154,7 +154,7 @@ void Verifier::jmp(State *state, const struct sock_filter& insn,
   }
 }
 
-uint32_t Verifier::ret(State *, const struct sock_filter& insn,
+uint32_t Verifier::Ret(State *, const struct sock_filter& insn,
                        const char **err) {
   if (BPF_SRC(insn.code) != BPF_K) {
     *err = "Invalid BPF_RET instruction";
diff --git a/sandbox/linux/seccomp-bpf/verifier.h b/sandbox/linux/seccomp-bpf/verifier.h
index c8128ec..505015e 100644
--- a/sandbox/linux/seccomp-bpf/verifier.h
+++ b/sandbox/linux/seccomp-bpf/verifier.h
@@ -24,7 +24,7 @@ class Verifier {
   // set by the "evaluators".
   // Upon success, "err" is set to NULL. Upon failure, it contains a static
   // error message that does not need to be free()'d.
-  static bool verifyBPF(const std::vector<struct sock_filter>& program,
+  static bool VerifyBPF(const std::vector<struct sock_filter>& program,
                         const Sandbox::Evaluators& evaluators,
                         const char **err);
 
@@ -36,7 +36,7 @@ class Verifier {
   // parts that can actually be generated by our BPF compiler. If this code
   // is used for purposes other than verifying the output of the sandbox's
   // BPF compiler, we might have to extend this BPF interpreter.
-  static uint32_t evaluateBPF(const std::vector<struct sock_filter>& program,
+  static uint32_t EvaluateBPF(const std::vector<struct sock_filter>& program,
                               const struct arch_seccomp_data& data,
                               const char **err);
 
@@ -48,23 +48,23 @@ class Verifier {
       data(d),
       ip(0),
       accumulator(0),
-      accIsValid(false) {
+      acc_is_valid(false) {
     }
     const std::vector<struct sock_filter>& program;
     const struct arch_seccomp_data&        data;
     unsigned int                           ip;
     uint32_t                               accumulator;
-    bool                                   accIsValid;
+    bool                                   acc_is_valid;
 
    private:
     DISALLOW_IMPLICIT_CONSTRUCTORS(State);
   };
 
-  static void     ld (State *state, const struct sock_filter& insn,
+  static void     Ld (State *state, const struct sock_filter& insn,
                       const char **err);
-  static void     jmp(State *state, const struct sock_filter& insn,
+  static void     Jmp(State *state, const struct sock_filter& insn,
                       const char **err);
-  static uint32_t ret(State *state, const struct sock_filter& insn,
+  static uint32_t Ret(State *state, const struct sock_filter& insn,
                       const char **err);
 
   DISALLOW_IMPLICIT_CONSTRUCTORS(Verifier);
author	markus@chromium.org <markus@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>	2012-10-19 03:43:55 +0000
committer	markus@chromium.org <markus@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>	2012-10-19 03:43:55 +0000
commit	772c04797682f3936e8323dbd04c311268ee01fe (patch)
tree	dd4e52496a4c2feaaabee5b362fea349a9cc5af7 /sandbox/linux
parent	02b98be97669cd18d4e60084446aa42973fc12e3 (diff)
download	chromium_src-772c04797682f3936e8323dbd04c311268ee01fe.zip chromium_src-772c04797682f3936e8323dbd04c311268ee01fe.tar.gz chromium_src-772c04797682f3936e8323dbd04c311268ee01fe.tar.bz2