Move Courgette

from src\third_party\courgette to src\courgette and src\courgette\third_party Fixed #includes Added properties to ignore generated files: C:\c5\src>svn pg svn:ignore courgette courgette.xcodeproj courgette.sln courgette_fuzz.vcproj courgette_lib.vcproj courgette_minimal_tool.vcproj courgette_tool.vcproj courgette.vcproj courgette_unittests.vcproj SConstruct courgette_fuzz.scons courgette_lib.scons courgette_main.scons courgette_minimal_tool.scons courgette.scons courgette_tool.scons courgette_unittests.scons Review URL: http://codereview.chromium.org/115062 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@15692 0039d316-1c4b-4281-b951-d872f2087c98
author: sra@chromium.org <sra@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> 2009-05-08 23:00:29 +0000
committer: sra@chromium.org <sra@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> 2009-05-08 23:00:29 +0000
commit: 04ca1bc65afb76ea30698c25f24599d20119e3d2 (patch)
tree: 2bb65e974d478f4d607b83f6a84a56c01f501ac0 /courgette
parent: 9adf1dcf3281408560267787eddcc767566b425f (diff)
download: chromium_src-04ca1bc65afb76ea30698c25f24599d20119e3d2.zip
chromium_src-04ca1bc65afb76ea30698c25f24599d20119e3d2.tar.gz
chromium_src-04ca1bc65afb76ea30698c25f24599d20119e3d2.tar.bz2
48 files changed, 8027 insertions, 0 deletions
diff --git a/courgette/adjustment_method.cc b/courgette/adjustment_method.cc
new file mode 100644
index 0000000..48e7cc0
--- /dev/null
+++ b/courgette/adjustment_method.cc
@@ -0,0 +1,703 @@
+// Copyright (c) 2009 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "courgette/adjustment_method.h"
+
+#include <algorithm>
+#include <list>
+#include <map>
+#include <set>
+#include <string>
+#include <vector>
+
+#include "base/basictypes.h"
+#include "base/logging.h"
+#include "base/string_util.h"
+
+#include "courgette/assembly_program.h"
+#include "courgette/courgette.h"
+#include "courgette/encoded_program.h"
+#include "courgette/image_info.h"
+
+namespace courgette {
+
+// We have three discretionary information logging levels for algorithm
+// development.  For now just configure with #defines.
+// TODO(sra): make dependent of some configurable setting.
+#define NO_LOG DLOG_IF(INFO, false)
+// #define ALOG1 LOG(INFO)
+// #define ALOG2 LOG(INFO)
+// #define ALOG3 LOG(INFO)
+#define ALOG1 NO_LOG
+#define ALOG2 NO_LOG
+#define ALOG3 NO_LOG
+
+////////////////////////////////////////////////////////////////////////////////
+
+class NullAdjustmentMethod : public AdjustmentMethod {
+  bool Adjust(const AssemblyProgram& model, AssemblyProgram* program) {
+    return true;
+  }
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+// The purpose of adjustment is to assign indexes to Labels of a program 'p' to
+// make the sequence of indexes similar to a 'model' program 'm'.  Labels
+// themselves don't have enough information to do this job, so we work with a
+// LabelInfo surrogate for each label.
+//
+class LabelInfo {
+ public:
+  Label* label_;              // The label that this info a surrogate for.
+
+  // Information used only in debugging messages.
+  uint32 is_model_ : 1;       // Is the label in the model?
+  uint32 debug_index_ : 31;   // An unique small number for naming the label.
+
+  uint32 refs_;               // Number of times this Label is referenced.
+
+  LabelInfo* assignment_;     // Label from other program corresponding to this.
+
+  // LabelInfos are in a doubly linked list ordered by address (label_->rva_) so
+  // we can quickly find Labels adjacent in address order.
+  LabelInfo* next_addr_;      // Label(Info) at next highest address.
+  LabelInfo* prev_addr_;      // Label(Info) at next lowest address.
+
+  std::vector<uint32> positions_;  // Offsets into the trace of references.
+
+  // Just a no-argument constructor and copy constructor.  Actual LabelInfo
+  // objects are allocated in std::pair structs in a std::map.
+  LabelInfo()
+      : label_(NULL), is_model_(false), debug_index_(0), refs_(0),
+        assignment_(NULL),
+        next_addr_(NULL),
+        prev_addr_(NULL) {}
+
+ private:
+  void operator=(const LabelInfo*);  // Disallow assignment only.
+
+  // Public compiler generated copy constructor is needed to constuct
+  // std::pair<Label*, LabelInfo> so that fresh LabelInfos can be allocated
+  // inside a std::map.
+};
+
+struct OrderLabelInfoByAddressAscending {
+  bool operator()(const LabelInfo* a, const LabelInfo* b) const {
+    return a->label_->rva_ < b->label_->rva_;
+  }
+};
+
+static std::string ToString(LabelInfo* info) {
+  std::string s;
+  StringAppendF(&s, "%c%d", "pm"[info->is_model_], info->debug_index_);
+  if (info->label_->index_ != Label::kNoIndex)
+    StringAppendF(&s, " (%d)", info->label_->index_);
+
+  StringAppendF(&s, " #%u", info->refs_);
+  return s;
+}
+
+// General graph matching is exponential, essentially trying all permutations.
+// The exponential algorithm can be made faster by avoiding consideration of
+// impossible or unlikely matches.  We can make the matching practical by eager
+// matching - by looking for likely matches and commiting to them, and using the
+// committed assignment as the basis for further matching.
+//
+// The basic eager graph-matching assignment is based on several ideas:
+//
+//  * The strongest match will be for parts of the program that have not
+//    changed.  If part of a program has not changed, then the number of
+//    references to a label will be the same, and corresponding pairs of
+//    adjacent labels will have the same RVA difference.
+//
+//  * Some assignments are 'obvious' if you look at the distribution.  Example:
+//    if both the program and the model have a label that is referred to much
+//    more often than the next most refered-to label, it is likely the two
+//    labels correspond.
+//
+//  * If a label from the program corresponds to a label in the model, it is
+//    likely that the labels near the corresponding labels also match.  A
+//    conservative way of extending the match is to assign only those labels
+//    which have exactly the same address offset and reference count.
+//
+//  * If two labels correspond, then we can try to match up the references
+//    before and after the labels in the reference stream.  For this to be
+//    practical, the number of references has to be small, e.g. each label has
+//    exactly one reference.
+//
+
+// Note: we also tried a completely different approach: random assignment
+// followed by simulated annealing.  This produced similar results.  The results
+// were not as good for very small differences because the simulated annealing
+// never quite hit the groove.  And simulated annealing was several orders of
+// magnitude slower.
+
+
+// TRIE node for suffix strings in the label reference sequence.
+//
+// We dynamically build a trie for both the program and model, growing the trie
+// as necessary.  The trie node for a (possibly) empty string of label
+// references contains the distribution of labels following the string.  The
+// roots node (for the empty string) thus contains the simple distribution of
+// labels within the label reference stream.
+
+struct Node {
+  Node(LabelInfo* in_edge, Node* prev)
+      : in_edge_(in_edge), prev_(prev), count_(0),
+        in_queue_(false) {
+    length_ = 1 + (prev_ ? prev_->length_ : 0);
+  }
+  LabelInfo* in_edge_;  //
+  Node* prev_;          // Node at shorter length.
+  int count_;           // Frequency of this path in Trie.
+  int length_;
+  typedef std::map<LabelInfo*, Node*> Edges;
+  Edges edges_;
+  std::vector<int> places_;   // Indexes into sequence of this item.
+  std::list<Node*> edges_in_frequency_order;
+
+  bool in_queue_;
+  bool Extended() const { return edges_.size() > 0; }
+
+  uint32 Weight() const {
+    return  edges_in_frequency_order.front()->count_;
+  }
+};
+
+static std::string ToString(Node* node) {
+  std::vector<std::string> prefix;
+  for (Node* n = node;  n->prev_;  n = n->prev_)
+    prefix.push_back(ToString(n->in_edge_));
+
+  std::string s;
+  s += "{";
+  const char* sep = "";
+  while (!prefix.empty()) {
+    s += sep;
+    sep = ",";
+    s += prefix.back();
+    prefix.pop_back();
+  }
+
+  s += StringPrintf("%u", node->count_);
+  s += " @";
+  s += Uint64ToString(node->edges_in_frequency_order.size());
+  s += "}";
+  return s;
+}
+
+typedef std::vector<LabelInfo*> Trace;
+
+struct OrderNodeByCountDecreasing {
+  bool operator()(Node* a, Node* b) const {
+    if (a->count_ != b->count_)
+      return  (a->count_) > (b->count_);
+    return a->places_.at(0) < b->places_.at(0);  // Prefer first occuring.
+  }
+};
+
+struct OrderNodeByWeightDecreasing {
+  bool operator()(Node* a, Node* b) const {
+    // (Maybe tie-break on total count, followed by lowest assigned node indexes
+    // in path.)
+    uint32 a_weight = a->Weight();
+    uint32 b_weight = b->Weight();
+    if (a_weight != b_weight)
+      return a_weight > b_weight;
+    if (a->length_ != b->length_)
+      return a->length_ > b->length_;            // Prefer longer.
+    return a->places_.at(0) < b->places_.at(0);  // Prefer first occuring.
+  }
+};
+
+typedef std::set<Node*, OrderNodeByWeightDecreasing> NodeQueue;
+
+class AssignmentProblem {
+ public:
+  AssignmentProblem(const Trace& model,
+                    const Trace& problem)
+      : m_trace_(model),
+        p_trace_(problem) {
+  }
+
+  ~AssignmentProblem() {
+    for (size_t i = 0;  i < all_nodes_.size();  ++i)
+      delete all_nodes_[i];
+  }
+
+  bool Solve() {
+    m_root_ = MakeRootNode(m_trace_);
+    p_root_ = MakeRootNode(p_trace_);
+    AddToQueue(p_root_);
+
+    while (!worklist_.empty()) {
+      Node* node = *worklist_.begin();
+      node->in_queue_ = false;
+      worklist_.erase(node);
+      TrySolveNode(node);
+    }
+
+    ALOG1 << unsolved_.size() << " unsolved items";
+    return true;
+  }
+
+ private:
+  void AddToQueue(Node* node) {
+    if (node->length_ >= 10) {
+      ALOG3 << "Length clipped " << ToString(node->prev_);
+      return;
+    }
+    if (node->in_queue_) {
+      LOG(ERROR) << "Double add " << ToString(node);
+      return;
+    }
+    // just to be sure data for prioritizing is available
+    ExtendNode(node, p_trace_);
+    // SkipCommittedLabels(node);
+    if (node->edges_in_frequency_order.empty())
+      return;
+    node->in_queue_ = true;
+    worklist_.insert(node);
+  }
+
+  void SkipCommittedLabels(Node* node) {
+    ExtendNode(node, p_trace_);
+    uint32 skipped = 0;
+    while (!node->edges_in_frequency_order.empty() &&
+           node->edges_in_frequency_order.front()->in_edge_->assignment_) {
+      ++skipped;
+      node->edges_in_frequency_order.pop_front();
+    }
+    if (skipped > 0)
+      ALOG3 << "Skipped " << skipped << " at " << ToString(node);
+  }
+
+  void TrySolveNode(Node* p_node) {
+    Node* front = p_node->edges_in_frequency_order.front();
+    if (front->in_edge_->assignment_) {
+      p_node->edges_in_frequency_order.pop_front();
+      AddToQueue(front);
+      AddToQueue(p_node);
+      return;
+    }
+
+    // Compare frequencies of unassigned edges, and either make
+    // assignment(s) or move node to unsolved list
+
+    Node* m_node = FindModelNode(p_node);
+
+    if (m_node == NULL) {
+      ALOG1 << "Can't find model node";
+      unsolved_.insert(p_node);
+      return;
+    }
+    ExtendNode(m_node, m_trace_);
+
+    // Lets just try greedy
+
+    SkipCommittedLabels(m_node);
+    if (m_node->edges_in_frequency_order.empty()) {
+      ALOG3 << "Punting, no elements left in model vs "
+            << p_node->edges_in_frequency_order.size();
+      unsolved_.insert(p_node);
+      return;
+    }
+    Node* m_match = m_node->edges_in_frequency_order.front();
+    Node* p_match = p_node->edges_in_frequency_order.front();
+
+    if (p_match->count_ > 1.1 * m_match->count_  ||
+        m_match->count_ > 1.1 * p_match->count_) {
+      ALOG2 << "Tricky distribution "
+            << p_match->count_ << ":" << m_match->count_ << "  "
+            << ToString(p_match) << " vs " << ToString(m_match);
+      return;
+    }
+
+    m_node->edges_in_frequency_order.pop_front();
+    p_node->edges_in_frequency_order.pop_front();
+
+    LabelInfo* p_label_info = p_match->in_edge_;
+    LabelInfo* m_label_info = m_match->in_edge_;
+    int m_index = p_label_info->label_->index_;
+    if (m_index != Label::kNoIndex) {
+      ALOG1 << "Cant use unassigned label from model " << m_index;
+      unsolved_.insert(p_node);
+      return;
+    }
+
+    Assign(p_label_info, m_label_info);
+
+    AddToQueue(p_match);  // find matches within new match
+    AddToQueue(p_node);   // and more matches within this node
+  }
+
+  void Assign(LabelInfo* p_info, LabelInfo* m_info) {
+    AssignOne(p_info, m_info);
+    ALOG3 << "Assign " << ToString(p_info) << " := " << ToString(m_info);
+    // Now consider unassigned adjacent addresses
+    TryExtendAssignment(p_info, m_info);
+  }
+
+  void AssignOne(LabelInfo* p_info, LabelInfo* m_info) {
+    p_info->label_->index_ = m_info->label_->index_;
+
+    // Mark as assigned
+    m_info->assignment_ = p_info;
+    p_info->assignment_ = m_info;
+  }
+
+  void TryExtendAssignment(LabelInfo* p_info, LabelInfo* m_info) {
+    RVA m_rva_base = m_info->label_->rva_;
+    RVA p_rva_base = p_info->label_->rva_;
+
+    LabelInfo* m_info_next = m_info->next_addr_;
+    LabelInfo* p_info_next = p_info->next_addr_;
+    for ( ; m_info_next && p_info_next; ) {
+      if (m_info_next->assignment_)
+        break;
+
+      RVA m_rva = m_info_next->label_->rva_;
+      RVA p_rva = p_info_next->label_->rva_;
+
+      if (m_rva - m_rva_base != p_rva - p_rva_base) {
+        // previous label was pointing to something that is different size
+        break;
+      }
+      LabelInfo* m_info_next_next = m_info_next->next_addr_;
+      LabelInfo* p_info_next_next = p_info_next->next_addr_;
+      if (m_info_next_next && p_info_next_next) {
+        RVA m_rva_next = m_info_next_next->label_->rva_;
+        RVA p_rva_next = p_info_next_next->label_->rva_;
+        if (m_rva_next - m_rva != p_rva_next - p_rva) {
+          // Since following labels are no longer in address lockstep, assume
+          // this address has a difference.
+          break;
+        }
+      }
+
+      // The label has inconsistent numbers of references, it is probably not
+      // the same thing.
+      if (m_info_next->refs_ != p_info_next->refs_) {
+        break;
+      }
+
+      ALOG3 << "  Extending assignment -> "
+            << ToString(p_info_next) << " := " << ToString(m_info_next);
+
+      AssignOne(p_info_next, m_info_next);
+
+      if (p_info_next->refs_ == m_info_next->refs_ &&
+          p_info_next->refs_ == 1) {
+        TryExtendSequence(p_info_next->positions_[0],
+                          m_info_next->positions_[0]);
+        TryExtendSequenceBackwards(p_info_next->positions_[0],
+                                   m_info_next->positions_[0]);
+      }
+
+      p_info_next = p_info_next_next;
+      m_info_next = m_info_next_next;
+    }
+
+    LabelInfo* m_info_prev = m_info->prev_addr_;
+    LabelInfo* p_info_prev = p_info->prev_addr_;
+    for ( ; m_info_prev && p_info_prev; ) {
+      if (m_info_prev->assignment_)
+        break;
+
+      RVA m_rva = m_info_prev->label_->rva_;
+      RVA p_rva = p_info_prev->label_->rva_;
+
+      if (m_rva - m_rva_base != p_rva - p_rva_base) {
+        // previous label was pointing to something that is different size
+        break;
+      }
+      LabelInfo* m_info_prev_prev = m_info_prev->prev_addr_;
+      LabelInfo* p_info_prev_prev = p_info_prev->prev_addr_;
+
+      // The the label has inconsistent numbers of references, it is
+      // probably not the same thing
+      if (m_info_prev->refs_ != p_info_prev->refs_) {
+        break;
+      }
+
+      AssignOne(p_info_prev, m_info_prev);
+      ALOG3 << "  Extending assignment <- " << ToString(p_info_prev) << " := "
+            << ToString(m_info_prev);
+
+      p_info_prev = p_info_prev_prev;
+      m_info_prev = m_info_prev_prev;
+    }
+  }
+
+  uint32 TryExtendSequence(uint32 p_pos_start, uint32 m_pos_start) {
+    uint32 p_pos = p_pos_start + 1;
+    uint32 m_pos = m_pos_start + 1;
+
+    while (p_pos < p_trace_.size()  &&  m_pos < m_trace_.size()) {
+      LabelInfo* p_info = p_trace_[p_pos];
+      LabelInfo* m_info = m_trace_[m_pos];
+
+      // To match, either (1) both are assigned or (2) both are unassigned.
+      if ((p_info->assignment_ == NULL) != (m_info->assignment_ == NULL))
+        break;
+
+      // If they are assigned, it needs to be consistent (same index).
+      if (p_info->assignment_ && m_info->assignment_) {
+        if (p_info->label_->index_ != m_info->label_->index_)
+          break;
+        ++p_pos;
+        ++m_pos;
+        continue;
+      }
+
+      if (p_info->refs_ != m_info->refs_)
+        break;
+
+      AssignOne(p_info, m_info);
+      ALOG3 << "    Extending assignment seq"
+            << "[+" << p_pos - p_pos_start << "]"
+            << " -> "
+            << ToString(p_info) << " := " << ToString(m_info);
+
+      ++p_pos;
+      ++m_pos;
+    }
+
+    return p_pos - p_pos_start;
+  }
+
+  uint32 TryExtendSequenceBackwards(uint32 p_pos_start, uint32 m_pos_start) {
+    if (p_pos_start == 0  ||  m_pos_start == 0)
+      return 0;
+
+    uint32 p_pos = p_pos_start - 1;
+    uint32 m_pos = m_pos_start - 1;
+
+    while (p_pos > 0  &&  m_pos > 0) {
+      LabelInfo* p_info = p_trace_[p_pos];
+      LabelInfo* m_info = m_trace_[m_pos];
+
+      if ((p_info->assignment_ == NULL) != (m_info->assignment_ == NULL))
+        break;
+
+      if (p_info->assignment_ && m_info->assignment_) {
+        if (p_info->label_->index_ != m_info->label_->index_)
+          break;
+        --p_pos;
+        --m_pos;
+        continue;
+      }
+
+      if (p_info->refs_ != m_info->refs_)
+        break;
+
+      AssignOne(p_info, m_info);
+      ALOG3 << "    Extending assignment seq"
+            << "[-" << p_pos_start - p_pos << "]"
+            << " <- "
+            << ToString(p_info) << " := " << ToString(m_info);
+
+      --p_pos;
+      --m_pos;
+    }
+
+    return p_pos - p_pos_start;
+  }
+
+  Node* FindModelNode(Node* node) {
+    if (node->prev_ == NULL)
+      return m_root_;
+
+    Node* m_parent = FindModelNode(node->prev_);
+    if (m_parent == NULL) {
+      return NULL;
+    }
+
+    ExtendNode(m_parent, m_trace_);
+
+    LabelInfo* p_label = node->in_edge_;
+    LabelInfo* m_label = p_label->assignment_;
+    if (m_label == NULL) {
+      ALOG1 << "Expected assigned prefix";
+      return NULL;
+    }
+
+    Node::Edges::iterator e = m_parent->edges_.find(m_label);
+    if (e == m_parent->edges_.end()) {
+      ALOG2 << "Expected defined edge in parent";
+      return NULL;
+    }
+
+    return e->second;
+  }
+
+  Node* MakeRootNode(const Trace& trace) {
+    Node* node = new Node(NULL, NULL);
+    all_nodes_.push_back(node);
+    for (size_t i = 0;  i < trace.size();  ++i) {
+      ++node->count_;
+      node->places_.push_back(i);
+    }
+    return node;
+  }
+
+  void ExtendNode(Node* node, const Trace& trace) {
+    // Make sure trie is filled in at this node.
+    if (node->Extended())
+      return;
+    for (size_t i = 0; i < node->places_.size();  ++i) {
+      uint32 index = node->places_.at(i);
+      if (index < trace.size()) {
+        LabelInfo* item = trace.at(index);
+        Node*& slot = node->edges_[item];
+        if (slot == NULL) {
+          slot = new Node(item, node);
+          all_nodes_.push_back(slot);
+          node->edges_in_frequency_order.push_back(slot);
+        }
+        slot->places_.push_back(index + 1);
+        ++slot->count_;
+      }
+    }
+    node->edges_in_frequency_order.sort(OrderNodeByCountDecreasing());
+  }
+
+  const Trace& m_trace_;
+  const Trace& p_trace_;
+  Node* m_root_;
+  Node* p_root_;
+
+  NodeQueue worklist_;
+  NodeQueue unsolved_;
+
+  std::vector<Node*> all_nodes_;
+
+  DISALLOW_COPY_AND_ASSIGN(AssignmentProblem);
+};
+
+class GraphAdjuster : public AdjustmentMethod {
+ public:
+  GraphAdjuster() {}
+  ~GraphAdjuster() {}
+
+  bool Adjust(const AssemblyProgram& model, AssemblyProgram* program) {
+    LOG(INFO) << "GraphAdjuster::Adjust";
+    prog_ = program;
+    model_ = &model;
+    debug_label_index_gen_ = 0;
+    return Finish();
+  }
+
+  bool Finish() {
+    prog_->UnassignIndexes();
+    CollectTraces(model_, &model_abs32_, &model_rel32_, true);
+    CollectTraces(prog_,  &prog_abs32_,  &prog_rel32_,  false);
+    Solve(model_abs32_, prog_abs32_);
+    Solve(model_rel32_, prog_rel32_);
+    prog_->AssignRemainingIndexes();
+    return true;
+  }
+
+ private:
+
+  void CollectTraces(const AssemblyProgram* program, Trace* abs32, Trace* rel32,
+                     bool is_model) {
+    const std::vector<Instruction*>& instructions = program->instructions();
+    for (size_t i = 0;  i < instructions.size();  ++i) {
+      Instruction* instruction = instructions.at(i);
+      if (Label* label = program->InstructionAbs32Label(instruction))
+        ReferenceLabel(abs32, label, is_model);
+      if (Label* label = program->InstructionRel32Label(instruction))
+        ReferenceLabel(rel32, label, is_model);
+    }
+    // TODO(sra): we could simply append all the labels in index order to
+    // incorporate some costing for entropy (bigger deltas) that will be
+    // introduced into the label address table by non-monotonic ordering.  This
+    // would have some knock-on effects to parts of the algorithm that work on
+    // single-occurence labels.
+  }
+
+  void Solve(const Trace& model, const Trace& problem) {
+    LinkLabelInfos(model);
+    LinkLabelInfos(problem);
+    AssignmentProblem a(model, problem);
+    a.Solve();
+  }
+
+  void LinkLabelInfos(const Trace& trace) {
+    typedef std::set<LabelInfo*, OrderLabelInfoByAddressAscending> Ordered;
+    Ordered ordered;
+    for (Trace::const_iterator p = trace.begin();  p != trace.end();  ++p)
+      ordered.insert(*p);
+    LabelInfo* prev = NULL;
+    for (Ordered::iterator p = ordered.begin();  p != ordered.end();  ++p) {
+      LabelInfo* curr = *p;
+      if (prev) prev->next_addr_ = curr;
+      curr->prev_addr_ = prev;
+      prev = curr;
+
+      if (curr->positions_.size() != curr->refs_)
+        NOTREACHED();
+    }
+  }
+
+  void ReferenceLabel(Trace* trace, Label* label, bool is_model) {
+    trace->push_back(MakeLabelInfo(label, is_model, trace->size()));
+  }
+
+  LabelInfo* MakeLabelInfo(Label* label, bool is_model, uint32 position) {
+    LabelInfo& slot = label_infos_[label];
+    if (slot.label_ == NULL) {
+      slot.label_ = label;
+      slot.is_model_ = is_model;
+      slot.debug_index_ = ++debug_label_index_gen_;
+    }
+    slot.positions_.push_back(position);
+    ++slot.refs_;
+    return &slot;
+  }
+
+  AssemblyProgram* prog_;         // Program to be adjusted, owned by caller.
+  const AssemblyProgram* model_;  // Program to be mimicked, owned by caller.
+
+  Trace model_abs32_;
+  Trace model_rel32_;
+  Trace prog_abs32_;
+  Trace prog_rel32_;
+
+  int debug_label_index_gen_;
+
+  // Note LabelInfo is allocated inside map, so the LabelInfo lifetimes are
+  // managed by the map.
+  std::map<Label*, LabelInfo> label_infos_;
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(GraphAdjuster);
+};
+
+
+////////////////////////////////////////////////////////////////////////////////
+
+void AdjustmentMethod::Destroy() { delete this; }
+
+AdjustmentMethod* AdjustmentMethod::MakeNullAdjustmentMethod() {
+  return new NullAdjustmentMethod();
+}
+
+AdjustmentMethod* AdjustmentMethod::MakeProductionAdjustmentMethod() {
+  return new GraphAdjuster();
+}
+
+Status Adjust(const AssemblyProgram& model, AssemblyProgram* program) {
+  AdjustmentMethod* method = AdjustmentMethod::MakeProductionAdjustmentMethod();
+  bool ok = method->Adjust(model, program);
+  method->Destroy();
+  if (ok)
+    return C_OK;
+  else
+    return C_ADJUSTMENT_FAILED;
+}
+
+}  // namespace courgette
diff --git a/courgette/adjustment_method.h b/courgette/adjustment_method.h
new file mode 100644
index 0000000..8ca71e8
--- /dev/null
+++ b/courgette/adjustment_method.h
@@ -0,0 +1,42 @@
+// Copyright (c) 2009 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef COURGETTE_ADJUSTMENT_METHOD_H_
+#define COURGETTE_ADJUSTMENT_METHOD_H_
+
+#include "base/basictypes.h"
+
+namespace courgette {
+
+class AssemblyProgram;
+
+class AdjustmentMethod {
+ public:
+  // Factory methods for making adjusters.
+
+  // Returns the adjustment method used in production.
+  static AdjustmentMethod* MakeProductionAdjustmentMethod();
+
+  // Returns and adjustement method that makes no adjustments.
+  static AdjustmentMethod* MakeNullAdjustmentMethod();
+
+
+  // Adjusts |program| to increase similarity to |model|.  |program| can be
+  // changed in any way provided that it still produces the same output when
+  // assembled.
+  virtual bool Adjust(const AssemblyProgram& model,
+                      AssemblyProgram* program) = 0;
+
+  // Deletes 'this' adjustment method.
+  virtual void Destroy();
+
+ protected:
+  AdjustmentMethod() {}
+  virtual ~AdjustmentMethod() {}
+
+  DISALLOW_COPY_AND_ASSIGN(AdjustmentMethod);
+};
+
+}  // namespace courgette
+#endif  // COURGETTE_ADJUSTMENT_METHOD_H_
diff --git a/courgette/adjustment_method_unittest.cc b/courgette/adjustment_method_unittest.cc
new file mode 100644
index 0000000..661b23d
--- /dev/null
+++ b/courgette/adjustment_method_unittest.cc
@@ -0,0 +1,108 @@
+// Copyright (c) 2009 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include <string>
+
+#include "base/path_service.h"
+#include "base/file_util.h"
+#include "base/string_util.h"
+
+#include "courgette/assembly_program.h"
+#include "courgette/courgette.h"
+#include "courgette/streams.h"
+
+#include "testing/gtest/include/gtest/gtest.h"
+
+class AdjustmentMethodTest : public testing::Test {
+ public:
+  void Test1() const;
+
+ private:
+  void SetUp() {
+  }
+
+  void TearDown() {
+  }
+
+  // Returns one of two similar a simple programs.  They differ only in the
+  // label assignment, so that it is possible to make them look identical.
+  courgette::AssemblyProgram* MakeProgram(int kind) const {
+    courgette::AssemblyProgram* prog = new courgette::AssemblyProgram();
+    prog->set_image_base(0x00400000);
+
+    courgette::Label* labelA = prog->FindOrMakeAbs32Label(0x00410000);
+    courgette::Label* labelB = prog->FindOrMakeAbs32Label(0x00410004);
+
+    prog->EmitAbs32(labelA);
+    prog->EmitAbs32(labelA);
+    prog->EmitAbs32(labelB);
+    prog->EmitAbs32(labelA);
+    prog->EmitAbs32(labelA);
+    prog->EmitAbs32(labelB);
+
+    if (kind == 0) {
+      labelA->index_ = 0;
+      labelB->index_ = 1;
+    } else {
+      labelA->index_ = 1;
+      labelB->index_ = 0;
+    }
+    prog->AssignRemainingIndexes();
+
+    return prog;
+  }
+
+  courgette::AssemblyProgram* MakeProgramA() const { return MakeProgram(0); }
+  courgette::AssemblyProgram* MakeProgramB() const { return MakeProgram(1); }
+
+  // Returns a string that is the serialized version of |program|.
+  // Deletes |program|.
+  std::string Serialize(courgette::AssemblyProgram *program) const {
+    courgette::EncodedProgram* encoded = NULL;
+
+    const courgette::Status encode_status = Encode(program, &encoded);
+    EXPECT_EQ(courgette::C_OK, encode_status);
+
+    DeleteAssemblyProgram(program);
+
+    courgette::SinkStreamSet sinks;
+    const courgette::Status write_status = WriteEncodedProgram(encoded, &sinks);
+    EXPECT_EQ(courgette::C_OK, write_status);
+
+    DeleteEncodedProgram(encoded);
+
+    courgette::SinkStream sink;
+    bool can_collect = sinks.CopyTo(&sink);
+    EXPECT_TRUE(can_collect);
+
+    return std::string(reinterpret_cast<const char *>(sink.Buffer()),
+                       sink.Length());
+  }
+};
+
+
+void AdjustmentMethodTest::Test1() const {
+  courgette::AssemblyProgram* prog1 = MakeProgramA();
+  courgette::AssemblyProgram* prog2 = MakeProgramB();
+  std::string s1 = Serialize(prog1);
+  std::string s2 = Serialize(prog2);
+
+  // Don't use EXPECT_EQ because strings are unprintable.
+  EXPECT_FALSE(s1 == s2);  // Unadjusted A and B differ.
+
+  courgette::AssemblyProgram* prog5 = MakeProgramA();
+  courgette::AssemblyProgram* prog6 = MakeProgramB();
+  courgette::Status can_adjust = Adjust(*prog5, prog6);
+  EXPECT_EQ(courgette::C_OK, can_adjust);
+  std::string s5 = Serialize(prog5);
+  std::string s6 = Serialize(prog6);
+
+  EXPECT_TRUE(s1 == s5);  // Adjustment did not change A (prog5)
+  EXPECT_TRUE(s5 == s6);  // Adjustment did change B into A
+}
+
+
+TEST_F(AdjustmentMethodTest, All) {
+  Test1();
+}
diff --git a/courgette/assembly_program.cc b/courgette/assembly_program.cc
new file mode 100644
index 0000000..e40f38f
--- /dev/null
+++ b/courgette/assembly_program.cc
@@ -0,0 +1,372 @@
+// Copyright (c) 2009 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "courgette/assembly_program.h"
+
+#include <memory.h>
+#include <algorithm>
+#include <map>
+#include <set>
+#include <sstream>
+#include <vector>
+
+#include "base/logging.h"
+
+#include "courgette/courgette.h"
+#include "courgette/encoded_program.h"
+
+namespace courgette {
+
+// Opcodes of simple assembly language
+enum OP {
+  ORIGIN,         // ORIGIN <rva> - set current address for assembly.
+  MAKERELOCS,     // Generates a base relocation table.
+  DEFBYTE,        // DEFBYTE <value> - emit a byte literal.
+  REL32,          // REL32 <label> - emit a rel32 encoded reference to 'label'.
+  ABS32,          // REL32 <label> - emit am abs32 encoded reference to 'label'.
+  LAST_OP
+};
+
+// Base class for instructions.  Because we have so many instructions we want to
+// keep them as small as possible.  For this reason we avoid virtual functions.
+//
+class Instruction {
+ public:
+  OP op() const { return static_cast<OP>(op_); }
+
+ protected:
+  explicit Instruction(OP op) : op_(op), info_(0) {}
+  Instruction(OP op, unsigned int info) : op_(op), info_(info) {}
+
+  uint32 op_   : 4;    // A few bits to store the OP code.
+  uint32 info_ : 28;   // Remaining bits in first word available to subclass.
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(Instruction);
+};
+
+namespace {
+
+// Sets the current address for the emitting instructions.
+class OriginInstruction : public Instruction {
+ public:
+  explicit OriginInstruction(RVA rva) : Instruction(ORIGIN, 0), rva_(rva) {}
+  RVA origin_rva() const { return rva_; }
+ private:
+  RVA  rva_;
+};
+
+// Emits an entire base relocation table.
+class MakeRelocsInstruction : public Instruction {
+ public:
+  MakeRelocsInstruction() : Instruction(MAKERELOCS) {}
+};
+
+// Emits a single byte.
+class ByteInstruction : public Instruction {
+ public:
+  explicit ByteInstruction(uint8 value) : Instruction(DEFBYTE, value) {}
+  uint8 byte_value() const { return info_; }
+};
+
+// A ABS32 to REL32 instruction emits a reference to a label's address.
+class InstructionWithLabel : public Instruction {
+ public:
+  InstructionWithLabel(OP op, Label* label)
+    : Instruction(op, 0), label_(label) {
+    if (label == NULL) NOTREACHED();
+  }
+  Label* label() const { return label_; }
+ private:
+  Label* label_;
+};
+
+}  // namespace
+
+AssemblyProgram::AssemblyProgram()
+  : image_base_(0),
+    byte_instruction_cache_(NULL) {
+}
+
+static void DeleteContainedLabels(const RVAToLabel& labels) {
+  for (RVAToLabel::const_iterator p = labels.begin();  p != labels.end();  ++p)
+    delete p->second;
+}
+
+AssemblyProgram::~AssemblyProgram() {
+  for (size_t i = 0;  i < instructions_.size();  ++i) {
+    Instruction* instruction = instructions_[i];
+    if (instruction->op() != DEFBYTE)  // Will be in byte_instruction_cache_.
+      delete instruction;
+  }
+  if (byte_instruction_cache_) {
+    for (size_t i = 0;  i < 256;  ++i)
+      delete byte_instruction_cache_[i];
+    delete[] byte_instruction_cache_;
+  }
+  DeleteContainedLabels(rel32_labels_);
+  DeleteContainedLabels(abs32_labels_);
+}
+
+void AssemblyProgram::EmitMakeRelocsInstruction() {
+  Emit(new MakeRelocsInstruction());
+}
+
+void AssemblyProgram::EmitOriginInstruction(RVA rva) {
+  Emit(new OriginInstruction(rva));
+}
+
+void AssemblyProgram::EmitByteInstruction(uint8 byte) {
+  Emit(GetByteInstruction(byte));
+}
+
+void AssemblyProgram::EmitRel32(Label* label) {
+  Emit(new InstructionWithLabel(REL32, label));
+}
+
+void AssemblyProgram::EmitAbs32(Label* label) {
+  Emit(new InstructionWithLabel(ABS32, label));
+}
+
+Label* AssemblyProgram::FindOrMakeAbs32Label(RVA rva) {
+  return FindLabel(rva, &abs32_labels_);
+}
+
+Label* AssemblyProgram::FindOrMakeRel32Label(RVA rva) {
+  return FindLabel(rva, &rel32_labels_);
+}
+
+void AssemblyProgram::DefaultAssignIndexes() {
+  DefaultAssignIndexes(&abs32_labels_);
+  DefaultAssignIndexes(&rel32_labels_);
+}
+
+void AssemblyProgram::UnassignIndexes() {
+  UnassignIndexes(&abs32_labels_);
+  UnassignIndexes(&rel32_labels_);
+}
+
+void AssemblyProgram::AssignRemainingIndexes() {
+  AssignRemainingIndexes(&abs32_labels_);
+  AssignRemainingIndexes(&rel32_labels_);
+}
+
+Label* AssemblyProgram::InstructionAbs32Label(
+    const Instruction* instruction) const {
+  if (instruction->op() == ABS32)
+    return static_cast<const InstructionWithLabel*>(instruction)->label();
+  return NULL;
+}
+
+Label* AssemblyProgram::InstructionRel32Label(
+    const Instruction* instruction) const {
+  if (instruction->op() == REL32)
+    return static_cast<const InstructionWithLabel*>(instruction)->label();
+  return NULL;
+}
+
+Label* AssemblyProgram::FindLabel(RVA rva, RVAToLabel* labels) {
+  Label*& slot = (*labels)[rva];
+  if (slot == 0) {
+    slot = new Label(rva);
+  }
+  return slot;
+}
+
+void AssemblyProgram::UnassignIndexes(RVAToLabel* labels) {
+  for (RVAToLabel::iterator p = labels->begin();  p != labels->end();  ++p) {
+    Label* current = p->second;
+    current->index_ = Label::kNoIndex;
+  }
+}
+
+// DefaultAssignIndexes takes a set of labels and assigns indexes in increasing
+// address order.
+//
+void AssemblyProgram::DefaultAssignIndexes(RVAToLabel* labels) {
+  int index = 0;
+  for (RVAToLabel::iterator p = labels->begin();  p != labels->end();  ++p) {
+    Label* current = p->second;
+    if (current->index_ != Label::kNoIndex)
+      NOTREACHED();
+    current->index_ = index;
+    ++index;
+  }
+}
+
+// AssignRemainingIndexes assigns indexes to any addresses (labels) that are not
+// yet assigned an index.
+//
+void AssemblyProgram::AssignRemainingIndexes(RVAToLabel* labels) {
+  // An address table compresses best when each index is associated with an
+  // address that is slight larger than the previous index.
+
+  // First see which indexes have not been used.  The 'available' vector could
+  // grow even bigger, but the number of addresses is a better starting size
+  // than empty.
+  std::vector<bool> available(labels->size(), true);
+  int used = 0;
+
+  for (RVAToLabel::iterator p = labels->begin();  p != labels->end();  ++p) {
+    size_t index = p->second->index_;
+    if (index != Label::kNoIndex) {
+      while (index >= available.size())
+        available.push_back(true);
+      available.at(index) = false;
+      ++used;
+    }
+  }
+
+  LOG(INFO) << used << " of " << labels->size() << " labels pre-assigned";
+
+  // Are there any unused labels that happen to be adjacent following a used
+  // label?
+  //
+  int fill_forward_count = 0;
+  Label* prev = 0;
+  for (RVAToLabel::iterator p = labels->begin();  p != labels->end();  ++p) {
+    Label* current = p->second;
+    if (current->index_ == Label::kNoIndex) {
+      size_t index = 0;
+      if (prev  &&  prev->index_ != Label::kNoIndex)
+        index = prev->index_ + 1;
+      if (index < available.size() && available.at(index)) {
+        current->index_ = index;
+        available.at(index) = false;
+        ++fill_forward_count;
+      }
+    }
+    prev = current;
+  }
+
+  // Are there any unused labels that happen to be adjacent preceeding a used
+  // label?
+  //
+  int fill_backward_count = 0;
+  int backward_refs = 0;
+  prev = 0;
+  for (RVAToLabel::reverse_iterator p = labels->rbegin();
+       p != labels->rend();
+       ++p) {
+    Label* current = p->second;
+    if (current->index_ == Label::kNoIndex) {
+      int prev_index;
+      if (prev)
+        prev_index = prev->index_;
+      else
+        prev_index = available.size();
+      if (prev_index != 0  &&
+          prev_index != Label::kNoIndex  &&
+          available.at(prev_index - 1)) {
+        current->index_ = prev_index - 1;
+        available.at(current->index_) = false;
+        ++fill_backward_count;
+      }
+    }
+    prev = current;
+  }
+
+  // Fill in any remaining indexes
+  int fill_infill_count = 0;
+  int index = 0;
+  for (RVAToLabel::iterator p = labels->begin();  p != labels->end();  ++p) {
+    Label* current = p->second;
+    if (current->index_ == Label::kNoIndex) {
+      while (!available.at(index)) {
+        ++index;
+      }
+      current->index_ = index;
+      available.at(index) = false;
+      ++index;
+      ++fill_infill_count;
+    }
+  }
+
+  LOG(INFO) << "  fill"
+            << " forward " << fill_forward_count << " "
+            << " backward " << fill_backward_count << " "
+            << " infill " << fill_infill_count;
+}
+
+typedef void (EncodedProgram::*DefineLabelMethod)(int index, RVA value);
+
+static void DefineLabels(const RVAToLabel& labels,
+                         EncodedProgram* encoded_format,
+                         DefineLabelMethod define_label) {
+  for (RVAToLabel::const_iterator p = labels.begin(); p != labels.end(); ++p) {
+    Label* label = p->second;
+    (encoded_format->*define_label)(label->index_, label->rva_);
+  }
+}
+
+EncodedProgram* AssemblyProgram::Encode() const {
+  EncodedProgram* encoded = new EncodedProgram();
+
+  encoded->set_image_base(image_base_);
+  DefineLabels(abs32_labels_, encoded, &EncodedProgram::DefineAbs32Label);
+  DefineLabels(rel32_labels_, encoded, &EncodedProgram::DefineRel32Label);
+  encoded->EndLabels();
+
+  for (size_t i = 0;  i < instructions_.size();  ++i) {
+    Instruction* instruction = instructions_[i];
+
+    switch (instruction->op()) {
+      case ORIGIN: {
+        OriginInstruction* org = static_cast<OriginInstruction*>(instruction);
+        encoded->AddOrigin(org->origin_rva());
+        break;
+      }
+      case DEFBYTE: {
+        uint8 b = static_cast<ByteInstruction*>(instruction)->byte_value();
+        encoded->AddCopy(1, &b);
+        break;
+      }
+      case REL32: {
+        Label* label = static_cast<InstructionWithLabel*>(instruction)->label();
+        encoded->AddRel32(label->index_);
+        break;
+      }
+      case ABS32: {
+        Label* label = static_cast<InstructionWithLabel*>(instruction)->label();
+        encoded->AddAbs32(label->index_);
+        break;
+      }
+      case MAKERELOCS: {
+        encoded->AddMakeRelocs();
+        break;
+      }
+      default: {
+        NOTREACHED() << "Unknown Insn OP kind";
+      }
+    }
+  }
+
+  return encoded;
+}
+
+Instruction* AssemblyProgram::GetByteInstruction(uint8 byte) {
+  if (!byte_instruction_cache_) {
+    byte_instruction_cache_ = new Instruction*[256];
+    for (int i = 0; i < 256; ++i) {
+      byte_instruction_cache_[i] = new ByteInstruction(static_cast<uint8>(i));
+    }
+  }
+
+  return byte_instruction_cache_[byte];
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+Status Encode(AssemblyProgram* program, EncodedProgram** output) {
+  *output = NULL;
+  EncodedProgram *encoded = program->Encode();
+  if (encoded) {
+    *output = encoded;
+    return C_OK;
+  } else {
+    return C_GENERAL_ERROR;
+  }
+}
+
+}  // namespace courgette
diff --git a/courgette/assembly_program.h b/courgette/assembly_program.h
new file mode 100644
index 0000000..e726f81
--- /dev/null
+++ b/courgette/assembly_program.h
@@ -0,0 +1,133 @@
+// Copyright (c) 2009 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef COURGETTE_ASSEMBLY_PROGRAM_H_
+#define COURGETTE_ASSEMBLY_PROGRAM_H_
+
+#include <map>
+#include <set>
+#include <vector>
+
+#include "base/basictypes.h"
+
+#include "courgette/image_info.h"
+
+namespace courgette {
+
+class EncodedProgram;
+class Instruction;
+
+// A Label is a symbolic reference to an address.  Unlike a conventional
+// assembly language, we always know the address.  The address will later be
+// stored in a table and the Label will be replaced with the index into the
+// table.
+//
+// TODO(sra): Make fields private and add setters and getters.
+class Label {
+ public:
+  static const int kNoIndex = -1;
+  Label() : rva_(0), index_(kNoIndex) {}
+  explicit Label(RVA rva) : rva_(rva), index_(kNoIndex) {}
+
+  RVA rva_;    // Address refered to by the label.
+  int index_;  // Index of address in address table, kNoIndex until assigned.
+};
+
+typedef std::map<RVA, Label*> RVAToLabel;
+
+// An AssemblyProgram is the result of disassembling an executable file.
+//
+// * The disassembler creates labels in the AssemblyProgram and emits
+//   'Instructions'.
+// * The disassembler then calls DefaultAssignIndexes to assign
+//   addresses to positions in the address tables.
+// * [Optional step]
+// * At this point the AssemblyProgram can be converted into an
+//   EncodedProgram and serialized to an output stream.
+// * Later, the EncodedProgram can be deserialized and assembled into
+//   the original file.
+//
+// The optional step is to modify the AssemblyProgram.  One form of modification
+// is to assign indexes in such a way as to make the EncodedProgram for this
+// AssemblyProgram look more like the EncodedProgram for some other
+// AssemblyProgram.  The modification process should call UnassignIndexes, do
+// its own assignment, and then call AssignRemainingIndexes to ensure all
+// indexes are assigned.
+//
+class AssemblyProgram {
+ public:
+  AssemblyProgram();
+  ~AssemblyProgram();
+
+  void set_image_base(uint64 image_base) { image_base_ = image_base; }
+
+  // Instructions will be assembled in the order they are emitted.
+
+  // Generates an entire base relocation table.
+  void EmitMakeRelocsInstruction();
+
+  // Following instruction will be assembled at address 'rva'.
+  void EmitOriginInstruction(RVA rva);
+
+  // Generates a single byte of data or machine instruction.
+  void EmitByteInstruction(uint8 byte);
+
+  // Generates 4-byte relative reference to address of 'label'.
+  void EmitRel32(Label* label);
+
+  // Generates 4-byte absolute reference to address of 'label'.
+  void EmitAbs32(Label* label);
+
+  Label* FindOrMakeAbs32Label(RVA rva);
+  Label* FindOrMakeRel32Label(RVA rva);
+
+  void DefaultAssignIndexes();
+  void UnassignIndexes();
+  void AssignRemainingIndexes();
+
+  EncodedProgram* Encode() const;
+
+  // Accessor for instruction list.
+  const std::vector<Instruction*>& instructions() const {
+    return instructions_;
+  }
+
+  // Returns the label if the instruction contains and absolute address,
+  // otherwise returns NULL.
+  Label* InstructionAbs32Label(const Instruction* instruction) const;
+
+  // Returns the label if the instruction contains and rel32 offset,
+  // otherwise returns NULL.
+  Label* InstructionRel32Label(const Instruction* instruction) const;
+
+
+ private:
+  void Emit(Instruction* instruction) { instructions_.push_back(instruction); }
+
+  Label* FindLabel(RVA rva, RVAToLabel* labels);
+
+  // Helper methods for the public versions.
+  static void UnassignIndexes(RVAToLabel* labels);
+  static void DefaultAssignIndexes(RVAToLabel* labels);
+  static void AssignRemainingIndexes(RVAToLabel* labels);
+
+  // Sharing instructions that emit a single byte saves a lot of space.
+  Instruction* GetByteInstruction(uint8 byte);
+  Instruction** byte_instruction_cache_;
+
+  uint64 image_base_;  // Desired or mandated base address of image.
+
+  std::vector<Instruction*> instructions_;  // All the instructions in program.
+
+  // These are lookup maps to find the label associated with a given address.
+  // We have separate label spaces for addresses referenced by rel32 labels and
+  // abs32 labels.  This is somewhat arbitrary.
+  RVAToLabel rel32_labels_;
+  RVAToLabel abs32_labels_;
+
+  DISALLOW_COPY_AND_ASSIGN(AssemblyProgram);
+};
+
+}  // namespace courgette
+#endif  // COURGETTE_ASSEMBLY_PROGRAM_H_
diff --git a/courgette/bsdiff_memory_unittest.cc b/courgette/bsdiff_memory_unittest.cc
new file mode 100644
index 0000000..97da726
--- /dev/null
+++ b/courgette/bsdiff_memory_unittest.cc
@@ -0,0 +1,111 @@
+// Copyright (c) 2009 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "courgette/third_party/bsdiff.h"
+
+#include <string>
+
+#include "base/file_util.h"
+#include "base/path_service.h"
+#include "base/string_util.h"
+
+#include "courgette/courgette.h"
+#include "courgette/streams.h"
+
+#include "testing/gtest/include/gtest/gtest.h"
+
+class BSDiffMemoryTest : public testing::Test {
+ public:
+  std::string FileContents(const char *file_name) const;
+  void GenerateAndTestPatch(const std::string& a, const std::string& b) const;
+
+ private:
+  void SetUp() {
+    PathService::Get(base::DIR_SOURCE_ROOT, &test_dir_);
+    file_util::AppendToPath(&test_dir_, L"courgette");
+    file_util::AppendToPath(&test_dir_, L"testdata");
+  }
+
+  std::wstring test_dir_;
+};
+
+//  Reads a test file into a string.
+std::string BSDiffMemoryTest::FileContents(const char *file_name) const {
+  std::wstring file_path = test_dir_;
+  file_util::AppendToPath(&file_path, UTF8ToWide(file_name));
+  std::string file_bytes;
+  if (!file_util::ReadFileToString(file_path, &file_bytes)) {
+    EXPECT_TRUE(!"Could not read test data");
+  }
+  return file_bytes;
+}
+
+void BSDiffMemoryTest::GenerateAndTestPatch(const std::string& old_text,
+                                            const std::string& new_text) const {
+  courgette::SourceStream old1;
+  courgette::SourceStream new1;
+  old1.Init(old_text.c_str(), old_text.length());
+  new1.Init(new_text.c_str(), new_text.length());
+
+  courgette::SinkStream patch1;
+  courgette::BSDiffStatus status = CreateBinaryPatch(&old1, &new1, &patch1);
+  EXPECT_EQ(courgette::OK, status);
+
+  courgette::SourceStream old2;
+  courgette::SourceStream patch2;
+  old2.Init(old_text.c_str(), old_text.length());
+  patch2.Init(patch1);
+
+  courgette::SinkStream new2;
+  status = ApplyBinaryPatch(&old2, &patch2, &new2);
+  EXPECT_EQ(courgette::OK, status);
+  EXPECT_EQ(new_text.length(), new2.Length());
+  EXPECT_EQ(0, memcmp(new_text.c_str(), new2.Buffer(), new_text.length()));
+}
+
+TEST_F(BSDiffMemoryTest, TestEmpty) {
+  GenerateAndTestPatch("", "");
+}
+
+TEST_F(BSDiffMemoryTest, TestEmptyVsNonempty) {
+  GenerateAndTestPatch("", "xxx");
+}
+
+TEST_F(BSDiffMemoryTest, TestNonemptyVsEmpty) {
+  GenerateAndTestPatch("xxx", "");
+}
+
+TEST_F(BSDiffMemoryTest, TestSmallInputsWithSmallChanges) {
+  std::string file1 =
+      "I would not, could not, in a box.\n"
+      "I could not, would not, with a fox.\n"
+      "I will not eat them with a mouse.\n"
+      "I will not eat them in a house.\n"
+      "I will not eat them here or there.\n"
+      "I will not eat them anywhere.\n"
+      "I do not eat green eggs and ham.\n"
+      "I do not like them, Sam-I-am.\n";
+  std::string file2 =
+      "I would not, could not, in a BOX.\n"
+      "I could not, would not, with a FOX.\n"
+      "I will not eat them with a MOUSE.\n"
+      "I will not eat them in a HOUSE.\n"
+      "I will not eat them in a HOUSE.\n"     // Extra line.
+      "I will not eat them here or THERE.\n"
+      "I will not eat them ANYWHERE.\n"
+      "I do not eat green eggs and HAM.\n"
+      "I do not like them, Sam-I-am.\n";
+  GenerateAndTestPatch(file1, file2);
+}
+
+TEST_F(BSDiffMemoryTest, TestIndenticalDlls) {
+  std::string file1 = FileContents("en-US.dll");
+  GenerateAndTestPatch(file1, file1);
+}
+
+TEST_F(BSDiffMemoryTest, TestDifferentExes) {
+  std::string file1 = FileContents("setup1.exe");
+  std::string file2 = FileContents("setup2.exe");
+  GenerateAndTestPatch(file1, file2);
+}
diff --git a/courgette/courgette.gyp b/courgette/courgette.gyp
new file mode 100644
index 0000000..03de462
--- /dev/null
+++ b/courgette/courgette.gyp
@@ -0,0 +1,111 @@
+# Copyright (c) 2009 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+{
+  'variables': {
+    'chromium_code': 1,
+  },
+  'includes': [
+    '../build/common.gypi',
+  ],
+  'targets': [
+    {
+      'target_name': 'courgette_lib',
+      'type': '<(library)',
+      'dependencies': [
+        '../base/base.gyp:base',
+        #'../third_party/lzma_sdk/7z_C.vcproj',
+      ],
+      'msvs_guid': '9A72A362-E617-4205-B9F2-43C6FB280FA1',
+      'sources': [
+        'adjustment_method.cc',
+        'adjustment_method.h',
+        'assembly_program.cc',
+        'assembly_program.h',
+        'third_party/bsdiff.h',
+        'third_party/bsdiff_apply.cc',
+        'third_party/bsdiff_create.cc',
+        'courgette.h',
+        'crc.cc',
+        'crc.h',
+        'difference_estimator.cc',
+        'difference_estimator.h',
+        'disassembler.cc',
+        'disassembler.h',
+        'encoded_program.cc',
+        'encoded_program.h',
+        'ensemble.cc',
+        'ensemble.h',
+        'ensemble_apply.cc',
+        'ensemble_create.cc',
+        'image_info.cc',
+        'image_info.h',
+        'region.h',
+        'simple_delta.cc',
+        'simple_delta.h',
+        'streams.cc',
+        'streams.h',
+        'win32_x86_generator.h',
+        'win32_x86_patcher.h',
+      ],
+    },
+   {
+      'target_name': 'courgette',
+      'type': 'executable',
+      'msvs_guid': '4EA8CE12-9C6F-45E5-9D08-720383FE3685',
+      'sources': [
+        'courgette_tool.cc',
+       ],
+      'dependencies': [
+        'courgette_lib',
+        '../base/base.gyp:base',
+      ],
+    },
+   {
+      'target_name': 'courgette_minimal_tool',
+      'type': 'executable',
+      'msvs_guid': 'EB79415F-2F17-4BDC-AADD-4CA4C2D21B73',
+      'sources': [
+        'courgette_minimal_tool.cc',
+       ],
+      'dependencies': [
+        'courgette_lib',
+        '../base/base.gyp:base',
+      ],
+    },
+    {
+      'target_name': 'courgette_unittests',
+      'type': 'executable',
+      'msvs_guid': '24309F1A-4035-46F9-A3D8-F47DC4BCC2B8',
+      'sources': [
+        'adjustment_method_unittest.cc',
+        'bsdiff_memory_unittest.cc',
+        'difference_estimator_unittest.cc',
+        'encoded_program_unittest.cc',
+        'encode_decode_unittest.cc',
+        'image_info_unittest.cc',
+        'run_all_unittests.cc',
+        'streams_unittest.cc',
+       ],
+      'dependencies': [
+        'courgette_lib',
+        '../base/base.gyp:base',
+        '../testing/gtest.gyp:gtest',
+      ],
+    },
+   {
+      'target_name': 'courgette_fuzz',
+      'type': 'executable',
+      'msvs_guid': '57C27529-8CA9-4FC3-9C02-DA05B172F785',
+      'sources': [
+        'encoded_program_fuzz_unittest.cc',
+       ],
+      'dependencies': [
+        'courgette_lib',
+        '../base/base.gyp:base',
+        '../testing/gtest.gyp:gtest',
+      ],
+    },
+  ],
+}
diff --git a/courgette/courgette.h b/courgette/courgette.h
new file mode 100644
index 0000000..0ddded5
--- /dev/null
+++ b/courgette/courgette.h
@@ -0,0 +1,118 @@
+// Copyright (c) 2009 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef COURGETTE_COURGETTE_H_
+#define COURGETTE_COURGETTE_H_
+
+namespace courgette {
+
+// Status codes for Courgette APIs.
+//
+// Client code should only rely on the distintion between C_OK and the other
+// status codes.
+//
+enum Status {
+  C_OK = 1,                       // Successful operation.
+
+  C_GENERAL_ERROR = 2,            // Error other than listed below.
+
+  C_READ_OPEN_ERROR = 3,          // Could not open input file for reading.
+  C_READ_ERROR = 4,               // Could not read from opened input file.
+
+  C_WRITE_OPEN_ERROR = 3,         // Could not open output file for writing.
+  C_WRITE_ERROR = 4,              // Could not write to opened output file.
+
+  C_BAD_ENSEMBLE_MAGIC = 5,       // Ensemble patch has bad magic.
+  C_BAD_ENSEMBLE_VERSION = 6,     // Ensemble patch has wrong version.
+  C_BAD_ENSEMBLE_HEADER = 7,      // Ensemble patch has corrupt header.
+  C_BAD_ENSEMBLE_CRC = 8,         // Ensemble patch has corrupt header.
+
+  C_BAD_TRANSFORM = 12,           // Transform mis-specified.
+  C_BAD_BASE = 13,                // Base for transform malformed.
+
+  C_BINARY_DIFF_CRC_ERROR = 14,   // Internal diff input doesn't have expected
+                                  // CRC.
+
+  // Internal errors.
+  C_STREAM_ERROR = 20,            // Unexpected error from streams.h.
+  C_STREAM_NOT_CONSUMED = 21,     // Stream has extra data, is expected to be
+                                  // used up.
+  C_SERIALIZATION_FAILED = 22,    //
+  C_DESERIALIZATION_FAILED = 23,  //
+  C_INPUT_NOT_RECOGNIZED = 24,    // Unrecognized input (not an executable).
+  C_DISASSEMBLY_FAILED = 25,      //
+  C_ASSEMBLY_FAILED = 26,         //
+  C_ADJUSTMENT_FAILED = 27,       //
+
+
+};
+
+class SinkStream;
+class SinkStreamSet;
+class SourceStream;
+class SourceStreamSet;
+
+class AssemblyProgram;
+class EncodedProgram;
+
+// Applies the patch to the bytes in |old| and writes the transformed ensemble
+// to |output|.
+// Returns C_OK unless something went wrong.
+Status ApplyEnsemblePatch(SourceStream* old, SourceStream* patch,
+                          SinkStream* output);
+
+// Applies the patch in |patch_file_name| to the bytes in |old_file_name| and
+// writes the transformed ensemble to |new_file_name|.
+// Returns C_OK unless something went wrong.
+// This function first validates that the patch file has a proper header, so the
+// function can be used to 'try' a patch.
+Status ApplyEnsemblePatch(const wchar_t* old_file_name,
+                          const wchar_t* patch_file_name,
+                          const wchar_t* new_file_name);
+
+// Generates a patch that will transform the bytes in |old| into the bytes in
+// |target|.
+// Returns C_OK unless something when wrong (unexpected).
+Status GenerateEnsemblePatch(SourceStream* old, SourceStream* target,
+                             SinkStream* patch);
+
+// Parses a Windows 32-bit 'Portable Executable' format file from memory,
+// storing the pointer to the AssemblyProgram in |*output|.
+// Returns C_OK if successful, otherwise returns an error status and sets
+// |*output| to NULL.
+Status ParseWin32X86PE(const void* buffer, size_t length,
+                       AssemblyProgram** output);
+
+// Converts |program| into encoded form, returning it as |*output|.
+// Returns C_OK if succeeded, otherwise returns an error status and
+// sets |*output| to NULL
+Status Encode(AssemblyProgram* program, EncodedProgram** output);
+
+
+// Serializes |encoded| into the stream set.
+// Returns C_OK if succeeded, otherwise returns an error status.
+Status WriteEncodedProgram(EncodedProgram* encoded, SinkStreamSet* sink);
+
+// Assembles |encoded|, emitting the bytes into |buffer|.
+// Returns C_OK if succeeded, otherwise returns an error status and leaves
+// |buffer| in an undefined state.
+Status Assemble(EncodedProgram* encoded, SinkStream* buffer);
+
+// Deserializes program from the stream set.
+// Returns C_OK if succeeded, otherwise returns an error status and
+// sets |*output| to NULL
+Status ReadEncodedProgram(SourceStreamSet* source, EncodedProgram** output);
+
+// Used to free an AssemblyProgram returned by other APIs.
+void DeleteAssemblyProgram(AssemblyProgram* program);
+
+// Used to free an EncodedProgram returned by other APIs.
+void DeleteEncodedProgram(EncodedProgram* encoded);
+
+// Adjusts |program| to look more like |model|.
+//
+Status Adjust(const AssemblyProgram& model, AssemblyProgram *program);
+
+}  // namespace courgette
+#endif  // COURGETTE_COURGETTE_H_
diff --git a/courgette/courgette.sln b/courgette/courgette.sln
new file mode 100644
index 0000000..38e1f59
--- /dev/null
+++ b/courgette/courgette.sln
@@ -0,0 +1,111 @@
+Microsoft Visual Studio Solution File, Format Version 9.00
+# Visual Studio 2005
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "courgette_lib", "courgette_lib.vcproj", "{9A72A362-E617-4205-B9F2-43C6FB280FA1}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "courgette_unittests", "courgette_unittests.vcproj", "{24309F1A-4035-46F9-A3D8-F47DC4BCC2B8}"
+	ProjectSection(ProjectDependencies) = postProject
+		{9A72A362-E617-4205-B9F2-43C6FB280FA1} = {9A72A362-E617-4205-B9F2-43C6FB280FA1}
+		{1832A374-8A74-4F9E-B536-69A699B3E165} = {1832A374-8A74-4F9E-B536-69A699B3E165}
+		{BFE8E2A7-3B3B-43B0-A994-3058B852DB8B} = {BFE8E2A7-3B3B-43B0-A994-3058B852DB8B}
+		{F22022F0-2D3B-5610-4E80-C674A8E01C5A} = {F22022F0-2D3B-5610-4E80-C674A8E01C5A}
+		{8C27D792-2648-4F5E-9ED0-374276327308} = {8C27D792-2648-4F5E-9ED0-374276327308}
+	EndProjectSection
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "courgette_fuzz", "courgette_fuzz.vcproj", "{57C27529-8CA9-4FC3-9C02-DA05B172F785}"
+	ProjectSection(ProjectDependencies) = postProject
+		{9A72A362-E617-4205-B9F2-43C6FB280FA1} = {9A72A362-E617-4205-B9F2-43C6FB280FA1}
+		{1832A374-8A74-4F9E-B536-69A699B3E165} = {1832A374-8A74-4F9E-B536-69A699B3E165}
+		{BFE8E2A7-3B3B-43B0-A994-3058B852DB8B} = {BFE8E2A7-3B3B-43B0-A994-3058B852DB8B}
+		{F22022F0-2D3B-5610-4E80-C674A8E01C5A} = {F22022F0-2D3B-5610-4E80-C674A8E01C5A}
+		{8C27D792-2648-4F5E-9ED0-374276327308} = {8C27D792-2648-4F5E-9ED0-374276327308}
+	EndProjectSection
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "courgette_minimal_tool", "courgette_minimal_tool.vcproj", "{EB79415F-2F17-4BDC-AADD-4CA4C2D21B73}"
+	ProjectSection(ProjectDependencies) = postProject
+		{9A72A362-E617-4205-B9F2-43C6FB280FA1} = {9A72A362-E617-4205-B9F2-43C6FB280FA1}
+		{1832A374-8A74-4F9E-B536-69A699B3E165} = {1832A374-8A74-4F9E-B536-69A699B3E165}
+		{F22022F0-2D3B-5610-4E80-C674A8E01C5A} = {F22022F0-2D3B-5610-4E80-C674A8E01C5A}
+		{8C27D792-2648-4F5E-9ED0-374276327308} = {8C27D792-2648-4F5E-9ED0-374276327308}
+	EndProjectSection
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "courgette", "courgette.vcproj", "{4EA8CE12-9C6F-45E5-9D08-720383FE3685}"
+	ProjectSection(ProjectDependencies) = postProject
+		{9A72A362-E617-4205-B9F2-43C6FB280FA1} = {9A72A362-E617-4205-B9F2-43C6FB280FA1}
+		{1832A374-8A74-4F9E-B536-69A699B3E165} = {1832A374-8A74-4F9E-B536-69A699B3E165}
+		{F22022F0-2D3B-5610-4E80-C674A8E01C5A} = {F22022F0-2D3B-5610-4E80-C674A8E01C5A}
+		{8C27D792-2648-4F5E-9ED0-374276327308} = {8C27D792-2648-4F5E-9ED0-374276327308}
+	EndProjectSection
+EndProject
+Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "dependencies", "dependencies", "{3C050794-ECB2-85EB-D650-18D00BF2A989}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "icuuc", "..\third_party\icu38\icuuc.vcproj", "{8C27D792-2648-4F5E-9ED0-374276327308}"
+	ProjectSection(ProjectDependencies) = postProject
+		{A0D94973-D355-47A5-A1E2-3456F321F010} = {A0D94973-D355-47A5-A1E2-3456F321F010}
+	EndProjectSection
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "icui18n", "..\third_party\icu38\icui18n.vcproj", "{F22022F0-2D3B-5610-4E80-C674A8E01C5A}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "icudata", "..\third_party\icu38\icudata.vcproj", "{A0D94973-D355-47A5-A1E2-3456F321F010}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "base", "..\base\base.vcproj", "{1832A374-8A74-4F9E-B536-69A699B3E165}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "gtest", "..\testing\gtest.vcproj", "{BFE8E2A7-3B3B-43B0-A994-3058B852DB8B}"
+EndProject
+Global
+	GlobalSection(SolutionConfigurationPlatforms) = preSolution
+		Debug|Win32 = Debug|Win32
+		Release|Win32 = Release|Win32
+	EndGlobalSection
+	GlobalSection(ProjectConfigurationPlatforms) = postSolution
+		{1832A374-8A74-4F9E-B536-69A699B3E165}.Debug|Win32.ActiveCfg = Debug|Win32
+		{1832A374-8A74-4F9E-B536-69A699B3E165}.Debug|Win32.Build.0 = Debug|Win32
+		{1832A374-8A74-4F9E-B536-69A699B3E165}.Release|Win32.ActiveCfg = Release|Win32
+		{1832A374-8A74-4F9E-B536-69A699B3E165}.Release|Win32.Build.0 = Release|Win32
+		{24309F1A-4035-46F9-A3D8-F47DC4BCC2B8}.Debug|Win32.ActiveCfg = Debug|Win32
+		{24309F1A-4035-46F9-A3D8-F47DC4BCC2B8}.Debug|Win32.Build.0 = Debug|Win32
+		{24309F1A-4035-46F9-A3D8-F47DC4BCC2B8}.Release|Win32.ActiveCfg = Release|Win32
+		{24309F1A-4035-46F9-A3D8-F47DC4BCC2B8}.Release|Win32.Build.0 = Release|Win32
+		{4EA8CE12-9C6F-45E5-9D08-720383FE3685}.Debug|Win32.ActiveCfg = Debug|Win32
+		{4EA8CE12-9C6F-45E5-9D08-720383FE3685}.Debug|Win32.Build.0 = Debug|Win32
+		{4EA8CE12-9C6F-45E5-9D08-720383FE3685}.Release|Win32.ActiveCfg = Release|Win32
+		{4EA8CE12-9C6F-45E5-9D08-720383FE3685}.Release|Win32.Build.0 = Release|Win32
+		{57C27529-8CA9-4FC3-9C02-DA05B172F785}.Debug|Win32.ActiveCfg = Debug|Win32
+		{57C27529-8CA9-4FC3-9C02-DA05B172F785}.Debug|Win32.Build.0 = Debug|Win32
+		{57C27529-8CA9-4FC3-9C02-DA05B172F785}.Release|Win32.ActiveCfg = Release|Win32
+		{57C27529-8CA9-4FC3-9C02-DA05B172F785}.Release|Win32.Build.0 = Release|Win32
+		{8C27D792-2648-4F5E-9ED0-374276327308}.Debug|Win32.ActiveCfg = Debug|Win32
+		{8C27D792-2648-4F5E-9ED0-374276327308}.Debug|Win32.Build.0 = Debug|Win32
+		{8C27D792-2648-4F5E-9ED0-374276327308}.Release|Win32.ActiveCfg = Release|Win32
+		{8C27D792-2648-4F5E-9ED0-374276327308}.Release|Win32.Build.0 = Release|Win32
+		{9A72A362-E617-4205-B9F2-43C6FB280FA1}.Debug|Win32.ActiveCfg = Debug|Win32
+		{9A72A362-E617-4205-B9F2-43C6FB280FA1}.Debug|Win32.Build.0 = Debug|Win32
+		{9A72A362-E617-4205-B9F2-43C6FB280FA1}.Release|Win32.ActiveCfg = Release|Win32
+		{9A72A362-E617-4205-B9F2-43C6FB280FA1}.Release|Win32.Build.0 = Release|Win32
+		{A0D94973-D355-47A5-A1E2-3456F321F010}.Debug|Win32.ActiveCfg = Debug|Win32
+		{A0D94973-D355-47A5-A1E2-3456F321F010}.Debug|Win32.Build.0 = Debug|Win32
+		{A0D94973-D355-47A5-A1E2-3456F321F010}.Release|Win32.ActiveCfg = Release|Win32
+		{A0D94973-D355-47A5-A1E2-3456F321F010}.Release|Win32.Build.0 = Release|Win32
+		{BFE8E2A7-3B3B-43B0-A994-3058B852DB8B}.Debug|Win32.ActiveCfg = Debug|Win32
+		{BFE8E2A7-3B3B-43B0-A994-3058B852DB8B}.Debug|Win32.Build.0 = Debug|Win32
+		{BFE8E2A7-3B3B-43B0-A994-3058B852DB8B}.Release|Win32.ActiveCfg = Release|Win32
+		{BFE8E2A7-3B3B-43B0-A994-3058B852DB8B}.Release|Win32.Build.0 = Release|Win32
+		{EB79415F-2F17-4BDC-AADD-4CA4C2D21B73}.Debug|Win32.ActiveCfg = Debug|Win32
+		{EB79415F-2F17-4BDC-AADD-4CA4C2D21B73}.Debug|Win32.Build.0 = Debug|Win32
+		{EB79415F-2F17-4BDC-AADD-4CA4C2D21B73}.Release|Win32.ActiveCfg = Release|Win32
+		{EB79415F-2F17-4BDC-AADD-4CA4C2D21B73}.Release|Win32.Build.0 = Release|Win32
+		{F22022F0-2D3B-5610-4E80-C674A8E01C5A}.Debug|Win32.ActiveCfg = Debug|Win32
+		{F22022F0-2D3B-5610-4E80-C674A8E01C5A}.Debug|Win32.Build.0 = Debug|Win32
+		{F22022F0-2D3B-5610-4E80-C674A8E01C5A}.Release|Win32.ActiveCfg = Release|Win32
+		{F22022F0-2D3B-5610-4E80-C674A8E01C5A}.Release|Win32.Build.0 = Release|Win32
+	EndGlobalSection
+	GlobalSection(SolutionProperties) = preSolution
+		HideSolutionNode = FALSE
+	EndGlobalSection
+	GlobalSection(NestedProjects) = preSolution
+		{8C27D792-2648-4F5E-9ED0-374276327308} = {3C050794-ECB2-85EB-D650-18D00BF2A989}
+		{F22022F0-2D3B-5610-4E80-C674A8E01C5A} = {3C050794-ECB2-85EB-D650-18D00BF2A989}
+		{A0D94973-D355-47A5-A1E2-3456F321F010} = {3C050794-ECB2-85EB-D650-18D00BF2A989}
+		{1832A374-8A74-4F9E-B536-69A699B3E165} = {3C050794-ECB2-85EB-D650-18D00BF2A989}
+		{BFE8E2A7-3B3B-43B0-A994-3058B852DB8B} = {3C050794-ECB2-85EB-D650-18D00BF2A989}
+	EndGlobalSection
+EndGlobal
diff --git a/courgette/courgette_minimal_tool.cc b/courgette/courgette_minimal_tool.cc
new file mode 100644
index 0000000..84de068
--- /dev/null
+++ b/courgette/courgette_minimal_tool.cc
@@ -0,0 +1,50 @@
+// Copyright (c) 2009 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// 'courgette_minimal_tool' is not meant to be a serious command-line tool.  It
+// has the minimum logic to apply a Courgette patch to a file.  The main purpose
+// is to monitor the code size of the patcher.
+
+#include <string>
+
+#include "base/file_util.h"
+
+#include "courgette/third_party/bsdiff.h"
+#include "courgette/courgette.h"
+#include "courgette/streams.h"
+
+void PrintHelp() {
+  fprintf(stderr,
+    "Usage:\n"
+    "  courgette_minimal_tool <old-file-input> <patch-file-input>"
+    " <new-file-output>\n"
+    "\n");
+}
+
+void UsageProblem(const char* message) {
+  fprintf(stderr, "%s\n", message);
+  PrintHelp();
+  exit(1);
+}
+
+void Problem(const char* message) {
+  fprintf(stderr, "%s\n", message);
+  exit(1);
+}
+
+int wmain(int argc, const wchar_t* argv[]) {
+  if (argc != 4)
+    UsageProblem("bad args");
+
+  courgette::Status status =
+      courgette::ApplyEnsemblePatch(argv[1], argv[2], argv[3]);
+
+  if (status != courgette::C_OK) {
+    if (status == courgette::C_READ_OPEN_ERROR) Problem("Can't open file.");
+    if (status == courgette::C_WRITE_OPEN_ERROR) Problem("Can't open file.");
+    if (status == courgette::C_READ_ERROR) Problem("Can't read from file.");
+    if (status == courgette::C_WRITE_ERROR) Problem("Can't write to file.");
+    Problem("patch failed.");
+  }
+}
diff --git a/courgette/courgette_tool.cc b/courgette/courgette_tool.cc
new file mode 100644
index 0000000..c07f2da
--- /dev/null
+++ b/courgette/courgette_tool.cc
@@ -0,0 +1,404 @@
+// Copyright (c) 2009 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include <vector>
+#include <string>
+
+#include "base/at_exit.h"
+#include "base/basictypes.h"
+#include "base/command_line.h"
+#include "base/file_util.h"
+#include "base/logging.h"
+#include "base/string_util.h"
+
+#include "courgette/third_party/bsdiff.h"
+#include "courgette/courgette.h"
+#include "courgette/streams.h"
+
+
+void PrintHelp() {
+  fprintf(stderr,
+    "Usage:\n"
+    "  courgette -dis <executable_file> <binary_assembly_file>\n"
+    "  courgette -asm <binary_assembly_file> <executable_file>\n"
+    "  courgette -disadj <executable_file> <reference> <binary_assembly_file>\n"
+    "  courgette -gen <v1> <v2> <patch>\n"
+    "  courgette -apply <v1> <patch> <v2>\n"
+    "\n");
+}
+
+void UsageProblem(const char* message) {
+  fprintf(stderr, "%s", message);
+  fprintf(stderr, "\n");
+  PrintHelp();
+  exit(1);
+}
+
+void Problem(const char* format, ...) {
+  va_list args;
+  va_start(args, format);
+  vfprintf(stderr, format, args);
+  fprintf(stderr, "\n");
+  va_end(args);
+  exit(1);
+}
+
+std::string ReadOrFail(const std::wstring& file_name, const char* kind) {
+  FilePath file_path(file_name);
+  std::string buffer;
+  if (!file_util::ReadFileToString(file_path, &buffer))
+    Problem("Can't read %s file.", kind);
+  return buffer;
+}
+
+void WriteSinkToFile(const courgette::SinkStream *sink,
+                     const std::wstring& output_file) {
+  FilePath output_path(output_file);
+  int count =
+      file_util::WriteFile(output_path,
+                           reinterpret_cast<const char*>(sink->Buffer()),
+                           sink->Length());
+  if (count == -1)
+    Problem("Cant write output.");
+  if (count != sink->Length())
+    Problem("Incomplete write.");
+}
+
+void Disassemble(const std::wstring& input_file,
+                 const std::wstring& output_file) {
+  std::string buffer = ReadOrFail(input_file, "input");
+
+  courgette::AssemblyProgram* program = NULL;
+  const courgette::Status parse_status =
+      courgette::ParseWin32X86PE(buffer.c_str(), buffer.length(), &program);
+
+  if (parse_status != courgette::C_OK)
+    Problem("Can't parse input.");
+
+  courgette::EncodedProgram* encoded = NULL;
+  const courgette::Status encode_status = Encode(program, &encoded);
+
+  courgette::DeleteAssemblyProgram(program);
+
+  if (encode_status != courgette::C_OK)
+    Problem("Can't encode program.");
+
+  courgette::SinkStreamSet sinks;
+
+  const courgette::Status write_status =
+    courgette::WriteEncodedProgram(encoded, &sinks);
+  if (write_status != courgette::C_OK)
+    Problem("Can't serialize encoded program.");
+
+  courgette::DeleteEncodedProgram(encoded);
+
+  courgette::SinkStream sink;
+  sinks.CopyTo(&sink);
+
+  WriteSinkToFile(&sink, output_file);
+}
+
+void DisassembleAndAdjust(const std::wstring& program_file,
+                          const std::wstring& model_file,
+                          const std::wstring& output_file) {
+  std::string program_buffer = ReadOrFail(program_file, "program");
+  std::string model_buffer = ReadOrFail(model_file, "reference");
+
+  courgette::AssemblyProgram* program = NULL;
+  const courgette::Status parse_program_status =
+      courgette::ParseWin32X86PE(program_buffer.c_str(),
+                                 program_buffer.length(),
+                                 &program);
+  if (parse_program_status != courgette::C_OK)
+    Problem("Can't parse program input.");
+
+  courgette::AssemblyProgram* model = NULL;
+  const courgette::Status parse_model_status =
+      courgette::ParseWin32X86PE(model_buffer.c_str(),
+                                 model_buffer.length(),
+                                 &model);
+  if (parse_model_status != courgette::C_OK)
+    Problem("Can't parse model input.");
+
+  const courgette::Status adjust_status = Adjust(*model, program);
+  if (adjust_status != courgette::C_OK)
+    Problem("Can't adjust program.");
+
+  courgette::EncodedProgram* encoded = NULL;
+  const courgette::Status encode_status = Encode(program, &encoded);
+
+  courgette::DeleteAssemblyProgram(program);
+
+  if (encode_status != courgette::C_OK)
+    Problem("Can't encode program.");
+
+  courgette::SinkStreamSet sinks;
+
+  const courgette::Status write_status =
+    courgette::WriteEncodedProgram(encoded, &sinks);
+  if (write_status != courgette::C_OK)
+    Problem("Can't serialize encoded program.");
+
+  courgette::DeleteEncodedProgram(encoded);
+
+  courgette::SinkStream sink;
+  sinks.CopyTo(&sink);
+
+  WriteSinkToFile(&sink, output_file);
+}
+
+// Diffs two executable files, write a set of files for the diff, one file per
+// stream of the EncodedProgram format.  Each file is the bsdiff between the
+// original file's stream and the new file's stream.  This is completely
+// uninteresting to users, but it is handy for seeing how much each which
+// streams are contributing to the final file size.  Adjustment is optional.
+void DisassembleAdjustDiff(const std::wstring& model_file,
+                           const std::wstring& program_file,
+                           const std::wstring& output_file_root,
+                           bool adjust) {
+  std::string model_buffer = ReadOrFail(model_file, "'old'");
+  std::string program_buffer = ReadOrFail(program_file, "'new'");
+
+  courgette::AssemblyProgram* model = NULL;
+  const courgette::Status parse_model_status =
+      courgette::ParseWin32X86PE(model_buffer.c_str(),
+                                 model_buffer.length(),
+                                 &model);
+  if (parse_model_status != courgette::C_OK)
+    Problem("Can't parse model input.");
+
+  courgette::AssemblyProgram* program = NULL;
+  const courgette::Status parse_program_status =
+      courgette::ParseWin32X86PE(program_buffer.c_str(),
+                                 program_buffer.length(),
+                                 &program);
+  if (parse_program_status != courgette::C_OK)
+    Problem("Can't parse program input.");
+
+  if (adjust) {
+    const courgette::Status adjust_status = Adjust(*model, program);
+    if (adjust_status != courgette::C_OK)
+      Problem("Can't adjust program.");
+  }
+
+  courgette::EncodedProgram* encoded_program = NULL;
+  const courgette::Status encode_program_status =
+      Encode(program, &encoded_program);
+  courgette::DeleteAssemblyProgram(program);
+  if (encode_program_status != courgette::C_OK)
+    Problem("Can't encode program.");
+
+  courgette::EncodedProgram* encoded_model = NULL;
+  const courgette::Status encode_model_status = Encode(model, &encoded_model);
+  courgette::DeleteAssemblyProgram(model);
+  if (encode_model_status != courgette::C_OK)
+    Problem("Can't encode model.");
+
+  courgette::SinkStreamSet program_sinks;
+  const courgette::Status write_program_status =
+    courgette::WriteEncodedProgram(encoded_program, &program_sinks);
+  if (write_program_status != courgette::C_OK)
+    Problem("Can't serialize encoded program.");
+  courgette::DeleteEncodedProgram(encoded_program);
+
+  courgette::SinkStreamSet model_sinks;
+  const courgette::Status write_model_status =
+    courgette::WriteEncodedProgram(encoded_model, &model_sinks);
+  if (write_model_status != courgette::C_OK)
+    Problem("Can't serialize encoded model.");
+  courgette::DeleteEncodedProgram(encoded_program);
+
+  for (int i = 0;  ; ++i) {
+    courgette::SinkStream* old_stream = model_sinks.stream(i);
+    courgette::SinkStream* new_stream = program_sinks.stream(i);
+    if (old_stream == NULL  && new_stream == NULL)
+      break;
+
+    courgette::SourceStream old_source;
+    courgette::SourceStream new_source;
+    old_source.Init(*old_stream);
+    new_source.Init(*new_stream);
+    courgette::SinkStream patch_stream;
+    courgette::BSDiffStatus status =
+        courgette::CreateBinaryPatch(&old_source, &new_source, &patch_stream);
+    if (status != courgette::OK) Problem("-xxx failed.");
+
+    WriteSinkToFile(&patch_stream,
+                    output_file_root + L"-" + IntToWString(i));
+  }
+}
+
+void Assemble(const std::wstring& input_file,
+              const std::wstring& output_file) {
+  std::string buffer = ReadOrFail(input_file, "input");
+
+  courgette::SourceStreamSet sources;
+  if (!sources.Init(buffer.c_str(), buffer.length()))
+    Problem("Bad input file.");
+
+  courgette::EncodedProgram* encoded = NULL;
+  const courgette::Status read_status = ReadEncodedProgram(&sources, &encoded);
+  if (read_status != courgette::C_OK)
+    Problem("Bad encoded program.");
+
+  courgette::SinkStream sink;
+
+  const courgette::Status assemble_status = courgette::Assemble(encoded, &sink);
+  if (assemble_status != courgette::C_OK)
+    Problem("Can't assemble.");
+
+  WriteSinkToFile(&sink, output_file);
+}
+
+void GenerateEnsemblePatch(const std::wstring& old_file,
+                           const std::wstring& new_file,
+                           const std::wstring& patch_file) {
+  std::string old_buffer = ReadOrFail(old_file, "'old' input");
+  std::string new_buffer = ReadOrFail(new_file, "'new' input");
+
+  courgette::SourceStream old_stream;
+  courgette::SourceStream new_stream;
+  old_stream.Init(old_buffer);
+  new_stream.Init(new_buffer);
+
+  courgette::SinkStream patch_stream;
+  courgette::Status status =
+      courgette::GenerateEnsemblePatch(&old_stream, &new_stream, &patch_stream);
+
+  if (status != courgette::C_OK) Problem("-gen failed.");
+
+  WriteSinkToFile(&patch_stream, patch_file);
+}
+
+void ApplyEnsemblePatch(const std::wstring& old_file,
+                        const std::wstring& patch_file,
+                        const std::wstring& new_file) {
+  std::string old_buffer = ReadOrFail(old_file, "'old' input");
+  std::string patch_buffer = ReadOrFail(patch_file, "'patch' input");
+
+  courgette::SourceStream old_stream;
+  courgette::SourceStream patch_stream;
+  old_stream.Init(old_buffer);
+  patch_stream.Init(patch_buffer);
+  courgette::SinkStream new_stream;
+  courgette::Status status =
+      courgette::ApplyEnsemblePatch(&old_stream, &patch_stream, &new_stream);
+
+  if (status != courgette::C_OK) Problem("-apply failed.");
+
+  WriteSinkToFile(&new_stream, new_file);
+}
+
+void GenerateBSDiffPatch(const std::wstring& old_file,
+                         const std::wstring& new_file,
+                         const std::wstring& patch_file) {
+  std::string old_buffer = ReadOrFail(old_file, "'old' input");
+  std::string new_buffer = ReadOrFail(new_file, "'new' input");
+
+  courgette::SourceStream old_stream;
+  courgette::SourceStream new_stream;
+  old_stream.Init(old_buffer);
+  new_stream.Init(new_buffer);
+
+  courgette::SinkStream patch_stream;
+  courgette::BSDiffStatus status =
+      courgette::CreateBinaryPatch(&old_stream, &new_stream, &patch_stream);
+
+  if (status != courgette::OK) Problem("-genbsdiff failed.");
+
+  WriteSinkToFile(&patch_stream, patch_file);
+}
+
+void ApplyBSDiffPatch(const std::wstring& old_file,
+                      const std::wstring& patch_file,
+                      const std::wstring& new_file) {
+  std::string old_buffer = ReadOrFail(old_file, "'old' input");
+  std::string patch_buffer = ReadOrFail(patch_file, "'patch' input");
+
+  courgette::SourceStream old_stream;
+  courgette::SourceStream patch_stream;
+  old_stream.Init(old_buffer);
+  patch_stream.Init(patch_buffer);
+
+  courgette::SinkStream new_stream;
+  courgette::BSDiffStatus status =
+      courgette::ApplyBinaryPatch(&old_stream, &patch_stream, &new_stream);
+
+  if (status != courgette::OK) Problem("-applybsdiff failed.");
+
+  WriteSinkToFile(&new_stream, new_file);
+}
+
+int main(int argc, const char* argv[]) {
+  base::AtExitManager at_exit_manager;
+  CommandLine::Init(argc, argv);
+  const CommandLine& command_line = *CommandLine::ForCurrentProcess();
+
+  bool cmd_dis = command_line.HasSwitch(L"dis");
+  bool cmd_asm = command_line.HasSwitch(L"asm");
+  bool cmd_disadj = command_line.HasSwitch(L"disadj");
+  bool cmd_make_patch = command_line.HasSwitch(L"gen");
+  bool cmd_apply_patch = command_line.HasSwitch(L"apply");
+  bool cmd_make_bsdiff_patch = command_line.HasSwitch(L"genbsdiff");
+  bool cmd_apply_bsdiff_patch = command_line.HasSwitch(L"applybsdiff");
+  bool cmd_spread_1_adjusted = command_line.HasSwitch(L"gen1a");
+  bool cmd_spread_1_unadjusted = command_line.HasSwitch(L"gen1u");
+
+  std::vector<std::wstring> values = command_line.GetLooseValues();
+
+  // '-repeat=N' is for debugging.  Running many iterations can reveal leaks and
+  // bugs in cleanup.
+  int repeat_count = 1;
+  std::wstring repeat_switch = command_line.GetSwitchValue(L"repeat");
+  if (!repeat_switch.empty())
+    if (!StringToInt(repeat_switch, &repeat_count))
+      repeat_count = 1;
+
+  if (cmd_dis + cmd_asm + cmd_disadj + cmd_make_patch + cmd_apply_patch +
+      cmd_make_bsdiff_patch + cmd_apply_bsdiff_patch +
+      cmd_spread_1_adjusted + cmd_spread_1_unadjusted
+      != 1)
+    UsageProblem(
+        "Must have exactly one of:\n"
+        "  -asm, -dis, -disadj, -gen or -apply, -genbsdiff or -applybsdiff.");
+
+  while (repeat_count-- > 0) {
+    if (cmd_dis) {
+      if (values.size() != 2)
+        UsageProblem("-dis <executable_file> <courgette_file>");
+      Disassemble(values[0], values[1]);
+    } else if (cmd_asm) {
+      if (values.size() != 2)
+        UsageProblem("-asm <courgette_file_input> <executable_file_output>");
+      Assemble(values[0], values[1]);
+    } else if (cmd_disadj) {
+      if (values.size() != 3)
+        UsageProblem("-disadj <executable_file> <model> <courgette_file>");
+      DisassembleAndAdjust(values[0], values[1], values[2]);
+    } else if (cmd_make_patch) {
+      if (values.size() != 3)
+        UsageProblem("-gen <old_file> <new_file> <patch_file>");
+      GenerateEnsemblePatch(values[0], values[1], values[2]);
+    } else if (cmd_apply_patch) {
+      if (values.size() != 3)
+        UsageProblem("-apply <old_file> <patch_file> <new_file>");
+      ApplyEnsemblePatch(values[0], values[1], values[2]);
+    } else if (cmd_make_bsdiff_patch) {
+      if (values.size() != 3)
+        UsageProblem("-genbsdiff <old_file> <new_file> <patch_file>");
+      GenerateBSDiffPatch(values[0], values[1], values[2]);
+    } else if (cmd_apply_bsdiff_patch) {
+      if (values.size() != 3)
+        UsageProblem("-applybsdiff <old_file> <patch_file> <new_file>");
+      ApplyBSDiffPatch(values[0], values[1], values[2]);
+    } else if (cmd_spread_1_adjusted || cmd_spread_1_unadjusted) {
+      if (values.size() != 3)
+        UsageProblem("-gen1[au] <old_file> <new_file> <patch_files_root>");
+      DisassembleAdjustDiff(values[0], values[1], values[2],
+                            cmd_spread_1_adjusted);
+    } else {
+      UsageProblem("No operation specified");
+    }
+  }
+}
diff --git a/courgette/crc.cc b/courgette/crc.cc
new file mode 100644
index 0000000..60c8c93
--- /dev/null
+++ b/courgette/crc.cc
@@ -0,0 +1,22 @@
+// Copyright (c) 2009 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// Calculate Crc by calling CRC method in LZMA SDK
+
+#include "courgette/crc.h"
+
+extern "C" {
+#include "third_party/lzma_sdk/7zCrc.h"
+}
+
+namespace courgette {
+
+uint32 CalculateCrc(const uint8* buffer, size_t size) {
+  // CrcGenerateTable();
+  uint32 crc = 0xffffffffL;
+  // crc = ~CrcCalc(buffer, size);
+  return crc;
+}
+
+}  // namespace
diff --git a/courgette/crc.h b/courgette/crc.h
new file mode 100644
index 0000000..c3662bb
--- /dev/null
+++ b/courgette/crc.h
@@ -0,0 +1,17 @@
+// Copyright (c) 2009 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef COURGETTE_CRC_H_
+#define COURGETTE_CRC_H_
+
+#include "base/basictypes.h"
+
+namespace courgette {
+
+// Calculates Crc of the given buffer by calling CRC method in LZMA SDK
+//
+uint32 CalculateCrc(const uint8* buffer, size_t size);
+
+}  // namespace courgette
+#endif  // COURGETTE_CRC_H_
diff --git a/courgette/difference_estimator.cc b/courgette/difference_estimator.cc
new file mode 100644
index 0000000..e591f9b
--- /dev/null
+++ b/courgette/difference_estimator.cc
@@ -0,0 +1,118 @@
+// Copyright (c) 2009 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// We want to measure the similarity of two sequences of bytes as a surrogate
+// for measuring how well a second sequence will compress differentially to the
+// first sequence.
+
+#include "courgette/difference_estimator.h"
+
+#include "base/hash_tables.h"
+
+namespace courgette {
+
+// Our difference measure is the number of k-tuples that occur in Subject that
+// don't occur in Base.
+const int kTupleSize = 4;
+
+namespace {
+
+COMPILE_ASSERT(kTupleSize >= 4 && kTupleSize <= 8, kTupleSize_between_4_and_8);
+
+size_t HashTuple(const uint8* source) {
+  size_t hash1 = *reinterpret_cast<const uint32*>(source);
+  size_t hash2 = *reinterpret_cast<const uint32*>(source + kTupleSize - 4);
+  size_t hash = ((hash1 * 17 + hash2 * 37) + (hash1 >> 17)) ^ (hash2 >> 23);
+  return hash;
+}
+
+bool RegionsEqual(const Region& a, const Region& b) {
+  if (a.length() != b.length())
+    return false;
+  return memcmp(a.start(), b.start(), a.length()) == 0;
+}
+
+}  // anonymous namepace
+
+class DifferenceEstimator::Base {
+ public:
+  explicit Base(const Region& region) : region_(region) { }
+
+  void Init() {
+    const uint8* start = region_.start();
+    const uint8* end = region_.end() - (kTupleSize - 1);
+    for (const uint8* p = start;  p < end;  ++p) {
+      size_t hash = HashTuple(p);
+      hashes_.insert(hash);
+    }
+  }
+
+  const Region& region() const { return region_; }
+
+ private:
+  Region region_;
+  base::hash_set<size_t> hashes_;
+
+  friend class DifferenceEstimator;
+  DISALLOW_COPY_AND_ASSIGN(Base);
+};
+
+class DifferenceEstimator::Subject {
+ public:
+  explicit Subject(const Region& region) : region_(region) {}
+
+  const Region& region() const { return region_; }
+
+ private:
+  Region region_;
+
+  DISALLOW_COPY_AND_ASSIGN(Subject);
+};
+
+DifferenceEstimator::DifferenceEstimator() {}
+
+DifferenceEstimator::~DifferenceEstimator() {
+  for (size_t i = 0;  i < owned_bases_.size();  ++i)
+    delete owned_bases_[i];
+  for (size_t i = 0;  i < owned_subjects_.size();  ++i)
+    delete owned_subjects_[i];
+}
+
+DifferenceEstimator::Base* DifferenceEstimator::MakeBase(const Region& region) {
+  Base* base = new Base(region);
+  base->Init();
+  owned_bases_.push_back(base);
+  return base;
+}
+
+DifferenceEstimator::Subject* DifferenceEstimator::MakeSubject(
+    const Region& region) {
+  Subject* subject = new Subject(region);
+  owned_subjects_.push_back(subject);
+  return subject;
+}
+
+size_t DifferenceEstimator::Measure(Base* base, Subject* subject) {
+  size_t mismatches = 0;
+  const uint8* start = subject->region().start();
+  const uint8* end = subject->region().end() - (kTupleSize - 1);
+
+  const uint8* p = start;
+  while (p < end) {
+    size_t hash = HashTuple(p);
+    if (base->hashes_.find(hash) == base->hashes_.end()) {
+      ++mismatches;
+    }
+    p += 1;
+  }
+
+  if (mismatches == 0) {
+    if (RegionsEqual(base->region(), subject->region()))
+      return 0;
+  }
+  ++mismatches;  // Guarantee not zero.
+  return mismatches;
+}
+
+}  // namespace
diff --git a/courgette/difference_estimator.h b/courgette/difference_estimator.h
new file mode 100644
index 0000000..295ff93
--- /dev/null
+++ b/courgette/difference_estimator.h
@@ -0,0 +1,58 @@
+// Copyright (c) 2009 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// A DifferenceEstimator class provides a means for quickly estimating the
+// difference between two regions of memory.
+
+#ifndef COURGETTE_DIFFERENCE_ESTIMATOR_H_
+#define COURGETTE_DIFFERENCE_ESTIMATOR_H_
+
+#include <vector>
+
+#include "courgette/region.h"
+
+namespace courgette {
+
+// A DifferenceEstimator simplifies the task of determining which 'Subject' byte
+// strings (stored in regions of memory) are good matches to existing 'Base'
+// regions.  The ultimate measure would be to try full differential compression
+// and measure the output size, but an estimate that correlates well with the
+// full compression is more efficient.
+//
+// The measure is asymmetric, if the Subject is a small substring of the Base
+// then it should match very well.
+//
+// The comparison is staged: first make Base and Subject objects for the regions
+// and then call 'Measure' to get the estimate.  The staging allows multiple
+// comparisons to be more efficient by precomputing information used in the
+// comparison.
+//
+class DifferenceEstimator {
+ public:
+  DifferenceEstimator();
+  ~DifferenceEstimator();
+
+  class Base;
+  class Subject;
+
+  // This DifferenceEstimator owns the objects returned by MakeBase and
+  // MakeSubject.  Caller continues to own memory at |region| and must not free
+  // it until ~DifferenceEstimator has been called.
+  Base* MakeBase(const Region& region);
+  Subject* MakeSubject(const Region& region);
+
+  // Returns a value correlated with the size of the bsdiff or xdelta difference
+  // from |base| to |subject|.  Returns zero iff the base and subject regions
+  // are bytewise identical.
+  size_t Measure(Base* base,  Subject* subject);
+
+ private:
+  std::vector<Base*> owned_bases_;
+  std::vector<Subject*> owned_subjects_;
+  DISALLOW_COPY_AND_ASSIGN(DifferenceEstimator);
+};
+
+}  // namespace
+
+#endif  // COURGETTE_DIFFERENCE_ESTIMATOR_H_
diff --git a/courgette/difference_estimator_unittest.cc b/courgette/difference_estimator_unittest.cc
new file mode 100644
index 0000000..913752b
--- /dev/null
+++ b/courgette/difference_estimator_unittest.cc
@@ -0,0 +1,58 @@
+// Copyright (c) 2009 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "courgette/difference_estimator.h"
+
+#include <string>
+
+#include "testing/gtest/include/gtest/gtest.h"
+#include "courgette/region.h"
+
+using courgette::DifferenceEstimator;
+using courgette::Region;
+
+TEST(DifferenceEstimatorTest, TestSame) {
+  static const char kString1[] = "Hello world";
+  // kString2 is stack allocated to prevent string sharing.
+  const char kString2[] = "Hello world";
+  DifferenceEstimator difference_estimator;
+  DifferenceEstimator::Base* base =
+      difference_estimator.MakeBase(Region(kString1, sizeof(kString1)));
+  DifferenceEstimator::Subject* subject =
+      difference_estimator.MakeSubject(Region(kString2, sizeof(kString2)));
+  EXPECT_EQ(0, difference_estimator.Measure(base, subject));
+}
+
+TEST(DifferenceEstimatorTest, TestDifferent) {
+  static const char kString1[] = "Hello world";
+  static const char kString2[] = "Hello universe";
+  DifferenceEstimator difference_estimator;
+  DifferenceEstimator::Base* base =
+      difference_estimator.MakeBase(Region(kString1, sizeof(kString1)));
+  DifferenceEstimator::Subject* subject =
+      difference_estimator.MakeSubject(Region(kString2, sizeof(kString2)));
+  EXPECT_EQ(10, difference_estimator.Measure(base, subject));
+}
+
+TEST(DifferenceEstimatorTest, TestDifferentSuperstring) {
+  static const char kString1[] = "abcdabcdabcd";
+  static const char kString2[] = "abcdabcdabcdabcd";
+  DifferenceEstimator difference_estimator;
+  DifferenceEstimator::Base* base =
+      difference_estimator.MakeBase(Region(kString1, sizeof(kString1)-1));
+  DifferenceEstimator::Subject* subject =
+      difference_estimator.MakeSubject(Region(kString2, sizeof(kString2)-1));
+  EXPECT_EQ(1, difference_estimator.Measure(base, subject));
+}
+
+TEST(DifferenceEstimatorTest, TestDifferentSubstring) {
+  static const char kString1[] = "abcdabcdabcdabcd";
+  static const char kString2[] = "abcdabcdabcd";
+  DifferenceEstimator difference_estimator;
+  DifferenceEstimator::Base* base =
+      difference_estimator.MakeBase(Region(kString1, sizeof(kString1)-1));
+  DifferenceEstimator::Subject* subject =
+      difference_estimator.MakeSubject(Region(kString2, sizeof(kString2)-1));
+  EXPECT_EQ(1, difference_estimator.Measure(base, subject));
+}
diff --git a/courgette/disassembler.cc b/courgette/disassembler.cc
new file mode 100644
index 0000000..6395d18
--- /dev/null
+++ b/courgette/disassembler.cc
@@ -0,0 +1,436 @@
+// Copyright (c) 2009 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "courgette/disassembler.h"
+
+#include <algorithm>
+#include <iostream>
+#include <string>
+#include <vector>
+
+#include "base/basictypes.h"
+#include "base/logging.h"
+
+#include "courgette/assembly_program.h"
+#include "courgette/courgette.h"
+#include "courgette/encoded_program.h"
+#include "courgette/image_info.h"
+
+// COURGETTE_HISTOGRAM_TARGETS prints out a histogram of how frequently
+// different target addresses are referenced.  Purely for debugging.
+#define COURGETTE_HISTOGRAM_TARGETS 0
+
+namespace courgette {
+
+class DisassemblerWin32X86 : public Disassembler {
+ public:
+  explicit DisassemblerWin32X86(PEInfo* pe_info)
+      : pe_info_(pe_info),
+        incomplete_disassembly_(false) {
+  }
+
+  virtual bool Disassemble(AssemblyProgram* target);
+
+  virtual void Destroy() { delete this; }
+
+ protected:
+  PEInfo& pe_info() { return *pe_info_; }
+
+  void ParseFile(AssemblyProgram* target);
+  bool ParseAbs32Relocs();
+  void ParseRel32RelocsFromSections();
+  void ParseRel32RelocsFromSection(const Section* section);
+
+  void ParseNonSectionFileRegion(uint32 start_file_offset,
+                                 uint32 end_file_offset,
+                                 AssemblyProgram* program);
+  void ParseFileRegion(const Section* section,
+                       uint32 start_file_offset, uint32 end_file_offset,
+                       AssemblyProgram* program);
+
+#if COURGETTE_HISTOGRAM_TARGETS
+  void HistogramTargets(const char* kind, const std::map<RVA, int>& map);
+#endif
+
+  PEInfo* pe_info_;
+  bool incomplete_disassembly_;  // 'true' if can leave out 'uninteresting' bits
+
+  std::vector<RVA> abs32_locations_;
+  std::vector<RVA> rel32_locations_;
+
+#if COURGETTE_HISTOGRAM_TARGETS
+  std::map<RVA, int> abs32_target_rvas_;
+  std::map<RVA, int> rel32_target_rvas_;
+#endif
+};
+
+bool DisassemblerWin32X86::Disassemble(AssemblyProgram* target) {
+  if (!pe_info().ok())
+    return false;
+
+  target->set_image_base(pe_info().image_base());
+
+  if (!ParseAbs32Relocs())
+    return false;
+
+  ParseRel32RelocsFromSections();
+
+  ParseFile(target);
+
+  target->DefaultAssignIndexes();
+  return true;
+}
+
+static uint32 Read32LittleEndian(const void* address) {
+  return *reinterpret_cast<const uint32*>(address);
+}
+
+bool DisassemblerWin32X86::ParseAbs32Relocs() {
+  abs32_locations_.clear();
+  if (!pe_info().ParseRelocs(&abs32_locations_))
+    return false;
+
+  std::sort(abs32_locations_.begin(), abs32_locations_.end());
+
+#if COURGETTE_HISTOGRAM_TARGETS
+  for (size_t i = 0;  i < abs32_locations_.size(); ++i) {
+    RVA rva = abs32_locations_[i];
+    // The 4 bytes at the relocation are a reference to some address.
+    uint32 target_address = Read32LittleEndian(pe_info().RVAToPointer(rva));
+    ++abs32_target_rvas_[target_address - pe_info().image_base()];
+  }
+#endif
+  return true;
+}
+
+void DisassemblerWin32X86::ParseRel32RelocsFromSections() {
+  uint32 file_offset = 0;
+  while (file_offset < pe_info().length()) {
+    const Section* section = pe_info().FindNextSection(file_offset);
+    if (section == NULL)
+      break;
+    if (file_offset < section->file_offset_of_raw_data)
+      file_offset = section->file_offset_of_raw_data;
+    ParseRel32RelocsFromSection(section);
+    file_offset += section->size_of_raw_data;
+  }
+  std::sort(rel32_locations_.begin(), rel32_locations_.end());
+
+#if COURGETTE_HISTOGRAM_TARGETS
+  LOG(INFO) << "abs32_locations_ " << abs32_locations_.size();
+  LOG(INFO) << "rel32_locations_ " << rel32_locations_.size();
+  LOG(INFO) << "abs32_target_rvas_ " << abs32_target_rvas_.size();
+  LOG(INFO) << "rel32_target_rvas_ " << rel32_target_rvas_.size();
+
+  int common = 0;
+  std::map<RVA, int>::iterator abs32_iter = abs32_target_rvas_.begin();
+  std::map<RVA, int>::iterator rel32_iter = rel32_target_rvas_.begin();
+  while (abs32_iter != abs32_target_rvas_.end() &&
+         rel32_iter != rel32_target_rvas_.end()) {
+    if (abs32_iter->first < rel32_iter->first)
+      ++abs32_iter;
+    else if (rel32_iter->first < abs32_iter->first)
+      ++rel32_iter;
+    else {
+      ++common;
+      ++abs32_iter;
+      ++rel32_iter;
+    }
+  }
+  LOG(INFO) << "common " << common;
+#endif
+}
+
+void DisassemblerWin32X86::ParseRel32RelocsFromSection(const Section* section) {
+  // TODO(sra): use characteristic.
+  bool isCode = strcmp(section->name, ".text") == 0;
+  if (!isCode)
+    return;
+
+  uint32 start_file_offset = section->file_offset_of_raw_data;
+  uint32 end_file_offset = start_file_offset + section->size_of_raw_data;
+  RVA relocs_start_rva = pe_info().base_relocation_table().address_;
+
+  const uint8* start_pointer = pe_info().FileOffsetToPointer(start_file_offset);
+  const uint8* end_pointer = pe_info().FileOffsetToPointer(end_file_offset);
+
+  RVA start_rva = pe_info().FileOffsetToRVA(start_file_offset);
+  RVA end_rva = start_rva + section->virtual_size;
+
+  // Quick way to convert from Pointer to RVA within a single Section is to
+  // subtract 'pointer_to_rva'.
+  const uint8* const adjust_pointer_to_rva = start_pointer - start_rva;
+
+  std::vector<RVA>::iterator abs32_pos = abs32_locations_.begin();
+
+  // Find the rel32 relocations.
+  const uint8* p = start_pointer;
+  while (p < end_pointer) {
+    RVA current_rva = p - adjust_pointer_to_rva;
+    if (current_rva == relocs_start_rva) {
+      uint32 relocs_size = pe_info().base_relocation_table().size_;
+      if (relocs_size) {
+        p += relocs_size;
+        continue;
+      }
+    }
+
+    //while (abs32_pos != abs32_locations_.end() && *abs32_pos < current_rva)
+    //  ++abs32_pos;
+
+    // Heuristic discovery of rel32 locations in instruction stream: are the
+    // next few bytes the start of an instruction containing a rel32
+    // addressing mode?
+    const uint8* rel32 = NULL;
+
+    if (p + 5 < end_pointer) {
+      if (*p == 0xE8 || *p == 0xE9) {  // jmp rel32 and call rel32
+        rel32 = p + 1;
+      }
+    }
+    if (p + 6 < end_pointer) {
+      if (*p == 0x0F  &&  (*(p+1) & 0xF0) == 0x80) {  // Jcc long form
+        if (p[1] != 0x8A && p[1] != 0x8B)  // JPE/JPO unlikely
+          rel32 = p + 2;
+      }
+    }
+    if (rel32) {
+      RVA rel32_rva = rel32 - adjust_pointer_to_rva;
+
+      // Is there an abs32 reloc overlapping the candidate?
+      while (abs32_pos != abs32_locations_.end() && *abs32_pos < rel32_rva - 3)
+        ++abs32_pos;
+      // Now: (*abs32_pos > rel32_rva - 4) i.e. the lowest addressed 4-byte
+      // region that could overlap rel32_rva.
+      if (abs32_pos != abs32_locations_.end()) {
+        if (*abs32_pos < rel32_rva + 4) {
+          // Beginning of abs32 reloc is before end of rel32 reloc so they
+          // overlap.  Skip four bytes past the abs32 reloc.
+          p += (*abs32_pos + 4) - current_rva;
+          continue;
+        }
+      }
+
+      RVA target_rva = rel32_rva + 4 + Read32LittleEndian(rel32);
+      // To be valid, rel32 target must be within image, and within this
+      // section.
+      if (pe_info().IsValidRVA(target_rva) &&
+          start_rva <= target_rva && target_rva < end_rva) {
+        rel32_locations_.push_back(rel32_rva);
+#if COURGETTE_HISTOGRAM_TARGETS
+        ++rel32_target_rvas_[target_rva];
+#endif
+        p += 4;
+        continue;
+      }
+    }
+    p += 1;
+  }
+}
+
+void DisassemblerWin32X86::ParseFile(AssemblyProgram* program) {
+  // Walk all the bytes in the file, whether or not in a section.
+  uint32 file_offset = 0;
+  while (file_offset < pe_info().length()) {
+    const Section* section = pe_info().FindNextSection(file_offset);
+    if (section == NULL) {
+      // No more sections.  There should not be extra stuff following last
+      // section.
+      //   ParseNonSectionFileRegion(file_offset, pe_info().length(), program);
+      break;
+    }
+    if (file_offset < section->file_offset_of_raw_data) {
+      uint32 section_start_offset = section->file_offset_of_raw_data;
+      ParseNonSectionFileRegion(file_offset, section_start_offset, program);
+      file_offset = section_start_offset;
+    }
+    uint32 end = file_offset + section->size_of_raw_data;
+    ParseFileRegion(section, file_offset, end, program);
+    file_offset = end;
+  }
+
+#if COURGETTE_HISTOGRAM_TARGETS
+  HistogramTargets("abs32 relocs", abs32_target_rvas_);
+  HistogramTargets("rel32 relocs", rel32_target_rvas_);
+#endif
+}
+
+void DisassemblerWin32X86::ParseNonSectionFileRegion(
+    uint32 start_file_offset,
+    uint32 end_file_offset,
+    AssemblyProgram* program) {
+  if (incomplete_disassembly_)
+    return;
+
+  const uint8* start = pe_info().FileOffsetToPointer(start_file_offset);
+  const uint8* end = pe_info().FileOffsetToPointer(end_file_offset);
+
+  const uint8* p = start;
+
+  while (p < end) {
+    program->EmitByteInstruction(*p);
+    ++p;
+  }
+}
+
+void DisassemblerWin32X86::ParseFileRegion(
+    const Section* section,
+    uint32 start_file_offset, uint32 end_file_offset,
+    AssemblyProgram* program) {
+  RVA relocs_start_rva = pe_info().base_relocation_table().address_;
+
+  const uint8* start_pointer = pe_info().FileOffsetToPointer(start_file_offset);
+  const uint8* end_pointer = pe_info().FileOffsetToPointer(end_file_offset);
+
+  RVA start_rva = pe_info().FileOffsetToRVA(start_file_offset);
+  RVA end_rva = start_rva + section->virtual_size;
+
+  // Quick way to convert from Pointer to RVA within a single Section is to
+  // subtract 'pointer_to_rva'.
+  const uint8* const adjust_pointer_to_rva = start_pointer - start_rva;
+
+  std::vector<RVA>::iterator rel32_pos = rel32_locations_.begin();
+  std::vector<RVA>::iterator abs32_pos = abs32_locations_.begin();
+
+  program->EmitOriginInstruction(start_rva);
+
+  const uint8* p = start_pointer;
+
+  while (p < end_pointer) {
+    RVA current_rva = p - adjust_pointer_to_rva;
+
+    // The base relocation table is usually in the .relocs section, but it could
+    // actually be anywhere.  Make sure we skip it because we will regenerate it
+    // during assembly.
+    if (current_rva == relocs_start_rva) {
+      program->EmitMakeRelocsInstruction();
+      uint32 relocs_size = pe_info().base_relocation_table().size_;
+      if (relocs_size) {
+        p += relocs_size;
+        continue;
+      }
+    }
+
+    while (abs32_pos != abs32_locations_.end() && *abs32_pos < current_rva)
+      ++abs32_pos;
+
+    if (abs32_pos != abs32_locations_.end() && *abs32_pos == current_rva) {
+      uint32 target_address = Read32LittleEndian(p);
+      RVA target_rva = target_address - pe_info().image_base();
+      // TODO(sra): target could be Label+offset.  It is not clear how to guess
+      // which it might be.  We assume offset==0.
+      program->EmitAbs32(program->FindOrMakeAbs32Label(target_rva));
+      p += 4;
+      continue;
+    }
+
+    while (rel32_pos != rel32_locations_.end() && *rel32_pos < current_rva)
+      ++rel32_pos;
+
+    if (rel32_pos != rel32_locations_.end() && *rel32_pos == current_rva) {
+      RVA target_rva = current_rva + 4 + Read32LittleEndian(p);
+      program->EmitRel32(program->FindOrMakeRel32Label(target_rva));
+      p += 4;
+      continue;
+    }
+
+    if (incomplete_disassembly_) {
+      if ((abs32_pos == abs32_locations_.end() || end_rva <= *abs32_pos) &&
+          (rel32_pos == rel32_locations_.end() || end_rva <= *rel32_pos) &&
+          (end_rva <= relocs_start_rva || current_rva >= relocs_start_rva)) {
+        // No more relocs in this section, don't bother encoding bytes.
+        break;
+      }
+    }
+
+    program->EmitByteInstruction(*p);
+    p += 1;
+  }
+}
+
+#if COURGETTE_HISTOGRAM_TARGETS
+// Histogram is printed to std::cout.  It is purely for debugging the algorithm
+// and is only enabled manually in 'exploration' builds.  I don't want to add
+// command-line configuration for this feature because this code has to be
+// small, which means compiled-out.
+void DisassemblerWin32X86::HistogramTargets(const char* kind,
+                                            const std::map<RVA, int>& map) {
+  int total = 0;
+  std::map<int, std::vector<RVA> > h;
+  for (std::map<RVA, int>::const_iterator p = map.begin();
+       p != map.end();
+       ++p) {
+    h[p->second].push_back(p->first);
+    total += p->second;
+  }
+
+  std::cout << total << " " << kind << " to "
+            << map.size() << " unique targets" << std::endl;
+
+  std::cout << "indegree: #targets-with-indegree (example)" << std::endl;
+  const int kFirstN = 15;
+  bool someSkipped = false;
+  int index = 0;
+  for (std::map<int, std::vector<RVA> >::reverse_iterator p = h.rbegin();
+       p != h.rend();
+       ++p) {
+    ++index;
+    if (index <= kFirstN || p->first <= 3) {
+      if (someSkipped) {
+        std::cout << "..." << std::endl;
+      }
+      size_t count = p->second.size();
+      std::cout << std::dec << p->first << ": " << count;
+      if (count <= 2) {
+        for (size_t i = 0;  i < count;  ++i)
+          std::cout << "  " << pe_info().DescribeRVA(p->second[i]);
+      }
+      std::cout << std::endl;
+      someSkipped = false;
+    } else {
+      someSkipped = true;
+    }
+  }
+}
+#endif  // COURGETTE_HISTOGRAM_TARGETS
+
+Disassembler* Disassembler::MakeDisassemberWin32X86(PEInfo* pe_info) {
+  return new DisassemblerWin32X86(pe_info);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+Status ParseWin32X86PE(const void* buffer, size_t length,
+                       AssemblyProgram** output) {
+  *output = NULL;
+
+  PEInfo* pe_info = new PEInfo();
+  pe_info->Init(buffer, length);
+
+  if (!pe_info->ParseHeader()) {
+    delete pe_info;
+    return C_INPUT_NOT_RECOGNIZED;
+  }
+
+  Disassembler* disassembler = Disassembler::MakeDisassemberWin32X86(pe_info);
+  AssemblyProgram* program = new AssemblyProgram();
+
+  if (!disassembler->Disassemble(program)) {
+    delete program;
+    disassembler->Destroy();
+    delete pe_info;
+    return C_DISASSEMBLY_FAILED;
+  }
+
+  disassembler->Destroy();
+  delete pe_info;
+  *output = program;
+  return C_OK;
+}
+
+void DeleteAssemblyProgram(AssemblyProgram* program) {
+  delete program;
+}
+
+}  // namespace courgette
diff --git a/courgette/disassembler.h b/courgette/disassembler.h
new file mode 100644
index 0000000..fa7c908
--- /dev/null
+++ b/courgette/disassembler.h
@@ -0,0 +1,38 @@
+// Copyright (c) 2009 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef COURGETTE_DISASSEMBLER_H_
+#define COURGETTE_DISASSEMBLER_H_
+
+#include "base/basictypes.h"
+
+namespace courgette {
+
+class AssemblyProgram;
+class PEInfo;
+
+class Disassembler {
+ public:
+  // Factory methods for making disassemblers for various kinds of executables.
+  // We have only one so far.
+
+  static Disassembler* MakeDisassemberWin32X86(PEInfo* pe_info);
+
+  // Disassembles the item passed to the factory method into the output
+  // parameter 'program'.
+  virtual bool Disassemble(AssemblyProgram* program) = 0;
+
+  // Deletes 'this' disassembler.
+  virtual void Destroy() = 0;
+
+ protected:
+  Disassembler() {}
+  virtual ~Disassembler() {}
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(Disassembler);
+};
+
+}  // namespace courgette
+#endif  // COURGETTE_DISASSEMBLER_H_
diff --git a/courgette/encode_decode_unittest.cc b/courgette/encode_decode_unittest.cc
new file mode 100644
index 0000000..436479d8
--- /dev/null
+++ b/courgette/encode_decode_unittest.cc
@@ -0,0 +1,105 @@
+// Copyright (c) 2009 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include <string>
+
+#include "base/path_service.h"
+#include "base/file_util.h"
+#include "base/string_util.h"
+
+#include "courgette/courgette.h"
+#include "courgette/streams.h"
+
+#include "testing/gtest/include/gtest/gtest.h"
+
+class EncodeDecodeTest : public testing::Test {
+ public:
+  void TestExe(const char *) const;
+
+ private:
+  void SetUp() {
+    PathService::Get(base::DIR_SOURCE_ROOT, &testdata_dir_);
+    file_util::AppendToPath(&testdata_dir_, L"courgette");
+    file_util::AppendToPath(&testdata_dir_, L"testdata");
+  }
+
+  void TearDown() { }
+
+  // Returns contents of |file_name| as uninterprested bytes stored in a string.
+  std::string FileContents(const char* file_name) const;
+
+  std::wstring testdata_dir_;  // Full path name of testdata directory
+};
+
+//  Reads a test file into a string.
+std::string EncodeDecodeTest::FileContents(const char* file_name) const {
+  std::wstring file_path = testdata_dir_;
+  file_util::AppendToPath(&file_path, UTF8ToWide(file_name));
+  std::string file_contents;
+  if (!file_util::ReadFileToString(file_path, &file_contents)) {
+    EXPECT_TRUE(!"Could not read test data");
+  }
+  return file_contents;
+}
+
+void EncodeDecodeTest::TestExe(const char* file_name) const {
+  // Test top-level Courgette API for converting an a file to a binary
+  // assembly representation and back.
+  std::string file1 = FileContents(file_name);
+
+  const void* original_buffer = file1.c_str();
+  size_t original_length = file1.size();
+
+  courgette::AssemblyProgram* program = NULL;
+  const courgette::Status parse_status =
+    courgette::ParseWin32X86PE(original_buffer, original_length, &program);
+  EXPECT_EQ(courgette::C_OK, parse_status);
+
+  courgette::EncodedProgram* encoded = NULL;
+
+  const courgette::Status encode_status = Encode(program, &encoded);
+  EXPECT_EQ(courgette::C_OK, encode_status);
+
+  DeleteAssemblyProgram(program);
+
+  courgette::SinkStreamSet sinks;
+  const courgette::Status write_status = WriteEncodedProgram(encoded, &sinks);
+  EXPECT_EQ(courgette::C_OK, write_status);
+
+  DeleteEncodedProgram(encoded);
+
+  courgette::SinkStream sink;
+  bool can_collect = sinks.CopyTo(&sink);
+  EXPECT_TRUE(can_collect);
+
+  const void* buffer = sink.Buffer();
+  size_t length = sink.Length();
+
+  EXPECT_EQ(971850, length);
+
+  courgette::SourceStreamSet sources;
+  bool can_get_source_streams = sources.Init(buffer, length);
+  EXPECT_TRUE(can_get_source_streams);
+
+  courgette::EncodedProgram *encoded2 = NULL;
+  const courgette::Status read_status = ReadEncodedProgram(&sources, &encoded2);
+  EXPECT_EQ(courgette::C_OK, read_status);
+
+  courgette::SinkStream assembled;
+  const courgette::Status assemble_status = Assemble(encoded2, &assembled);
+  EXPECT_EQ(courgette::C_OK, assemble_status);
+
+  const void* assembled_buffer = assembled.Buffer();
+  size_t assembled_length = assembled.Length();
+
+  EXPECT_EQ(original_length, assembled_length);
+  EXPECT_EQ(0, memcmp(original_buffer, assembled_buffer, original_length));
+
+  DeleteEncodedProgram(encoded2);
+}
+
+
+TEST_F(EncodeDecodeTest, All) {
+  TestExe("setup1.exe");
+}
diff --git a/courgette/encoded_program.cc b/courgette/encoded_program.cc
new file mode 100644
index 0000000..f606f96
--- /dev/null
+++ b/courgette/encoded_program.cc
@@ -0,0 +1,573 @@
+// Copyright (c) 2009 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "courgette/encoded_program.h"
+
+#include <algorithm>
+#include <map>
+#include <string>
+#include <vector>
+
+#include "base/logging.h"
+#include "base/sys_info.h"
+
+#include "courgette/courgette.h"
+#include "courgette/streams.h"
+
+namespace courgette {
+
+// Stream indexes.
+const int kStreamMisc    = 0;
+const int kStreamOps     = 1;
+const int kStreamBytes   = 2;
+const int kStreamAbs32Indexes = 3;
+const int kStreamRel32Indexes = 4;
+const int kStreamAbs32Addresses = 5;
+const int kStreamRel32Addresses = 6;
+const int kStreamCopyCounts = 7;
+const int kStreamOriginAddresses = kStreamMisc;
+
+const int kStreamLimit = 9;
+
+// Binary assembly language operations.
+enum EncodedProgram::OP {
+  ORIGIN,    // ORIGIN <rva> - set address for subsequent assembly.
+  COPY,      // COPY <count> <bytes> - copy bytes to output.
+  COPY1,     // COPY1 <byte> - same as COPY 1 <byte>.
+  REL32,     // REL32 <index> - emit rel32 encoded reference to address at
+             // address table offset <index>
+  ABS32,     // ABS32 <index> - emit abs32 encoded reference to address at
+             // address table offset <index>
+  MAKE_BASE_RELOCATION_TABLE,  // Emit base relocation table blocks.
+  OP_LAST
+};
+
+
+// Constructor is here rather than in the header.  Although the constructor
+// appears to do nothing it is fact quite large because of the implict calls to
+// field constructors.  Ditto for the destructor.
+EncodedProgram::EncodedProgram() {}
+EncodedProgram::~EncodedProgram() {}
+
+// Serializes a vector of integral values using Varint32 coding.
+template<typename T>
+void WriteVector(const std::vector<T>& items, SinkStream* buffer) {
+  size_t count = items.size();
+  buffer->WriteVarint32(count);
+  for (size_t i = 0;  i < count;  ++i) {
+    COMPILE_ASSERT(sizeof(T) <= sizeof(uint32), T_must_fit_in_uint32);
+    buffer->WriteVarint32(static_cast<uint32>(items[i]));
+  }
+}
+
+template<typename T>
+bool ReadVector(std::vector<T>* items, SourceStream* buffer) {
+  uint32 count;
+  if (!buffer->ReadVarint32(&count))
+    return false;
+
+  items->clear();
+  items->reserve(count);
+  for (size_t i = 0;  i < count;  ++i) {
+    uint32 item;
+    if (!buffer->ReadVarint32(&item))
+      return false;
+    items->push_back(static_cast<T>(item));
+  }
+
+  return true;
+}
+
+// Serializes a vector, using delta coding followed by Varint32 coding.
+void WriteU32Delta(const std::vector<uint32>& set, SinkStream* buffer) {
+  size_t count = set.size();
+  buffer->WriteVarint32(count);
+  uint32 prev = 0;
+  for (size_t i = 0;  i < count;  ++i) {
+    uint32 current = set[i];
+    uint32 delta = current - prev;
+    buffer->WriteVarint32(delta);
+    prev = current;
+  }
+}
+
+static bool ReadU32Delta(std::vector<uint32>* set, SourceStream* buffer) {
+  uint32 count;
+
+  if (!buffer->ReadVarint32(&count))
+    return false;
+
+  set->clear();
+  set->reserve(count);
+  uint32 prev = 0;
+
+  for (size_t i = 0;  i < count;  ++i) {
+    uint32 delta;
+    if (!buffer->ReadVarint32(&delta))
+      return false;
+    uint32 current = prev + delta;
+    set->push_back(current);
+    prev = current;
+  }
+
+  return true;
+}
+
+// Write a vector as the byte representation of the contents.
+//
+// (This only really makes sense for a type T that has sizeof(T)==1, otherwise
+// serilized representation is not endian-agnositic.  But it is useful to keep
+// the possibility of a greater size for experiments comparing Varint32 encoding
+// of a vector of larger integrals vs a plain form.)
+//
+template<typename T>
+void WriteVectorU8(const std::vector<T>& items, SinkStream* buffer) {
+  uint32 count = items.size();
+  buffer->WriteVarint32(count);
+  if (count != 0) {
+    size_t byte_count = count * sizeof(T);
+    buffer->Write(static_cast<const void*>(&items[0]), byte_count);
+  }
+}
+
+template<typename T>
+bool ReadVectorU8(std::vector<T>* items, SourceStream* buffer) {
+  uint32 count;
+  if (!buffer->ReadVarint32(&count))
+    return false;
+
+  items->clear();
+  items->resize(count);
+  if (count != 0) {
+    size_t byte_count = count * sizeof(T);
+    return buffer->Read(static_cast<void*>(&((*items)[0])), byte_count);
+  }
+  return true;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+void EncodedProgram::DefineRel32Label(int index, RVA value) {
+  DefineLabelCommon(&rel32_rva_, index, value);
+}
+
+void EncodedProgram::DefineAbs32Label(int index, RVA value) {
+  DefineLabelCommon(&abs32_rva_, index, value);
+}
+
+static const RVA kUnassignedRVA = static_cast<RVA>(-1);
+
+void EncodedProgram::DefineLabelCommon(std::vector<RVA>* rvas,
+                                       int index,
+                                       RVA rva) {
+  if (static_cast<int>(rvas->size()) <= index) {
+    rvas->resize(index + 1, kUnassignedRVA);
+  }
+  if ((*rvas)[index] != kUnassignedRVA) {
+    NOTREACHED() << "DefineLabel double assigned " << index;
+  }
+  (*rvas)[index] = rva;
+}
+
+void EncodedProgram::EndLabels() {
+  FinishLabelsCommon(&abs32_rva_);
+  FinishLabelsCommon(&rel32_rva_);
+}
+
+void EncodedProgram::FinishLabelsCommon(std::vector<RVA>* rvas) {
+  // Replace all unassigned slots with the value at the previous index so they
+  // delta-encode to zero.  (There might be better values than zero.  The way to
+  // get that is have the higher level assembly program assign the unassigned
+  // slots.)
+  RVA previous = 0;
+  size_t size = rvas->size();
+  for (size_t i = 0;  i < size;  ++i) {
+    if ((*rvas)[i] == kUnassignedRVA)
+      (*rvas)[i] = previous;
+    else
+      previous = (*rvas)[i];
+  }
+}
+
+void EncodedProgram::AddOrigin(RVA origin) {
+  ops_.push_back(ORIGIN);
+  origins_.push_back(origin);
+}
+
+void EncodedProgram::AddCopy(int count, const void* bytes) {
+  const uint8* source = static_cast<const uint8*>(bytes);
+
+  // Fold adjacent COPY instructions into one.  This nearly halves the size of
+  // an EncodedProgram with only COPY1 instructions since there are approx plain
+  // 16 bytes per reloc.  This has a working-set benefit during decompression.
+  // For compression of files with large differences this makes a small (4%)
+  // improvement in size.  For files with small differences this degrades the
+  // compressed size by 1.3%
+  if (ops_.size() > 0) {
+    if (ops_.back() == COPY1) {
+      ops_.back() = COPY;
+      copy_counts_.push_back(1);
+    }
+    if (ops_.back() == COPY) {
+      copy_counts_.back() += count;
+      for (int i = 0;  i < count;  ++i) {
+        copy_bytes_.push_back(source[i]);
+      }
+      return;
+    }
+  }
+
+  if (count == 1) {
+    ops_.push_back(COPY1);
+    copy_bytes_.push_back(source[0]);
+  } else {
+    ops_.push_back(COPY);
+    copy_counts_.push_back(count);
+    for (int i = 0;  i < count;  ++i) {
+      copy_bytes_.push_back(source[i]);
+    }
+  }
+}
+
+void EncodedProgram::AddAbs32(int label_index) {
+  ops_.push_back(ABS32);
+  abs32_ix_.push_back(label_index);
+}
+
+void EncodedProgram::AddRel32(int label_index) {
+  ops_.push_back(REL32);
+  rel32_ix_.push_back(label_index);
+}
+
+void EncodedProgram::AddMakeRelocs() {
+  ops_.push_back(MAKE_BASE_RELOCATION_TABLE);
+}
+
+void EncodedProgram::DebuggingSummary() {
+  LOG(INFO) << "EncodedProgram Summary";
+  LOG(INFO) << "  image base  " << image_base_;
+  LOG(INFO) << "  abs32 rvas  " << abs32_rva_.size();
+  LOG(INFO) << "  rel32 rvas  " << rel32_rva_.size();
+  LOG(INFO) << "  ops         " << ops_.size();
+  LOG(INFO) << "  origins     " << origins_.size();
+  LOG(INFO) << "  copy_counts " << copy_counts_.size();
+  LOG(INFO) << "  copy_bytes  " << copy_bytes_.size();
+  LOG(INFO) << "  abs32_ix    " << abs32_ix_.size();
+  LOG(INFO) << "  rel32_ix    " << rel32_ix_.size();
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+// For algorithm refinement purposes it is useful to write subsets of the file
+// format.  This gives us the ability to estimate the entropy of the
+// differential compression of the individual streams, which can provide
+// invaluable insights.  The default, of course, is to include all the streams.
+//
+enum FieldSelect {
+  INCLUDE_ABS32_ADDRESSES = 0x0001,
+  INCLUDE_REL32_ADDRESSES = 0x0002,
+  INCLUDE_ABS32_INDEXES   = 0x0010,
+  INCLUDE_REL32_INDEXES   = 0x0020,
+  INCLUDE_OPS             = 0x0100,
+  INCLUDE_BYTES           = 0x0200,
+  INCLUDE_COPY_COUNTS     = 0x0400,
+  INCLUDE_MISC            = 0x1000
+};
+
+static FieldSelect GetFieldSelect() {
+#if 1
+  // TODO(sra): Use better configuration.
+  std::wstring s = base::SysInfo::GetEnvVar(L"A_FIELDS");
+  if (!s.empty()) {
+    return static_cast<FieldSelect>(wcstoul(s.c_str(), 0, 0));
+  }
+#endif
+  return  static_cast<FieldSelect>(~0);
+}
+
+void EncodedProgram::WriteTo(SinkStreamSet* streams) {
+  FieldSelect select = GetFieldSelect();
+
+  // The order of fields must be consistent in WriteTo and ReadFrom, regardless
+  // of the streams used.  The code can be configured with all kStreamXXX
+  // constants the same.
+  //
+  // If we change the code to pipeline reading with assembly (to avoid temporary
+  // storage vectors by consuming operands directly from the stream) then we
+  // need to read the base address and the random access address tables first,
+  // the rest can be interleaved.
+
+  if (select & INCLUDE_MISC) {
+    // TODO(sra): write 64 bits.
+    streams->stream(kStreamMisc)->WriteVarint32(
+      static_cast<uint32>(image_base_));
+  }
+
+  if (select & INCLUDE_ABS32_ADDRESSES)
+    WriteU32Delta(abs32_rva_, streams->stream(kStreamAbs32Addresses));
+  if (select & INCLUDE_REL32_ADDRESSES)
+    WriteU32Delta(rel32_rva_, streams->stream(kStreamRel32Addresses));
+  if (select & INCLUDE_MISC)
+    WriteVector(origins_, streams->stream(kStreamOriginAddresses));
+  if (select & INCLUDE_OPS) {
+    streams->stream(kStreamOps)->Reserve(ops_.size() + 5);  // 5 for length.
+    WriteVector(ops_, streams->stream(kStreamOps));
+  }
+  if (select & INCLUDE_COPY_COUNTS)
+    WriteVector(copy_counts_, streams->stream(kStreamCopyCounts));
+  if (select & INCLUDE_BYTES)
+    WriteVectorU8(copy_bytes_, streams->stream(kStreamBytes));
+  if (select & INCLUDE_ABS32_INDEXES)
+    WriteVector(abs32_ix_, streams->stream(kStreamAbs32Indexes));
+  if (select & INCLUDE_REL32_INDEXES)
+    WriteVector(rel32_ix_, streams->stream(kStreamRel32Indexes));
+}
+
+bool EncodedProgram::ReadFrom(SourceStreamSet* streams) {
+  // TODO(sra): read 64 bits.
+  uint32 temp;
+  if (!streams->stream(kStreamMisc)->ReadVarint32(&temp))
+    return false;
+  image_base_ = temp;
+
+  if (!ReadU32Delta(&abs32_rva_, streams->stream(kStreamAbs32Addresses)))
+    return false;
+  if (!ReadU32Delta(&rel32_rva_, streams->stream(kStreamRel32Addresses)))
+    return false;
+  if (!ReadVector(&origins_, streams->stream(kStreamOriginAddresses)))
+    return false;
+  if (!ReadVector(&ops_, streams->stream(kStreamOps)))
+    return false;
+  if (!ReadVector(&copy_counts_, streams->stream(kStreamCopyCounts)))
+    return false;
+  if (!ReadVectorU8(&copy_bytes_, streams->stream(kStreamBytes)))
+    return false;
+  if (!ReadVector(&abs32_ix_, streams->stream(kStreamAbs32Indexes)))
+    return false;
+  if (!ReadVector(&rel32_ix_, streams->stream(kStreamRel32Indexes)))
+    return false;
+
+  // Check that streams have been completely consumed.
+  for (int i = 0;  i < kStreamLimit;  ++i) {
+    if (streams->stream(i)->Remaining() > 0)
+      return false;
+  }
+
+  return true;
+}
+
+// Safe, non-throwing version of std::vector::at().  Returns 'true' for success,
+// 'false' for out-of-bounds index error.
+template<typename T>
+bool VectorAt(const std::vector<T>& v, size_t index, T* output) {
+  if (index >= v.size())
+    return false;
+  *output = v[index];
+  return true;
+}
+
+bool EncodedProgram::AssembleTo(SinkStream* final_buffer) {
+  // For the most part, the assembly process walks the various tables.
+  // ix_mumble is the index into the mumble table.
+  size_t ix_origins = 0;
+  size_t ix_copy_counts = 0;
+  size_t ix_copy_bytes = 0;
+  size_t ix_abs32_ix = 0;
+  size_t ix_rel32_ix = 0;
+
+  RVA current_rva = 0;
+
+  bool pending_base_relocation_table = false;
+  SinkStream bytes_following_base_relocation_table;
+
+  SinkStream* output = final_buffer;
+
+  for (size_t ix_ops = 0;  ix_ops < ops_.size();  ++ix_ops) {
+    OP op = ops_[ix_ops];
+
+    switch (op) {
+      default:
+        return false;
+
+      case ORIGIN: {
+        RVA section_rva;
+        if (!VectorAt(origins_, ix_origins, &section_rva))
+          return false;
+        ++ix_origins;
+        current_rva = section_rva;
+        break;
+      }
+
+      case COPY: {
+        int count;
+        if (!VectorAt(copy_counts_, ix_copy_counts, &count))
+          return false;
+        ++ix_copy_counts;
+        for (int i = 0;  i < count;  ++i) {
+          uint8 b;
+          if (!VectorAt(copy_bytes_, ix_copy_bytes, &b))
+            return false;
+          ++ix_copy_bytes;
+          output->Write(&b, 1);
+        }
+        current_rva += count;
+        break;
+      }
+
+      case COPY1: {
+        uint8 b;
+        if (!VectorAt(copy_bytes_, ix_copy_bytes, &b))
+          return false;
+        ++ix_copy_bytes;
+        output->Write(&b, 1);
+        current_rva += 1;
+        break;
+      }
+
+      case REL32: {
+        uint32 index;
+        if (!VectorAt(rel32_ix_, ix_rel32_ix, &index))
+          return false;
+        ++ix_rel32_ix;
+        RVA rva;
+        if (!VectorAt(rel32_rva_, index, &rva))
+          return false;
+        uint32 offset = (rva - (current_rva + 4));
+        output->Write(&offset, 4);
+        current_rva += 4;
+        break;
+      }
+
+      case ABS32: {
+        uint32 index;
+        if (!VectorAt(abs32_ix_, ix_abs32_ix, &index))
+          return false;
+        ++ix_abs32_ix;
+        RVA rva;
+        if (!VectorAt(abs32_rva_, index, &rva))
+          return false;
+        uint32 abs32 = static_cast<uint32>(rva + image_base_);
+        abs32_relocs_.push_back(current_rva);
+        output->Write(&abs32, 4);
+        current_rva += 4;
+        break;
+      }
+
+      case MAKE_BASE_RELOCATION_TABLE: {
+        // We can see the base relocation anywhere, but we only have the
+        // information to generate it at the very end.  So we divert the bytes
+        // we are generating to a temporary stream.
+        if (pending_base_relocation_table)  // Can't have two base relocation
+                                            // tables.
+          return false;
+
+        pending_base_relocation_table = true;
+        output = &bytes_following_base_relocation_table;
+        break;
+        // There is a potential problem *if* the instruction stream contains
+        // some REL32 relocations following the base relocation and in the same
+        // section.  We don't know the size of the table, so 'current_rva' will
+        // be wrong, causing REL32 offsets to be miscalculated.  This never
+        // happens; the base relocation table is usually in a section of its
+        // own, a data-only section, and following everything else in the
+        // executable except some padding zero bytes.  We could fix this by
+        // emitting an ORIGIN after the MAKE_BASE_RELOCATION_TABLE.
+      }
+    }
+  }
+
+  if (pending_base_relocation_table) {
+    GenerateBaseRelocations(final_buffer);
+    final_buffer->Append(&bytes_following_base_relocation_table);
+  }
+
+  // Final verification check: did we consume all lists?
+  if (ix_copy_counts != copy_counts_.size())
+    return false;
+  if (ix_copy_bytes != copy_bytes_.size())
+    return false;
+  if (ix_abs32_ix != abs32_ix_.size())
+    return false;
+  if (ix_rel32_ix != rel32_ix_.size())
+    return false;
+
+  return true;
+}
+
+
+// RelocBlock has the layout of a block of relocations in the base relocation
+// table file format.
+//
+class RelocBlock {
+ public:
+  uint32 page_rva;
+  uint32 block_size;
+  uint16 relocs[4096];  // Allow up to one relocation per byte of a 4k page.
+
+  RelocBlock() : page_rva(~0), block_size(8) {}
+
+  void Add(uint16 item) {
+    relocs[(block_size-8)/2] = item;
+    block_size += 2;
+  }
+
+  void Flush(SinkStream* buffer) {
+    if (block_size != 8) {
+      if (block_size % 4 != 0) {  // Pad to make size multiple of 4 bytes.
+        Add(0);
+      }
+      buffer->Write(this, block_size);
+      block_size = 8;
+    }
+  }
+};
+
+COMPILE_ASSERT(offsetof(RelocBlock, relocs) == 8, reloc_block_header_size);
+
+void EncodedProgram::GenerateBaseRelocations(SinkStream* buffer) {
+  std::sort(abs32_relocs_.begin(), abs32_relocs_.end());
+
+  RelocBlock block;
+
+  for (size_t i = 0;  i < abs32_relocs_.size();  ++i) {
+    uint32 rva = abs32_relocs_[i];
+    uint32 page_rva = rva & ~0xFFF;
+    if (page_rva != block.page_rva) {
+      block.Flush(buffer);
+      block.page_rva = page_rva;
+    }
+    block.Add(0x3000 | (rva & 0xFFF));
+  }
+  block.Flush(buffer);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+Status WriteEncodedProgram(EncodedProgram* encoded, SinkStreamSet* sink) {
+  encoded->WriteTo(sink);
+  return C_OK;
+}
+
+Status ReadEncodedProgram(SourceStreamSet* streams, EncodedProgram** output) {
+  EncodedProgram* encoded = new EncodedProgram();
+  if (encoded->ReadFrom(streams)) {
+    *output = encoded;
+    return C_OK;
+  }
+  delete encoded;
+  return C_DESERIALIZATION_FAILED;
+}
+
+Status Assemble(EncodedProgram* encoded, SinkStream* buffer) {
+  bool assembled = encoded->AssembleTo(buffer);
+  if (assembled)
+    return C_OK;
+  return C_ASSEMBLY_FAILED;
+}
+
+void DeleteEncodedProgram(EncodedProgram* encoded) {
+  delete encoded;
+}
+
+}  // end namespace
diff --git a/courgette/encoded_program.h b/courgette/encoded_program.h
new file mode 100644
index 0000000..df1119c
--- /dev/null
+++ b/courgette/encoded_program.h
@@ -0,0 +1,83 @@
+// Copyright (c) 2009 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef COURGETTE_ENCODED_PROGRAM_H_
+#define COURGETTE_ENCODED_PROGRAM_H_
+
+#include <vector>
+
+#include "base/basictypes.h"
+#include "courgette/image_info.h"
+
+namespace courgette {
+
+class SinkStream;
+class SinkStreamSet;
+class SourceStreamSet;
+
+// An EncodedProgram is a set of tables that contain a simple 'binary assembly
+// language' that can be assembled to produce a sequence of bytes, for example,
+// a Windows 32-bit executable.
+//
+class EncodedProgram {
+ public:
+  EncodedProgram();
+  ~EncodedProgram();
+
+  // Generating an EncodedProgram:
+  //
+  // (1) The image base can be specified at any time.
+  void set_image_base(uint64 base) { image_base_ = base; }
+
+  // (2) Address tables and indexes defined first.
+  void DefineRel32Label(int index, RVA address);
+  void DefineAbs32Label(int index, RVA address);
+  void EndLabels();
+
+  // (3) Add instructions in the order needed to generate bytes of file.
+  void AddOrigin(RVA rva);
+  void AddCopy(int count, const void* bytes);
+  void AddRel32(int label_index);
+  void AddAbs32(int label_index);
+  void AddMakeRelocs();
+
+  // (3) Serialize binary assembly language tables to a set of streams.
+  void WriteTo(SinkStreamSet *streams);
+
+  // Using an EncodedProgram to generate a byte stream:
+  //
+  // (4) Deserializes a fresh EncodedProgram from a set of streams.
+  bool ReadFrom(SourceStreamSet *streams);
+
+  // (5) Assembles the 'binary assembly language' into final file.
+  bool AssembleTo(SinkStream *buffer);
+
+ private:
+  enum OP;    // Binary assembly language operations.
+
+  void DebuggingSummary();
+  void GenerateBaseRelocations(SinkStream *buffer);
+  void DefineLabelCommon(std::vector<RVA>*, int, RVA);
+  void FinishLabelsCommon(std::vector<RVA>* addresses);
+
+  // Binary assembly language tables.
+  uint64 image_base_;
+  std::vector<RVA> rel32_rva_;
+  std::vector<RVA> abs32_rva_;
+  std::vector<OP> ops_;
+  std::vector<RVA> origins_;
+  std::vector<int> copy_counts_;
+  std::vector<uint8> copy_bytes_;
+  std::vector<uint32> rel32_ix_;
+  std::vector<uint32> abs32_ix_;
+
+  // Table of the addresses containing abs32 relocations; computed during
+  // assembly, used to generate base relocation table.
+  std::vector<uint32> abs32_relocs_;
+
+  DISALLOW_COPY_AND_ASSIGN(EncodedProgram);
+};
+
+}  // namespace courgette
+#endif  // COURGETTE_ENCODED_FORMAT_H_
diff --git a/courgette/encoded_program_fuzz_unittest.cc b/courgette/encoded_program_fuzz_unittest.cc
new file mode 100644
index 0000000..adc09d9
--- /dev/null
+++ b/courgette/encoded_program_fuzz_unittest.cc
@@ -0,0 +1,235 @@
+// Copyright (c) 2009 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// Fuzz testing for EncodedProgram serialized format and assembly.
+//
+// We would like some assurance that if an EncodedProgram is malformed we will
+// not crash.  The EncodedProgram could be malformed either due to malicious
+// attack to due to an error in patch generation.
+//
+// We try a lot of arbitrary modifications to the serialized form and make sure
+// that the outcome is not a crash.
+
+#include <string>
+
+#include "base/path_service.h"
+#include "base/file_util.h"
+#include "base/string_util.h"
+#include "base/test_suite.h"
+
+#include "courgette/courgette.h"
+#include "courgette/streams.h"
+
+#include "testing/gtest/include/gtest/gtest.h"
+
+class DecodeFuzzTest : public testing::Test {
+ public:
+  void FuzzExe(const char *) const;
+
+ private:
+  virtual void SetUp() {
+    PathService::Get(base::DIR_SOURCE_ROOT, &testdata_dir_);
+    testdata_dir_ = testdata_dir_.Append(L"courgette");
+    testdata_dir_ = testdata_dir_.Append(L"testdata");
+  }
+
+  virtual void TearDown() { }
+
+  void FuzzByte(const std::string& buffer, const std::string& output,
+                size_t index) const;
+  void FuzzBits(const std::string& buffer, const std::string& output,
+                size_t index, int bits_to_flip) const;
+
+  // Returns true if could assemble, false if rejected.
+  bool TryAssemble(const std::string& buffer, std::string* output) const;
+
+  // Returns contents of |file_name| as uninterprested bytes stored in a string.
+  std::string FileContents(const char* file_name) const;
+
+  // Full path name of testdata directory
+  FilePath testdata_dir_;
+};
+
+//  Reads a test file into a string.
+std::string DecodeFuzzTest::FileContents(const char* file_name) const {
+  FilePath file_path = testdata_dir_.AppendASCII(file_name);
+  std::string file_contents;
+  if (!file_util::ReadFileToString(file_path, &file_contents)) {
+    EXPECT_TRUE(!"Could not read test data");
+  }
+  return file_contents;
+}
+
+// Loads an executable and does fuzz testing in the serialized format.
+void DecodeFuzzTest::FuzzExe(const char* file_name) const {
+  std::string file1 = FileContents(file_name);
+
+  const void* original_buffer = file1.c_str();
+  size_t original_length = file1.size();
+
+  courgette::AssemblyProgram* program = NULL;
+  const courgette::Status parse_status =
+      courgette::ParseWin32X86PE(original_buffer, original_length, &program);
+  EXPECT_EQ(courgette::C_OK, parse_status);
+
+  courgette::EncodedProgram* encoded = NULL;
+
+  const courgette::Status encode_status = Encode(program, &encoded);
+  EXPECT_EQ(courgette::C_OK, encode_status);
+
+  DeleteAssemblyProgram(program);
+
+  courgette::SinkStreamSet sinks;
+  const courgette::Status write_status = WriteEncodedProgram(encoded, &sinks);
+  EXPECT_EQ(courgette::C_OK, write_status);
+
+  DeleteEncodedProgram(encoded);
+
+  courgette::SinkStream sink;
+  bool can_collect = sinks.CopyTo(&sink);
+  EXPECT_TRUE(can_collect);
+
+  size_t length = sink.Length();
+
+  std::string base_buffer(reinterpret_cast<const char*>(sink.Buffer()), length);
+  std::string base_output;
+  bool ok = TryAssemble(base_buffer, &base_output);
+  EXPECT_EQ(true, ok);
+
+  // Now we have a good serialized EncodedProgram in |base_buffer|. Time to
+  // fuzz.
+
+  // More intense fuzzing on the first part because it contains more control
+  // information like substeam lengths.
+  size_t position = 0;
+  for ( ;  position < 100 && position < length;  position += 1) {
+    FuzzByte(base_buffer, base_output, position);
+  }
+  // We would love to fuzz every position, but it takes too long.
+  for ( ;  position < length;  position += 900) {
+    FuzzByte(base_buffer, base_output, position);
+  }
+}
+
+// FuzzByte tries to break the EncodedProgram deserializer and assembler.  It
+// takes a good serialization of and EncodedProgram, flips some bits, and checks
+// that the behaviour is reasonable.  It has testing checks for unreasonable
+// behaviours.
+void DecodeFuzzTest::FuzzByte(const std::string& base_buffer,
+                              const std::string& base_output,
+                              size_t index) const {
+  printf("Fuzzing position %d\n", static_cast<int>(index));
+
+  // The following 10 values are a compromize between run time and coverage of
+  // the 255 'wrong' values at this byte position.
+
+  // 0xFF flips all the bits.
+  FuzzBits(base_buffer, base_output, index, 0xFF);
+  // 0x7F flips the most bits without changing Varint32 framing.
+  FuzzBits(base_buffer, base_output, index, 0x7F);
+  // These all flip one bit.
+  FuzzBits(base_buffer, base_output, index, 0x80);
+  FuzzBits(base_buffer, base_output, index, 0x40);
+  FuzzBits(base_buffer, base_output, index, 0x20);
+  FuzzBits(base_buffer, base_output, index, 0x10);
+  FuzzBits(base_buffer, base_output, index, 0x08);
+  FuzzBits(base_buffer, base_output, index, 0x04);
+  FuzzBits(base_buffer, base_output, index, 0x02);
+  FuzzBits(base_buffer, base_output, index, 0x01);
+}
+
+// FuzzBits tries to break the EncodedProgram deserializer and assembler.  It
+// takes a good serialization of and EncodedProgram, flips some bits, and checks
+// that the behaviour is reasonable.
+//
+// There are EXPECT calls to check for unreasonable behaviour.  These are
+// somewhat arbitrary in that the parameters cannot easily be derived from first
+// principles.  They may need updating as the serialized format evolves.
+void DecodeFuzzTest::FuzzBits(const std::string& base_buffer,
+                              const std::string& base_output,
+                              size_t index, int bits_to_flip) const {
+  std::string modified_buffer = base_buffer;
+  std::string modified_output;
+  modified_buffer[index] ^= bits_to_flip;
+
+  bool ok = TryAssemble(modified_buffer, &modified_output);
+
+  if (ok) {
+    // We normally expect TryAssemble to fail.  But sometimes it succeeds.
+    // What could have happened?  We changed one byte in the serialized form:
+    //
+    //  * If we changed one of the copied bytes, we would see a single byte
+    //    change in the output.
+    //  * If we changed an address table element, all the references to that
+    //    address would be different.
+    //  * If we changed a copy count, we would run out of data in some stream,
+    //    or leave data remaining, so should not be here.
+    //  * If we changed an origin address, it could affect all relocations based
+    //    off that address.  If no relocations were based off the address then
+    //    there will be no changes.
+    //  * If we changed an origin address, it could cause some abs32 relocs to
+    //    shift from one page to the next, changing the number and layout of
+    //    blocks in the base relocation table.
+
+    // Generated length could vary slightly due to base relocation table layout.
+    // In the worst case the number of base relocation blocks doubles, approx
+    // 12/4096 or 0.3% size of file.
+    size_t base_length = base_output.length();
+    size_t modified_length = modified_output.length();
+    ptrdiff_t diff = base_length - modified_length;
+    if (diff < -200 || diff > 200) {
+      EXPECT_EQ(base_length, modified_length);
+    }
+
+    size_t changed_byte_count = 0;
+    for (size_t i = 0;  i < base_length && i < modified_length; ++i) {
+      changed_byte_count += (base_output[i] != modified_output[i]);
+    }
+
+    if (index > 60) {                     // Beyond the origin addresses ...
+      EXPECT_NE(0, changed_byte_count);   //   ... we expect some difference.
+    }
+    // Currently all changes are smaller than this number:
+    EXPECT_GE(45000u, changed_byte_count);
+  }
+}
+
+bool DecodeFuzzTest::TryAssemble(const std::string& buffer,
+                                 std::string* output) const {
+  courgette::EncodedProgram *encoded = NULL;
+  bool result = false;
+
+  courgette::SourceStreamSet sources;
+  bool can_get_source_streams = sources.Init(buffer.c_str(), buffer.length());
+  if (can_get_source_streams) {
+    const courgette::Status read_status =
+        ReadEncodedProgram(&sources, &encoded);
+    if (read_status == courgette::C_OK) {
+      courgette::SinkStream assembled;
+      const courgette::Status assemble_status = Assemble(encoded, &assembled);
+
+      if (assemble_status == courgette::C_OK) {
+        const void* assembled_buffer = assembled.Buffer();
+        size_t assembled_length = assembled.Length();
+
+        output->clear();
+        output->assign(reinterpret_cast<const char*>(assembled_buffer),
+                       assembled_length);
+        result = true;
+      }
+    }
+  }
+
+  DeleteEncodedProgram(encoded);
+
+  return result;
+}
+
+TEST_F(DecodeFuzzTest, All) {
+  FuzzExe("setup1.exe");
+}
+
+int main(int argc, char** argv) {
+  return TestSuite(argc, argv).Run();
+}
diff --git a/courgette/encoded_program_unittest.cc b/courgette/encoded_program_unittest.cc
new file mode 100644
index 0000000..aedb838
--- /dev/null
+++ b/courgette/encoded_program_unittest.cc
@@ -0,0 +1,64 @@
+// Copyright (c) 2009 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "courgette/encoded_program.h"
+
+#include "courgette/streams.h"
+#include "testing/gtest/include/gtest/gtest.h"
+
+TEST(EncodedProgramTest, Test) {
+  //
+  // Create a simple program with a few addresses and references and
+  // check that the bits produced are as expected.
+  //
+  courgette::EncodedProgram* program = new courgette::EncodedProgram();
+
+  uint32 base = 0x00900000;
+  program->set_image_base(base);
+
+  program->DefineRel32Label(5, 0);  // REL32 index 5 == base + 0
+  program->DefineAbs32Label(7, 4);  // ABS32 index 7 == base + 4
+  program->EndLabels();
+
+  program->AddOrigin(0);  // Start at base.
+  program->AddAbs32(7);
+  program->AddRel32(5);
+
+  // Serialize and deserialize.
+
+  courgette::SinkStreamSet sinks;
+  program->WriteTo(&sinks);
+
+  courgette::SinkStream sink;
+  bool can_collect = sinks.CopyTo(&sink);
+  EXPECT_TRUE(can_collect);
+
+  const void* buffer = sink.Buffer();
+  size_t length = sink.Length();
+
+  courgette::SourceStreamSet sources;
+  bool can_get_source_streams = sources.Init(buffer, length);
+  EXPECT_TRUE(can_get_source_streams);
+
+  courgette::EncodedProgram *encoded2 = new courgette::EncodedProgram();
+  bool can_read = encoded2->ReadFrom(&sources);
+  EXPECT_TRUE(can_read);
+
+  // Finally, try to assemble.
+  courgette::SinkStream assembled;
+  bool can_assemble = encoded2->AssembleTo(&assembled);
+  EXPECT_TRUE(can_assemble);
+
+  const void* assembled_buffer = assembled.Buffer();
+  size_t assembled_length = assembled.Length();
+
+  EXPECT_EQ(8, assembled_length);
+
+  static const uint8 golden[] = {
+    0x04, 0x00, 0x90, 0x00,  // ABS32 to base + 4
+    0xF8, 0xFF, 0xFF, 0xFF   // REL32 from next line to base + 2
+  };
+
+  EXPECT_EQ(0, memcmp(assembled_buffer, golden, 8));
+}
diff --git a/courgette/ensemble.cc b/courgette/ensemble.cc
new file mode 100644
index 0000000..15bfb3e
--- /dev/null
+++ b/courgette/ensemble.cc
@@ -0,0 +1,102 @@
+// Copyright (c) 2009 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// *** File comments
+
+#include "courgette/ensemble.h"
+
+#include "base/basictypes.h"
+#include "base/logging.h"
+#include "base/string_util.h"
+
+#include "courgette/image_info.h"
+#include "courgette/region.h"
+#include "courgette/streams.h"
+#include "courgette/simple_delta.h"
+
+namespace courgette {
+
+Element::Element(Kind kind, Ensemble* ensemble, const Region& region)
+    : kind_(kind), ensemble_(ensemble), region_(region) {
+}
+
+std::string Element::Name() const {
+  return ensemble_->name() + "("
+      + IntToString(kind()) + ","
+      + Uint64ToString(offset_in_ensemble()) + ","
+      + Uint64ToString(region().length()) + ")";
+}
+
+// A subclass of Element that has a PEInfo.
+class ElementWinPE : public Element {
+ public:
+  ElementWinPE(Kind kind, Ensemble* ensemble, const Region& region,
+               PEInfo* info)
+      : Element(kind, ensemble, region),
+        pe_info_(info) {
+  }
+
+  virtual PEInfo* GetPEInfo() const { return pe_info_; }
+
+ protected:
+  ~ElementWinPE() { delete pe_info_; }
+
+ private:
+  PEInfo* pe_info_;  // Owned by |this|.
+};
+
+// Scans the Ensemble's region, sniffing out Elements.  We assume that the
+// elements do not overlap.
+Status Ensemble::FindEmbeddedElements() {
+  size_t length = region_.length();
+  const uint8* start = region_.start();
+
+  size_t position = 0;
+  while (position < length) {
+    // Quick test; Windows executables begin with 'MZ'.
+    if (start[position] == 'M' &&
+        position + 1 < length && start[position + 1] == 'Z') {
+      courgette::PEInfo *info = new courgette::PEInfo();
+      info->Init(start + position, length - position);
+      if (info->ParseHeader()) {
+        Region region(start + position, info->length());
+
+        if (info->has_text_section()) {
+          Element* element = new ElementWinPE(Element::WIN32_X86_WITH_CODE,
+                                              this, region, info);
+          owned_elements_.push_back(element);
+          elements_.push_back(element);
+          position += region.length();
+          continue;
+        }
+
+        // If we had a clever transformation for resource-only executables we
+        // should identify the suitable elements here:
+        if (!info->has_text_section() && false) {
+          Element* element = new ElementWinPE(Element::WIN32_NOCODE,
+                                              this, region, info);
+          owned_elements_.push_back(element);
+          elements_.push_back(element);
+          position += region.length();
+          continue;
+        }
+      }
+      delete info;
+    }
+
+    // This is where to add new formats, e.g. Linux executables, Dalvik
+    // executables etc.
+
+    // No Element found at current position.
+    ++position;
+  }
+  return C_OK;
+}
+
+Ensemble::~Ensemble() {
+  for (size_t i = 0;  i < owned_elements_.size();  ++i)
+    delete owned_elements_[i];
+}
+
+}  // namespace
diff --git a/courgette/ensemble.h b/courgette/ensemble.h
new file mode 100644
index 0000000..6a58cb0
--- /dev/null
+++ b/courgette/ensemble.h
@@ -0,0 +1,257 @@
+// Copyright (c) 2009 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// The main idea in Courgette is to do patching *under a tranformation*.  The
+// input is transformed into a new representation, patching occurs in the new
+// repesentation, and then the tranform is reversed to get the patched data.
+//
+// The idea is applied to pieces (or 'Elements') of the whole (or 'Ensemble').
+// Each of the elements has to go through the same set of steps in lock-step,
+// but there may be many different kinds of elements, which have different
+// transformation.
+//
+// This file declares all the main types involved in creating and applying a
+// patch with this structure.
+
+#ifndef COURGETTE_ENSEMBLE_H_
+#define COURGETTE_ENSEMBLE_H_
+
+#include <vector>
+#include <string>
+
+#include "base/basictypes.h"
+
+#include "courgette/courgette.h"
+#include "courgette/region.h"
+#include "courgette/streams.h"
+
+namespace courgette {
+
+// Forward declarations:
+class Ensemble;
+class PEInfo;
+
+// An Element is a region of an Ensemble with an identifyable kind.
+//
+class Element {
+ public:
+  enum Kind { WIN32_X86_WITH_CODE, WIN32_NOCODE };
+
+  virtual ~Element() {}
+
+  Kind kind() const { return kind_; }
+  const Region& region() const { return region_; }
+
+  // The name is used only for debugging and logging.
+  virtual std::string Name() const;
+
+  // Returns the byte position of this Element relative to the start of
+  // containing Ensemble.
+  size_t offset_in_ensemble() const;
+
+  // Some subclasses of Element might have a PEInfo.
+  virtual PEInfo* GetPEInfo() const { return NULL; }
+
+ protected:
+  Element(Kind kind, Ensemble* ensemble, const Region& region);
+
+ private:
+  Kind kind_;
+  Ensemble* ensemble_;
+  Region region_;
+
+  DISALLOW_COPY_AND_ASSIGN(Element);
+};
+
+
+class Ensemble {
+ public:
+  Ensemble(const Region& region, const char* name)
+      : region_(region), name_(name) {}
+  ~Ensemble();
+
+  const Region& region() const { return region_; }
+  const std::string& name() const { return name_; }
+
+  // Scans the region to find Elements within the region().
+  Status FindEmbeddedElements();
+
+  // Returns the elements found by 'FindEmbeddedElements'.
+  const std::vector<Element*>& elements() const { return elements_; }
+
+
+ private:
+  Region region_;       // The memory, owned by caller, containing the
+                        // Ensemble's data.
+  std::string name_;    // A debugging/logging name for the Ensemble.
+
+  std::vector<Element*> elements_;        // Embedded elements discovered.
+  std::vector<Element*> owned_elements_;  // For deallocation.
+
+  DISALLOW_COPY_AND_ASSIGN(Ensemble);
+};
+
+inline size_t Element::offset_in_ensemble() const {
+  return region().start() - ensemble_->region().start();
+}
+
+// The 'CourgettePatchFile' is class is a 'namespace' for the constants that
+// appear in a Courgette patch file.
+struct CourgettePatchFile {
+  //
+  // The Courgette patch format interleaves the data for N embedded Elements.
+  //
+  // Format of a patch file:
+  //  header:
+  //    magic
+  //    version
+  //    source-checksum
+  //    target-checksum
+  //  multiple-streams:
+  //    stream 0:
+  //      number-of-transformed-elements (N) - varint32
+  //      transformation-1-method-id
+  //      transformation-2-method-id
+  //      ...
+  //      transformation-1-initial-parameters
+  //      transformation-2-initial-parameters
+  //      ...
+  //    stream 1:
+  //      correction:
+  //        transformation-1-parameters
+  //        transformation-2-parameters
+  //        ...
+  //    stream 2:
+  //      correction:
+  //        transformed-element-1
+  //        transformed-element-2
+  //        ...
+  //    stream 3:
+  //      correction:
+  //        base-file
+  //        element-1
+  //        element-2
+  //        ...
+
+  static const uint32 kMagic = 'C' | ('o' << 8) | ('u' << 16);
+
+  static const uint32 kVersion = 20090320;
+
+  // Transformation method IDs.
+  enum TransformationMethodId {
+    T_COURGETTE_WIN32_X86 = 1,  // Windows 32 bit 'Portable Executable' x86.
+  };
+};
+
+// For any transform you would implement both a TransformationPatcher and a
+// TransformationPatchGenerator.
+//
+// TransformationPatcher is the interface which abstracts out the actual
+// transformation used on an Element.  The patching itself happens outside the
+// actions of a TransformationPatcher.  There are four steps.
+//
+// The first step is an Init step.  The parameters to the Init step identify the
+// element, for example, range of locations within the original ensemble that
+// correspond to the element.
+//
+// PredictTransformParameters, explained below.
+//
+// The two final steps are 'Transform' - to transform the element into a new
+// representation, and to 'Reform' - to transform from the new representation
+// back to the original form.
+//
+// The Transform step takes some parameters.  This allows the transform to be
+// customized to the particular element, or to receive some assistance in the
+// analysis required to perform the transform.  The transform parameters might
+// be extensive but mostly predicable, so preceeding Transform is a
+// PredictTransformParameters step.
+//
+class TransformationPatcher {
+ public:
+  virtual ~TransformationPatcher() {}
+
+  // First step: provides parameters for the patching.  This would at a minimum
+  // identify the element within the ensemble being patched.
+  virtual Status Init(SourceStream* parameter_stream) = 0;
+
+  // Second step: predicts transform parameters.
+  virtual Status PredictTransformParameters(
+      SinkStreamSet* predicted_parameters) = 0;
+
+  // Third step: transforms element from original representation into alternate
+  // representation.
+  virtual Status Transform(SourceStreamSet* corrected_parameters,
+                           SinkStreamSet* transformed_element) = 0;
+
+  // Final step: transforms element back from alternate representation into
+  // original representation.
+  virtual Status Reform(SourceStreamSet* transformed_element,
+                        SinkStream* reformed_element) = 0;
+};
+
+// TransformationPatchGenerator is the interface which abstracts out the actual
+// transformation used (and adjustment used) when differentially compressing one
+// Element from the |new_ensemble| against a corresponding element in the
+// |old_ensemble|.
+//
+// This is not a pure interface.  There is a small amount of inheritance
+// implementation for the fields and actions common to all
+// TransformationPatchGenerators.
+//
+// When TransformationPatchGenerator is subclassed, there will be a
+// corresponding subclass of TransformationPatcher.
+//
+class TransformationPatchGenerator {
+ public:
+  TransformationPatchGenerator(Element* old_element,
+                               Element* new_element,
+                               TransformationPatcher* patcher);
+
+  virtual ~TransformationPatchGenerator();
+
+  // Returns the TransformationMethodId that identies this transformation.
+  virtual CourgettePatchFile::TransformationMethodId Kind() = 0;
+
+  // Writes the parameters that will be passed to TransformationPatcher::Init.
+  virtual Status WriteInitialParameters(SinkStream* parameter_stream) = 0;
+
+  // Predicts the transform parameters for the |old_element|.  This must match
+  // exactly the output that will be produced by the PredictTransformParameters
+  // method of the corresponding subclass of TransformationPatcher.  This method
+  // is not pure. The default implementation delegates to the patcher to
+  // guarantee matching output.
+  virtual Status PredictTransformParameters(SinkStreamSet* prediction);
+
+  // Writes the desired parameters for the transform of the old element from the
+  // file representation to the alternate representation.
+  virtual Status CorrectedTransformParameters(SinkStreamSet* parameters) = 0;
+
+  // Writes both |old_element| and |new_element| in the new representation.
+  // |old_corrected_parameters| will match the |corrected_parameters| passed to
+  // the Transform method of the corresponding sublcass of
+  // TransformationPatcher.
+  //
+  // The output written to |old_transformed_element| must match exactly the
+  // output written by the Transform method of the corresponding subclass of
+  // TransformationPatcher.
+  virtual Status Transform(SourceStreamSet* old_corrected_parameters,
+                           SinkStreamSet* old_transformed_element,
+                           SinkStreamSet* new_transformed_element) = 0;
+
+  // Transforms the new transformed_element back from the alternate
+  // representation into the original file format.  This must match exactly the
+  // output that will be produced by the corresponding subclass of
+  // TransformationPatcher::Reform.  This method is not pure. The default
+  // implementation delegates to the patcher.
+  virtual Status Reform(SourceStreamSet* transformed_element,
+                        SinkStream* reformed_element);
+
+ protected:
+  Element* old_element_;
+  Element* new_element_;
+  TransformationPatcher* patcher_;
+};
+
+}  // namespace
+#endif  // COURGETTE_ENSEMBLE_H_
diff --git a/courgette/ensemble_apply.cc b/courgette/ensemble_apply.cc
new file mode 100644
index 0000000..5a9edfd4
--- /dev/null
+++ b/courgette/ensemble_apply.cc
@@ -0,0 +1,412 @@
+// Copyright (c) 2009 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// This file contains the code to apply a Courgette patch.
+
+#include "courgette/ensemble.h"
+
+#include "base/basictypes.h"
+#include "base/file_util.h"
+#include "base/logging.h"
+
+#include "courgette/crc.h"
+#include "courgette/image_info.h"
+#include "courgette/region.h"
+#include "courgette/streams.h"
+#include "courgette/simple_delta.h"
+#include "courgette/win32_x86_patcher.h"
+
+namespace courgette {
+
+// EnsemblePatchApplication is all the logic and data required to apply the
+// multi-stage patch.
+class EnsemblePatchApplication {
+ public:
+  EnsemblePatchApplication();
+  ~EnsemblePatchApplication();
+
+  Status ReadHeader(SourceStream* header_stream);
+
+  Status InitBase(const Region& region);
+
+  Status ValidateBase();
+
+  Status ReadInitialParameters(SourceStream* initial_parameters);
+
+  Status PredictTransformParameters(SinkStreamSet* predicted_parameters);
+
+  Status SubpatchTransformParameters(SinkStreamSet* prediction,
+                                     SourceStream* correction,
+                                     SourceStreamSet* corrected_parameters);
+
+  Status TransformUp(SourceStreamSet* parameters,
+                     SinkStreamSet* transformed_elements);
+
+  Status SubpatchTransformedElements(SinkStreamSet* elements,
+                                     SourceStream* correction,
+                                     SourceStreamSet* corrected_elements);
+
+  Status TransformDown(SourceStreamSet* transformed_elements,
+                       SinkStream* basic_elements);
+
+  Status SubpatchFinalOutput(SourceStream* original,
+                             SourceStream* correction,
+                             SinkStream* corrected_ensemble);
+
+ private:
+  Status SubpatchStreamSets(SinkStreamSet* predicted_items,
+                            SourceStream* correction,
+                            SourceStreamSet* corrected_items,
+                            SinkStream* corrected_items_storage);
+
+  Region base_region_;       // Location of in-memory copy of 'old' version.
+
+  uint32 source_checksum_;
+  uint32 target_checksum_;
+
+  std::vector<TransformationPatcher*> patchers_;
+
+  SinkStream corrected_parameters_storage_;
+  SinkStream corrected_elements_storage_;
+
+  DISALLOW_COPY_AND_ASSIGN(EnsemblePatchApplication);
+};
+
+EnsemblePatchApplication::EnsemblePatchApplication()
+    : source_checksum_(0), target_checksum_(0) {
+}
+
+EnsemblePatchApplication::~EnsemblePatchApplication() {
+  for (size_t i = 0;  i < patchers_.size();  ++i) {
+    delete patchers_[i];
+  }
+}
+
+Status EnsemblePatchApplication::ReadHeader(SourceStream* header_stream) {
+  uint32 magic;
+  if (!header_stream->ReadVarint32(&magic))
+    return C_BAD_ENSEMBLE_MAGIC;
+
+  if (magic != CourgettePatchFile::kMagic)
+    return C_BAD_ENSEMBLE_MAGIC;
+
+  uint32 version;
+  if (!header_stream->ReadVarint32(&version))
+    return C_BAD_ENSEMBLE_VERSION;
+
+  if (version != CourgettePatchFile::kVersion)
+    return C_BAD_ENSEMBLE_VERSION;
+
+  if (!header_stream->ReadVarint32(&source_checksum_))
+    return C_BAD_ENSEMBLE_HEADER;
+
+  if (!header_stream->ReadVarint32(&target_checksum_))
+    return C_BAD_ENSEMBLE_HEADER;
+
+  return C_OK;
+}
+
+Status EnsemblePatchApplication::InitBase(const Region& region) {
+  base_region_.assign(region);
+  return C_OK;
+}
+
+Status EnsemblePatchApplication::ValidateBase() {
+  uint32 checksum = CalculateCrc(base_region_.start(), base_region_.length());
+  if (source_checksum_ != checksum)
+    return C_BAD_ENSEMBLE_CRC;
+
+  return C_OK;
+}
+
+Status EnsemblePatchApplication::ReadInitialParameters(
+    SourceStream* transformation_parameters) {
+  uint32 number_of_transformations = 0;
+  if (!transformation_parameters->ReadVarint32(&number_of_transformations))
+    return C_BAD_ENSEMBLE_HEADER;
+
+  for (size_t i = 0;  i < number_of_transformations;  ++i) {
+    uint32 kind;
+    if (!transformation_parameters->ReadVarint32(&kind))
+      return C_BAD_ENSEMBLE_HEADER;
+
+    if (kind == CourgettePatchFile::T_COURGETTE_WIN32_X86) {
+      TransformationPatcher* patcher =
+          new CourgetteWin32X86Patcher(base_region_);
+      patchers_.push_back(patcher);
+    } else {
+      return C_BAD_ENSEMBLE_HEADER;
+    }
+  }
+
+  for (size_t i = 0;  i < patchers_.size();  ++i) {
+    Status status = patchers_[i]->Init(transformation_parameters);
+    if (status != C_OK)
+      return status;
+  }
+
+  // All transformation_parameters should have been consumed by the above loop.
+  if (!transformation_parameters->Empty())
+    return C_BAD_ENSEMBLE_HEADER;
+
+  return C_OK;
+}
+
+Status EnsemblePatchApplication::PredictTransformParameters(
+    SinkStreamSet* all_predicted_parameters) {
+  for (size_t i = 0;  i < patchers_.size();  ++i) {
+    SinkStreamSet single_predicted_parameters;
+    Status status =
+        patchers_[i]->PredictTransformParameters(&single_predicted_parameters);
+    if (status != C_OK)
+      return status;
+    if (!all_predicted_parameters->WriteSet(&single_predicted_parameters))
+      return C_STREAM_ERROR;
+  }
+  return C_OK;
+}
+
+Status EnsemblePatchApplication::SubpatchTransformParameters(
+    SinkStreamSet* predicted_parameters,
+    SourceStream* correction,
+    SourceStreamSet* corrected_parameters) {
+  return SubpatchStreamSets(predicted_parameters,
+                            correction,
+                            corrected_parameters,
+                            &corrected_parameters_storage_);
+}
+
+Status EnsemblePatchApplication::TransformUp(
+    SourceStreamSet* parameters,
+    SinkStreamSet* transformed_elements) {
+  for (size_t i = 0;  i < patchers_.size();  ++i) {
+    SourceStreamSet single_parameters;
+    if (!parameters->ReadSet(&single_parameters))
+      return C_STREAM_ERROR;
+    SinkStreamSet single_transformed_element;
+    Status status = patchers_[i]->Transform(&single_parameters,
+                                            &single_transformed_element);
+    if (status != C_OK)
+      return status;
+    if (!single_parameters.Empty())
+      return C_STREAM_NOT_CONSUMED;
+    if (!transformed_elements->WriteSet(&single_transformed_element))
+      return C_STREAM_ERROR;
+  }
+
+  if (!parameters->Empty())
+    return C_STREAM_NOT_CONSUMED;
+  return C_OK;
+}
+
+Status EnsemblePatchApplication::SubpatchTransformedElements(
+    SinkStreamSet* predicted_elements,
+    SourceStream* correction,
+    SourceStreamSet* corrected_elements) {
+  return SubpatchStreamSets(predicted_elements,
+                            correction,
+                            corrected_elements,
+                            &corrected_elements_storage_);
+}
+
+Status EnsemblePatchApplication::TransformDown(
+    SourceStreamSet* transformed_elements,
+    SinkStream* basic_elements) {
+  // Construct blob of original input followed by reformed elements.
+
+  // The original input:
+  basic_elements->Write(base_region_.start(), base_region_.length());
+
+  for (size_t i = 0;  i < patchers_.size();  ++i) {
+    SourceStreamSet single_corrected_element;
+    if (!transformed_elements->ReadSet(&single_corrected_element))
+      return C_STREAM_ERROR;
+    Status status = patchers_[i]->Reform(&single_corrected_element,
+                                         basic_elements);
+    if (status != C_OK)
+      return status;
+    if (!single_corrected_element.Empty())
+      return C_STREAM_NOT_CONSUMED;
+  }
+
+  if (!transformed_elements->Empty())
+    return C_STREAM_NOT_CONSUMED;
+
+  return C_OK;
+}
+
+Status EnsemblePatchApplication::SubpatchFinalOutput(
+    SourceStream* original,
+    SourceStream* correction,
+    SinkStream* corrected_ensemble) {
+  Status delta_status = ApplySimpleDelta(original, correction,
+                                         corrected_ensemble);
+  if (delta_status != C_OK)
+    return delta_status;
+
+  if (CalculateCrc(corrected_ensemble->Buffer(),
+                   corrected_ensemble->Length()) != target_checksum_)
+    return C_BAD_ENSEMBLE_CRC;
+
+  return C_OK;
+}
+
+Status EnsemblePatchApplication::SubpatchStreamSets(
+    SinkStreamSet* predicted_items,
+    SourceStream* correction,
+    SourceStreamSet* corrected_items,
+    SinkStream* corrected_items_storage) {
+  SinkStream linearized_predicted_items;
+  if (!predicted_items->CopyTo(&linearized_predicted_items))
+    return C_STREAM_ERROR;
+
+  SourceStream prediction;
+  prediction.Init(linearized_predicted_items);
+
+  Status status = ApplySimpleDelta(&prediction,
+                                   correction,
+                                   corrected_items_storage);
+  if (status != C_OK)
+    return status;
+
+  if (!corrected_items->Init(corrected_items_storage->Buffer(),
+                             corrected_items_storage->Length()))
+    return C_STREAM_ERROR;
+
+  return C_OK;
+}
+
+Status ApplyEnsemblePatch(SourceStream* base,
+                          SourceStream* patch,
+                          SinkStream* output) {
+  Status status;
+  EnsemblePatchApplication patch_process;
+
+  status = patch_process.ReadHeader(patch);
+  if (status != C_OK)
+    return status;
+
+  status = patch_process.InitBase(Region(base->Buffer(), base->Remaining()));
+  if (status != C_OK)
+    return status;
+
+  status = patch_process.ValidateBase();
+  if (status != C_OK)
+    return status;
+
+  // The rest of the patch stream is a StreamSet.
+  SourceStreamSet patch_streams;
+  patch_streams.Init(patch);
+
+  SourceStream* transformation_descriptions     = patch_streams.stream(0);
+  SourceStream* parameter_correction            = patch_streams.stream(1);
+  SourceStream* transformed_elements_correction = patch_streams.stream(2);
+  SourceStream* ensemble_correction             = patch_streams.stream(3);
+
+  status = patch_process.ReadInitialParameters(transformation_descriptions);
+  if (status != C_OK)
+    return status;
+
+  SinkStreamSet predicted_parameters;
+  status = patch_process.PredictTransformParameters(&predicted_parameters);
+  if (status != C_OK)
+    return status;
+
+  SourceStreamSet corrected_parameters;
+  status = patch_process.SubpatchTransformParameters(&predicted_parameters,
+                                                     parameter_correction,
+                                                     &corrected_parameters);
+  if (status != C_OK)
+    return status;
+
+  SinkStreamSet transformed_elements;
+  status = patch_process.TransformUp(&corrected_parameters,
+                                     &transformed_elements);
+  if (status != C_OK)
+    return status;
+
+  SourceStreamSet corrected_transformed_elements;
+  status = patch_process.SubpatchTransformedElements(
+          &transformed_elements,
+          transformed_elements_correction,
+          &corrected_transformed_elements);
+  if (status != C_OK)
+    return status;
+
+  SinkStream original_ensemble_and_corrected_base_elements;
+  status = patch_process.TransformDown(
+      &corrected_transformed_elements,
+      &original_ensemble_and_corrected_base_elements);
+  if (status != C_OK)
+    return status;
+
+  SourceStream final_patch_prediction;
+  final_patch_prediction.Init(original_ensemble_and_corrected_base_elements);
+  status = patch_process.SubpatchFinalOutput(&final_patch_prediction,
+                                             ensemble_correction, output);
+  if (status != C_OK)
+    return status;
+
+  return C_OK;
+}
+
+Status ApplyEnsemblePatch(const wchar_t* old_file_name,
+                          const wchar_t* patch_file_name,
+                          const wchar_t* new_file_name) {
+  Status status;
+
+  // First read enough of the patch file to validate the header is well-formed.
+  // A few varint32 numbers should fit in 100.
+  FilePath patch_file_path(patch_file_name);
+  const int BIG_ENOUGH_FOR_HEADER = 100;
+  char buffer[BIG_ENOUGH_FOR_HEADER];
+  int read_count =
+      file_util::ReadFile(patch_file_path, buffer, sizeof(buffer));
+  if (read_count < 0)
+    return C_READ_OPEN_ERROR;
+
+  // 'Dry-run' the first step of the patch process to validate format of header.
+  SourceStream patch_header_stream;
+  patch_header_stream.Init(buffer, read_count);
+  EnsemblePatchApplication patch_process;
+  status = patch_process.ReadHeader(&patch_header_stream);
+  if (status != C_OK)
+    return status;
+
+  // Header smells good so read the whole patch file for real.
+  std::string patch_file_buffer;
+  if (!file_util::ReadFileToString(patch_file_path, &patch_file_buffer))
+    return C_READ_ERROR;
+
+  // Read the old_file.
+  FilePath old_file_path(old_file_name);
+  std::string old_file_buffer;
+  if (!file_util::ReadFileToString(old_file_path, &old_file_buffer))
+    return C_READ_ERROR;
+
+  // Apply patch on streams.
+  SourceStream old_source_stream;
+  SourceStream patch_source_stream;
+  old_source_stream.Init(old_file_buffer);
+  patch_source_stream.Init(patch_file_buffer);
+  SinkStream new_sink_stream;
+  status = ApplyEnsemblePatch(&old_source_stream, &patch_source_stream,
+                              &new_sink_stream);
+
+  // Write the patched data to |new_file_name|.
+  FilePath new_file_path(new_file_name);
+  int written =
+      file_util::WriteFile(
+          new_file_path,
+          reinterpret_cast<const char*>(new_sink_stream.Buffer()),
+          new_sink_stream.Length());
+  if (written == -1)
+    return C_WRITE_OPEN_ERROR;
+  if (written != new_sink_stream.Length())
+    return C_WRITE_ERROR;
+
+  return C_OK;
+}
+
+}  // namespace
diff --git a/courgette/ensemble_create.cc b/courgette/ensemble_create.cc
new file mode 100644
index 0000000..6e64474
--- /dev/null
+++ b/courgette/ensemble_create.cc
@@ -0,0 +1,382 @@
+// Copyright (c) 2009 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// The main idea in Courgette is to do patching *under a tranformation*.  The
+// input is transformed into a new representation, patching occurs in the new
+// repesentation, and then the tranform is reversed to get the patched data.
+//
+// The idea is applied to pieces (or 'elements') of the whole (or 'ensemble').
+// Each of the elements has to go through the same set of steps in lock-step.
+
+// This file contains the code to create the patch.
+
+
+#include "courgette/ensemble.h"
+
+#include <vector>
+#include <limits>
+
+#include "base/basictypes.h"
+#include "base/logging.h"
+#include "base/time.h"
+
+#include "courgette/third_party/bsdiff.h"
+#include "courgette/crc.h"
+#include "courgette/difference_estimator.h"
+#include "courgette/image_info.h"
+#include "courgette/streams.h"
+#include "courgette/region.h"
+#include "courgette/simple_delta.h"
+
+#include "courgette/win32_x86_patcher.h"
+#include "courgette/win32_x86_generator.h"
+
+namespace courgette {
+
+TransformationPatchGenerator::TransformationPatchGenerator(
+    Element* old_element,
+    Element* new_element,
+    TransformationPatcher* patcher)
+    : old_element_(old_element),
+      new_element_(new_element),
+      patcher_(patcher) {
+}
+
+TransformationPatchGenerator::~TransformationPatchGenerator() {
+  delete patcher_;
+}
+
+// The default implementation of PredictTransformParameters delegates to the
+// patcher.
+Status TransformationPatchGenerator::PredictTransformParameters(
+    SinkStreamSet* prediction) {
+  return patcher_->PredictTransformParameters(prediction);
+}
+
+// The default implementation of Reform delegates to the patcher.
+Status TransformationPatchGenerator::Reform(
+    SourceStreamSet* transformed_element,
+    SinkStream* reformed_element) {
+  return patcher_->Reform(transformed_element, reformed_element);
+}
+
+// Makes a TransformationPatchGenerator of the appropriate variety for the
+// Element kind.
+TransformationPatchGenerator* MakeGenerator(Element* old_element,
+                                            Element* new_element) {
+  if (new_element->kind() == Element::WIN32_X86_WITH_CODE) {
+    CourgetteWin32X86PatchGenerator* generator =
+        new CourgetteWin32X86PatchGenerator(
+            old_element,
+            new_element,
+            new CourgetteWin32X86Patcher(old_element->region()));
+    return generator;
+  } else {
+    LOG(WARNING) << "Unexpected Element::Kind " << old_element->kind();
+    return NULL;
+  }
+}
+
+// FindGenerators finds TransformationPatchGenerators for the elements of
+// |new_ensemble|.  For each element of |new_ensemble| we find the closest
+// matching element from |old_ensemble| and use that as the basis for
+// differential compression.  The elements have to be the same kind so as to
+// support transformation into the same kind of 'new representation'.
+//
+Status FindGenerators(Ensemble* old_ensemble, Ensemble* new_ensemble,
+                      std::vector<TransformationPatchGenerator*>* generators) {
+  base::Time start_find_time = base::Time::Now();
+  old_ensemble->FindEmbeddedElements();
+  new_ensemble->FindEmbeddedElements();
+  LOG(INFO) << "done FindEmbeddedElements "
+            << (base::Time::Now() - start_find_time).InSecondsF();
+
+  std::vector<Element*> old_elements(old_ensemble->elements());
+  std::vector<Element*> new_elements(new_ensemble->elements());
+
+  LOG(INFO) << "old has " << old_elements.size() << " elements";
+  LOG(INFO) << "new has " << new_elements.size() << " elements";
+
+  DifferenceEstimator difference_estimator;
+  std::vector<DifferenceEstimator::Base*> bases;
+
+  base::Time start_bases_time = base::Time::Now();
+  for (size_t i = 0;  i < old_elements.size();  ++i) {
+    bases.push_back(
+        difference_estimator.MakeBase(old_elements[i]->region()));
+  }
+  LOG(INFO) << "done make bases "
+            << (base::Time::Now() - start_bases_time).InSecondsF()
+            << "s";
+
+  for (size_t new_index = 0;  new_index < new_elements.size();  ++new_index) {
+    Element* new_element = new_elements[new_index];
+    DifferenceEstimator::Subject* new_subject =
+        difference_estimator.MakeSubject(new_element->region());
+
+    // Search through old elements to find the best match.
+    //
+    // TODO(sra): This is O(N x M), i.e. O(N^2) since old_ensemble and
+    // new_ensemble probably have a very similar structure.  We can make the
+    // search faster by making the comparison provided by DifferenceEstimator
+    // more nuanced, returning early if the measured difference is greater than
+    // the current best.  This will be most effective if we can arrange that the
+    // first elements we try to match are likely the 'right' ones.  We could
+    // prioritize elements that are of a similar size or similar position in the
+    // sequence of elements.
+    //
+    Element* best_old_element = NULL;
+    size_t best_difference = std::numeric_limits<size_t>::max();
+    for (size_t old_index = 0;  old_index < old_elements.size();  ++old_index) {
+      Element* old_element = old_elements[old_index];
+      // Elements of different kinds are incompatible.
+      if (old_element->kind() != new_element->kind())
+        continue;
+
+      base::Time start_compare = base::Time::Now();
+      DifferenceEstimator::Base* old_base = bases[old_index];
+      size_t difference = difference_estimator.Measure(old_base, new_subject);
+
+      LOG(INFO) << "Compare " << old_element->Name()
+                << " to " << new_element->Name()
+                << " --> " << difference
+                << " in " << (base::Time::Now() - start_compare).InSecondsF()
+                << "s";
+      if (difference == 0) {
+        LOG(INFO) << "Skip " << new_element->Name()
+                  << " - identical to " << old_element->Name();
+        best_difference = 0;
+        best_old_element = NULL;
+        break;
+      }
+      if (difference < best_difference) {
+        best_difference = difference;
+        best_old_element = old_element;
+      }
+    }
+
+    if (best_old_element) {
+      LOG(INFO) << "Matched " << best_old_element->Name()
+                << " to " << new_element->Name()
+                << " --> " << best_difference;
+      TransformationPatchGenerator* generator =
+          MakeGenerator(best_old_element, new_element);
+      if (generator)
+        generators->push_back(generator);
+    }
+  }
+
+  LOG(INFO) << "done FindGenerators "
+            << "found " << generators->size() << " in "
+            << (base::Time::Now() - start_find_time).InSecondsF() << "s";
+
+  return C_OK;
+}
+
+void FreeGenerators(std::vector<TransformationPatchGenerator*>* generators) {
+  for (size_t i = 0;  i < generators->size();  ++i) {
+    delete (*generators)[i];
+  }
+  generators->clear();
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+Status GenerateEnsemblePatch(SourceStream* base,
+                             SourceStream* update,
+                             SinkStream* final_patch) {
+  Region old_region(base->Buffer(), base->Remaining());
+  Region new_region(update->Buffer(), update->Remaining());
+  Ensemble old_ensemble(old_region, "old");
+  Ensemble new_ensemble(new_region, "new");
+  std::vector<TransformationPatchGenerator*> generators;
+  Status generators_status = FindGenerators(&old_ensemble, &new_ensemble,
+                                            &generators);
+  if (generators_status != C_OK)
+    return generators_status;
+
+  SinkStreamSet patch_streams;
+
+  SinkStream* tranformation_descriptions      = patch_streams.stream(0);
+  SinkStream* parameter_correction            = patch_streams.stream(1);
+  SinkStream* transformed_elements_correction = patch_streams.stream(2);
+  SinkStream* ensemble_correction             = patch_streams.stream(3);
+
+  uint32 number_of_transformations = generators.size();
+  tranformation_descriptions->WriteVarint32(number_of_transformations);
+
+  for (size_t i = 0;  i < number_of_transformations;  ++i) {
+    CourgettePatchFile::TransformationMethodId kind = generators[i]->Kind();
+    tranformation_descriptions->WriteVarint32(kind);
+  }
+
+  for (size_t i = 0;  i < number_of_transformations;  ++i) {
+    Status status =
+        generators[i]->WriteInitialParameters(tranformation_descriptions);
+    if (status != C_OK)
+      return status;
+  }
+
+  //
+  // Generate sub-patch for parameters.
+  //
+  SinkStreamSet predicted_parameters_sink;
+  SinkStreamSet corrected_parameters_sink;
+
+  for (size_t i = 0;  i < number_of_transformations;  ++i) {
+    SinkStreamSet single_predicted_parameters;
+    Status status;
+    status = generators[i]->PredictTransformParameters(
+        &single_predicted_parameters);
+    if (status != C_OK)
+      return status;
+    if (!predicted_parameters_sink.WriteSet(&single_predicted_parameters))
+      return C_STREAM_ERROR;
+
+    SinkStreamSet single_corrected_parameters;
+    status = generators[i]->CorrectedTransformParameters(
+        &single_corrected_parameters);
+    if (status != C_OK)
+      return status;
+    if (!corrected_parameters_sink.WriteSet(&single_corrected_parameters))
+      return C_STREAM_ERROR;
+  }
+
+  SinkStream linearized_predicted_parameters;
+  SinkStream linearized_corrected_parameters;
+
+  if (!predicted_parameters_sink.CopyTo(&linearized_predicted_parameters))
+    return C_STREAM_ERROR;
+  if (!corrected_parameters_sink.CopyTo(&linearized_corrected_parameters))
+    return C_STREAM_ERROR;
+
+  SourceStream predicted_parameters_source;
+  SourceStream corrected_parameters_source;
+  predicted_parameters_source.Init(linearized_predicted_parameters);
+  corrected_parameters_source.Init(linearized_corrected_parameters);
+
+  Status delta1_status = GenerateSimpleDelta(&predicted_parameters_source,
+                                             &corrected_parameters_source,
+                                             parameter_correction);
+  if (delta1_status != C_OK)
+    return delta1_status;
+
+  //
+  // Generate sub-patch for elements.
+  //
+  corrected_parameters_source.Init(linearized_corrected_parameters);
+  SourceStreamSet corrected_parameters_source_set;
+  if (!corrected_parameters_source_set.Init(&corrected_parameters_source))
+    return C_STREAM_ERROR;
+
+  SinkStreamSet predicted_transformed_elements;
+  SinkStreamSet corrected_transformed_elements;
+
+  for (size_t i = 0;  i < number_of_transformations;  ++i) {
+    SourceStreamSet single_parameters;
+    if (!corrected_parameters_source_set.ReadSet(&single_parameters))
+      return C_STREAM_ERROR;
+    SinkStreamSet single_predicted_transformed_element;
+    SinkStreamSet single_corrected_transformed_element;
+    Status status = generators[i]->Transform(
+        &single_parameters,
+        &single_predicted_transformed_element,
+        &single_corrected_transformed_element);
+    if (status != C_OK)
+      return status;
+    if (!single_parameters.Empty())
+      return C_STREAM_NOT_CONSUMED;
+    if (!predicted_transformed_elements.WriteSet(
+            &single_predicted_transformed_element))
+      return C_STREAM_ERROR;
+    if (!corrected_transformed_elements.WriteSet(
+            &single_corrected_transformed_element))
+      return C_STREAM_ERROR;
+  }
+
+  if (!corrected_parameters_source_set.Empty())
+    return C_STREAM_NOT_CONSUMED;
+
+  SinkStream linearized_predicted_transformed_elements;
+  SinkStream linearized_corrected_transformed_elements;
+
+  if (!predicted_transformed_elements.CopyTo(
+          &linearized_predicted_transformed_elements))
+    return C_STREAM_ERROR;
+  if (!corrected_transformed_elements.CopyTo(
+          &linearized_corrected_transformed_elements))
+    return C_STREAM_ERROR;
+
+  SourceStream predicted_transformed_elements_source;
+  SourceStream corrected_transformed_elements_source;
+  predicted_transformed_elements_source
+      .Init(linearized_predicted_transformed_elements);
+  corrected_transformed_elements_source
+      .Init(linearized_corrected_transformed_elements);
+
+  Status delta2_status =
+      GenerateSimpleDelta(&predicted_transformed_elements_source,
+                          &corrected_transformed_elements_source,
+                          transformed_elements_correction);
+  if (delta2_status != C_OK)
+    return delta2_status;
+
+  //
+  // Generate sub-patch for whole enchilada.
+  //
+  SinkStream predicted_ensemble;
+
+  predicted_ensemble.Write(base->Buffer(), base->Remaining());
+
+  SourceStreamSet corrected_transformed_elements_source_set;
+  corrected_transformed_elements_source
+      .Init(linearized_corrected_transformed_elements);
+  if (!corrected_transformed_elements_source_set
+      .Init(&corrected_transformed_elements_source))
+    return C_STREAM_ERROR;
+
+  for (size_t i = 0;  i < number_of_transformations;  ++i) {
+    SourceStreamSet single_corrected_transformed_element;
+    if (!corrected_transformed_elements_source_set.ReadSet(
+            &single_corrected_transformed_element))
+      return C_STREAM_ERROR;
+    Status status = generators[i]->Reform(&single_corrected_transformed_element,
+                                          &predicted_ensemble);
+    if (status != C_OK)
+      return status;
+    if (!single_corrected_transformed_element.Empty())
+      return C_STREAM_NOT_CONSUMED;
+  }
+
+  if (!corrected_transformed_elements_source_set.Empty())
+    return C_STREAM_NOT_CONSUMED;
+
+  FreeGenerators(&generators);
+
+  SourceStream predicted_ensemble_source;
+  predicted_ensemble_source.Init(predicted_ensemble);
+  Status delta3_status = GenerateSimpleDelta(&predicted_ensemble_source,
+                                             update,
+                                             ensemble_correction);
+  if (delta3_status != C_OK)
+    return delta3_status;
+
+  //
+  // Final output stream has a header followed by a StreamSet.
+  //
+  final_patch->WriteVarint32(CourgettePatchFile::kMagic);
+  final_patch->WriteVarint32(CourgettePatchFile::kVersion);
+
+  final_patch->WriteVarint32(
+      CalculateCrc(old_region.start(), old_region.length()));
+  final_patch->WriteVarint32(
+      CalculateCrc(new_region.start(), new_region.length()));
+
+  if (!patch_streams.CopyTo(final_patch))
+    return C_STREAM_ERROR;
+
+  return C_OK;
+}
+
+}  // namespace
diff --git a/courgette/image_info.cc b/courgette/image_info.cc
new file mode 100644
index 0000000..5651f36
--- /dev/null
+++ b/courgette/image_info.cc
@@ -0,0 +1,391 @@
+// Copyright (c) 2009 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "courgette/image_info.h"
+
+#include <memory.h>
+#include <algorithm>
+#include <map>
+#include <set>
+#include <sstream>
+#include <vector>
+
+#include "base/logging.h"
+
+namespace courgette {
+
+std::string SectionName(const Section* section) {
+  if (section == NULL)
+    return "<none>";
+  char name[9];
+  memcpy(name, section->name, 8);
+  name[8] = '\0';  // Ensure termination.
+  return name;
+}
+
+PEInfo::PEInfo()
+  : failure_reason_("uninitialized"),
+    start_(0), end_(0), length_(0),
+    is_PE32_plus_(0), file_length_(0), has_text_section_(false) {
+}
+
+void PEInfo::Init(const void* start, size_t length) {
+  start_ = reinterpret_cast<const uint8*>(start);
+  length_ = length;
+  end_ = start_ + length_;
+  failure_reason_ = "unparsed";
+}
+
+// DescribeRVA is for debugging only.  I would put it under #ifdef DEBUG except
+// that during development I'm finding I need to call it when compiled in
+// Release mode.  Hence:
+// TODO(sra): make this compile only for debug mode.
+std::string PEInfo::DescribeRVA(RVA rva) const {
+  const Section* section = RVAToSection(rva);
+  std::ostringstream s;
+  s << std::hex << rva;
+  if (section) {
+    s << " (";
+    s << SectionName(section) << "+"
+      << std::hex << (rva - section->virtual_address)
+      << ")";
+  }
+  return s.str();
+}
+
+const Section* PEInfo::FindNextSection(uint32 fileOffset) const {
+  const Section* best = 0;
+  for (int i = 0; i < number_of_sections_; i++) {
+    const Section* section = &sections_[i];
+    if (fileOffset <= section->file_offset_of_raw_data) {
+      if (best == 0 ||
+          section->file_offset_of_raw_data < best->file_offset_of_raw_data) {
+        best = section;
+      }
+    }
+  }
+  return best;
+}
+
+const Section* PEInfo::RVAToSection(RVA rva) const {
+  for (int i = 0; i < number_of_sections_; i++) {
+    const Section* section = &sections_[i];
+    uint32 offset = rva - section->virtual_address;
+    if (offset < section->virtual_size) {
+      return section;
+    }
+  }
+  return NULL;
+}
+
+int PEInfo::RVAToFileOffset(RVA rva) const {
+  const Section* section = RVAToSection(rva);
+  if (section) {
+    uint32 offset = rva - section->virtual_address;
+    if (offset < section->size_of_raw_data) {
+      return section->file_offset_of_raw_data + offset;
+    } else {
+      return kNoOffset;  // In section but not in file (e.g. uninit data).
+    }
+  }
+
+  // Small RVA values point into the file header in the loaded image.
+  // RVA 0 is the module load address which Windows uses as the module handle.
+  // RVA 2 sometimes occurs, I'm not sure what it is, but it would map into the
+  // DOS header.
+  if (rva == 0 || rva == 2)
+    return rva;
+
+  NOTREACHED();
+  return kNoOffset;
+}
+
+const uint8* PEInfo::RVAToPointer(RVA rva) const {
+  int file_offset = RVAToFileOffset(rva);
+  if (file_offset == kNoOffset)
+    return NULL;
+  else
+    return start_ + file_offset;
+}
+
+RVA PEInfo::FileOffsetToRVA(uint32 file_offset) const {
+  for (int i = 0; i < number_of_sections_; i++) {
+    const Section* section = &sections_[i];
+    uint32 offset = file_offset - section->file_offset_of_raw_data;
+    if (offset < section->size_of_raw_data) {
+      return section->virtual_address + offset;
+    }
+  }
+  return 0;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+namespace {
+
+// Constants and offsets gleaned from WINNT.H and various articles on the
+// format of Windows PE executables.
+
+// This is FIELD_OFFSET(IMAGE_DOS_HEADER, e_lfanew):
+const size_t kOffsetOfFileAddressOfNewExeHeader = 0x3c;
+
+const uint16 kImageNtOptionalHdr32Magic = 0x10b;
+const uint16 kImageNtOptionalHdr64Magic = 0x20b;
+
+const size_t kSizeOfCoffHeader = 20;
+const size_t kOffsetOfDataDirectoryFromImageOptionalHeader32 = 96;
+const size_t kOffsetOfDataDirectoryFromImageOptionalHeader64 = 112;
+
+// These helper functions avoid the need for casts in the main code.
+inline uint16 ReadU16(const uint8* address, size_t offset) {
+  return *reinterpret_cast<const uint16*>(address + offset);
+}
+
+inline uint32 ReadU32(const uint8* address, size_t offset) {
+  return *reinterpret_cast<const uint32*>(address + offset);
+}
+
+inline uint64 ReadU64(const uint8* address, size_t offset) {
+  return *reinterpret_cast<const uint64*>(address + offset);
+}
+
+}  // namespace
+
+// ParseHeader attempts to match up the buffer with the Windows data
+// structures that exist within a Windows 'Portable Executable' format file.
+// Returns 'true' if the buffer matches, and 'false' if the data looks
+// suspicious.  Rather than try to 'map' the buffer to the numerous windows
+// structures, we extract the information we need into the courgette::PEInfo
+// structure.
+//
+bool PEInfo::ParseHeader() {
+  if (length_ < kOffsetOfFileAddressOfNewExeHeader + 4 /*size*/)
+    return Bad("Too small");
+
+  // Have 'MZ' magic for a DOS header?
+  if (start_[0] != 'M' || start_[1] != 'Z')
+    return Bad("Not MZ");
+
+  // offset from DOS header to PE header is stored in DOS header.
+  uint32 offset = ReadU32(start_, kOffsetOfFileAddressOfNewExeHeader);
+
+  const uint8* const pe_header = start_ + offset;
+  const size_t kMinPEHeaderSize = 4 /*signature*/ + kSizeOfCoffHeader;
+  if (pe_header <= start_  ||  pe_header >= end_ - kMinPEHeaderSize)
+    return Bad("Bad offset to PE header");
+
+  if (offset % 8 != 0)
+    return Bad("Misaligned PE header");
+
+  // The 'PE' header is an IMAGE_NT_HEADERS structure as defined in WINNT.H.
+  // See http://msdn.microsoft.com/en-us/library/ms680336(VS.85).aspx
+  //
+  // The first field of the IMAGE_NT_HEADERS is the signature.
+  if (!(pe_header[0] == 'P' &&
+        pe_header[1] == 'E' &&
+        pe_header[2] == 0 &&
+        pe_header[3] == 0))
+    return Bad("no PE signature");
+
+  // The second field of the IMAGE_NT_HEADERS is the COFF header.
+  // The COFF header is also called an IMAGE_FILE_HEADER
+  //   http://msdn.microsoft.com/en-us/library/ms680313(VS.85).aspx
+  const uint8* const coff_header = pe_header + 4;
+  machine_type_       = ReadU16(coff_header, 0);
+  number_of_sections_ = ReadU16(coff_header, 2);
+  size_of_optional_header_ = ReadU16(coff_header, 16);
+
+  // The rest of the IMAGE_NT_HEADERS is the IMAGE_OPTIONAL_HEADER(32|64)
+  const uint8* const optional_header = coff_header + kSizeOfCoffHeader;
+  optional_header_ = optional_header;
+
+  if (optional_header + size_of_optional_header_ >= end_)
+    return Bad("optional header past end of file");
+
+  // Check we can read the magic.
+  if (size_of_optional_header_ < 2)
+    return Bad("optional header no magic");
+
+  uint16 magic = ReadU16(optional_header, 0);
+
+  if (magic == kImageNtOptionalHdr32Magic) {
+    is_PE32_plus_ = false;
+    offset_of_data_directories_ =
+      kOffsetOfDataDirectoryFromImageOptionalHeader32;
+  } else if (magic == kImageNtOptionalHdr64Magic) {
+    is_PE32_plus_ = true;
+    offset_of_data_directories_ =
+      kOffsetOfDataDirectoryFromImageOptionalHeader64;
+  } else {
+    return Bad("unrecognized magic");
+  }
+
+  // Check that we can read the rest of the the fixed fields.  Data directories
+  // directly follow the fixed fields of the IMAGE_OPTIONAL_HEADER.
+  if (size_of_optional_header_ < offset_of_data_directories_)
+    return Bad("optional header too short");
+
+  // The optional header is either an IMAGE_OPTIONAL_HEADER32 or
+  // IMAGE_OPTIONAL_HEADER64
+  // http://msdn.microsoft.com/en-us/library/ms680339(VS.85).aspx
+  //
+  // Copy the fields we care about.
+  size_of_code_               = ReadU32(optional_header, 4);
+  size_of_initialized_data_   = ReadU32(optional_header, 8);
+  size_of_uninitialized_data_ = ReadU32(optional_header, 12);
+  base_of_code_               = ReadU32(optional_header, 20);
+  if (is_PE32_plus_) {
+    base_of_data_ = 0;
+    image_base_  = ReadU64(optional_header, 24);
+  } else {
+    base_of_data_ = ReadU32(optional_header, 24);
+    image_base_   = ReadU32(optional_header, 28);
+  }
+  size_of_image_ = ReadU32(optional_header, 56);
+  number_of_data_directories_ =
+    ReadU32(optional_header, (is_PE32_plus_ ? 108 : 92));
+
+  if (size_of_code_ >= length_ ||
+      size_of_initialized_data_ >= length_ ||
+      size_of_code_ + size_of_initialized_data_ >= length_) {
+    // This validation fires on some perfectly fine executables.
+    //  return Bad("code or initialized data too big");
+  }
+
+  // TODO(sra): we can probably get rid of most of the data directories.
+  bool b = true;
+  // 'b &= ...' could be short circuit 'b = b && ...' but it is not necessary
+  // for correctness and it compiles smaller this way.
+  b &= ReadDataDirectory(0, &export_table_);
+  b &= ReadDataDirectory(1, &import_table_);
+  b &= ReadDataDirectory(2, &resource_table_);
+  b &= ReadDataDirectory(3, &exception_table_);
+  b &= ReadDataDirectory(5, &base_relocation_table_);
+  b &= ReadDataDirectory(11, &bound_import_table_);
+  b &= ReadDataDirectory(12, &import_address_table_);
+  b &= ReadDataDirectory(13, &delay_import_descriptor_);
+  b &= ReadDataDirectory(14, &clr_runtime_header_);
+  if (!b) {
+    return Bad("malformed data directory");
+  }
+
+  // Sections follow the optional header.
+  sections_ =
+      reinterpret_cast<const Section*>(optional_header +
+                                       size_of_optional_header_);
+  file_length_ = 0;
+
+  for (int i = 0;  i < number_of_sections_;  ++i) {
+    const Section* section = &sections_[i];
+
+    // TODO(sra): consider using the 'characteristics' field of the section
+    // header to see if the section contains instructions.
+    if (memcmp(section->name, ".text", 6) == 0)
+      has_text_section_ = true;
+
+    uint32 section_end =
+        section->file_offset_of_raw_data + section->size_of_raw_data;
+    if (section_end > file_length_)
+      file_length_ = section_end;
+  }
+
+  failure_reason_ = NULL;
+  return true;
+}
+
+bool PEInfo::ReadDataDirectory(int index, ImageDataDirectory* directory) {
+  if (index < number_of_data_directories_) {
+    size_t offset = index * 8 + offset_of_data_directories_;
+    if (offset >= size_of_optional_header_)
+      return Bad("number of data directories inconsistent");
+    const uint8* data_directory = optional_header_ + offset;
+    if (data_directory < start_ || data_directory + 8 >= end_)
+      return Bad("data directory outside image");
+    RVA rva = ReadU32(data_directory, 0);
+    size_t size  = ReadU32(data_directory, 4);
+    if (size > size_of_image_)
+      return Bad("data directory size too big");
+
+    // TODO(sra): validate RVA.
+    directory->address_ = rva;
+    directory->size_ = size;
+    return true;
+  } else {
+    directory->address_ = 0;
+    directory->size_ = 0;
+    return true;
+  }
+}
+
+bool PEInfo::Bad(const char* reason) {
+  failure_reason_ = reason;
+  return false;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+bool PEInfo::ParseRelocs(std::vector<RVA> *relocs) {
+  relocs->clear();
+
+  size_t relocs_size = base_relocation_table_.size_;
+  if (relocs_size == 0)
+    return true;
+
+  // The format of the base relocation table is a sequence of variable sized
+  // IMAGE_BASE_RELOCATION blocks.  Search for
+  //   "The format of the base relocation data is somewhat quirky"
+  // at http://msdn.microsoft.com/en-us/library/ms809762.aspx
+
+  const uint8* start = RVAToPointer(base_relocation_table_.address_);
+  const uint8* end = start + relocs_size;
+
+  // Make sure entire base relocation table is within the buffer.
+  if (start < start_ ||
+      start >= end_ ||
+      end <= start_ ||
+      end > end_) {
+    return Bad(".relocs outside image");
+  }
+
+  const uint8* block = start;
+
+  // Walk the variable sized blocks.
+  while (block + 8 < end) {
+    RVA page_rva = ReadU32(block, 0);
+    uint32 size = ReadU32(block, 4);
+    if (size < 8 ||        // Size includes header ...
+        size % 4  !=  0)   // ... and is word aligned.
+      return Bad("unreasonable relocs block");
+
+    const uint8* end_entries = block + size;
+
+    if (end_entries <= block || end_entries <= start_ || end_entries > end_)
+      return Bad(".relocs block outside image");
+
+    // Walk through the two-byte entries.
+    for (const uint8* p = block + 8;  p < end_entries;  p += 2) {
+      uint16 entry = ReadU16(p, 0);
+      int type = entry >> 12;
+      int offset = entry & 0xFFF;
+
+      RVA rva = page_rva + offset;
+      if (type == 3) {         // IMAGE_REL_BASED_HIGHLOW
+        relocs->push_back(rva);
+      } else if (type == 0) {  // IMAGE_REL_BASED_ABSOLUTE
+        // Ignore, used as padding.
+      } else {
+        // Does not occur in Windows x86 executables.
+        return Bad("unknown type of reloc");
+      }
+    }
+
+    block += size;
+  }
+
+  std::sort(relocs->begin(), relocs->end());
+
+  return true;
+}
+
+}  // namespace courgette
diff --git a/courgette/image_info.h b/courgette/image_info.h
new file mode 100644
index 0000000..53a0be7
--- /dev/null
+++ b/courgette/image_info.h
@@ -0,0 +1,199 @@
+// Copyright (c) 2009 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef COURGETTE_IMAGE_INFO_H_
+#define COURGETTE_IMAGE_INFO_H_
+
+#include <string>
+#include <vector>
+
+#include "base/basictypes.h"
+
+namespace courgette {
+
+// A Relative Virtual Address is the address in the image file after it is
+// loaded into memory relative to the image load address.
+typedef uint32 RVA;
+
+// PE file section header.  This struct has the same layout as the
+// IMAGE_SECTION_HEADER structure from WINNT.H
+// http://msdn.microsoft.com/en-us/library/ms680341(VS.85).aspx
+//
+#pragma pack(push, 1)  // Supported by MSVC and GCC. Ensures no gaps in packing.
+struct Section {
+  char name[8];
+  uint32 virtual_size;
+  uint32 virtual_address;
+  uint32 size_of_raw_data;
+  uint32 file_offset_of_raw_data;
+  uint32 pointer_to_relocations;   // Always zero in an image.
+  uint32 pointer_to_line_numbers;  // Always zero in an image.
+  uint16 number_of_relocations;    // Always zero in an image.
+  uint16 number_of_line_numbers;   // Always zero in an image.
+  uint32 characteristics;
+};
+#pragma pack(pop)
+
+COMPILE_ASSERT(sizeof(Section) == 40, section_is_40_bytes);
+
+// Returns the name of a section, solving the problem that the name is not
+// always properly NUL-terminated.  Used only for debugging.
+std::string SectionName(const Section* section);
+
+// ImageDataDirectory has same layout as IMAGE_DATA_DIRECTORY structure from
+// WINNT.H
+// http://msdn.microsoft.com/en-us/library/ms680305(VS.85).aspx
+//
+class ImageDataDirectory {
+ public:
+  ImageDataDirectory() : address_(0), size_(0) {}
+  RVA address_;
+  uint32 size_;
+};
+
+COMPILE_ASSERT(sizeof(ImageDataDirectory) == 8,
+               image_data_directory_is_8_bytes);
+
+//
+//  PEInfo holds information about a single Windows 'Portable Executable' format
+//  file in the on-disk format.
+//
+//  Imagine you had concatenated a bunch of 'original' files into one 'big'
+//  file and read the big file into memory.  You could find the executables
+//  from the original files by calling PEInfo::Init with different addresses.
+//  If PEInfo::TryParseHeader returns true, then Init was passed the address
+//  of the first byte of one of the original executables, and PEIinfo::length
+//  will tell how long the file was.
+//
+class PEInfo {
+ public:
+  PEInfo();
+
+  // ok() may always be called but returns 'true' only after ParseHeader
+  // succeeds.
+  bool ok() const { return failure_reason_ == NULL; }
+
+  // Initialize with buffer.  This just sets up the region of memory that
+  // potentially contains the bytes from an executable file.  The caller
+  // continues to own 'start'.
+  void Init(const void* start, size_t length);
+
+  // Returns 'true' if the buffer appears to point to a Windows 32 bit
+  // executable, 'false' otherwise.  If ParseHeader() succeeds, other member
+  // functions may be called.
+  bool ParseHeader();
+
+  // Returns 'true' if the base relocation table can be parsed.
+  // Output is a vector of the RVAs corresponding to locations within executable
+  // that are listed in the base relocation table.
+  bool ParseRelocs(std::vector<RVA> *addresses);
+
+  // Returns the length of the image.  Valid only if ParseHeader succeeded.
+  uint32 length() const { return file_length_; }
+
+  bool has_text_section() const { return has_text_section_; }
+
+  uint32 size_of_code() const { return size_of_code_; }
+
+  bool is_32bit() const { return !is_PE32_plus_; }
+
+  // Most addresses are represented as 32-bit RVAs.  The one address we can't
+  // do this with is the image base address.  'image_base' is valid only for
+  // 32-bit executables. 'image_base_64' is valid for 32- and 64-bit executable.
+  uint32 image_base() const { return static_cast<uint32>(image_base_); }
+  uint64 image_base_64() const { return image_base_; }
+
+  const ImageDataDirectory& base_relocation_table() const {
+    return base_relocation_table_;
+  }
+
+  bool IsValidRVA(RVA rva) const { return rva < size_of_image_; }
+
+  // Returns description of the RVA, e.g. ".text+0x1243".  For debugging only.
+  std::string DescribeRVA(RVA rva) const;
+
+  // Returns a pointer into the memory copy of the file format.
+  // FileOffsetToPointer(0) returns a pointer to the start of the file format.
+  const uint8* FileOffsetToPointer(uint32 offset) const {
+    return start_ + offset;
+  }
+
+  // Finds the first section at file_offset or above.
+  const Section* FindNextSection(uint32 file_offset) const;
+  // Returns Section containing the relative virtual address, or NULL if none.
+  const Section* RVAToSection(RVA rva) const;
+
+  // There are 2 'coordinate systems' for reasoning about executables.
+  //   FileOffset - the the offset within a single .EXE or .DLL *file*.
+  //   RVA - relative virtual address (offset within *loaded image*)
+  // FileOffsetToRVA and RVAToFileOffset convert between these representations.
+
+  RVA FileOffsetToRVA(uint32 offset) const;
+
+  static const int kNoOffset = -1;
+  // Returns kNoOffset if there is no file offset corresponding to 'rva'.
+  int RVAToFileOffset(RVA rva) const;
+
+  // Returns same as FileOffsetToPointer(RVAToFileOffset(rva)) except that NULL
+  // is returned if there is no file offset corresponding to 'rva'.
+  const uint8* RVAToPointer(RVA rva) const;
+
+ protected:
+  //
+  // Fields that are always valid.
+  //
+  const char* failure_reason_;
+
+  //
+  // Basic information that is always valid after Init.
+  //
+  const uint8* start_;    // In current memory, base for 'file offsets'.
+  const uint8* end_;      // In current memory.
+  unsigned int length_;   // In current memory.
+
+  //
+  // Information that is valid after successful ParseHeader.
+  //
+  bool is_PE32_plus_;   // PE32_plus is for 64 bit executables.
+  uint32 file_length_;
+
+  // Location and size of IMAGE_OPTIONAL_HEADER in the buffer.
+  const uint8 *optional_header_;
+  uint16 size_of_optional_header_;
+  uint16 offset_of_data_directories_;
+
+  uint16 machine_type_;
+  uint16 number_of_sections_;
+  const Section *sections_;
+  bool has_text_section_;
+
+  uint32 size_of_code_;
+  uint32 size_of_initialized_data_;
+  uint32 size_of_uninitialized_data_;
+  RVA base_of_code_;
+  RVA base_of_data_;
+
+  uint64 image_base_;  // range limited to 32 bits for 32 bit executable
+  uint32 size_of_image_;
+  int number_of_data_directories_;
+
+  ImageDataDirectory export_table_;
+  ImageDataDirectory import_table_;
+  ImageDataDirectory resource_table_;
+  ImageDataDirectory exception_table_;
+  ImageDataDirectory base_relocation_table_;
+  ImageDataDirectory bound_import_table_;
+  ImageDataDirectory import_address_table_;
+  ImageDataDirectory delay_import_descriptor_;
+  ImageDataDirectory clr_runtime_header_;
+
+ private:
+  bool ReadDataDirectory(int index, ImageDataDirectory* dir);
+  bool Bad(const char *reason);
+
+  DISALLOW_COPY_AND_ASSIGN(PEInfo);
+};
+
+}  // namespace
+#endif  // COURGETTE_IMAGE_INFO_H_
diff --git a/courgette/image_info_unittest.cc b/courgette/image_info_unittest.cc
new file mode 100644
index 0000000..6d6deb5
--- /dev/null
+++ b/courgette/image_info_unittest.cc
@@ -0,0 +1,105 @@
+// Copyright (c) 2009 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "courgette/image_info.h"
+
+#include <string>
+
+#include "base/path_service.h"
+#include "base/file_util.h"
+#include "base/string_util.h"
+#include "testing/gtest/include/gtest/gtest.h"
+
+class ImageInfoTest : public testing::Test {
+ public:
+
+  void TestExe() const;
+  void TestResourceDll() const;
+
+ private:
+  void SetUp() {
+    PathService::Get(base::DIR_SOURCE_ROOT, &test_dir_);
+    file_util::AppendToPath(&test_dir_, L"courgette");
+    file_util::AppendToPath(&test_dir_, L"testdata");
+  }
+
+  void TearDown() {
+  }
+
+  void ExpectExecutable(courgette::PEInfo *info) const;
+
+  std::string FileContents(const char *file_name) const;
+
+  std::wstring test_dir_;
+};
+
+//  Reads a test file into a string.
+std::string ImageInfoTest::FileContents(const char *file_name) const {
+  std::wstring file_path = test_dir_;
+  file_util::AppendToPath(&file_path, UTF8ToWide(file_name));
+  std::string file_bytes;
+  if (!file_util::ReadFileToString(file_path, &file_bytes)) {
+    EXPECT_TRUE(!"Could not read test data");
+  }
+  return file_bytes;
+}
+
+void ImageInfoTest::ExpectExecutable(courgette::PEInfo *info) const {
+  EXPECT_TRUE(info->ok());
+  EXPECT_TRUE(info->has_text_section());
+}
+
+void ImageInfoTest::TestExe() const {
+  std::string file1 = FileContents("setup1.exe");
+
+  courgette::PEInfo *info = new courgette::PEInfo();
+  info->Init(reinterpret_cast<const uint8*>(file1.c_str()), file1.length());
+
+  bool can_parse_header = info->ParseHeader();
+  EXPECT_TRUE(can_parse_header);
+
+  // The executable is the whole file, not 'embedded' with the file
+  EXPECT_EQ(file1.length(), info->length());
+
+  ExpectExecutable(info);
+  EXPECT_EQ(449536, info->size_of_code());
+  EXPECT_EQ(SectionName(info->RVAToSection(0x00401234 - 0x00400000)),
+            std::string(".text"));
+
+  EXPECT_EQ(0, info->RVAToFileOffset(0));
+  EXPECT_EQ(1024, info->RVAToFileOffset(4096));
+  EXPECT_EQ(46928, info->RVAToFileOffset(50000));
+
+  std::vector<courgette::RVA> relocs;
+  bool can_parse_relocs = info->ParseRelocs(&relocs);
+  EXPECT_TRUE(can_parse_relocs);
+
+  const uint8* p = info->RVAToPointer(0);
+  EXPECT_EQ(reinterpret_cast<const void*>(file1.c_str()),
+            reinterpret_cast<const void*>(p));
+  EXPECT_EQ('M', p[0]);
+  EXPECT_EQ('Z', p[1]);
+}
+
+void ImageInfoTest::TestResourceDll() const {
+  std::string file1 = FileContents("en-US.dll");
+
+  courgette::PEInfo *info = new courgette::PEInfo();
+  info->Init(reinterpret_cast<const uint8*>(file1.c_str()), file1.length());
+
+  bool can_parse_header = info->ParseHeader();
+  EXPECT_TRUE(can_parse_header);
+
+  // The executable is the whole file, not 'embedded' with the file
+  EXPECT_EQ(file1.length(), info->length());
+
+  EXPECT_TRUE(info->ok());
+  EXPECT_FALSE(info->has_text_section());
+  EXPECT_EQ(0u, info->size_of_code());
+}
+
+TEST_F(ImageInfoTest, All) {
+  TestExe();
+  TestResourceDll();
+}
diff --git a/courgette/memory_monitor.cc b/courgette/memory_monitor.cc
new file mode 100644
index 0000000..51cb0cc
--- /dev/null
+++ b/courgette/memory_monitor.cc
@@ -0,0 +1,122 @@
+#include <stdio.h>
+#include <map>
+
+#include "base/logging.h"
+#include "base/string_util.h"
+
+bool inH = true;
+struct H {
+  H() { inH = false;  tick_ = 0; bw_ = 0; d_bw_ = d_tick_ = 0; m_bw_ = 0; mem_ = high_ = 0;}
+  ~H() {
+    inH = true;
+    int i = 0;
+    for (M::iterator p = m_.begin(); p != m_.end(); ++p, ++i) {
+      size_t s = p->first;
+      LOG(INFO) << StringPrintf("%3d %8u: %8u %8u %8u %8u", i, s,
+             m_[s], c_[s], h_[s], h_[s] * s);
+    }
+    LOG(INFO) << "Peak " << fmt(high_);
+  }
+
+  std::string fmt(size_t s) {
+    if (s > 1000000000) return StringPrintf("%.3gG", s/(1000000000.0));
+    if (s > 1000000) return StringPrintf("%.3gM", s/(1000000.));
+    if (s > 9999) return StringPrintf("%.3gk", s/(1000.));
+    return StringPrintf("%d", (int)s);
+  }
+
+  void tick(size_t w, char sign) {
+    d_tick_ += 1;
+    d_bw_ += w;
+    const size_t T = 4*4*1024;
+    const size_t M = 4*1024*1024;
+    bool print = false;
+    if (d_tick_ >= T) {
+      tick_ += (d_tick_/T)*T;
+      d_tick_ %= T;
+      print = true;
+    }
+    if (d_bw_ >= M) {
+      bw_ += (d_bw_/M) * M;
+      d_bw_ %= M;
+      print = true;
+    }
+    if (!print) return;
+    std::string o;
+    StringAppendF(&o, "%u:", tick_ + d_tick_);
+    StringAppendF(&o, " (%c%s)", sign, fmt(w).c_str());
+    size_t sum = 0;
+    for (M::iterator p = c_.begin(); p != c_.end(); ++p) {
+      size_t s = p->first;
+      size_t n = p->second;
+      if (n) {
+        if (s*n >= 64*1024)
+          if (n == 1)
+            StringAppendF(&o, "  %s", fmt(s).c_str());
+          else
+            StringAppendF(&o, "  %s*%u", fmt(s).c_str(), n);
+        sum += s*n;
+        }
+    }
+    StringAppendF(&o, "  = %s", fmt(sum).c_str());
+    LOG(INFO) << o;
+    //printf("%s\n", o.c_str());
+    if (sum > 200*1024*1024) {
+      // __asm int 3;
+      m_bw_ = sum;
+    }
+  }
+  void add(size_t s, void *p) {
+    if (!inH) {
+      inH = true;
+      mem_ += s; if (mem_ > high_) high_ = mem_;
+      c_[s] += 1;
+      m_[s] += 1;
+      if (c_[s] > h_[s]) h_[s] = c_[s];
+      allocs_[p] = s;
+      inH = false;
+      tick(s, '+');
+    }
+  }
+
+  void sub(void *p) {
+    if (!inH) {
+      inH = true;
+      size_t s = allocs_[p];
+      if (s) {
+        mem_ -= s;
+        c_[s] -= 1;
+        allocs_[p] = 0;
+        tick(s, '-');
+      }
+      inH = false;
+    }
+  }
+
+  typedef std::map<size_t, size_t> M;
+  M m_;
+  M c_;
+  M h_;
+
+  size_t bw_;
+  size_t d_bw_;
+  size_t tick_;
+  size_t d_tick_;
+  size_t m_bw_;
+  size_t mem_;
+  size_t high_;
+
+  std::map<void*, size_t> allocs_;
+} _H;
+
+void* operator new(size_t s) {
+  //printf("%u\n", s);
+  void *p = malloc(s);
+  _H.add(s, p);
+  return p;
+}
+
+void operator delete(void *p) {
+  _H.sub(p);
+  free(p);
+}
diff --git a/courgette/region.h b/courgette/region.h
new file mode 100644
index 0000000..4aafbca
--- /dev/null
+++ b/courgette/region.h
@@ -0,0 +1,62 @@
+// Copyright (c) 2009 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef COURGETTE_REGION_H_
+#define COURGETTE_REGION_H_
+
+#include <string>
+
+#include "base/basictypes.h"
+
+namespace courgette {
+
+// A Region is a descriptor for a region of memory.  It has a start address and
+// a length measured in bytes.  The Region object does not own the memory.
+//
+class Region {
+ public:
+  // Default constructor: and empty region.
+  Region() : start_(NULL), length_(0) {}
+
+  // Usual constructor for regions given a |start| address and |length|.
+  Region(const void* start, size_t length)
+      : start_(static_cast<const uint8*>(start)),
+        length_(length) {
+  }
+
+  // String constructor.  Region is owned by the string, so the string should
+  // have a lifetime greater than the region.
+  explicit Region(const std::string& string)
+      : start_(reinterpret_cast<const uint8*>(string.c_str())),
+        length_(string.length()) {
+  }
+
+  // Copy constructor.
+  Region(const Region& other) : start_(other.start_), length_(other.length_) {}
+
+  // Assignment 'operator' makes |this| region the same as |other|.
+  Region& assign(const Region& other) {
+    this->start_ = other.start_;
+    this->length_ = other.length_;
+    return *this;
+  }
+
+  // Returns the starting address of the region.
+  const uint8* start() const { return start_; }
+
+  // Returns the length of the region.
+  size_t length() const { return length_; }
+
+  // Returns the address after the last byte of the region.
+  const uint8* end() const { return start_ + length_; }
+
+ private:
+  const uint8* start_;
+  size_t length_;
+
+  void operator=(const Region&);  // Disallow assignment operator.
+};
+
+}  // namespace
+#endif  // COURGETTE_REGION_H_
diff --git a/courgette/run_all_unittests.cc b/courgette/run_all_unittests.cc
new file mode 100644
index 0000000..b5fc4a6
--- /dev/null
+++ b/courgette/run_all_unittests.cc
@@ -0,0 +1,9 @@
+// Copyright (c) 2009 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "base/test_suite.h"
+
+int main(int argc, char** argv) {
+  return TestSuite(argc, argv).Run();
+}
diff --git a/courgette/simple_delta.cc b/courgette/simple_delta.cc
new file mode 100644
index 0000000..71038fd
--- /dev/null
+++ b/courgette/simple_delta.cc
@@ -0,0 +1,41 @@
+// Copyright (c) 2009 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// Implementation of the byte-level differential compression used internally by
+// Courgette.
+
+#include "courgette/simple_delta.h"
+
+#include "base/basictypes.h"
+#include "base/logging.h"
+
+#include "courgette/third_party/bsdiff.h"
+
+namespace courgette {
+
+namespace {
+
+Status BSDiffStatusToStatus(BSDiffStatus status) {
+  switch (status) {
+    case OK: return C_OK;
+    case CRC_ERROR: return C_BINARY_DIFF_CRC_ERROR;
+    default: return C_GENERAL_ERROR;
+  }
+}
+
+}
+
+Status ApplySimpleDelta(SourceStream* old, SourceStream* delta,
+                        SinkStream* target) {
+  return BSDiffStatusToStatus(ApplyBinaryPatch(old, delta, target));
+}
+
+Status GenerateSimpleDelta(SourceStream* old, SourceStream* target,
+                           SinkStream* delta) {
+  LOG(INFO) << "GenerateSimpleDelta "
+            << old->Remaining() << " " << target->Remaining();
+  return BSDiffStatusToStatus(CreateBinaryPatch(old, target, delta));
+}
+
+}  // namespace courgette
diff --git a/courgette/simple_delta.h b/courgette/simple_delta.h
new file mode 100644
index 0000000..d35333a
--- /dev/null
+++ b/courgette/simple_delta.h
@@ -0,0 +1,23 @@
+// Copyright (c) 2009 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// Byte level differential compression algorithm used by Courgette.
+
+#ifndef COURGETTE_SIMPLE_DELTA_H_
+#define COURGETTE_SIMPLE_DELTA_H_
+
+#include "courgette/courgette.h"
+#include "courgette/streams.h"
+
+namespace courgette {
+
+Status ApplySimpleDelta(SourceStream* old, SourceStream* delta,
+                        SinkStream* target);
+
+Status GenerateSimpleDelta(SourceStream* old, SourceStream* target,
+                           SinkStream* delta);
+
+}  // namespace courgette
+
+#endif  // COURGETTE_SIMPLE_DELTA_H_
diff --git a/courgette/streams.cc b/courgette/streams.cc
new file mode 100644
index 0000000..365a416
--- /dev/null
+++ b/courgette/streams.cc
@@ -0,0 +1,383 @@
+// Copyright (c) 2009 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// Streams classes.
+//
+// These memory-resident streams are used for serializing data into a sequential
+// region of memory.
+//
+// Streams are divided into SourceStreams for reading and SinkStreams for
+// writing.  Streams are aggregated into Sets which allows several streams to be
+// used at once.  Example: we can write A1, B1, A2, B2 but achieve the memory
+// layout A1 A2 B1 B2 by writing 'A's to one stream and 'B's to another.
+//
+// The aggregated streams are important to Courgette's compression efficiency,
+// we use it to cluster similar kinds of data which helps to generate longer
+// common subsequences and repeated sequences.
+
+#include "courgette/streams.h"
+
+#include <io.h>
+#include <memory.h>
+
+#include "base/basictypes.h"
+
+namespace courgette {
+
+// Update this version number if the serialization format of a StreamSet
+// changes.
+static const unsigned int kStreamsSerializationFormatVersion = 20090218;
+
+//
+// This is a cut down Varint implementation, implementing only what we use for
+// streams.
+//
+class Varint {
+ public:
+  // Maximum lengths of varint encoding of uint32
+  static const int kMax32 = 5;
+
+  // Parses a Varint32 encoded value from |source| and stores it in |output|,
+  // and returns a pointer to the following byte.  Returns NULL if a valid
+  // varint value was not found before |limit|.
+  static const uint8* Parse32WithLimit(const uint8* source, const uint8* limit,
+                                       uint32* output);
+
+  // Writes the Varint32 encoded representation of |value| to buffer
+  // |destination|.  |destination| must have sufficient length to hold kMax32
+  // bytes.  Returns a pointer to the byte just past the last encoded byte.
+  static uint8* Encode32(uint8* destination, uint32 value);
+};
+
+// Parses a Varint32 encoded unsigned number from |source|.  The Varint32
+// encoding is a little-endian sequence of bytes containing base-128 digits,
+// with the high order bit set to indicate if there are more digits.
+//
+// For each byte, we mask out the digit and 'or' it into the right place in the
+// result.
+//
+// The digit loop is unrolled for performance.  It usually exits after the first
+// one or two digits.
+const uint8* Varint::Parse32WithLimit(const uint8* source,
+                                      const uint8* limit,
+                                      uint32* output) {
+  uint32 digit, result;
+  if (source >= limit)
+    return NULL;
+  digit = *(source++);
+  result = digit & 127;
+  if (digit < 128) {
+    *output = result;
+    return source;
+  }
+
+  if (source >= limit)
+    return NULL;
+  digit = *(source++);
+  result |= (digit & 127) <<  7;
+  if (digit < 128) {
+    *output = result;
+    return source;
+  }
+
+  if (source >= limit)
+    return NULL;
+  digit = *(source++);
+  result |= (digit & 127) << 14;
+  if (digit < 128) {
+    *output = result;
+    return source;
+  }
+
+  if (source >= limit)
+    return NULL;
+  digit = *(source++);
+  result |= (digit & 127) << 21;
+  if (digit < 128) {
+    *output = result;
+    return source;
+  }
+
+  if (source >= limit)
+    return NULL;
+  digit = *(source++);
+  result |= (digit & 127) << 28;
+  if (digit < 128) {
+    *output = result;
+    return source;
+  }
+
+  return NULL;  // Value is too long to be a Varint32.
+}
+
+// Write the base-128 digits in little-endian order.  All except the last digit
+// have the high bit set to indicate more digits.
+inline uint8* Varint::Encode32(uint8* destination, uint32 value) {
+  while (value >= 128) {
+    *(destination++) = value | 128;
+    value = value >> 7;
+  }
+  *(destination++) = value;
+  return destination;
+}
+
+void SourceStream::Init(const SinkStream& sink) {
+  Init(sink.Buffer(), sink.Length());
+}
+
+bool SourceStream::Read(void* destination, size_t count) {
+  if (current_ + count > end_)
+    return false;
+  memcpy(destination, current_, count);
+  current_ += count;
+  return true;
+}
+
+bool SourceStream::ReadVarint32(uint32* output_value) {
+  const uint8* after = Varint::Parse32WithLimit(current_, end_, output_value);
+  if (!after)
+    return false;
+  current_ = after;
+  return true;
+}
+
+bool SourceStream::ReadVarint32Signed(int32* output_value) {
+  // Signed numbers are encoded as unsigned numbers so that numbers nearer zero
+  // have shorter varint encoding.
+  //  0000xxxx encoded as 000xxxx0.
+  //  1111xxxx encoded as 000yyyy1 where yyyy is complement of xxxx.
+  uint32 unsigned_value;
+  if (!ReadVarint32(&unsigned_value))
+    return false;
+  if (unsigned_value & 1)
+    *output_value = ~static_cast<int32>(unsigned_value >> 1);
+  else
+    *output_value = (unsigned_value >> 1);
+  return true;
+}
+
+bool SourceStream::ShareSubstream(size_t offset, size_t length,
+                                  SourceStream* substream) {
+  if (offset > Remaining())
+    return false;
+  if (length > Remaining() - offset)
+    return false;
+  substream->Init(current_ + offset, length);
+  return true;
+}
+
+bool SourceStream::ReadSubstream(size_t length, SourceStream* substream) {
+  if (!ShareSubstream(0, length, substream))
+    return false;
+  current_ += length;
+  return true;
+}
+
+bool SourceStream::Skip(size_t byte_count) {
+  if (current_ + byte_count > end_)
+    return false;
+  current_ += byte_count;
+  return true;
+}
+
+void SinkStream::Write(const void* data, size_t byte_count) {
+  buffer_.append(static_cast<const char*>(data), byte_count);
+}
+
+void SinkStream::WriteVarint32(uint32 value) {
+  uint8 buffer[Varint::kMax32];
+  uint8* end = Varint::Encode32(buffer, value);
+  Write(buffer, end - buffer);
+}
+
+void SinkStream::WriteVarint32Signed(int32 value) {
+  // Encode signed numbers so that numbers nearer zero have shorter
+  // varint encoding.
+  //  0000xxxx encoded as 000xxxx0.
+  //  1111xxxx encoded as 000yyyy1 where yyyy is complement of xxxx.
+  if (value < 0)
+    WriteVarint32(~value * 2 + 1);
+  else
+    WriteVarint32(value * 2);
+}
+
+void SinkStream::Append(SinkStream* other) {
+  Write(other->buffer_.c_str(), other->buffer_.size());
+  other->buffer_.clear();
+  other->buffer_.reserve(0);  // Non-binding request to reduce storage.
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+SourceStreamSet::SourceStreamSet()
+    : count_(kMaxStreams) {
+}
+
+SourceStreamSet::~SourceStreamSet() {
+}
+
+
+// Initializes from |source|.
+// The stream set for N streams is serialized as a header
+//   <version><N><length1><length2>...<lengthN>
+// followed by the stream contents
+//   <bytes1><bytes2>...<bytesN>
+//
+bool SourceStreamSet::Init(const void* source, size_t byte_count) {
+  const uint8* start = static_cast<const uint8*>(source);
+  const uint8* end = start + byte_count;
+
+  unsigned int version;
+  const uint8* finger = Varint::Parse32WithLimit(start, end, &version);
+  if (finger == NULL)
+    return false;
+  if (version != kStreamsSerializationFormatVersion)
+    return false;
+
+  unsigned int count;
+  finger = Varint::Parse32WithLimit(finger, end, &count);
+  if (finger == NULL)
+    return false;
+  if (count > kMaxStreams)
+    return false;
+
+  count_ = count;
+
+  unsigned int lengths[kMaxStreams];
+  size_t accumulated_length = 0;
+
+  for (size_t i = 0; i < count_; ++i) {
+    finger = Varint::Parse32WithLimit(finger, end, &lengths[i]);
+    if (finger == NULL)
+      return false;
+    accumulated_length += lengths[i];
+  }
+
+  // Remaining bytes should add up to sum of lengths.
+  if (end - finger != accumulated_length)
+    return false;
+
+  accumulated_length = finger - start;
+  for (size_t i = 0; i < count_; ++i) {
+    stream(i)->Init(start + accumulated_length, lengths[i]);
+    accumulated_length += lengths[i];
+  }
+
+  return true;
+}
+
+bool SourceStreamSet::Init(SourceStream* source) {
+  // TODO(sra): consume the rest of |source|.
+  return Init(source->Buffer(), source->Remaining());
+}
+
+bool SourceStreamSet::ReadSet(SourceStreamSet* set) {
+  uint32 stream_count = 0;
+  SourceStream* control_stream = this->stream(0);
+  if (!control_stream->ReadVarint32(&stream_count))
+    return false;
+
+  uint32 lengths[kMaxStreams] = {};  // i.e. all zero.
+
+  for (size_t i = 0; i < stream_count; ++i) {
+    if (!control_stream->ReadVarint32(&lengths[i]))
+      return false;
+  }
+
+  for (size_t i = 0; i < stream_count; ++i) {
+    if (!this->stream(i)->ReadSubstream(lengths[i], set->stream(i)))
+      return false;
+  }
+  return true;
+}
+
+bool SourceStreamSet::Empty() const {
+  for (size_t i = 0; i < count_; ++i) {
+    if (streams_[i].Remaining() != 0)
+      return false;
+  }
+  return true;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+SinkStreamSet::SinkStreamSet()
+  : count_(kMaxStreams) {
+}
+
+SinkStreamSet::~SinkStreamSet() {
+}
+
+void SinkStreamSet::Init(size_t stream_index_limit) {
+  count_ = stream_index_limit;
+}
+
+// The header for a stream set for N streams is serialized as
+//   <version><N><length1><length2>...<lengthN>
+void SinkStreamSet::CopyHeaderTo(SinkStream* header) {
+  header->WriteVarint32(kStreamsSerializationFormatVersion);
+  header->WriteVarint32(count_);
+  for (size_t i = 0; i < count_; ++i) {
+    header->WriteVarint32(stream(i)->Length());
+  }
+}
+
+// Writes |this| to |combined_stream|.  See SourceStreamSet::Init for the layout
+// of the stream metadata and contents.
+bool SinkStreamSet::CopyTo(SinkStream *combined_stream) {
+  SinkStream header;
+  CopyHeaderTo(&header);
+  combined_stream->Append(&header);
+  for (size_t i = 0; i < count_; ++i) {
+    combined_stream->Append(stream(i));
+  }
+  return true;
+}
+
+namespace {
+bool Write(int file_descriptor, SinkStream* sink) {
+  size_t length = sink->Length();
+  const void *buffer = sink->Buffer();
+  int bytes_written = _write(file_descriptor, buffer, length);
+  return bytes_written == length;
+}
+}
+
+bool SinkStreamSet::CopyToFileDescriptor(int file_descriptor) {
+  SinkStream header;
+  CopyHeaderTo(&header);
+  if (!Write(file_descriptor, &header))
+    return false;
+  for (size_t i = 0; i < count_; ++i) {
+    if (!Write(file_descriptor, stream(i)))
+      return false;
+  }
+  return true;
+}
+
+bool SinkStreamSet::WriteSet(SinkStreamSet* set) {
+  uint32 lengths[kMaxStreams];
+  // 'stream_count' includes all non-empty streams and all empty stream numbered
+  // lower than a non-empty stream.
+  size_t stream_count = 0;
+  for (size_t i = 0; i < kMaxStreams; ++i) {
+    SinkStream* stream = set->stream(i);
+    lengths[i] = stream->Length();
+    if (lengths[i] > 0)
+      stream_count = i + 1;
+  }
+
+  SinkStream* control_stream = this->stream(0);
+  control_stream->WriteVarint32(stream_count);
+  for (size_t i = 0; i < stream_count; ++i) {
+    control_stream->WriteVarint32(lengths[i]);
+  }
+
+  for (size_t i = 0; i < stream_count; ++i) {
+    this->stream(i)->Append(set->stream(i));
+  }
+  return true;
+}
+
+}  // namespace
diff --git a/courgette/streams.h b/courgette/streams.h
new file mode 100644
index 0000000..2fb824f
--- /dev/null
+++ b/courgette/streams.h
@@ -0,0 +1,220 @@
+// Copyright (c) 2009 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// Streams classes.
+//
+// These memory-resident streams are used for serialzing data into a sequential
+// region of memory.
+// Streams are divided into SourceStreams for reading and SinkStreams for
+// writing.  Streams are aggregated into Sets which allows several streams to be
+// used at once.  Example: we can write A1, B1, A2, B2 but achive the memory
+// layout A1 A2 B1 B2 by writing 'A's to one stream and 'B's to another.
+#ifndef COURGETTE_STREAMS_H_
+#define COURGETTE_STREAMS_H_
+
+#include <stdio.h>  // for FILE*
+#include <string>
+
+#include "base/basictypes.h"
+
+#include "courgette/region.h"
+
+namespace courgette {
+
+class SourceStream;
+class SinkStream;
+
+// Maximum number of streams in a stream set.
+static const unsigned int kMaxStreams = 10;
+
+// A SourceStream allows a region of memory to be scanned by a sequence of Read
+// operations.  The stream does not own the memory.
+class SourceStream {
+ public:
+  SourceStream() : start_(NULL), end_(NULL), current_(NULL) {}
+
+  // Initializes the SourceStream to yield the bytes at |pointer|.  The caller
+  // still owns the memory at |pointer| and should free the memory only after
+  // the last use of the stream.
+  void Init(const void* pointer, size_t length) {
+    start_ = static_cast<const uint8*>(pointer);
+    end_ = start_ + length;
+    current_ = start_;
+  }
+
+  // Initializes the SourceStream to yield the bytes in |region|.  The caller
+  // still owns the memory at |region| and should free the memory only after
+  // the last use of the stream.
+  void Init(const Region& region) { Init(region.start(), region.length()); }
+
+  // Initializes the SourceStream to yield the bytes in |string|.  The caller
+  // still owns the memory at |string| and should free the memory only after
+  // the last use of the stream.
+  void Init(const std::string& string) { Init(string.c_str(), string.size()); }
+
+  // Initializes the SourceStream to yield the bytes written to |sink|. |sink|
+  // still owns the memory, so needs to outlive |this|.  |sink| should not be
+  // written to after |this| is initialized.
+  void Init(const SinkStream& sink);
+
+  // Returns number of bytes remaining to be read from stream.
+  size_t Remaining() const { return end_ - current_; }
+
+  // Returns initial length of stream before any data consumed by reading.
+  size_t OriginalLength() const { return end_ - start_; }
+
+  const uint8* Buffer() const { return current_; }
+  bool Empty() const { return current_ == end_; }
+
+  // Copies bytes from stream to memory at |destination|.  Returns 'false' if
+  // insufficient data to satisfy request.
+  bool Read(void* destination, size_t byte_count);
+
+  // Reads a varint formatted unsigned integer from stream.  Returns 'false' if
+  // the read failed due to insufficient data or malformed Varint32.
+  bool ReadVarint32(uint32* output_value);
+
+  // Reads a varint formatted signed integer from stream.  Returns 'false' if
+  // the read failed due to insufficient data or malformed Varint32.
+  bool ReadVarint32Signed(int32* output_value);
+
+  // Initializes |substream| to yield |length| bytes from |this| stream,
+  // starting at |offset| bytes from the current position.  Returns 'false' if
+  // there are insufficient bytes in |this| stream.
+  bool ShareSubstream(size_t offset, size_t length, SourceStream* substream);
+
+  // Initializes |substream| to yield |length| bytes from |this| stream,
+  // starting at the current position.  Returns 'false' if there are
+  // insufficient bytes in |this| stream.
+  bool ShareSubstream(size_t length, SourceStream* substream) {
+    return ShareSubstream(0, length, substream);
+  }
+
+  // Reads |length| bytes from |this| stream.  Initializes |substream| to yield
+  // the bytes.  Returns 'false' if there are insufficient bytes in |this|
+  // stream.
+  bool ReadSubstream(size_t length, SourceStream* substream);
+
+  // Skips over bytes.  Returns 'false' if insufficient data to satisfy request.
+  bool Skip(size_t byte_count);
+
+ private:
+  const uint8* start_;     // Points to start of buffer.
+  const uint8* end_;       // Points to first location after buffer.
+  const uint8* current_;   // Points into buffer at current read location.
+
+  DISALLOW_COPY_AND_ASSIGN(SourceStream);
+};
+
+// A SinkStream accumulates writes into a buffer that it owns.  The stream is
+// initialy in an 'accumulating' state where writes are permitted.  Accessing
+// the buffer moves the stream into a 'locked' state where no more writes are
+// permitted.  The stream may also be in a 'retired' state where the buffer
+// contents are no longer available.
+class SinkStream {
+ public:
+  SinkStream() {}
+  ~SinkStream() {}
+
+  // Appends |byte_count| bytes from |data| to the stream.
+  void Write(const void* data, size_t byte_count);
+
+  // Appends the 'varint32' encoding of |value| to the stream.
+  void WriteVarint32(uint32 value);
+
+  // Appends the 'varint32' encoding of |value| to the stream.
+  void WriteVarint32Signed(int32 value);
+
+  // Contents of |other| are appended to |this| stream.  The |other| stream
+  // becomes retired.
+  void Append(SinkStream* other);
+
+  // Returns the number of bytes in this SinkStream
+  size_t Length() const { return buffer_.size(); }
+
+  // Returns a pointer to contiguously allocated Length() bytes in the stream.
+  // Writing to the stream invalidates the pointer.  The SinkStream continues to
+  // own the memory.
+  const uint8* Buffer() const {
+    return reinterpret_cast<const uint8*>(buffer_.c_str());
+  }
+
+  // Hints that the stream will grow by an additional |length| bytes.
+  void Reserve(size_t length) { buffer_.reserve(length + buffer_.length()); }
+
+ private:
+  std::string buffer_;  // Use a string to manage the stream's memory.
+
+  DISALLOW_COPY_AND_ASSIGN(SinkStream);
+};
+
+// A SourceStreamSet is a set of SourceStreams.
+class SourceStreamSet {
+ public:
+  SourceStreamSet();
+  ~SourceStreamSet();
+
+  // Initializes the SourceStreamSet with the stream data in memory at |source|.
+  // The caller continues to own the memory and should not modify or free the
+  // memory until the SourceStreamSet destructor has been called.
+  //
+  // The layout of the streams are as written by SinkStreamSet::CopyTo.
+  // Init returns 'false' if the layout is inconsistent with |byte_count|.
+  bool Init(const void* source, size_t byte_count);
+
+  // Initializes |this| from |source|.  The caller continues to own the memory
+  // because it continues to be owned by |source|.
+  bool Init(SourceStream* source);
+
+  // Returns a pointer to one of the sub-streams.
+  SourceStream* stream(size_t id) { return id < count_ ? &streams_[id] : NULL; }
+
+  // Initialize |set| from |this|.
+  bool ReadSet(SourceStreamSet* set);
+
+  // Returns 'true' if all streams are completely consumed.
+  bool Empty() const;
+
+ private:
+  size_t count_;
+  SourceStream streams_[kMaxStreams];
+
+  DISALLOW_COPY_AND_ASSIGN(SourceStreamSet);
+};
+
+class SinkStreamSet {
+ public:
+  SinkStreamSet();
+  ~SinkStreamSet();
+
+  // Initializes the SinkStreamSet to have |stream_index_limit| streams.  Must
+  // be <= kMaxStreams.  If Init is not called the default is has kMaxStream.
+  void Init(size_t stream_index_limit);
+
+  // Returns a pointer to a substream.
+  SinkStream* stream(size_t id) { return id < count_ ? &streams_[id] : NULL; }
+
+  // CopyTo serializes the streams in the SinkStreamSet into a single target
+  // stream or file.  The serialized format may be re-read by initializing a
+  // SourceStreamSet with a buffer containing the data.
+  bool CopyTo(SinkStream* combined_stream);
+  bool CopyToFile(FILE* file);
+  bool CopyToFileDescriptor(int file_descriptor);
+
+  // Writes the streams of |set| into the corresponding streams of |this|.
+  // Stream zero first has some metadata written to it.  |set| becomes retired.
+  // Partner to SourceStreamSet::ReadSet.
+  bool WriteSet(SinkStreamSet* set);
+
+ private:
+  void CopyHeaderTo(SinkStream* stream);
+
+  size_t count_;
+  SinkStream streams_[kMaxStreams];
+
+  DISALLOW_COPY_AND_ASSIGN(SinkStreamSet);
+};
+
+}  // namespace
+#endif  // COURGETTE_STREAMS_H_
diff --git a/courgette/streams_unittest.cc b/courgette/streams_unittest.cc
new file mode 100644
index 0000000..dd330ce
--- /dev/null
+++ b/courgette/streams_unittest.cc
@@ -0,0 +1,200 @@
+// Copyright (c) 2009 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "courgette/streams.h"
+
+#include "testing/gtest/include/gtest/gtest.h"
+
+TEST(StreamsTest, SimpleWriteRead) {
+  const unsigned int kValue1 = 12345;
+  courgette::SinkStream sink;
+
+  sink.WriteVarint32(kValue1);
+
+  const uint8* sink_buffer = sink.Buffer();
+  size_t length = sink.Length();
+
+  courgette::SourceStream source;
+  source.Init(sink_buffer, length);
+
+  unsigned int value;
+  bool can_read = source.ReadVarint32(&value);
+  EXPECT_EQ(true, can_read);
+  EXPECT_EQ(kValue1, value);
+  EXPECT_EQ(0, source.Remaining());
+}
+
+TEST(StreamsTest, SimpleWriteRead2) {
+  courgette::SinkStream sink;
+
+  sink.Write("Hello", 5);
+
+  const uint8* sink_buffer = sink.Buffer();
+  size_t sink_length = sink.Length();
+
+  courgette::SourceStream source;
+  source.Init(sink_buffer, sink_length);
+
+  char text[10] = {0};
+  bool can_read = source.Read(text, 5);
+  EXPECT_EQ(true, can_read);
+  EXPECT_EQ(0, memcmp("Hello", text, 5));
+  EXPECT_EQ(0, source.Remaining());
+}
+
+TEST(StreamsTest, StreamSetWriteRead) {
+  courgette::SinkStreamSet out;
+  out.Init(4);
+
+  const unsigned int kValue1 = 12345;
+
+  out.stream(3)->WriteVarint32(kValue1);
+
+  courgette::SinkStream collected;
+
+  out.CopyTo(&collected);
+
+  const uint8* collected_buffer = collected.Buffer();
+  size_t collected_length = collected.Length();
+
+  courgette::SourceStreamSet in;
+  bool can_init = in.Init(collected_buffer, collected_length);
+  EXPECT_EQ(true, can_init);
+
+  uint32 value;
+  bool can_read = in.stream(3)->ReadVarint32(&value);
+  EXPECT_EQ(true, can_read);
+  EXPECT_EQ(kValue1, value);
+  EXPECT_EQ(0, in.stream(3)->Remaining());
+  EXPECT_EQ(0, in.stream(2)->Remaining());
+}
+
+TEST(StreamsTest, StreamSetWriteRead2) {
+  const size_t kNumberOfStreams = 4;
+  const unsigned int kEnd = ~0U;
+
+  courgette::SinkStreamSet out;
+  out.Init(kNumberOfStreams);
+
+  static const unsigned int data[] = {
+    3, 123,  3, 1000,  0, 100, 2, 100,  0, 999999,
+    0, 0,  0, 0,  1, 2,  1, 3,  1, 5,  0, 66,
+    // varint32 edge case values:
+    1, 127,  1, 128,  1, 129,  1, 16383,  1, 16384,
+    kEnd
+  };
+
+  for (size_t i = 0;  data[i] != kEnd;  i += 2) {
+    size_t id = data[i];
+    size_t datum = data[i + 1];
+    out.stream(id)->WriteVarint32(datum);
+  }
+
+  courgette::SinkStream collected;
+
+  out.CopyTo(&collected);
+
+  courgette::SourceStreamSet in;
+  bool can_init = in.Init(collected.Buffer(), collected.Length());
+  EXPECT_EQ(true, can_init);
+
+  for (size_t i = 0;  data[i] != kEnd;  i += 2) {
+    size_t id = data[i];
+    size_t datum = data[i + 1];
+    uint32 value = 77;
+    bool can_read = in.stream(id)->ReadVarint32(&value);
+    EXPECT_EQ(true, can_read);
+    EXPECT_EQ(datum, value);
+  }
+
+  for (size_t i = 0;  i < kNumberOfStreams;  ++i) {
+    EXPECT_EQ(0, in.stream(i)->Remaining());
+  }
+}
+
+TEST(StreamsTest, SignedVarint32) {
+  courgette::SinkStream out;
+
+  static const int32 data[] = {
+    0, 64, 128, 8192, 16384,
+    1 << 20, 1 << 21, 1 << 22,
+    1 << 27, 1 << 28,
+    0x7fffffff, -0x7fffffff
+  };
+
+  std::vector<int32> values;
+  for (size_t i = 0;  i < sizeof(data)/sizeof(data[0]);  ++i) {
+    int32 basis = data[i];
+    for (int delta = -4; delta <= 4; ++delta) {
+      out.WriteVarint32Signed(basis + delta);
+      values.push_back(basis + delta);
+      out.WriteVarint32Signed(-basis + delta);
+      values.push_back(-basis + delta);
+    }
+  }
+
+  courgette::SourceStream in;
+  in.Init(out);
+
+  for (size_t i = 0;  i < values.size();  ++i) {
+    int written_value = values[i];
+    int32 datum;
+    bool can_read = in.ReadVarint32Signed(&datum);
+    EXPECT_EQ(written_value, datum);
+  }
+
+  EXPECT_EQ(true, in.Empty());
+}
+
+TEST(StreamsTest, StreamSetReadWrite) {
+  courgette::SinkStreamSet out;
+
+  { // Local scope for temporary stream sets.
+    courgette::SinkStreamSet subset1;
+    subset1.stream(3)->WriteVarint32(30000);
+    subset1.stream(5)->WriteVarint32(50000);
+    out.WriteSet(&subset1);
+
+    courgette::SinkStreamSet subset2;
+    subset2.stream(2)->WriteVarint32(20000);
+    subset2.stream(6)->WriteVarint32(60000);
+    out.WriteSet(&subset2);
+  }
+
+  courgette::SinkStream collected;
+  out.CopyTo(&collected);
+  courgette::SourceStreamSet in;
+  bool can_init_in = in.Init(collected.Buffer(), collected.Length());
+  EXPECT_EQ(true, can_init_in);
+
+  courgette::SourceStreamSet subset1;
+  bool can_read_1 = in.ReadSet(&subset1);
+  EXPECT_EQ(true, can_read_1);
+  EXPECT_EQ(false, in.Empty());
+
+  courgette::SourceStreamSet subset2;
+  bool can_read_2 = in.ReadSet(&subset2);
+  EXPECT_EQ(true, can_read_2);
+  EXPECT_EQ(true, in.Empty());
+
+  courgette::SourceStreamSet subset3;
+  bool can_read_3 = in.ReadSet(&subset3);
+  EXPECT_EQ(false, can_read_3);
+
+  EXPECT_EQ(false, subset1.Empty());
+  EXPECT_EQ(false, subset1.Empty());
+
+  uint32 datum;
+  EXPECT_EQ(true, subset1.stream(3)->ReadVarint32(&datum));
+  EXPECT_EQ(30000, datum);
+  EXPECT_EQ(true, subset1.stream(5)->ReadVarint32(&datum));
+  EXPECT_EQ(50000, datum);
+  EXPECT_EQ(true, subset1.Empty());
+
+  EXPECT_EQ(true, subset2.stream(2)->ReadVarint32(&datum));
+  EXPECT_EQ(20000, datum);
+  EXPECT_EQ(true, subset2.stream(6)->ReadVarint32(&datum));
+  EXPECT_EQ(60000, datum);
+  EXPECT_EQ(true, subset2.Empty());
+}
diff --git a/courgette/testdata/en-US.dll b/courgette/testdata/en-US.dll
new file mode 100644
index 0000000..f1ad46c
--- /dev/null
+++ b/courgette/testdata/en-US.dll
diff --git a/courgette/testdata/setup1.exe b/courgette/testdata/setup1.exe
new file mode 100644
index 0000000..da072f5
--- /dev/null
+++ b/courgette/testdata/setup1.exe
diff --git a/courgette/testdata/setup2.exe b/courgette/testdata/setup2.exe
new file mode 100644
index 0000000..577a47f
--- /dev/null
+++ b/courgette/testdata/setup2.exe
diff --git a/courgette/third_party/LICENCE b/courgette/third_party/LICENCE
new file mode 100644
index 0000000..c146b5b
--- /dev/null
+++ b/courgette/third_party/LICENCE
@@ -0,0 +1,121 @@
+BSD Protection License
+February 2002
+
+Preamble
+--------
+
+The Berkeley Software Distribution ("BSD") license has proven very effective
+over the years at allowing for a wide spread of work throughout both
+commercial and non-commercial products.  For programmers whose primary
+intention is to improve the general quality of available software, it is
+arguable that there is no better license than the BSD license, as it
+permits improvements to be used wherever they will help, without idealogical
+or metallic constraint.
+
+This is of particular value to those who produce reference implementations
+of proposed standards: The case of TCP/IP clearly illustrates that freely
+and universally available implementations leads the rapid acceptance of
+standards -- often even being used instead of a de jure standard (eg, OSI
+network models).
+
+With the rapid proliferation of software licensed under the GNU General
+Public License, however, the continued success of this role is called into
+question.  Given that the inclusion of a few lines of "GPL-tainted" work
+into a larger body of work will result in restricted distribution -- and
+given that further work will likely build upon the "tainted" portions,
+making them difficult to remove at a future date -- there are inevitable
+circumstances where authors would, in order to protect their goal of
+providing for the widespread usage of their work, wish to guard against
+such "GPL-taint".
+
+In addition, one can imagine that companies which operate by producing and
+selling (possibly closed-source) code would wish to protect themselves
+against the rise of a GPL-licensed competitor.  While under existing
+licenses this would mean not releasing their code under any form of open
+license, if a license existed under which they could incorporate any
+improvements back into their own (commercial) products then they might be
+far more willing to provide for non-closed distribution.
+
+For the above reasons, we put forth this "BSD Protection License": A
+license designed to retain the freedom granted by the BSD license to use
+licensed works in a wide variety of settings, both non-commercial and
+commercial, while protecting the work from having future contributors
+restrict that freedom.
+
+The precise terms and conditions for copying, distribution, and
+modification follow.
+
+BSD PROTECTION LICENSE
+TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION, AND MODIFICATION
+----------------------------------------------------------------
+
+0. Definitions.
+   a) "Program", below, refers to any program or work distributed under
+      the terms of this license.
+   b) A "work based on the Program", below, refers to either the Program
+      or any derivative work under copyright law.
+   c) "Modification", below, refers to the act of creating derivative works.
+   d) "You", below, refers to each licensee.
+
+1. Scope.
+   This license governs the copying, distribution, and modification of the
+   Program.  Other activities are outside the scope of this license; The
+   act of running the Program is not restricted, and the output from the
+   Program is covered only if its contents constitute a work based on the
+   Program.
+
+2. Verbatim copies.
+   You may copy and distribute verbatim copies of the Program as you
+   receive it, in any medium, provided that you conspicuously and
+   appropriately publish on each copy an appropriate copyright notice; keep
+   intact all the notices that refer to this License and to the absence of
+   any warranty; and give any other recipients of the Program a copy of this
+   License along with the Program.
+
+3. Modification and redistribution under closed license.
+   You may modify your copy or copies of the Program, and distribute
+   the resulting derivative works, provided that you meet the
+   following conditions:
+   a) The copyright notice and disclaimer on the Program must be reproduced
+      and included in the source code, documentation, and/or other materials
+      provided in a manner in which such notices are normally distributed.
+   b) The derivative work must be clearly identified as such, in order that
+      it may not be confused with the original work.
+   c) The license under which the derivative work is distributed must
+      expressly prohibit the distribution of further derivative works.
+
+4. Modification and redistribution under open license.
+   You may modify your copy or copies of the Program, and distribute
+   the resulting derivative works, provided that you meet the
+   following conditions:
+   a) The copyright notice and disclaimer on the Program must be reproduced
+      and included in the source code, documentation, and/or other materials
+      provided in a manner in which such notices are normally distributed.
+   b) You must clearly indicate the nature and date of any changes made
+      to the Program.  The full details need not necessarily be included in
+      the individual modified files, provided that each modified file is
+      clearly marked as such and instructions are included on where the
+      full details of the modifications may be found.
+   c) You must cause any work that you distribute or publish, that in whole
+      or in part contains or is derived from the Program or any part
+      thereof, to be licensed as a whole at no charge to all third
+      parties under the terms of this License.
+
+5. Implied acceptance.
+   You may not copy or distribute the Program or any derivative works except
+   as expressly provided under this license.  Consequently, any such action
+   will be taken as implied acceptance of the terms of this license.
+
+6. NO WARRANTY.
+   THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
+   INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
+   AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL
+   THE COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+   REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE FOR ANY DIRECT,
+   INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+   ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING, BUT
+   NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+   USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+   ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
+   TORT, EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGES.
diff --git a/courgette/third_party/README.chromium b/courgette/third_party/README.chromium
new file mode 100644
index 0000000..d3037a1
--- /dev/null
+++ b/courgette/third_party/README.chromium
@@ -0,0 +1,23 @@
+This directory contains an extensively modified version of Colin Percival's
+bsdiff, available in its original form from:
+
+   http://www.daemonology.net/bsdiff/
+
+The basic principles of operation are best understood by reading Colin's
+unpublised paper:
+
+Colin Percival, Naive differences of executable code, http://www.daemonology.net/bsdiff/, 200
+
+The copy on this directory so extensively modified that the binary format is
+incompatible with the original and it cannot be compiled outside the Chromium
+source tree or the Courgette project.
+
+List of changes made to original code:
+  - wrapped functions in 'courgette' namespace
+  - renamed .c files to .cc
+  - added bsdiff.h header file
+  - changed the code to use streams.h from courgette
+  - changed the encoding of numbers to use the 'varint' encoding
+  - reformatted code to be closer to Google coding standards
+  - renamed variables
+  - added comments
diff --git a/courgette/third_party/bsdiff.h b/courgette/third_party/bsdiff.h
new file mode 100644
index 0000000..bf7fdec
--- /dev/null
+++ b/courgette/third_party/bsdiff.h
@@ -0,0 +1,87 @@
+/*-
+ * Copyright 2003,2004 Colin Percival
+ * All rights reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted providing that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Changelog:
+ * 2005-04-26 - Define the header as a C structure, add a CRC32 checksum to
+ *              the header, and make all the types 32-bit.
+ *                --Benjamin Smedberg <benjamin@smedbergs.us>
+ * 2009-03-31 - Change to use Streams.  Move CRC code to crc.{h,cc}
+ *              Changed status to an enum, removed unused status codes.
+ *                --Stephen Adams <sra@chromium.org>
+ */
+
+#ifndef COURGETTE_BSDIFF_H_
+#define COURGETTE_BSDIFF_H_
+
+#include "base/basictypes.h"
+
+namespace courgette {
+
+enum BSDiffStatus {
+  OK = 0,
+  MEM_ERROR = 1,
+  CRC_ERROR = 2,
+  READ_ERROR = 3,
+  UNEXPECTED_ERROR = 4
+};
+
+class SourceStream;
+class SinkStream;
+
+// Creates a binary patch.
+//
+BSDiffStatus CreateBinaryPatch(SourceStream* old_stream,
+                               SourceStream* new_stream,
+                               SinkStream* patch_stream);
+
+// Applies the given patch file to a given source file. This method validates
+// the CRC of the original file stored in the patch file, before applying the
+// patch to it.
+//
+BSDiffStatus ApplyBinaryPatch(SourceStream* old_stream,
+                              SourceStream* patch_stream,
+                              SinkStream* new_stream);
+
+
+// The following declarations are common to the patch-creation and
+// patch-application code.
+
+// The patch stream starts with a MBSPatchHeader.
+typedef struct MBSPatchHeader_ {
+  char tag[8];       // Contains MBS_PATCH_HEADER_TAG
+  uint32 slen;       // Length of the file to be patched.
+  uint32 scrc32;     // CRC32 of the file to be patched.
+  uint32 dlen;       // Length of the result file.
+  uint32 cblen;      // Length of the control block in bytes.
+  uint32 difflen;    // Length of the diff block in bytes.
+  uint32 extralen;   // Length of the extra block in bytes.
+} MBSPatchHeader;
+
+// This is the value for the tag field.  Must match length exactly, not counting
+// null at end of string.
+#define MBS_PATCH_HEADER_TAG "GBSDIF42"
+
+}  // namespace
+#endif  // COURGETTE_BSDIFF_H_
diff --git a/courgette/third_party/bsdiff_apply.cc b/courgette/third_party/bsdiff_apply.cc
new file mode 100644
index 0000000..4b6a011
--- /dev/null
+++ b/courgette/third_party/bsdiff_apply.cc
@@ -0,0 +1,165 @@
+/*-
+ * Copyright 2003,2004 Colin Percival
+ * All rights reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted providing that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Changelog:
+ * 2009-03-31 - Change to use Streams.  Move CRC code to crc.{h,cc}
+ *                --Stephen Adams <sra@chromium.org>
+ */
+
+// Copyright (c) 2009 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "courgette/third_party/bsdiff.h"
+
+#include "courgette/crc.h"
+#include "courgette/streams.h"
+
+namespace courgette {
+
+BSDiffStatus MBS_ReadHeader(SourceStream* stream, MBSPatchHeader* header) {
+  if (!stream->Read(header->tag, sizeof(header->tag))) return READ_ERROR;
+  if (!stream->ReadVarint32(&header->slen)) return READ_ERROR;
+  if (!stream->ReadVarint32(&header->scrc32)) return READ_ERROR;
+  if (!stream->ReadVarint32(&header->dlen)) return READ_ERROR;
+  if (!stream->ReadVarint32(&header->cblen)) return READ_ERROR;
+  if (!stream->ReadVarint32(&header->difflen)) return READ_ERROR;
+  if (!stream->ReadVarint32(&header->extralen)) return READ_ERROR;
+
+  // The string will have a NUL terminator that we don't use, hence '-1'.
+  COMPILE_ASSERT(sizeof(MBS_PATCH_HEADER_TAG) - 1 == sizeof(header->tag),
+                 MBS_PATCH_HEADER_TAG_must_match_header_field_size);
+  if (memcmp(header->tag, MBS_PATCH_HEADER_TAG, 8) != 0)
+    return UNEXPECTED_ERROR;
+
+  size_t bytes_remaining = stream->Remaining();
+  if (header->cblen +
+      header->difflen +
+      header->extralen != bytes_remaining)
+    return UNEXPECTED_ERROR;
+
+  return OK;
+}
+
+BSDiffStatus MBS_ApplyPatch(const MBSPatchHeader *header,
+                            SourceStream* patch_stream,
+                            const uint8* old_start, size_t old_size,
+                            SinkStream* new_stream) {
+  const uint8* old_end = old_start + old_size;
+
+  SourceStream control_stream;
+
+  const uint8* control_start = patch_stream->Buffer();
+  if (!patch_stream->ReadSubstream(header->cblen, &control_stream))
+    return READ_ERROR;
+  if (!patch_stream->Skip(header->difflen + header->extralen))
+    return READ_ERROR;
+  if (!patch_stream->Empty())
+    return READ_ERROR;
+
+  const uint8* diff_start = control_start + header->cblen;
+  const uint8* diff_end = diff_start + header->difflen;
+  const uint8* extra_start = diff_end;
+  const uint8* extra_end = extra_start + header->extralen;
+
+  const uint8* old_position = old_start;
+  const uint8* diff_position = diff_start;
+  const uint8* extra_position = extra_start;
+
+  new_stream->Reserve(header->dlen);
+
+  while (!control_stream.Empty()) {
+    uint32 copy_count, extra_count;
+    int32 seek_adjustment;
+    if (!control_stream.ReadVarint32(&copy_count))
+      return UNEXPECTED_ERROR;
+    if (!control_stream.ReadVarint32(&extra_count))
+      return UNEXPECTED_ERROR;
+    if (!control_stream.ReadVarint32Signed(&seek_adjustment))
+      return UNEXPECTED_ERROR;
+
+#ifdef DEBUG_bsmedberg
+    printf("Applying block:  copy: %-8u extra: %-8u seek: %+i\n",
+           copy_count, extra_count, seek_adjustment);
+#endif
+    // Byte-wise arithmetically add bytes from old file to bytes from the diff
+    // block.
+    if (copy_count > static_cast<size_t>(old_end - old_position))
+      return UNEXPECTED_ERROR;
+    if (copy_count > static_cast<size_t>(diff_end - diff_position))
+      return UNEXPECTED_ERROR;
+
+    // Add together bytes from the 'old' file and the 'diff' stream.
+    for (size_t i = 0;  i < copy_count;  ++i) {
+      uint8 byte = old_position[i] + diff_position[i];
+      new_stream->Write(&byte, 1);
+    }
+    old_position += copy_count;
+    diff_position += copy_count;
+
+    // Copy bytes from the extra block.
+    if (extra_count > static_cast<size_t>(extra_end - extra_position))
+      return UNEXPECTED_ERROR;
+
+    new_stream->Write(extra_position, extra_count);
+    extra_position += extra_count;
+
+    // "seek" forwards (or backwards) in oldfile.
+    if (old_position + seek_adjustment < old_start ||
+        old_position + seek_adjustment > old_end)
+      return UNEXPECTED_ERROR;
+
+    old_position += seek_adjustment;
+  }
+
+  if (diff_position != diff_end)
+    return UNEXPECTED_ERROR;
+  if (extra_position != extra_end)
+    return UNEXPECTED_ERROR;
+
+  return OK;
+}
+
+BSDiffStatus ApplyBinaryPatch(SourceStream* old_stream,
+                              SourceStream* patch_stream,
+                              SinkStream* new_stream) {
+  MBSPatchHeader header;
+  BSDiffStatus ret = MBS_ReadHeader(patch_stream, &header);
+  if (ret != OK) return ret;
+
+  const uint8* old_start = old_stream->Buffer();
+  size_t old_size = old_stream->Remaining();
+
+  if (old_size != header.slen) return UNEXPECTED_ERROR;
+
+  if (CalculateCrc(old_start, old_size) != header.scrc32)
+    return CRC_ERROR;
+
+  MBS_ApplyPatch(&header, patch_stream, old_start, old_size, new_stream);
+
+  return OK;
+}
+
+}  // namespace
diff --git a/courgette/third_party/bsdiff_create.cc b/courgette/third_party/bsdiff_create.cc
new file mode 100644
index 0000000..d59dc21
--- /dev/null
+++ b/courgette/third_party/bsdiff_create.cc
@@ -0,0 +1,432 @@
+/*
+  bsdiff.c -- Binary patch generator.
+
+  Copyright 2003 Colin Percival
+
+  For the terms under which this work may be distributed, please see
+  the adjoining file "LICENSE".
+
+  ChangeLog:
+  2005-05-05 - Use the modified header struct from bspatch.h; use 32-bit
+               values throughout.
+                 --Benjamin Smedberg <benjamin@smedbergs.us>
+  2005-05-18 - Use the same CRC algorithm as bzip2, and leverage the CRC table
+               provided by libbz2.
+                 --Darin Fisher <darin@meer.net>
+  2007-11-14 - Changed to use Crc from Lzma library instead of Bzip library
+                 --Rahul Kuchhal
+  2009-03-31 - Change to use Streams.  Added lots of comments.
+                 --Stephen Adams <sra@chromium.org>
+*/
+
+#include "courgette/third_party/bsdiff.h"
+
+#include <stdlib.h>
+#include <algorithm>
+
+#include "base/logging.h"
+#include "base/scoped_ptr.h"
+#include "base/string_util.h"
+#include "base/time.h"
+
+#include "courgette/crc.h"
+#include "courgette/streams.h"
+
+namespace courgette {
+
+// ------------------------------------------------------------------------
+//
+// The following code is taken verbatim from 'bsdiff.c'. Please keep all the
+// code formatting and variable names.  The only changes from the original are
+// replacing tabs with spaces, indentation, and using 'const'.
+//
+// The code appears to be a rewritten version of the suffix array algorithm
+// presented in "Faster Suffix Sorting" by N. Jesper Larsson and Kunihiko
+// Sadakane, special-cased for bytes.
+
+static void
+split(int *I,int *V,int start,int len,int h)
+{
+  int i,j,k,x,tmp,jj,kk;
+
+  if(len<16) {
+    for(k=start;k<start+len;k+=j) {
+      j=1;x=V[I[k]+h];
+      for(i=1;k+i<start+len;i++) {
+        if(V[I[k+i]+h]<x) {
+          x=V[I[k+i]+h];
+          j=0;
+        };
+        if(V[I[k+i]+h]==x) {
+          tmp=I[k+j];I[k+j]=I[k+i];I[k+i]=tmp;
+          j++;
+        };
+      };
+      for(i=0;i<j;i++) V[I[k+i]]=k+j-1;
+      if(j==1) I[k]=-1;
+    };
+    return;
+  };
+
+  x=V[I[start+len/2]+h];
+  jj=0;kk=0;
+  for(i=start;i<start+len;i++) {
+    if(V[I[i]+h]<x) jj++;
+    if(V[I[i]+h]==x) kk++;
+  };
+  jj+=start;kk+=jj;
+
+  i=start;j=0;k=0;
+  while(i<jj) {
+    if(V[I[i]+h]<x) {
+      i++;
+    } else if(V[I[i]+h]==x) {
+      tmp=I[i];I[i]=I[jj+j];I[jj+j]=tmp;
+      j++;
+    } else {
+      tmp=I[i];I[i]=I[kk+k];I[kk+k]=tmp;
+      k++;
+    };
+  };
+
+  while(jj+j<kk) {
+    if(V[I[jj+j]+h]==x) {
+      j++;
+    } else {
+      tmp=I[jj+j];I[jj+j]=I[kk+k];I[kk+k]=tmp;
+      k++;
+    };
+  };
+
+  if(jj>start) split(I,V,start,jj-start,h);
+
+  for(i=0;i<kk-jj;i++) V[I[jj+i]]=kk-1;
+  if(jj==kk-1) I[jj]=-1;
+
+  if(start+len>kk) split(I,V,kk,start+len-kk,h);
+}
+
+static void
+qsufsort(int *I,int *V,const unsigned char *old,int oldsize)
+{
+  int buckets[256];
+  int i,h,len;
+
+  for(i=0;i<256;i++) buckets[i]=0;
+  for(i=0;i<oldsize;i++) buckets[old[i]]++;
+  for(i=1;i<256;i++) buckets[i]+=buckets[i-1];
+  for(i=255;i>0;i--) buckets[i]=buckets[i-1];
+  buckets[0]=0;
+
+  for(i=0;i<oldsize;i++) I[++buckets[old[i]]]=i;
+  I[0]=oldsize;
+  for(i=0;i<oldsize;i++) V[i]=buckets[old[i]];
+  V[oldsize]=0;
+  for(i=1;i<256;i++) if(buckets[i]==buckets[i-1]+1) I[buckets[i]]=-1;
+  I[0]=-1;
+
+  for(h=1;I[0]!=-(oldsize+1);h+=h) {
+    len=0;
+    for(i=0;i<oldsize+1;) {
+      if(I[i]<0) {
+        len-=I[i];
+        i-=I[i];
+      } else {
+        if(len) I[i-len]=-len;
+        len=V[I[i]]+1-i;
+        split(I,V,i,len,h);
+        i+=len;
+        len=0;
+      };
+    };
+    if(len) I[i-len]=-len;
+  };
+
+  for(i=0;i<oldsize+1;i++) I[V[i]]=i;
+}
+
+static int
+matchlen(const unsigned char *old,int oldsize,const unsigned char *newbuf,int newsize)
+{
+  int i;
+
+  for(i=0;(i<oldsize)&&(i<newsize);i++)
+    if(old[i]!=newbuf[i]) break;
+
+  return i;
+}
+
+static int
+search(int *I,const unsigned char *old,int oldsize,
+       const unsigned char *newbuf,int newsize,int st,int en,int *pos)
+{
+  int x,y;
+
+  if(en-st<2) {
+    x=matchlen(old+I[st],oldsize-I[st],newbuf,newsize);
+    y=matchlen(old+I[en],oldsize-I[en],newbuf,newsize);
+
+    if(x>y) {
+      *pos=I[st];
+      return x;
+    } else {
+      *pos=I[en];
+      return y;
+    }
+  }
+
+  x=st+(en-st)/2;
+  if(memcmp(old+I[x],newbuf,std::min(oldsize-I[x],newsize))<0) {
+    return search(I,old,oldsize,newbuf,newsize,x,en,pos);
+  } else {
+    return search(I,old,oldsize,newbuf,newsize,st,x,pos);
+  }
+}
+
+//  End of 'verbatim' code.
+// ------------------------------------------------------------------------
+
+static void WriteHeader(SinkStream* stream, MBSPatchHeader* header) {
+  stream->Write(header->tag, sizeof(header->tag));
+  stream->WriteVarint32(header->slen);
+  stream->WriteVarint32(header->scrc32);
+  stream->WriteVarint32(header->dlen);
+  stream->WriteVarint32(header->cblen);
+  stream->WriteVarint32(header->difflen);
+  stream->WriteVarint32(header->extralen);
+}
+
+BSDiffStatus CreateBinaryPatch(SourceStream* old_stream,
+                               SourceStream* new_stream,
+                               SinkStream* patch_stream)
+{
+  base::Time start_bsdiff_time = base::Time::Now();
+  LOG(INFO) << "Start bsdiff";
+  size_t initial_patch_stream_length = patch_stream->Length();
+
+  const uint8* old = old_stream->Buffer();
+  const int oldsize = old_stream->Remaining();
+
+  scoped_array<int> I(new(std::nothrow) int[oldsize + 1]);
+  scoped_array<int> V(new(std::nothrow) int[oldsize + 1]);
+  if (I == NULL || V == NULL)
+    return MEM_ERROR;
+
+  base::Time q_start_time = base::Time::Now();
+  qsufsort(I.get(), V.get(), old, oldsize);
+  LOG(INFO) << " done qsufsort "
+            << (base::Time::Now() - q_start_time).InSecondsF();
+  V.reset();
+
+  const uint8* newbuf = new_stream->Buffer();
+  const int newsize = new_stream->Remaining();
+
+  // Allocate newsize+1 bytes instead of newsize bytes to ensure that we never
+  // try to malloc(0) and get a NULL pointer.
+
+  scoped_array<uint8> diff_bytes_array(new(std::nothrow) uint8[newsize + 1]);
+  scoped_array<uint8> extra_bytes_array(new(std::nothrow) uint8[newsize + 1]);
+  if (diff_bytes_array == NULL || extra_bytes_array == NULL)
+    return MEM_ERROR;
+
+  uint8* diff_bytes = diff_bytes_array.get();
+  uint8* extra_bytes = extra_bytes_array.get();
+  int control_length = 0;
+  int diff_bytes_length = 0;
+  int diff_bytes_nonzero = 0;
+  int extra_bytes_length = 0;
+  int eblen = 0;
+
+  SinkStream control_stream;
+
+  // The patch format is a sequence of triples <copy,extra,seek> where 'copy' is
+  // the number of bytes to copy from the old file (possibly with mistakes),
+  // 'extra' is the number of bytes to copy from a stream of fresh bytes, and
+  // 'seek' is an offset to move to the position to copy for the next triple.
+  //
+  // The invariant at the top of this loop is that we are committed to emitting
+  // a triple for the part of |newbuf| surrounding a 'seed' match near
+  // |lastscan|.  We are searching for a second match that will be the 'seed' of
+  // the next triple.  As we scan through |newbuf|, one of four things can
+  // happen at the current position |scan|:
+  //
+  //  1. We find a nice match that appears to be consistent with the current
+  //     seed.  Continue scanning.  It is likely that this match will become
+  //     part of the 'copy'.
+  //
+  //  2. We find match which does much better than extending the current seed
+  //     old match.  Emit a triple for the current seed and take this match as
+  //     the new seed for a new triple.  By 'much better' we remove 8 mismatched
+  //     bytes by taking the new seed.
+  //
+  //  3. There is not a good match.  Continue scanning.  These bytes will likely
+  //     become part of the 'extra'.
+  //
+  //  4. There is no match because we reached the end of the input, |newbuf|.
+
+  // This is how the loop advances through the bytes of |newbuf|:
+  //
+  // ...012345678901234567890123456789...
+  //    ssssssssss                      Seed at |lastscan|
+  //              xxyyyxxyyxy           |scan| forward, cases (3)(x) & (1)(y)
+  //                         mmmmmmmm   New match will start new seed case (2).
+  //    fffffffffffffff                 |lenf| = scan forward from |lastscan|
+  //                     bbbb           |lenb| = scan back from new seed |scan|.
+  //    ddddddddddddddd                 Emit diff bytes for the 'copy'.
+  //                   xx               Emit extra bytes.
+  //                     ssssssssssss   |lastscan = scan - lenb| is new seed.
+  //                                 x  Cases (1) and (3) ....
+
+
+  int lastscan = 0, lastpos = 0, lastoffset = 0;
+
+  int scan = 0;
+  int match_length = 0;
+
+  while (scan < newsize) {
+    int pos = 0;
+    int oldscore = 0;  // Count of how many bytes of the current match at |scan|
+                       // extend the match at |lastscan|.
+
+    scan += match_length;
+    for (int scsc = scan;  scan < newsize;  ++scan) {
+      match_length = search(I.get(), old, oldsize,
+                            newbuf + scan, newsize - scan,
+                            0, oldsize, &pos);
+
+      for ( ; scsc < scan + match_length ; scsc++)
+        if ((scsc + lastoffset < oldsize) &&
+            (old[scsc + lastoffset] == newbuf[scsc]))
+          oldscore++;
+
+      if ((match_length == oldscore) && (match_length != 0))
+        break;  // Good continuing match, case (1)
+      if (match_length > oldscore + 8)
+        break;  // New seed match, case (2)
+
+      if ((scan + lastoffset < oldsize) &&
+          (old[scan + lastoffset] == newbuf[scan]))
+        oldscore--;
+      // Case (3) continues in this loop until we fall out of the loop (4).
+    }
+
+    if ((match_length != oldscore) || (scan == newsize)) {  // Cases (2) and (4)
+      // This next chunk of code finds the boundary between the bytes to be
+      // copied as part of the current triple, and the bytes to be copied as
+      // part of the next triple.  The |lastscan| match is extended forwards as
+      // far as possible provided doing to does not add too many mistakes.  The
+      // |scan| match is extended backwards in a similar way.
+
+      // Extend the current match (if any) backwards.  |lenb| is the maximal
+      // extension for which less than half the byte positions in the extension
+      // are wrong.
+      int lenb = 0;
+      if (scan < newsize) {  // i.e. not case (4); there is a match to extend.
+        int score = 0, Sb = 0;
+        for (int i = 1;  (scan >= lastscan + i) && (pos >= i);  i++) {
+          if (old[pos - i] == newbuf[scan - i]) score++;
+          if (score*2 - i > Sb*2 - lenb) { Sb = score; lenb = i; }
+        }
+      }
+
+      // Extend the lastscan match forward; |lenf| is the maximal extension for
+      // which less than half of the byte positions in entire lastscan match are
+      // wrong.  There is a subtle point here: |lastscan| points to before the
+      // seed match by |lenb| bytes from the previous iteration.  This is why
+      // the loop measures the total number of mistakes in the the match, not
+      // just the from the match.
+      int lenf = 0;
+      {
+        int score = 0, Sf = 0;
+        for (int i = 0;  (lastscan + i < scan) && (lastpos + i < oldsize);  ) {
+          if (old[lastpos + i] == newbuf[lastscan + i]) score++;
+          i++;
+          if (score*2 - i > Sf*2 - lenf) { Sf = score; lenf = i; }
+        }
+      }
+
+      // If the extended scans overlap, pick a position in the overlap region
+      // that maximizes the exact matching bytes.
+      if (lastscan + lenf > scan - lenb) {
+        int overlap = (lastscan + lenf) - (scan - lenb);
+        int score = 0;
+        int Ss = 0, lens = 0;
+        for (int i = 0;  i < overlap;  i++) {
+          if (newbuf[lastscan + lenf - overlap + i] ==
+              old[lastpos + lenf - overlap + i]) score++;
+          if (newbuf[scan - lenb + i] ==  old[pos - lenb + i]) score--;
+          if (score > Ss) { Ss = score; lens = i + 1; }
+        }
+
+        lenf += lens - overlap;
+        lenb -= lens;
+      };
+
+      for (int i = 0;  i < lenf;  i++) {
+        uint8 diff_byte = newbuf[lastscan + i] - old[lastpos + i];
+        diff_bytes[diff_bytes_length + i] = diff_byte;
+        if (diff_byte)
+          ++diff_bytes_nonzero;
+      }
+      int gap = (scan - lenb) - (lastscan + lenf);
+      for (int i = 0;  i < gap;  i++)
+        extra_bytes[extra_bytes_length + i] = newbuf[lastscan + lenf + i];
+
+      diff_bytes_length += lenf;
+      extra_bytes_length += gap;
+
+      uint32 copy_count = lenf;
+      uint32 extra_count = gap;
+      int32 seek_adjustment = ((pos - lenb) - (lastpos + lenf));
+
+      control_stream.WriteVarint32(copy_count);
+      control_stream.WriteVarint32(extra_count);
+      control_stream.WriteVarint32Signed(seek_adjustment);
+      ++control_length;
+#ifdef DEBUG_bsmedberg
+      LOG(INFO) << StringPrintf(
+          "Writing a block:  copy: %-8u extra: %-8u seek: %+-9d",
+          copy_count, extra_count, seek_adjustment);
+#endif
+
+      lastscan = scan - lenb;   // Include the backward extension in seed.
+      lastpos = pos - lenb;     //  ditto.
+      lastoffset = lastpos - lastscan;
+    }
+  }
+
+  I.reset();
+
+  MBSPatchHeader header;
+  // The string will have a null terminator that we don't use, hence '-1'.
+  COMPILE_ASSERT(sizeof(MBS_PATCH_HEADER_TAG) - 1 == sizeof(header.tag),
+                 MBS_PATCH_HEADER_TAG_must_match_header_field_size);
+  memcpy(header.tag, MBS_PATCH_HEADER_TAG, sizeof(header.tag));
+  header.slen     = oldsize;
+  header.scrc32   = CalculateCrc(old, oldsize);
+  header.dlen     = newsize;
+  header.cblen    = control_stream.Length();
+  header.difflen  = diff_bytes_length;
+  header.extralen = extra_bytes_length;
+
+  WriteHeader(patch_stream, &header);
+
+  patch_stream->Append(&control_stream);
+  patch_stream->Write(diff_bytes, diff_bytes_length);
+  patch_stream->Write(extra_bytes, extra_bytes_length);
+
+  LOG(INFO) << "Control tuples: " << control_length
+            << "  copy bytes: " << diff_bytes_length
+            << "  mistakes: " << diff_bytes_nonzero
+            << "  extra bytes: " << extra_bytes_length;
+
+  LOG(INFO) << "Uncompressed bsdiff patch size "
+            << patch_stream->Length() - initial_patch_stream_length;
+
+  LOG(INFO) << "End bsdiff "
+            << (base::Time::Now() - start_bsdiff_time).InSecondsF();
+
+  return OK;
+}
+
+}  // namespace
diff --git a/courgette/win32_x86_generator.h b/courgette/win32_x86_generator.h
new file mode 100644
index 0000000..3aabe9f
--- /dev/null
+++ b/courgette/win32_x86_generator.h
@@ -0,0 +1,126 @@
+// Copyright (c) 2009 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// This is the transformation and adjustment for Windows X86 executables.
+
+#ifndef COURGETTE_WIN32_X86_GENERATOR_H_
+#define COURGETTE_WIN32_X86_GENERATOR_H_
+
+#include "base/scoped_ptr.h"
+
+#include "courgette/ensemble.h"
+
+namespace courgette {
+
+class CourgetteWin32X86PatchGenerator : public TransformationPatchGenerator {
+ public:
+  CourgetteWin32X86PatchGenerator(Element* old_element,
+                                  Element* new_element,
+                                  CourgetteWin32X86Patcher* patcher)
+      : TransformationPatchGenerator(old_element, new_element, patcher) {
+  }
+
+  CourgettePatchFile::TransformationMethodId Kind() {
+    return CourgettePatchFile::T_COURGETTE_WIN32_X86;
+  }
+
+  Status WriteInitialParameters(SinkStream* parameter_stream) {
+    parameter_stream->WriteVarint32(old_element_->offset_in_ensemble());
+    parameter_stream->WriteVarint32(old_element_->region().length());
+    return C_OK;
+    // TODO(sra): Initialize |patcher_| with these parameters.
+  }
+
+  Status PredictTransformParameters(SinkStreamSet* prediction) {
+    return TransformationPatchGenerator::PredictTransformParameters(prediction);
+  }
+
+  Status CorrectedTransformParameters(SinkStreamSet* parameters) {
+    // No code needed to write an 'empty' parameter set.
+    return C_OK;
+  }
+
+  // The format of a transformed_element is a serialized EncodedProgram.  We
+  // first disassemble the original old and new Elements into AssemblyPrograms.
+  // Then we adjust the new AssemblyProgram to make it as much like the old one
+  // as possible, before converting the AssemblyPrograms to EncodedPrograms and
+  // serializing them.
+  Status Transform(SourceStreamSet* corrected_parameters,
+                   SinkStreamSet* old_transformed_element,
+                   SinkStreamSet* new_transformed_element) {
+    // Don't expect any corrected parameters.
+    if (!corrected_parameters->Empty())
+      return C_GENERAL_ERROR;
+
+    // Generate old version of program using |corrected_parameters|.
+    // TODO(sra): refactor to use same code from patcher_.
+    AssemblyProgram* old_program = NULL;
+    Status old_parse_status =
+        ParseWin32X86PE(old_element_->region().start(),
+                        old_element_->region().length(),
+                        &old_program);
+    if (old_parse_status != C_OK)
+      return old_parse_status;
+
+    AssemblyProgram* new_program = NULL;
+    Status new_parse_status =
+        ParseWin32X86PE(new_element_->region().start(),
+                        new_element_->region().length(),
+                        &new_program);
+    if (new_parse_status != C_OK) {
+      DeleteAssemblyProgram(old_program);
+      return new_parse_status;
+    }
+
+    EncodedProgram* old_encoded = NULL;
+    Status old_encode_status = Encode(old_program, &old_encoded);
+    if (old_encode_status != C_OK) {
+      DeleteAssemblyProgram(old_program);
+      return old_encode_status;
+    }
+
+    Status old_write_status =
+        WriteEncodedProgram(old_encoded, old_transformed_element);
+    DeleteEncodedProgram(old_encoded);
+    if (old_write_status != C_OK) {
+      DeleteAssemblyProgram(old_program);
+      return old_write_status;
+    }
+
+    Status adjust_status = Adjust(*old_program, new_program);
+    DeleteAssemblyProgram(old_program);
+    if (adjust_status != C_OK) {
+      DeleteAssemblyProgram(new_program);
+      return adjust_status;
+    }
+
+    EncodedProgram* new_encoded = NULL;
+    Status new_encode_status = Encode(new_program, &new_encoded);
+    DeleteAssemblyProgram(new_program);
+    if (new_encode_status != C_OK)
+      return new_encode_status;
+
+    Status new_write_status =
+        WriteEncodedProgram(new_encoded, new_transformed_element);
+    DeleteEncodedProgram(new_encoded);
+    if (new_write_status != C_OK)
+      return new_write_status;
+
+    return C_OK;
+  }
+
+  Status Reform(SourceStreamSet* transformed_element,
+                SinkStream* reformed_element) {
+    return TransformationPatchGenerator::Reform(transformed_element,
+                                                reformed_element);
+  }
+
+ private:
+  ~CourgetteWin32X86PatchGenerator() { }
+
+  DISALLOW_COPY_AND_ASSIGN(CourgetteWin32X86PatchGenerator);
+};
+
+}  // namespace courgette
+#endif  // COURGETTE_WIN32_X86_GENERATOR_H_
diff --git a/courgette/win32_x86_patcher.h b/courgette/win32_x86_patcher.h
new file mode 100644
index 0000000..9ab53d9
--- /dev/null
+++ b/courgette/win32_x86_patcher.h
@@ -0,0 +1,95 @@
+// Copyright (c) 2009 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// This is the transformation for Windows X86 executables.
+
+#ifndef COURGETTE_WIN32_X86_PATCHER_H_
+#define COURGETTE_WIN32_X86_PATCHER_H_
+
+#include "courgette/ensemble.h"
+
+namespace courgette {
+
+// CourgetteWin32X86Patcher is a TransformationPatcher for Windows 32-bit
+// executables.
+//
+class CourgetteWin32X86Patcher : public TransformationPatcher {
+ public:
+  explicit CourgetteWin32X86Patcher(const Region& region)
+      : ensemble_region_(region) {
+  }
+
+  Status Init(SourceStream* parameter_stream) {
+    if (!parameter_stream->ReadVarint32(&base_offset_))
+      return C_BAD_TRANSFORM;
+    if (!parameter_stream->ReadVarint32(&base_length_))
+      return C_BAD_TRANSFORM;
+
+    if (base_offset_ > ensemble_region_.length())
+      return C_BAD_TRANSFORM;
+    if (base_length_ > ensemble_region_.length() - base_offset_)
+      return C_BAD_TRANSFORM;
+
+    return C_OK;
+  }
+
+  Status PredictTransformParameters(SinkStreamSet* predicted_parameters) {
+    // No code needed to write an 'empty' predicted parameter set.
+    return C_OK;
+  }
+
+  Status Transform(SourceStreamSet* corrected_parameters,
+                   SinkStreamSet* transformed_element) {
+    Status status;
+    if (!corrected_parameters->Empty())
+      return C_GENERAL_ERROR;   // Don't expect any corrected parameters.
+
+    AssemblyProgram* program = NULL;
+    status = ParseWin32X86PE(ensemble_region_.start() + base_offset_,
+                             base_length_,
+                             &program);
+    if (status != C_OK)
+      return status;
+
+    EncodedProgram* encoded = NULL;
+    status = Encode(program, &encoded);
+    DeleteAssemblyProgram(program);
+    if (status != C_OK)
+      return status;
+
+    status = WriteEncodedProgram(encoded, transformed_element);
+    DeleteEncodedProgram(encoded);
+    if (status != C_OK)
+      return status;
+
+    return status;
+  }
+
+  Status Reform(SourceStreamSet* transformed_element,
+                SinkStream* reformed_element) {
+    Status status;
+    EncodedProgram* encoded_program = NULL;
+    status = ReadEncodedProgram(transformed_element, &encoded_program);
+    if (status != C_OK)
+      return status;
+
+    status = Assemble(encoded_program, reformed_element);
+    DeleteEncodedProgram(encoded_program);
+    if (status != C_OK)
+      return status;
+
+    return C_OK;
+  }
+
+ private:
+  Region ensemble_region_;
+
+  uint32 base_offset_;
+  uint32 base_length_;
+
+  DISALLOW_COPY_AND_ASSIGN(CourgetteWin32X86Patcher);
+};
+
+}  // namespace
+#endif  // COURGETTE_WIN32_X86_PATCHER_H_
author	sra@chromium.org <sra@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>	2009-05-08 23:00:29 +0000
committer	sra@chromium.org <sra@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>	2009-05-08 23:00:29 +0000
commit	04ca1bc65afb76ea30698c25f24599d20119e3d2 (patch)
tree	2bb65e974d478f4d607b83f6a84a56c01f501ac0 /courgette
parent	9adf1dcf3281408560267787eddcc767566b425f (diff)
download	chromium_src-04ca1bc65afb76ea30698c25f24599d20119e3d2.zip chromium_src-04ca1bc65afb76ea30698c25f24599d20119e3d2.tar.gz chromium_src-04ca1bc65afb76ea30698c25f24599d20119e3d2.tar.bz2