diff options
author | sra@chromium.org <sra@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2009-05-08 23:00:29 +0000 |
---|---|---|
committer | sra@chromium.org <sra@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2009-05-08 23:00:29 +0000 |
commit | 04ca1bc65afb76ea30698c25f24599d20119e3d2 (patch) | |
tree | 2bb65e974d478f4d607b83f6a84a56c01f501ac0 /courgette/adjustment_method.cc | |
parent | 9adf1dcf3281408560267787eddcc767566b425f (diff) | |
download | chromium_src-04ca1bc65afb76ea30698c25f24599d20119e3d2.zip chromium_src-04ca1bc65afb76ea30698c25f24599d20119e3d2.tar.gz chromium_src-04ca1bc65afb76ea30698c25f24599d20119e3d2.tar.bz2 |
Move Courgette
from src\third_party\courgette
to src\courgette and src\courgette\third_party
Fixed #includes
Added properties to ignore generated files:
C:\c5\src>svn pg svn:ignore courgette
courgette.xcodeproj
courgette.sln
courgette_fuzz.vcproj
courgette_lib.vcproj
courgette_minimal_tool.vcproj
courgette_tool.vcproj
courgette.vcproj
courgette_unittests.vcproj
SConstruct
courgette_fuzz.scons
courgette_lib.scons
courgette_main.scons
courgette_minimal_tool.scons
courgette.scons
courgette_tool.scons
courgette_unittests.scons
Review URL: http://codereview.chromium.org/115062
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@15692 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'courgette/adjustment_method.cc')
-rw-r--r-- | courgette/adjustment_method.cc | 703 |
1 files changed, 703 insertions, 0 deletions
diff --git a/courgette/adjustment_method.cc b/courgette/adjustment_method.cc new file mode 100644 index 0000000..48e7cc0 --- /dev/null +++ b/courgette/adjustment_method.cc @@ -0,0 +1,703 @@ +// Copyright (c) 2009 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "courgette/adjustment_method.h" + +#include <algorithm> +#include <list> +#include <map> +#include <set> +#include <string> +#include <vector> + +#include "base/basictypes.h" +#include "base/logging.h" +#include "base/string_util.h" + +#include "courgette/assembly_program.h" +#include "courgette/courgette.h" +#include "courgette/encoded_program.h" +#include "courgette/image_info.h" + +namespace courgette { + +// We have three discretionary information logging levels for algorithm +// development. For now just configure with #defines. +// TODO(sra): make dependent of some configurable setting. +#define NO_LOG DLOG_IF(INFO, false) +// #define ALOG1 LOG(INFO) +// #define ALOG2 LOG(INFO) +// #define ALOG3 LOG(INFO) +#define ALOG1 NO_LOG +#define ALOG2 NO_LOG +#define ALOG3 NO_LOG + +//////////////////////////////////////////////////////////////////////////////// + +class NullAdjustmentMethod : public AdjustmentMethod { + bool Adjust(const AssemblyProgram& model, AssemblyProgram* program) { + return true; + } +}; + +//////////////////////////////////////////////////////////////////////////////// + +// The purpose of adjustment is to assign indexes to Labels of a program 'p' to +// make the sequence of indexes similar to a 'model' program 'm'. Labels +// themselves don't have enough information to do this job, so we work with a +// LabelInfo surrogate for each label. +// +class LabelInfo { + public: + Label* label_; // The label that this info a surrogate for. + + // Information used only in debugging messages. + uint32 is_model_ : 1; // Is the label in the model? + uint32 debug_index_ : 31; // An unique small number for naming the label. + + uint32 refs_; // Number of times this Label is referenced. + + LabelInfo* assignment_; // Label from other program corresponding to this. + + // LabelInfos are in a doubly linked list ordered by address (label_->rva_) so + // we can quickly find Labels adjacent in address order. + LabelInfo* next_addr_; // Label(Info) at next highest address. + LabelInfo* prev_addr_; // Label(Info) at next lowest address. + + std::vector<uint32> positions_; // Offsets into the trace of references. + + // Just a no-argument constructor and copy constructor. Actual LabelInfo + // objects are allocated in std::pair structs in a std::map. + LabelInfo() + : label_(NULL), is_model_(false), debug_index_(0), refs_(0), + assignment_(NULL), + next_addr_(NULL), + prev_addr_(NULL) {} + + private: + void operator=(const LabelInfo*); // Disallow assignment only. + + // Public compiler generated copy constructor is needed to constuct + // std::pair<Label*, LabelInfo> so that fresh LabelInfos can be allocated + // inside a std::map. +}; + +struct OrderLabelInfoByAddressAscending { + bool operator()(const LabelInfo* a, const LabelInfo* b) const { + return a->label_->rva_ < b->label_->rva_; + } +}; + +static std::string ToString(LabelInfo* info) { + std::string s; + StringAppendF(&s, "%c%d", "pm"[info->is_model_], info->debug_index_); + if (info->label_->index_ != Label::kNoIndex) + StringAppendF(&s, " (%d)", info->label_->index_); + + StringAppendF(&s, " #%u", info->refs_); + return s; +} + +// General graph matching is exponential, essentially trying all permutations. +// The exponential algorithm can be made faster by avoiding consideration of +// impossible or unlikely matches. We can make the matching practical by eager +// matching - by looking for likely matches and commiting to them, and using the +// committed assignment as the basis for further matching. +// +// The basic eager graph-matching assignment is based on several ideas: +// +// * The strongest match will be for parts of the program that have not +// changed. If part of a program has not changed, then the number of +// references to a label will be the same, and corresponding pairs of +// adjacent labels will have the same RVA difference. +// +// * Some assignments are 'obvious' if you look at the distribution. Example: +// if both the program and the model have a label that is referred to much +// more often than the next most refered-to label, it is likely the two +// labels correspond. +// +// * If a label from the program corresponds to a label in the model, it is +// likely that the labels near the corresponding labels also match. A +// conservative way of extending the match is to assign only those labels +// which have exactly the same address offset and reference count. +// +// * If two labels correspond, then we can try to match up the references +// before and after the labels in the reference stream. For this to be +// practical, the number of references has to be small, e.g. each label has +// exactly one reference. +// + +// Note: we also tried a completely different approach: random assignment +// followed by simulated annealing. This produced similar results. The results +// were not as good for very small differences because the simulated annealing +// never quite hit the groove. And simulated annealing was several orders of +// magnitude slower. + + +// TRIE node for suffix strings in the label reference sequence. +// +// We dynamically build a trie for both the program and model, growing the trie +// as necessary. The trie node for a (possibly) empty string of label +// references contains the distribution of labels following the string. The +// roots node (for the empty string) thus contains the simple distribution of +// labels within the label reference stream. + +struct Node { + Node(LabelInfo* in_edge, Node* prev) + : in_edge_(in_edge), prev_(prev), count_(0), + in_queue_(false) { + length_ = 1 + (prev_ ? prev_->length_ : 0); + } + LabelInfo* in_edge_; // + Node* prev_; // Node at shorter length. + int count_; // Frequency of this path in Trie. + int length_; + typedef std::map<LabelInfo*, Node*> Edges; + Edges edges_; + std::vector<int> places_; // Indexes into sequence of this item. + std::list<Node*> edges_in_frequency_order; + + bool in_queue_; + bool Extended() const { return edges_.size() > 0; } + + uint32 Weight() const { + return edges_in_frequency_order.front()->count_; + } +}; + +static std::string ToString(Node* node) { + std::vector<std::string> prefix; + for (Node* n = node; n->prev_; n = n->prev_) + prefix.push_back(ToString(n->in_edge_)); + + std::string s; + s += "{"; + const char* sep = ""; + while (!prefix.empty()) { + s += sep; + sep = ","; + s += prefix.back(); + prefix.pop_back(); + } + + s += StringPrintf("%u", node->count_); + s += " @"; + s += Uint64ToString(node->edges_in_frequency_order.size()); + s += "}"; + return s; +} + +typedef std::vector<LabelInfo*> Trace; + +struct OrderNodeByCountDecreasing { + bool operator()(Node* a, Node* b) const { + if (a->count_ != b->count_) + return (a->count_) > (b->count_); + return a->places_.at(0) < b->places_.at(0); // Prefer first occuring. + } +}; + +struct OrderNodeByWeightDecreasing { + bool operator()(Node* a, Node* b) const { + // (Maybe tie-break on total count, followed by lowest assigned node indexes + // in path.) + uint32 a_weight = a->Weight(); + uint32 b_weight = b->Weight(); + if (a_weight != b_weight) + return a_weight > b_weight; + if (a->length_ != b->length_) + return a->length_ > b->length_; // Prefer longer. + return a->places_.at(0) < b->places_.at(0); // Prefer first occuring. + } +}; + +typedef std::set<Node*, OrderNodeByWeightDecreasing> NodeQueue; + +class AssignmentProblem { + public: + AssignmentProblem(const Trace& model, + const Trace& problem) + : m_trace_(model), + p_trace_(problem) { + } + + ~AssignmentProblem() { + for (size_t i = 0; i < all_nodes_.size(); ++i) + delete all_nodes_[i]; + } + + bool Solve() { + m_root_ = MakeRootNode(m_trace_); + p_root_ = MakeRootNode(p_trace_); + AddToQueue(p_root_); + + while (!worklist_.empty()) { + Node* node = *worklist_.begin(); + node->in_queue_ = false; + worklist_.erase(node); + TrySolveNode(node); + } + + ALOG1 << unsolved_.size() << " unsolved items"; + return true; + } + + private: + void AddToQueue(Node* node) { + if (node->length_ >= 10) { + ALOG3 << "Length clipped " << ToString(node->prev_); + return; + } + if (node->in_queue_) { + LOG(ERROR) << "Double add " << ToString(node); + return; + } + // just to be sure data for prioritizing is available + ExtendNode(node, p_trace_); + // SkipCommittedLabels(node); + if (node->edges_in_frequency_order.empty()) + return; + node->in_queue_ = true; + worklist_.insert(node); + } + + void SkipCommittedLabels(Node* node) { + ExtendNode(node, p_trace_); + uint32 skipped = 0; + while (!node->edges_in_frequency_order.empty() && + node->edges_in_frequency_order.front()->in_edge_->assignment_) { + ++skipped; + node->edges_in_frequency_order.pop_front(); + } + if (skipped > 0) + ALOG3 << "Skipped " << skipped << " at " << ToString(node); + } + + void TrySolveNode(Node* p_node) { + Node* front = p_node->edges_in_frequency_order.front(); + if (front->in_edge_->assignment_) { + p_node->edges_in_frequency_order.pop_front(); + AddToQueue(front); + AddToQueue(p_node); + return; + } + + // Compare frequencies of unassigned edges, and either make + // assignment(s) or move node to unsolved list + + Node* m_node = FindModelNode(p_node); + + if (m_node == NULL) { + ALOG1 << "Can't find model node"; + unsolved_.insert(p_node); + return; + } + ExtendNode(m_node, m_trace_); + + // Lets just try greedy + + SkipCommittedLabels(m_node); + if (m_node->edges_in_frequency_order.empty()) { + ALOG3 << "Punting, no elements left in model vs " + << p_node->edges_in_frequency_order.size(); + unsolved_.insert(p_node); + return; + } + Node* m_match = m_node->edges_in_frequency_order.front(); + Node* p_match = p_node->edges_in_frequency_order.front(); + + if (p_match->count_ > 1.1 * m_match->count_ || + m_match->count_ > 1.1 * p_match->count_) { + ALOG2 << "Tricky distribution " + << p_match->count_ << ":" << m_match->count_ << " " + << ToString(p_match) << " vs " << ToString(m_match); + return; + } + + m_node->edges_in_frequency_order.pop_front(); + p_node->edges_in_frequency_order.pop_front(); + + LabelInfo* p_label_info = p_match->in_edge_; + LabelInfo* m_label_info = m_match->in_edge_; + int m_index = p_label_info->label_->index_; + if (m_index != Label::kNoIndex) { + ALOG1 << "Cant use unassigned label from model " << m_index; + unsolved_.insert(p_node); + return; + } + + Assign(p_label_info, m_label_info); + + AddToQueue(p_match); // find matches within new match + AddToQueue(p_node); // and more matches within this node + } + + void Assign(LabelInfo* p_info, LabelInfo* m_info) { + AssignOne(p_info, m_info); + ALOG3 << "Assign " << ToString(p_info) << " := " << ToString(m_info); + // Now consider unassigned adjacent addresses + TryExtendAssignment(p_info, m_info); + } + + void AssignOne(LabelInfo* p_info, LabelInfo* m_info) { + p_info->label_->index_ = m_info->label_->index_; + + // Mark as assigned + m_info->assignment_ = p_info; + p_info->assignment_ = m_info; + } + + void TryExtendAssignment(LabelInfo* p_info, LabelInfo* m_info) { + RVA m_rva_base = m_info->label_->rva_; + RVA p_rva_base = p_info->label_->rva_; + + LabelInfo* m_info_next = m_info->next_addr_; + LabelInfo* p_info_next = p_info->next_addr_; + for ( ; m_info_next && p_info_next; ) { + if (m_info_next->assignment_) + break; + + RVA m_rva = m_info_next->label_->rva_; + RVA p_rva = p_info_next->label_->rva_; + + if (m_rva - m_rva_base != p_rva - p_rva_base) { + // previous label was pointing to something that is different size + break; + } + LabelInfo* m_info_next_next = m_info_next->next_addr_; + LabelInfo* p_info_next_next = p_info_next->next_addr_; + if (m_info_next_next && p_info_next_next) { + RVA m_rva_next = m_info_next_next->label_->rva_; + RVA p_rva_next = p_info_next_next->label_->rva_; + if (m_rva_next - m_rva != p_rva_next - p_rva) { + // Since following labels are no longer in address lockstep, assume + // this address has a difference. + break; + } + } + + // The label has inconsistent numbers of references, it is probably not + // the same thing. + if (m_info_next->refs_ != p_info_next->refs_) { + break; + } + + ALOG3 << " Extending assignment -> " + << ToString(p_info_next) << " := " << ToString(m_info_next); + + AssignOne(p_info_next, m_info_next); + + if (p_info_next->refs_ == m_info_next->refs_ && + p_info_next->refs_ == 1) { + TryExtendSequence(p_info_next->positions_[0], + m_info_next->positions_[0]); + TryExtendSequenceBackwards(p_info_next->positions_[0], + m_info_next->positions_[0]); + } + + p_info_next = p_info_next_next; + m_info_next = m_info_next_next; + } + + LabelInfo* m_info_prev = m_info->prev_addr_; + LabelInfo* p_info_prev = p_info->prev_addr_; + for ( ; m_info_prev && p_info_prev; ) { + if (m_info_prev->assignment_) + break; + + RVA m_rva = m_info_prev->label_->rva_; + RVA p_rva = p_info_prev->label_->rva_; + + if (m_rva - m_rva_base != p_rva - p_rva_base) { + // previous label was pointing to something that is different size + break; + } + LabelInfo* m_info_prev_prev = m_info_prev->prev_addr_; + LabelInfo* p_info_prev_prev = p_info_prev->prev_addr_; + + // The the label has inconsistent numbers of references, it is + // probably not the same thing + if (m_info_prev->refs_ != p_info_prev->refs_) { + break; + } + + AssignOne(p_info_prev, m_info_prev); + ALOG3 << " Extending assignment <- " << ToString(p_info_prev) << " := " + << ToString(m_info_prev); + + p_info_prev = p_info_prev_prev; + m_info_prev = m_info_prev_prev; + } + } + + uint32 TryExtendSequence(uint32 p_pos_start, uint32 m_pos_start) { + uint32 p_pos = p_pos_start + 1; + uint32 m_pos = m_pos_start + 1; + + while (p_pos < p_trace_.size() && m_pos < m_trace_.size()) { + LabelInfo* p_info = p_trace_[p_pos]; + LabelInfo* m_info = m_trace_[m_pos]; + + // To match, either (1) both are assigned or (2) both are unassigned. + if ((p_info->assignment_ == NULL) != (m_info->assignment_ == NULL)) + break; + + // If they are assigned, it needs to be consistent (same index). + if (p_info->assignment_ && m_info->assignment_) { + if (p_info->label_->index_ != m_info->label_->index_) + break; + ++p_pos; + ++m_pos; + continue; + } + + if (p_info->refs_ != m_info->refs_) + break; + + AssignOne(p_info, m_info); + ALOG3 << " Extending assignment seq" + << "[+" << p_pos - p_pos_start << "]" + << " -> " + << ToString(p_info) << " := " << ToString(m_info); + + ++p_pos; + ++m_pos; + } + + return p_pos - p_pos_start; + } + + uint32 TryExtendSequenceBackwards(uint32 p_pos_start, uint32 m_pos_start) { + if (p_pos_start == 0 || m_pos_start == 0) + return 0; + + uint32 p_pos = p_pos_start - 1; + uint32 m_pos = m_pos_start - 1; + + while (p_pos > 0 && m_pos > 0) { + LabelInfo* p_info = p_trace_[p_pos]; + LabelInfo* m_info = m_trace_[m_pos]; + + if ((p_info->assignment_ == NULL) != (m_info->assignment_ == NULL)) + break; + + if (p_info->assignment_ && m_info->assignment_) { + if (p_info->label_->index_ != m_info->label_->index_) + break; + --p_pos; + --m_pos; + continue; + } + + if (p_info->refs_ != m_info->refs_) + break; + + AssignOne(p_info, m_info); + ALOG3 << " Extending assignment seq" + << "[-" << p_pos_start - p_pos << "]" + << " <- " + << ToString(p_info) << " := " << ToString(m_info); + + --p_pos; + --m_pos; + } + + return p_pos - p_pos_start; + } + + Node* FindModelNode(Node* node) { + if (node->prev_ == NULL) + return m_root_; + + Node* m_parent = FindModelNode(node->prev_); + if (m_parent == NULL) { + return NULL; + } + + ExtendNode(m_parent, m_trace_); + + LabelInfo* p_label = node->in_edge_; + LabelInfo* m_label = p_label->assignment_; + if (m_label == NULL) { + ALOG1 << "Expected assigned prefix"; + return NULL; + } + + Node::Edges::iterator e = m_parent->edges_.find(m_label); + if (e == m_parent->edges_.end()) { + ALOG2 << "Expected defined edge in parent"; + return NULL; + } + + return e->second; + } + + Node* MakeRootNode(const Trace& trace) { + Node* node = new Node(NULL, NULL); + all_nodes_.push_back(node); + for (size_t i = 0; i < trace.size(); ++i) { + ++node->count_; + node->places_.push_back(i); + } + return node; + } + + void ExtendNode(Node* node, const Trace& trace) { + // Make sure trie is filled in at this node. + if (node->Extended()) + return; + for (size_t i = 0; i < node->places_.size(); ++i) { + uint32 index = node->places_.at(i); + if (index < trace.size()) { + LabelInfo* item = trace.at(index); + Node*& slot = node->edges_[item]; + if (slot == NULL) { + slot = new Node(item, node); + all_nodes_.push_back(slot); + node->edges_in_frequency_order.push_back(slot); + } + slot->places_.push_back(index + 1); + ++slot->count_; + } + } + node->edges_in_frequency_order.sort(OrderNodeByCountDecreasing()); + } + + const Trace& m_trace_; + const Trace& p_trace_; + Node* m_root_; + Node* p_root_; + + NodeQueue worklist_; + NodeQueue unsolved_; + + std::vector<Node*> all_nodes_; + + DISALLOW_COPY_AND_ASSIGN(AssignmentProblem); +}; + +class GraphAdjuster : public AdjustmentMethod { + public: + GraphAdjuster() {} + ~GraphAdjuster() {} + + bool Adjust(const AssemblyProgram& model, AssemblyProgram* program) { + LOG(INFO) << "GraphAdjuster::Adjust"; + prog_ = program; + model_ = &model; + debug_label_index_gen_ = 0; + return Finish(); + } + + bool Finish() { + prog_->UnassignIndexes(); + CollectTraces(model_, &model_abs32_, &model_rel32_, true); + CollectTraces(prog_, &prog_abs32_, &prog_rel32_, false); + Solve(model_abs32_, prog_abs32_); + Solve(model_rel32_, prog_rel32_); + prog_->AssignRemainingIndexes(); + return true; + } + + private: + + void CollectTraces(const AssemblyProgram* program, Trace* abs32, Trace* rel32, + bool is_model) { + const std::vector<Instruction*>& instructions = program->instructions(); + for (size_t i = 0; i < instructions.size(); ++i) { + Instruction* instruction = instructions.at(i); + if (Label* label = program->InstructionAbs32Label(instruction)) + ReferenceLabel(abs32, label, is_model); + if (Label* label = program->InstructionRel32Label(instruction)) + ReferenceLabel(rel32, label, is_model); + } + // TODO(sra): we could simply append all the labels in index order to + // incorporate some costing for entropy (bigger deltas) that will be + // introduced into the label address table by non-monotonic ordering. This + // would have some knock-on effects to parts of the algorithm that work on + // single-occurence labels. + } + + void Solve(const Trace& model, const Trace& problem) { + LinkLabelInfos(model); + LinkLabelInfos(problem); + AssignmentProblem a(model, problem); + a.Solve(); + } + + void LinkLabelInfos(const Trace& trace) { + typedef std::set<LabelInfo*, OrderLabelInfoByAddressAscending> Ordered; + Ordered ordered; + for (Trace::const_iterator p = trace.begin(); p != trace.end(); ++p) + ordered.insert(*p); + LabelInfo* prev = NULL; + for (Ordered::iterator p = ordered.begin(); p != ordered.end(); ++p) { + LabelInfo* curr = *p; + if (prev) prev->next_addr_ = curr; + curr->prev_addr_ = prev; + prev = curr; + + if (curr->positions_.size() != curr->refs_) + NOTREACHED(); + } + } + + void ReferenceLabel(Trace* trace, Label* label, bool is_model) { + trace->push_back(MakeLabelInfo(label, is_model, trace->size())); + } + + LabelInfo* MakeLabelInfo(Label* label, bool is_model, uint32 position) { + LabelInfo& slot = label_infos_[label]; + if (slot.label_ == NULL) { + slot.label_ = label; + slot.is_model_ = is_model; + slot.debug_index_ = ++debug_label_index_gen_; + } + slot.positions_.push_back(position); + ++slot.refs_; + return &slot; + } + + AssemblyProgram* prog_; // Program to be adjusted, owned by caller. + const AssemblyProgram* model_; // Program to be mimicked, owned by caller. + + Trace model_abs32_; + Trace model_rel32_; + Trace prog_abs32_; + Trace prog_rel32_; + + int debug_label_index_gen_; + + // Note LabelInfo is allocated inside map, so the LabelInfo lifetimes are + // managed by the map. + std::map<Label*, LabelInfo> label_infos_; + + private: + DISALLOW_COPY_AND_ASSIGN(GraphAdjuster); +}; + + +//////////////////////////////////////////////////////////////////////////////// + +void AdjustmentMethod::Destroy() { delete this; } + +AdjustmentMethod* AdjustmentMethod::MakeNullAdjustmentMethod() { + return new NullAdjustmentMethod(); +} + +AdjustmentMethod* AdjustmentMethod::MakeProductionAdjustmentMethod() { + return new GraphAdjuster(); +} + +Status Adjust(const AssemblyProgram& model, AssemblyProgram* program) { + AdjustmentMethod* method = AdjustmentMethod::MakeProductionAdjustmentMethod(); + bool ok = method->Adjust(model, program); + method->Destroy(); + if (ok) + return C_OK; + else + return C_ADJUSTMENT_FAILED; +} + +} // namespace courgette |