summaryrefslogtreecommitdiffstats
path: root/courgette/assembly_program.h
diff options
context:
space:
mode:
Diffstat (limited to 'courgette/assembly_program.h')
-rw-r--r--courgette/assembly_program.h133
1 files changed, 133 insertions, 0 deletions
diff --git a/courgette/assembly_program.h b/courgette/assembly_program.h
new file mode 100644
index 0000000..e726f81
--- /dev/null
+++ b/courgette/assembly_program.h
@@ -0,0 +1,133 @@
+// Copyright (c) 2009 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef COURGETTE_ASSEMBLY_PROGRAM_H_
+#define COURGETTE_ASSEMBLY_PROGRAM_H_
+
+#include <map>
+#include <set>
+#include <vector>
+
+#include "base/basictypes.h"
+
+#include "courgette/image_info.h"
+
+namespace courgette {
+
+class EncodedProgram;
+class Instruction;
+
+// A Label is a symbolic reference to an address. Unlike a conventional
+// assembly language, we always know the address. The address will later be
+// stored in a table and the Label will be replaced with the index into the
+// table.
+//
+// TODO(sra): Make fields private and add setters and getters.
+class Label {
+ public:
+ static const int kNoIndex = -1;
+ Label() : rva_(0), index_(kNoIndex) {}
+ explicit Label(RVA rva) : rva_(rva), index_(kNoIndex) {}
+
+ RVA rva_; // Address refered to by the label.
+ int index_; // Index of address in address table, kNoIndex until assigned.
+};
+
+typedef std::map<RVA, Label*> RVAToLabel;
+
+// An AssemblyProgram is the result of disassembling an executable file.
+//
+// * The disassembler creates labels in the AssemblyProgram and emits
+// 'Instructions'.
+// * The disassembler then calls DefaultAssignIndexes to assign
+// addresses to positions in the address tables.
+// * [Optional step]
+// * At this point the AssemblyProgram can be converted into an
+// EncodedProgram and serialized to an output stream.
+// * Later, the EncodedProgram can be deserialized and assembled into
+// the original file.
+//
+// The optional step is to modify the AssemblyProgram. One form of modification
+// is to assign indexes in such a way as to make the EncodedProgram for this
+// AssemblyProgram look more like the EncodedProgram for some other
+// AssemblyProgram. The modification process should call UnassignIndexes, do
+// its own assignment, and then call AssignRemainingIndexes to ensure all
+// indexes are assigned.
+//
+class AssemblyProgram {
+ public:
+ AssemblyProgram();
+ ~AssemblyProgram();
+
+ void set_image_base(uint64 image_base) { image_base_ = image_base; }
+
+ // Instructions will be assembled in the order they are emitted.
+
+ // Generates an entire base relocation table.
+ void EmitMakeRelocsInstruction();
+
+ // Following instruction will be assembled at address 'rva'.
+ void EmitOriginInstruction(RVA rva);
+
+ // Generates a single byte of data or machine instruction.
+ void EmitByteInstruction(uint8 byte);
+
+ // Generates 4-byte relative reference to address of 'label'.
+ void EmitRel32(Label* label);
+
+ // Generates 4-byte absolute reference to address of 'label'.
+ void EmitAbs32(Label* label);
+
+ Label* FindOrMakeAbs32Label(RVA rva);
+ Label* FindOrMakeRel32Label(RVA rva);
+
+ void DefaultAssignIndexes();
+ void UnassignIndexes();
+ void AssignRemainingIndexes();
+
+ EncodedProgram* Encode() const;
+
+ // Accessor for instruction list.
+ const std::vector<Instruction*>& instructions() const {
+ return instructions_;
+ }
+
+ // Returns the label if the instruction contains and absolute address,
+ // otherwise returns NULL.
+ Label* InstructionAbs32Label(const Instruction* instruction) const;
+
+ // Returns the label if the instruction contains and rel32 offset,
+ // otherwise returns NULL.
+ Label* InstructionRel32Label(const Instruction* instruction) const;
+
+
+ private:
+ void Emit(Instruction* instruction) { instructions_.push_back(instruction); }
+
+ Label* FindLabel(RVA rva, RVAToLabel* labels);
+
+ // Helper methods for the public versions.
+ static void UnassignIndexes(RVAToLabel* labels);
+ static void DefaultAssignIndexes(RVAToLabel* labels);
+ static void AssignRemainingIndexes(RVAToLabel* labels);
+
+ // Sharing instructions that emit a single byte saves a lot of space.
+ Instruction* GetByteInstruction(uint8 byte);
+ Instruction** byte_instruction_cache_;
+
+ uint64 image_base_; // Desired or mandated base address of image.
+
+ std::vector<Instruction*> instructions_; // All the instructions in program.
+
+ // These are lookup maps to find the label associated with a given address.
+ // We have separate label spaces for addresses referenced by rel32 labels and
+ // abs32 labels. This is somewhat arbitrary.
+ RVAToLabel rel32_labels_;
+ RVAToLabel abs32_labels_;
+
+ DISALLOW_COPY_AND_ASSIGN(AssemblyProgram);
+};
+
+} // namespace courgette
+#endif // COURGETTE_ASSEMBLY_PROGRAM_H_