1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
|
// Copyright (c) 2011 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef COURGETTE_ASSEMBLY_PROGRAM_H_
#define COURGETTE_ASSEMBLY_PROGRAM_H_
#include <map>
#include <set>
#include <vector>
#include "base/basictypes.h"
#include "base/memory/scoped_ptr.h"
#include "courgette/disassembler.h"
#include "courgette/memory_allocator.h"
namespace courgette {
class EncodedProgram;
class Instruction;
typedef NoThrowBuffer<Instruction*> InstructionVector;
// A Label is a symbolic reference to an address. Unlike a conventional
// assembly language, we always know the address. The address will later be
// stored in a table and the Label will be replaced with the index into the
// table.
//
// TODO(sra): Make fields private and add setters and getters.
class Label {
public:
static const int kNoIndex = -1;
Label() : rva_(0), index_(kNoIndex), count_(0) {}
explicit Label(RVA rva) : rva_(rva), index_(kNoIndex), count_(0) {}
RVA rva_; // Address referred to by the label.
int index_; // Index of address in address table, kNoIndex until assigned.
int count_;
};
typedef std::map<RVA, Label*> RVAToLabel;
// An AssemblyProgram is the result of disassembling an executable file.
//
// * The disassembler creates labels in the AssemblyProgram and emits
// 'Instructions'.
// * The disassembler then calls DefaultAssignIndexes to assign
// addresses to positions in the address tables.
// * [Optional step]
// * At this point the AssemblyProgram can be converted into an
// EncodedProgram and serialized to an output stream.
// * Later, the EncodedProgram can be deserialized and assembled into
// the original file.
//
// The optional step is to modify the AssemblyProgram. One form of modification
// is to assign indexes in such a way as to make the EncodedProgram for this
// AssemblyProgram look more like the EncodedProgram for some other
// AssemblyProgram. The modification process should call UnassignIndexes, do
// its own assignment, and then call AssignRemainingIndexes to ensure all
// indexes are assigned.
//
class AssemblyProgram {
public:
explicit AssemblyProgram(ExecutableType kind);
~AssemblyProgram();
ExecutableType kind() const { return kind_; }
void set_image_base(uint64 image_base) { image_base_ = image_base; }
// Instructions will be assembled in the order they are emitted.
// Generates an entire base relocation table.
CheckBool EmitPeRelocsInstruction() WARN_UNUSED_RESULT;
// Generates an ELF style relocation table for X86.
CheckBool EmitElfRelocationInstruction() WARN_UNUSED_RESULT;
// Generates an ELF style relocation table for ARM.
CheckBool EmitElfARMRelocationInstruction() WARN_UNUSED_RESULT;
// Following instruction will be assembled at address 'rva'.
CheckBool EmitOriginInstruction(RVA rva) WARN_UNUSED_RESULT;
// Generates a single byte of data or machine instruction.
CheckBool EmitByteInstruction(uint8 byte) WARN_UNUSED_RESULT;
// Generates multiple bytes of data or machine instructions.
CheckBool EmitBytesInstruction(const uint8* value, size_t len)
WARN_UNUSED_RESULT;
// Generates 4-byte relative reference to address of 'label'.
CheckBool EmitRel32(Label* label) WARN_UNUSED_RESULT;
// Generates 4-byte relative reference to address of 'label' for
// ARM.
CheckBool EmitRel32ARM(uint16 op, Label* label, const uint8* arm_op,
uint16 op_size) WARN_UNUSED_RESULT;
// Generates 4-byte absolute reference to address of 'label'.
CheckBool EmitAbs32(Label* label) WARN_UNUSED_RESULT;
// Generates 8-byte absolute reference to address of 'label'.
CheckBool EmitAbs64(Label* label) WARN_UNUSED_RESULT;
// Looks up a label or creates a new one. Might return NULL.
Label* FindOrMakeAbs32Label(RVA rva);
// Looks up a label or creates a new one. Might return NULL.
Label* FindOrMakeRel32Label(RVA rva);
void DefaultAssignIndexes();
void UnassignIndexes();
void AssignRemainingIndexes();
EncodedProgram* Encode() const;
// Accessor for instruction list.
const InstructionVector& instructions() const {
return instructions_;
}
// Returns the label if the instruction contains an absolute 32-bit address,
// otherwise returns NULL.
Label* InstructionAbs32Label(const Instruction* instruction) const;
// Returns the label if the instruction contains an absolute 64-bit address,
// otherwise returns NULL.
Label* InstructionAbs64Label(const Instruction* instruction) const;
// Returns the label if the instruction contains a rel32 offset,
// otherwise returns NULL.
Label* InstructionRel32Label(const Instruction* instruction) const;
// Trim underused labels
CheckBool TrimLabels();
private:
ExecutableType kind_;
CheckBool Emit(Instruction* instruction) WARN_UNUSED_RESULT;
static const int kLabelLowerLimit;
// Looks up a label or creates a new one. Might return NULL.
Label* FindLabel(RVA rva, RVAToLabel* labels);
// Helper methods for the public versions.
static void UnassignIndexes(RVAToLabel* labels);
static void DefaultAssignIndexes(RVAToLabel* labels);
static void AssignRemainingIndexes(RVAToLabel* labels);
// Sharing instructions that emit a single byte saves a lot of space.
Instruction* GetByteInstruction(uint8 byte);
scoped_ptr<Instruction*[]> byte_instruction_cache_;
uint64 image_base_; // Desired or mandated base address of image.
InstructionVector instructions_; // All the instructions in program.
// These are lookup maps to find the label associated with a given address.
// We have separate label spaces for addresses referenced by rel32 labels and
// abs32 labels. This is somewhat arbitrary.
RVAToLabel rel32_labels_;
RVAToLabel abs32_labels_;
DISALLOW_COPY_AND_ASSIGN(AssemblyProgram);
};
} // namespace courgette
#endif // COURGETTE_ASSEMBLY_PROGRAM_H_
|