1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
|
//===-- EDToken.cpp - LLVM Enhanced Disassembler --------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file implements the Enhanced Disassembler library's token class. The
// token is responsible for vending information about the token, such as its
// type and logical value.
//
//===----------------------------------------------------------------------===//
#include "EDToken.h"
#include "EDDisassembler.h"
#include "llvm/MC/MCParser/MCAsmLexer.h"
#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
#include "llvm/ADT/SmallVector.h"
using namespace llvm;
EDToken::EDToken(StringRef str,
enum tokenType type,
uint64_t localType,
EDDisassembler &disassembler) :
Disassembler(disassembler),
Str(str),
Type(type),
LocalType(localType),
OperandID(-1) {
}
EDToken::~EDToken() {
}
void EDToken::makeLiteral(bool sign, uint64_t absoluteValue) {
Type = kTokenLiteral;
LiteralSign = sign;
LiteralAbsoluteValue = absoluteValue;
}
void EDToken::makeRegister(unsigned registerID) {
Type = kTokenRegister;
RegisterID = registerID;
}
void EDToken::setOperandID(int operandID) {
OperandID = operandID;
}
enum EDToken::tokenType EDToken::type() const {
return Type;
}
uint64_t EDToken::localType() const {
return LocalType;
}
StringRef EDToken::string() const {
return Str;
}
int EDToken::operandID() const {
return OperandID;
}
int EDToken::literalSign() const {
if (Type != kTokenLiteral)
return -1;
return (LiteralSign ? 1 : 0);
}
int EDToken::literalAbsoluteValue(uint64_t &value) const {
if (Type != kTokenLiteral)
return -1;
value = LiteralAbsoluteValue;
return 0;
}
int EDToken::registerID(unsigned ®isterID) const {
if (Type != kTokenRegister)
return -1;
registerID = RegisterID;
return 0;
}
int EDToken::tokenize(std::vector<EDToken*> &tokens,
std::string &str,
const char *operandOrder,
EDDisassembler &disassembler) {
SmallVector<MCParsedAsmOperand*, 5> parsedOperands;
SmallVector<AsmToken, 10> asmTokens;
if (disassembler.parseInst(parsedOperands, asmTokens, str))
return -1;
SmallVectorImpl<MCParsedAsmOperand*>::iterator operandIterator;
unsigned int operandIndex;
SmallVectorImpl<AsmToken>::iterator tokenIterator;
operandIterator = parsedOperands.begin();
operandIndex = 0;
bool readOpcode = false;
const char *wsPointer = asmTokens.begin()->getLoc().getPointer();
for (tokenIterator = asmTokens.begin();
tokenIterator != asmTokens.end();
++tokenIterator) {
SMLoc tokenLoc = tokenIterator->getLoc();
const char *tokenPointer = tokenLoc.getPointer();
if (tokenPointer > wsPointer) {
unsigned long wsLength = tokenPointer - wsPointer;
EDToken *whitespaceToken = new EDToken(StringRef(wsPointer, wsLength),
EDToken::kTokenWhitespace,
0,
disassembler);
tokens.push_back(whitespaceToken);
}
wsPointer = tokenPointer + tokenIterator->getString().size();
while (operandIterator != parsedOperands.end() &&
tokenLoc.getPointer() >
(*operandIterator)->getEndLoc().getPointer()) {
++operandIterator;
++operandIndex;
}
EDToken *token;
switch (tokenIterator->getKind()) {
case AsmToken::Identifier:
if (!readOpcode) {
token = new EDToken(tokenIterator->getString(),
EDToken::kTokenOpcode,
(uint64_t)tokenIterator->getKind(),
disassembler);
readOpcode = true;
break;
}
// any identifier that isn't an opcode is mere punctuation; so we fall
// through
default:
token = new EDToken(tokenIterator->getString(),
EDToken::kTokenPunctuation,
(uint64_t)tokenIterator->getKind(),
disassembler);
break;
case AsmToken::Integer:
{
token = new EDToken(tokenIterator->getString(),
EDToken::kTokenLiteral,
(uint64_t)tokenIterator->getKind(),
disassembler);
int64_t intVal = tokenIterator->getIntVal();
if (intVal < 0)
token->makeLiteral(true, -intVal);
else
token->makeLiteral(false, intVal);
break;
}
case AsmToken::Register:
{
token = new EDToken(tokenIterator->getString(),
EDToken::kTokenLiteral,
(uint64_t)tokenIterator->getKind(),
disassembler);
token->makeRegister((unsigned)tokenIterator->getRegVal());
break;
}
}
if (operandIterator != parsedOperands.end() &&
tokenLoc.getPointer() >=
(*operandIterator)->getStartLoc().getPointer()) {
/// operandIndex == 0 means the operand is the instruction (which the
/// AsmParser treats as an operand but edis does not). We therefore skip
/// operandIndex == 0 and subtract 1 from all other operand indices.
if (operandIndex > 0)
token->setOperandID(operandOrder[operandIndex - 1]);
}
tokens.push_back(token);
}
return 0;
}
int EDToken::getString(const char*& buf) {
if (PermStr.length() == 0) {
PermStr = Str.str();
}
buf = PermStr.c_str();
return 0;
}
|