1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
|
// Copyright 2011 Google Inc. All Rights Reserved.
#ifndef ART_SRC_DEX_VERIFY_H_
#define ART_SRC_DEX_VERIFY_H_
#include <deque>
#include <limits>
#include <map>
#include <vector>
#include "casts.h"
#include "compiler.h"
#include "dex_file.h"
#include "dex_instruction.h"
#include "macros.h"
#include "object.h"
#include "stl_util.h"
#include "UniquePtr.h"
namespace art {
namespace verifier {
class DexVerifier;
class PcToReferenceMap;
class RegTypeCache;
/*
* Set this to enable dead code scanning. This is not required, but it's very useful when testing
* changes to the verifier (to make sure we're not skipping over stuff). The only reason not to do
* it is that it slightly increases the time required to perform verification.
*/
#ifndef NDEBUG
# define DEAD_CODE_SCAN true
#else
# define DEAD_CODE_SCAN false
#endif
/*
* RegType holds information about the "type" of data held in a register.
*/
class RegType {
public:
enum Type {
kRegTypeUnknown = 0, /* initial state */
kRegTypeConflict, /* merge clash makes this reg's type unknowable */
kRegTypeBoolean, /* Z */
kRegType1nrSTART = kRegTypeBoolean,
kRegTypeIntegralSTART = kRegTypeBoolean,
kRegTypeByte, /* B */
kRegTypeShort, /* S */
kRegTypeChar, /* C */
kRegTypeInteger, /* I */
kRegTypeIntegralEND = kRegTypeInteger,
kRegTypeFloat, /* F */
kRegType1nrEND = kRegTypeFloat,
kRegTypeLongLo, /* J - lower-numbered register; endian-independent */
kRegTypeLongHi,
kRegTypeDoubleLo, /* D */
kRegTypeDoubleHi,
kRegTypeConstLo, /* const derived wide, lower half - could be long or double */
kRegTypeConstHi, /* const derived wide, upper half - could be long or double */
kRegTypeLastFixedLocation = kRegTypeConstHi,
kRegTypeConst, /* 32-bit constant derived value - could be float or int */
kRegTypeUnresolvedReference, // Reference type that couldn't be resolved
kRegTypeUninitializedReference, // Freshly allocated reference type
kRegTypeUninitializedThisReference, // Freshly allocated reference passed as "this"
kRegTypeUnresolvedAndUninitializedReference, // Freshly allocated unresolved reference type
kRegTypeReference, // Reference type
};
Type GetType() const {
return type_;
}
bool IsUnknown() const { return type_ == kRegTypeUnknown; }
bool IsConflict() const { return type_ == kRegTypeConflict; }
bool IsBoolean() const { return type_ == kRegTypeBoolean; }
bool IsByte() const { return type_ == kRegTypeByte; }
bool IsChar() const { return type_ == kRegTypeChar; }
bool IsShort() const { return type_ == kRegTypeShort; }
bool IsInteger() const { return type_ == kRegTypeInteger; }
bool IsLong() const { return type_ == kRegTypeLongLo; }
bool IsFloat() const { return type_ == kRegTypeFloat; }
bool IsDouble() const { return type_ == kRegTypeDoubleLo; }
bool IsUnresolvedReference() const { return type_ == kRegTypeUnresolvedReference; }
bool IsUninitializedReference() const { return type_ == kRegTypeUninitializedReference; }
bool IsUninitializedThisReference() const { return type_ == kRegTypeUninitializedThisReference; }
bool IsUnresolvedAndUninitializedReference() const {
return type_ == kRegTypeUnresolvedAndUninitializedReference;
}
bool IsReference() const { return type_ == kRegTypeReference; }
bool IsUninitializedTypes() const {
return IsUninitializedReference() || IsUninitializedThisReference() ||
IsUnresolvedAndUninitializedReference();
}
bool IsUnresolvedTypes() const {
return IsUnresolvedReference() || IsUnresolvedAndUninitializedReference();
}
bool IsLowHalf() const { return type_ == kRegTypeLongLo ||
type_ == kRegTypeDoubleLo ||
type_ == kRegTypeConstLo; }
bool IsHighHalf() const { return type_ == kRegTypeLongHi ||
type_ == kRegTypeDoubleHi ||
type_ == kRegTypeConstHi; }
bool IsLongOrDoubleTypes() const { return IsLowHalf(); }
// Check this is the low half, and that type_h is its matching high-half
bool CheckWidePair(const RegType& type_h) const {
return IsLowHalf() && (type_h.type_ == type_ + 1);
}
// The high half that corresponds to this low half
const RegType& HighHalf(RegTypeCache* cache) const;
bool IsConstant() const { return type_ == kRegTypeConst; }
bool IsLongConstant() const { return type_ == kRegTypeConstLo; }
bool IsLongConstantHigh() const { return type_ == kRegTypeConstHi; }
// If this is a 32-bit constant, what is the value? This value may just
// approximate to the actual constant value by virtue of merging.
int32_t ConstantValue() const {
DCHECK(IsConstant());
return allocation_pc_or_constant_;
}
bool IsZero() const { return IsConstant() && ConstantValue() == 0; }
bool IsOne() const { return IsConstant() && ConstantValue() == 1; }
bool IsConstantBoolean() const { return IsZero() || IsOne(); }
bool IsConstantByte() const {
return IsConstant() &&
ConstantValue() >= std::numeric_limits<jbyte>::min() &&
ConstantValue() <= std::numeric_limits<jbyte>::max();
}
bool IsConstantShort() const {
return IsConstant() &&
ConstantValue() >= std::numeric_limits<jshort>::min() &&
ConstantValue() <= std::numeric_limits<jshort>::max();
}
bool IsConstantChar() const {
return IsConstant() && ConstantValue() >= 0 &&
ConstantValue() <= std::numeric_limits<jchar>::max();
}
bool IsReferenceTypes() const {
return IsReference() || IsUnresolvedReference() || IsUninitializedReference() ||
IsUninitializedThisReference() || IsUnresolvedAndUninitializedReference() || IsZero();
}
bool IsNonZeroReferenceTypes() const {
return IsReference() || IsUnresolvedReference() || IsUninitializedReference() ||
IsUninitializedThisReference();
}
bool IsCategory1Types() const {
return (type_ >= kRegType1nrSTART && type_ <= kRegType1nrEND) || IsConstant();
}
bool IsCategory2Types() const {
return IsLowHalf(); // Don't expect explicit testing of high halves
}
bool IsBooleanTypes() const { return IsBoolean() || IsConstantBoolean(); }
bool IsByteTypes() const { return IsByte() || IsBoolean() || IsConstantByte(); }
bool IsShortTypes() const { return IsShort() || IsByte() || IsBoolean() || IsConstantShort(); }
bool IsCharTypes() const { return IsChar() || IsBooleanTypes() || IsConstantChar(); }
bool IsIntegralTypes() const {
return (type_ >= kRegTypeIntegralSTART && type_ <= kRegTypeIntegralEND) || IsConstant();
}
bool IsArrayIndexTypes() const { return IsIntegralTypes(); }
// Float type may be derived from any constant type
bool IsFloatTypes() const { return IsFloat() || IsConstant(); }
bool IsLongTypes() const {
return IsLong() || IsLongConstant();
}
bool IsLongHighTypes() const {
return type_ == kRegTypeLongHi || type_ == kRegTypeConstHi;
}
bool IsDoubleTypes() const {
return IsDouble() || IsLongConstant();
}
bool IsDoubleHighTypes() const {
return type_ == kRegTypeDoubleHi || type_ == kRegTypeConstHi;
}
uint32_t GetAllocationPc() const {
DCHECK(IsUninitializedTypes());
return allocation_pc_or_constant_;
}
Class* GetClass() const {
DCHECK(!IsUnresolvedReference());
DCHECK(klass_or_descriptor_ != NULL);
DCHECK(klass_or_descriptor_->IsClass());
return down_cast<Class*>(klass_or_descriptor_);
}
bool IsJavaLangObject() const {
return IsReference() && GetClass()->IsObjectClass();
}
bool IsInstantiableTypes() const {
return IsUnresolvedTypes() || (IsNonZeroReferenceTypes() && GetClass()->IsInstantiable());
}
String* GetDescriptor() const {
DCHECK(IsUnresolvedTypes());
DCHECK(klass_or_descriptor_ != NULL);
DCHECK(klass_or_descriptor_->GetClass()->IsStringClass());
return down_cast<String*>(klass_or_descriptor_);
}
bool IsArrayClass() const {
if (IsUnresolvedTypes()) {
return GetDescriptor()->CharAt(0) == '[';
} else if (!IsConstant()) {
return GetClass()->IsArrayClass();
} else {
return false;
}
}
uint16_t GetId() const {
return cache_id_;
}
std::string Dump() const;
bool IsAssignableFrom(const RegType& src) const;
const RegType& Merge(const RegType& incoming_type, RegTypeCache* reg_types) const;
bool Equals(const RegType& other) const { return GetId() == other.GetId(); }
/*
* A basic Join operation on classes. For a pair of types S and T the Join, written S v T = J, is
* S <: J, T <: J and for-all U such that S <: U, T <: U then J <: U. That is J is the parent of
* S and T such that there isn't a parent of both S and T that isn't also the parent of J (ie J
* is the deepest (lowest upper bound) parent of S and T).
*
* This operation applies for regular classes and arrays, however, for interface types there needn't
* be a partial ordering on the types. We could solve the problem of a lack of a partial order by
* introducing sets of types, however, the only operation permissible on an interface is
* invoke-interface. In the tradition of Java verifiers [1] we defer the verification of interface
* types until an invoke-interface call on the interface typed reference at runtime and allow
* the perversion of Object being assignable to an interface type (note, however, that we don't
* allow assignment of Object or Interface to any concrete class and are therefore type safe).
*
* [1] Java bytecode verification: algorithms and formalizations, Xavier Leroy
*/
static Class* ClassJoin(Class* s, Class* t);
private:
friend class RegTypeCache;
RegType(Type type, Object* klass_or_descriptor, uint32_t allocation_pc_or_constant, uint16_t cache_id) :
type_(type), klass_or_descriptor_(klass_or_descriptor), allocation_pc_or_constant_(allocation_pc_or_constant),
cache_id_(cache_id) {
DCHECK(IsConstant() || IsUninitializedTypes() || allocation_pc_or_constant == 0);
if (!IsConstant() && !IsLongConstant() && !IsLongConstantHigh() && !IsUnknown() &&
!IsConflict()) {
DCHECK(klass_or_descriptor != NULL);
DCHECK(IsUnresolvedTypes() || klass_or_descriptor_->IsClass());
DCHECK(!IsUnresolvedTypes() || klass_or_descriptor_->GetClass()->IsStringClass());
}
}
const Type type_; // The current type of the register
// If known the type of the register, else a String for the descriptor
Object* klass_or_descriptor_;
// Overloaded field that:
// - if IsConstant() holds a 32bit constant value
// - is IsReference() holds the allocation_pc or kInitArgAddr for an initialized reference or
// kUninitThisArgAddr for an uninitialized this ptr
const uint32_t allocation_pc_or_constant_;
// A RegType cache densely encodes types, this is the location in the cache for this type
const uint16_t cache_id_;
DISALLOW_COPY_AND_ASSIGN(RegType);
};
std::ostream& operator<<(std::ostream& os, const RegType& rhs);
class RegTypeCache {
public:
explicit RegTypeCache() : entries_(RegType::kRegTypeLastFixedLocation + 1) {
Unknown(); // ensure Unknown is initialized
}
~RegTypeCache() {
STLDeleteElements(&entries_);
}
const RegType& GetFromId(uint16_t id) {
DCHECK_LT(id, entries_.size());
RegType* result = entries_[id];
DCHECK(result != NULL);
return *result;
}
const RegType& From(RegType::Type type, const ClassLoader* loader, const char* descriptor);
const RegType& FromClass(Class* klass);
const RegType& FromCat1Const(int32_t value);
const RegType& FromDescriptor(const ClassLoader* loader, const char* descriptor);
const RegType& FromType(RegType::Type);
const RegType& Boolean() { return FromType(RegType::kRegTypeBoolean); }
const RegType& Byte() { return FromType(RegType::kRegTypeByte); }
const RegType& Char() { return FromType(RegType::kRegTypeChar); }
const RegType& Short() { return FromType(RegType::kRegTypeShort); }
const RegType& Integer() { return FromType(RegType::kRegTypeInteger); }
const RegType& Float() { return FromType(RegType::kRegTypeFloat); }
const RegType& Long() { return FromType(RegType::kRegTypeLongLo); }
const RegType& Double() { return FromType(RegType::kRegTypeDoubleLo); }
const RegType& JavaLangClass() { return From(RegType::kRegTypeReference, NULL, "Ljava/lang/Class;"); }
const RegType& JavaLangObject() { return From(RegType::kRegTypeReference, NULL, "Ljava/lang/Object;"); }
const RegType& JavaLangString() { return From(RegType::kRegTypeReference, NULL, "Ljava/lang/String;"); }
const RegType& JavaLangThrowable() { return From(RegType::kRegTypeReference, NULL, "Ljava/lang/Throwable;"); }
const RegType& Unknown() { return FromType(RegType::kRegTypeUnknown); }
const RegType& Conflict() { return FromType(RegType::kRegTypeConflict); }
const RegType& ConstLo() { return FromType(RegType::kRegTypeConstLo); }
const RegType& Zero() { return FromCat1Const(0); }
const RegType& Uninitialized(const RegType& type, uint32_t allocation_pc);
const RegType& UninitializedThisArgument(Class* klass);
const RegType& FromUninitialized(const RegType& uninit_type);
// Representatives of various constant types. When merging constants we can't infer a type,
// (an int may later be used as a float) so we select these representative values meaning future
// merges won't know the exact constant value but have some notion of its size.
const RegType& ByteConstant() { return FromCat1Const(std::numeric_limits<jbyte>::min()); }
const RegType& ShortConstant() { return FromCat1Const(std::numeric_limits<jshort>::min()); }
const RegType& IntConstant() { return FromCat1Const(std::numeric_limits<jint>::max()); }
const RegType& GetComponentType(const RegType& array, const ClassLoader* loader);
private:
// The allocated entries
std::vector<RegType*> entries_;
DISALLOW_COPY_AND_ASSIGN(RegTypeCache);
};
class InsnFlags {
public:
InsnFlags() : length_(0), flags_(0) {}
void SetLengthInCodeUnits(size_t length) {
CHECK_LT(length, 65536u);
length_ = length;
}
size_t GetLengthInCodeUnits() {
return length_;
}
bool IsOpcode() const {
return length_ != 0;
}
void SetInTry() {
flags_ |= 1 << kInsnFlagInTry;
}
void ClearInTry() {
flags_ &= ~(1 << kInsnFlagInTry);
}
bool IsInTry() const {
return (flags_ & (1 << kInsnFlagInTry)) != 0;
}
void SetBranchTarget() {
flags_ |= 1 << kInsnFlagBranchTarget;
}
void ClearBranchTarget() {
flags_ &= ~(1 << kInsnFlagBranchTarget);
}
bool IsBranchTarget() const {
return (flags_ & (1 << kInsnFlagBranchTarget)) != 0;
}
void SetGcPoint() {
flags_ |= 1 << kInsnFlagGcPoint;
}
void ClearGcPoint() {
flags_ &= ~(1 << kInsnFlagGcPoint);
}
bool IsGcPoint() const {
return (flags_ & (1 << kInsnFlagGcPoint)) != 0;
}
void SetVisited() {
flags_ |= 1 << kInsnFlagVisited;
}
void ClearVisited() {
flags_ &= ~(1 << kInsnFlagVisited);
}
bool IsVisited() const {
return (flags_ & (1 << kInsnFlagVisited)) != 0;
}
void SetChanged() {
flags_ |= 1 << kInsnFlagChanged;
}
void ClearChanged() {
flags_ &= ~(1 << kInsnFlagChanged);
}
bool IsChanged() const {
return (flags_ & (1 << kInsnFlagChanged)) != 0;
}
bool IsVisitedOrChanged() const {
return IsVisited() || IsChanged();
}
std::string Dump() {
char encoding[6];
if (!IsOpcode()) {
strncpy(encoding, "XXXXX", sizeof(encoding));
} else {
strncpy(encoding, "-----", sizeof(encoding));
if (IsInTry()) encoding[kInsnFlagInTry] = 'T';
if (IsBranchTarget()) encoding[kInsnFlagBranchTarget] = 'B';
if (IsGcPoint()) encoding[kInsnFlagGcPoint] = 'G';
if (IsVisited()) encoding[kInsnFlagVisited] = 'V';
if (IsChanged()) encoding[kInsnFlagChanged] = 'C';
}
return std::string(encoding);
}
private:
enum InsnFlag {
kInsnFlagInTry,
kInsnFlagBranchTarget,
kInsnFlagGcPoint,
kInsnFlagVisited,
kInsnFlagChanged,
};
// Size of instruction in code units
uint16_t length_;
uint8_t flags_;
};
/*
* "Direct" and "virtual" methods are stored independently. The type of call used to invoke the
* method determines which list we search, and whether we travel up into superclasses.
*
* (<clinit>, <init>, and methods declared "private" or "static" are stored in the "direct" list.
* All others are stored in the "virtual" list.)
*/
enum MethodType {
METHOD_UNKNOWN = 0,
METHOD_DIRECT, // <init>, private
METHOD_STATIC, // static
METHOD_VIRTUAL, // virtual, super
METHOD_INTERFACE // interface
};
const int kRegTypeUninitMask = 0xff;
const int kRegTypeUninitShift = 8;
/*
* Register type categories, for type checking.
*
* The spec says category 1 includes boolean, byte, char, short, int, float, reference, and
* returnAddress. Category 2 includes long and double.
*
* We treat object references separately, so we have "category1nr". We don't support jsr/ret, so
* there is no "returnAddress" type.
*/
enum TypeCategory {
kTypeCategoryUnknown = 0,
kTypeCategory1nr = 1, // boolean, byte, char, short, int, float
kTypeCategory2 = 2, // long, double
kTypeCategoryRef = 3, // object reference
};
/*
* An enumeration of problems that can turn up during verification.
* VERIFY_ERROR_GENERIC denotes a failure that causes the entire class to be rejected. Other errors
* denote verification errors that cause bytecode to be rewritten to fail at runtime.
*/
enum VerifyError {
VERIFY_ERROR_NONE = 0, /* no error; must be zero */
VERIFY_ERROR_GENERIC, /* VerifyError */
VERIFY_ERROR_NO_CLASS, /* NoClassDefFoundError */
VERIFY_ERROR_NO_FIELD, /* NoSuchFieldError */
VERIFY_ERROR_NO_METHOD, /* NoSuchMethodError */
VERIFY_ERROR_ACCESS_CLASS, /* IllegalAccessError */
VERIFY_ERROR_ACCESS_FIELD, /* IllegalAccessError */
VERIFY_ERROR_ACCESS_METHOD, /* IllegalAccessError */
VERIFY_ERROR_CLASS_CHANGE, /* IncompatibleClassChangeError */
VERIFY_ERROR_INSTANTIATION, /* InstantiationError */
};
std::ostream& operator<<(std::ostream& os, const VerifyError& rhs);
/*
* Identifies the type of reference in the instruction that generated the verify error
* (e.g. VERIFY_ERROR_ACCESS_CLASS could come from a method, field, or class reference).
*
* This must fit in two bits.
*/
enum VerifyErrorRefType {
VERIFY_ERROR_REF_CLASS = 0,
VERIFY_ERROR_REF_FIELD = 1,
VERIFY_ERROR_REF_METHOD = 2,
};
const int kVerifyErrorRefTypeShift = 6;
/*
* Format enumeration for RegisterMap data area.
*/
enum RegisterMapFormat {
kRegMapFormatUnknown = 0,
kRegMapFormatNone, /* indicates no map data follows */
kRegMapFormatCompact8, /* compact layout, 8-bit addresses */
kRegMapFormatCompact16, /* compact layout, 16-bit addresses */
};
// During verification, we associate one of these with every "interesting" instruction. We track
// the status of all registers, and (if the method has any monitor-enter instructions) maintain a
// stack of entered monitors (identified by code unit offset).
// If live-precise register maps are enabled, the "liveRegs" vector will be populated. Unlike the
// other lists of registers here, we do not track the liveness of the method result register
// (which is not visible to the GC).
class RegisterLine {
public:
RegisterLine(size_t num_regs, DexVerifier* verifier) :
line_(new uint16_t[num_regs]), verifier_(verifier), num_regs_(num_regs) {
memset(line_.get(), 0, num_regs_ * sizeof(uint16_t));
result_[0] = RegType::kRegTypeUnknown;
result_[1] = RegType::kRegTypeUnknown;
}
// Implement category-1 "move" instructions. Copy a 32-bit value from "vsrc" to "vdst".
void CopyRegister1(uint32_t vdst, uint32_t vsrc, TypeCategory cat);
// Implement category-2 "move" instructions. Copy a 64-bit value from "vsrc" to "vdst". This
// copies both halves of the register.
void CopyRegister2(uint32_t vdst, uint32_t vsrc);
// Implement "move-result". Copy the category-1 value from the result register to another
// register, and reset the result register.
void CopyResultRegister1(uint32_t vdst, bool is_reference);
// Implement "move-result-wide". Copy the category-2 value from the result register to another
// register, and reset the result register.
void CopyResultRegister2(uint32_t vdst);
// Set the invisible result register to unknown
void SetResultTypeToUnknown();
// Set the type of register N, verifying that the register is valid. If "newType" is the "Lo"
// part of a 64-bit value, register N+1 will be set to "newType+1".
// The register index was validated during the static pass, so we don't need to check it here.
void SetRegisterType(uint32_t vdst, const RegType& new_type);
/* Set the type of the "result" register. */
void SetResultRegisterType(const RegType& new_type);
// Get the type of register vsrc.
const RegType& GetRegisterType(uint32_t vsrc) const;
bool VerifyRegisterType(uint32_t vsrc, const RegType& check_type);
void CopyFromLine(const RegisterLine* src) {
DCHECK_EQ(num_regs_, src->num_regs_);
memcpy(line_.get(), src->line_.get(), num_regs_ * sizeof(uint16_t));
monitors_ = src->monitors_;
reg_to_lock_depths_ = src->reg_to_lock_depths_;
}
std::string Dump() const {
std::string result;
for (size_t i = 0; i < num_regs_; i++) {
result += StringPrintf("%zd:[", i);
result += GetRegisterType(i).Dump();
result += "],";
}
typedef std::deque<uint32_t>::const_iterator It; // TODO: C++0x auto
for (It it = monitors_.begin(), end = monitors_.end(); it != end ; ++it) {
result += StringPrintf("{%d},", *it);
}
return result;
}
void FillWithGarbage() {
memset(line_.get(), 0xf1, num_regs_ * sizeof(uint16_t));
while (!monitors_.empty()) {
monitors_.pop_back();
}
reg_to_lock_depths_.clear();
}
/*
* We're creating a new instance of class C at address A. Any registers holding instances
* previously created at address A must be initialized by now. If not, we mark them as "conflict"
* to prevent them from being used (otherwise, MarkRefsAsInitialized would mark the old ones and
* the new ones at the same time).
*/
void MarkUninitRefsAsInvalid(const RegType& uninit_type);
/*
* Update all registers holding "uninit_type" to instead hold the corresponding initialized
* reference type. This is called when an appropriate constructor is invoked -- all copies of
* the reference must be marked as initialized.
*/
void MarkRefsAsInitialized(const RegType& uninit_type);
/*
* Check constraints on constructor return. Specifically, make sure that the "this" argument got
* initialized.
* The "this" argument to <init> uses code offset kUninitThisArgAddr, which puts it at the start
* of the list in slot 0. If we see a register with an uninitialized slot 0 reference, we know it
* somehow didn't get initialized.
*/
bool CheckConstructorReturn() const;
// Compare two register lines. Returns 0 if they match.
// Using this for a sort is unwise, since the value can change based on machine endianness.
int CompareLine(const RegisterLine* line2) const {
DCHECK(monitors_ == line2->monitors_);
// TODO: DCHECK(reg_to_lock_depths_ == line2->reg_to_lock_depths_);
return memcmp(line_.get(), line2->line_.get(), num_regs_ * sizeof(uint16_t));
}
size_t NumRegs() const {
return num_regs_;
}
/*
* Get the "this" pointer from a non-static method invocation. This returns the RegType so the
* caller can decide whether it needs the reference to be initialized or not. (Can also return
* kRegTypeZero if the reference can only be zero at this point.)
*
* The argument count is in vA, and the first argument is in vC, for both "simple" and "range"
* versions. We just need to make sure vA is >= 1 and then return vC.
*/
const RegType& GetInvocationThis(const Instruction::DecodedInstruction& dec_insn);
/*
* Get the value from a register, and cast it to a Class. Sets "*failure" if something fails.
* This fails if the register holds an uninitialized class.
* If the register holds kRegTypeZero, this returns a NULL pointer.
*/
Class* GetClassFromRegister(uint32_t vsrc) const;
/*
* Verify types for a simple two-register instruction (e.g. "neg-int").
* "dst_type" is stored into vA, and "src_type" is verified against vB.
*/
void CheckUnaryOp(const Instruction::DecodedInstruction& dec_insn,
const RegType& dst_type, const RegType& src_type);
/*
* Verify types for a simple three-register instruction (e.g. "add-int").
* "dst_type" is stored into vA, and "src_type1"/"src_type2" are verified
* against vB/vC.
*/
void CheckBinaryOp(const Instruction::DecodedInstruction& dec_insn,
const RegType& dst_type, const RegType& src_type1, const RegType& src_type2,
bool check_boolean_op);
/*
* Verify types for a binary "2addr" operation. "src_type1"/"src_type2"
* are verified against vA/vB, then "dst_type" is stored into vA.
*/
void CheckBinaryOp2addr(const Instruction::DecodedInstruction& dec_insn,
const RegType& dst_type,
const RegType& src_type1, const RegType& src_type2,
bool check_boolean_op);
/*
* Verify types for A two-register instruction with a literal constant (e.g. "add-int/lit8").
* "dst_type" is stored into vA, and "src_type" is verified against vB.
*
* If "check_boolean_op" is set, we use the constant value in vC.
*/
void CheckLiteralOp(const Instruction::DecodedInstruction& dec_insn,
const RegType& dst_type, const RegType& src_type, bool check_boolean_op);
// Verify/push monitor onto the monitor stack, locking the value in reg_idx at location insn_idx.
void PushMonitor(uint32_t reg_idx, int32_t insn_idx);
// Verify/pop monitor from monitor stack ensuring that we believe the monitor is locked
void PopMonitor(uint32_t reg_idx);
// Stack of currently held monitors and where they were locked
size_t MonitorStackDepth() const {
return monitors_.size();
}
// We expect no monitors to be held at certain points, such a method returns. Verify the stack
// is empty, failing and returning false if not.
bool VerifyMonitorStackEmpty();
bool MergeRegisters(const RegisterLine* incoming_line);
size_t GetMaxNonZeroReferenceReg(size_t max_ref_reg) {
size_t i = static_cast<int>(max_ref_reg) < 0 ? 0 : max_ref_reg;
for (; i < num_regs_; i++) {
if (GetRegisterType(i).IsNonZeroReferenceTypes()) {
max_ref_reg = i;
}
}
return max_ref_reg;
}
// Write a bit at each register location that holds a reference
void WriteReferenceBitMap(std::vector<uint8_t>& data, size_t max_bytes);
private:
void CopyRegToLockDepth(size_t dst, size_t src) {
if (reg_to_lock_depths_.count(src) > 0) {
uint32_t depths = reg_to_lock_depths_[src];
reg_to_lock_depths_[dst] = depths;
}
}
bool IsSetLockDepth(size_t reg, size_t depth) {
if (reg_to_lock_depths_.count(reg) > 0) {
uint32_t depths = reg_to_lock_depths_[reg];
return (depths & (1 << depth)) != 0;
} else {
return false;
}
}
void SetRegToLockDepth(size_t reg, size_t depth) {
CHECK_LT(depth, 32u);
DCHECK(!IsSetLockDepth(reg, depth));
uint32_t depths;
if (reg_to_lock_depths_.count(reg) > 0) {
depths = reg_to_lock_depths_[reg];
depths = depths | (1 << depth);
} else {
depths = 1 << depth;
}
reg_to_lock_depths_[reg] = depths;
}
void ClearRegToLockDepth(size_t reg, size_t depth) {
CHECK_LT(depth, 32u);
DCHECK(IsSetLockDepth(reg, depth));
uint32_t depths = reg_to_lock_depths_[reg];
depths = depths ^ (1 << depth);
if (depths != 0) {
reg_to_lock_depths_[reg] = depths;
} else {
reg_to_lock_depths_.erase(reg);
}
}
void ClearAllRegToLockDepths(size_t reg) {
reg_to_lock_depths_.erase(reg);
}
// Storage for the result register's type, valid after an invocation
uint16_t result_[2];
// An array of RegType Ids associated with each dex register
UniquePtr<uint16_t[]> line_;
// Back link to the verifier
DexVerifier* verifier_;
// Length of reg_types_
const size_t num_regs_;
// A stack of monitor enter locations
std::deque<uint32_t> monitors_;
// A map from register to a bit vector of indices into the monitors_ stack. As we pop the monitor
// stack we verify that monitor-enter/exit are correctly nested. That is, if there was a
// monitor-enter on v5 and then on v6, we expect the monitor-exit to be on v6 then on v5
std::map<uint32_t, uint32_t> reg_to_lock_depths_;
};
std::ostream& operator<<(std::ostream& os, const RegisterLine& rhs);
class PcToRegisterLineTable {
public:
// We don't need to store the register data for many instructions, because we either only need
// it at branch points (for verification) or GC points and branches (for verification +
// type-precise register analysis).
enum RegisterTrackingMode {
kTrackRegsBranches,
kTrackRegsGcPoints,
kTrackRegsAll,
};
PcToRegisterLineTable() {}
~PcToRegisterLineTable() {
STLDeleteValues(&pc_to_register_line_);
}
// Initialize the RegisterTable. Every instruction address can have a different set of information
// about what's in which register, but for verification purposes we only need to store it at
// branch target addresses (because we merge into that).
void Init(RegisterTrackingMode mode, InsnFlags* flags, uint32_t insns_size,
uint16_t registers_size, DexVerifier* verifier);
RegisterLine* GetLine(size_t idx) {
return pc_to_register_line_[idx];
}
private:
// Map from a dex pc to the register status associated with it
std::map<int32_t, RegisterLine*> pc_to_register_line_;
// Number of registers we track for each instruction. This is equal to the method's declared
// "registersSize" plus kExtraRegs (2).
size_t insn_reg_count_plus_;
};
// The verifier
class DexVerifier {
public:
/* Verify a class. Returns "true" on success. */
static bool VerifyClass(const Class* klass);
/*
* Perform verification on a single method.
*
* We do this in three passes:
* (1) Walk through all code units, determining instruction locations,
* widths, and other characteristics.
* (2) Walk through all code units, performing static checks on
* operands.
* (3) Iterate through the method, checking type safety and looking
* for code flow problems.
*
* Some checks may be bypassed depending on the verification mode. We can't
* turn this stuff off completely if we want to do "exact" GC.
*
* Confirmed here:
* - code array must not be empty
* Confirmed by ComputeWidthsAndCountOps():
* - opcode of first instruction begins at index 0
* - only documented instructions may appear
* - each instruction follows the last
* - last byte of last instruction is at (code_length-1)
*/
static bool VerifyMethod(Method* method);
static void VerifyMethodAndDump(Method* method);
uint8_t EncodePcToReferenceMapData() const;
uint32_t DexFileVersion() const {
return dex_file_->GetVersion();
}
RegTypeCache* GetRegTypeCache() {
return ®_types_;
}
// Verification failed
std::ostream& Fail(VerifyError error) {
CHECK_EQ(failure_, VERIFY_ERROR_NONE);
failure_ = error;
return fail_messages_ << "VFY: " << PrettyMethod(method_)
<< '[' << reinterpret_cast<void*>(work_insn_idx_) << "] : ";
}
// Log for verification information
std::ostream& LogVerifyInfo() {
return info_messages_ << "VFY: " << PrettyMethod(method_)
<< '[' << reinterpret_cast<void*>(work_insn_idx_) << "] : ";
}
// Dump the state of the verifier, namely each instruction, what flags are set on it, register
// information
void Dump(std::ostream& os);
static const std::vector<uint8_t>* GetGcMap(Compiler::MethodReference ref);
static void DeleteGcMaps();
private:
explicit DexVerifier(Method* method);
bool Verify();
/*
* Compute the width of the instruction at each address in the instruction stream, and store it in
* insn_flags_. Addresses that are in the middle of an instruction, or that are part of switch
* table data, are not touched (so the caller should probably initialize "insn_flags" to zero).
*
* The "new_instance_count_" and "monitor_enter_count_" fields in vdata are also set.
*
* Performs some static checks, notably:
* - opcode of first instruction begins at index 0
* - only documented instructions may appear
* - each instruction follows the last
* - last byte of last instruction is at (code_length-1)
*
* Logs an error and returns "false" on failure.
*/
bool ComputeWidthsAndCountOps();
/*
* Set the "in try" flags for all instructions protected by "try" statements. Also sets the
* "branch target" flags for exception handlers.
*
* Call this after widths have been set in "insn_flags".
*
* Returns "false" if something in the exception table looks fishy, but we're expecting the
* exception table to be somewhat sane.
*/
bool ScanTryCatchBlocks();
/*
* Perform static verification on all instructions in a method.
*
* Walks through instructions in a method calling VerifyInstruction on each.
*/
bool VerifyInstructions();
/*
* Perform static verification on an instruction.
*
* As a side effect, this sets the "branch target" flags in InsnFlags.
*
* "(CF)" items are handled during code-flow analysis.
*
* v3 4.10.1
* - target of each jump and branch instruction must be valid
* - targets of switch statements must be valid
* - operands referencing constant pool entries must be valid
* - (CF) operands of getfield, putfield, getstatic, putstatic must be valid
* - (CF) operands of method invocation instructions must be valid
* - (CF) only invoke-direct can call a method starting with '<'
* - (CF) <clinit> must never be called explicitly
* - operands of instanceof, checkcast, new (and variants) must be valid
* - new-array[-type] limited to 255 dimensions
* - can't use "new" on an array class
* - (?) limit dimensions in multi-array creation
* - local variable load/store register values must be in valid range
*
* v3 4.11.1.2
* - branches must be within the bounds of the code array
* - targets of all control-flow instructions are the start of an instruction
* - register accesses fall within range of allocated registers
* - (N/A) access to constant pool must be of appropriate type
* - code does not end in the middle of an instruction
* - execution cannot fall off the end of the code
* - (earlier) for each exception handler, the "try" area must begin and
* end at the start of an instruction (end can be at the end of the code)
* - (earlier) for each exception handler, the handler must start at a valid
* instruction
*/
bool VerifyInstruction(const Instruction* inst, uint32_t code_offset);
/* Ensure that the register index is valid for this code item. */
bool CheckRegisterIndex(uint32_t idx);
/* Ensure that the wide register index is valid for this code item. */
bool CheckWideRegisterIndex(uint32_t idx);
// Perform static checks on a field get or set instruction. All we do here is ensure that the
// field index is in the valid range.
bool CheckFieldIndex(uint32_t idx);
// Perform static checks on a method invocation instruction. All we do here is ensure that the
// method index is in the valid range.
bool CheckMethodIndex(uint32_t idx);
// Perform static checks on a "new-instance" instruction. Specifically, make sure the class
// reference isn't for an array class.
bool CheckNewInstance(uint32_t idx);
/* Ensure that the string index is in the valid range. */
bool CheckStringIndex(uint32_t idx);
// Perform static checks on an instruction that takes a class constant. Ensure that the class
// index is in the valid range.
bool CheckTypeIndex(uint32_t idx);
// Perform static checks on a "new-array" instruction. Specifically, make sure they aren't
// creating an array of arrays that causes the number of dimensions to exceed 255.
bool CheckNewArray(uint32_t idx);
// Verify an array data table. "cur_offset" is the offset of the fill-array-data instruction.
bool CheckArrayData(uint32_t cur_offset);
// Verify that the target of a branch instruction is valid. We don't expect code to jump directly
// into an exception handler, but it's valid to do so as long as the target isn't a
// "move-exception" instruction. We verify that in a later stage.
// The dex format forbids certain instructions from branching to themselves.
// Updates "insnFlags", setting the "branch target" flag.
bool CheckBranchTarget(uint32_t cur_offset);
// Verify a switch table. "cur_offset" is the offset of the switch instruction.
// Updates "insnFlags", setting the "branch target" flag.
bool CheckSwitchTargets(uint32_t cur_offset);
// Check the register indices used in a "vararg" instruction, such as invoke-virtual or
// filled-new-array.
// - vA holds word count (0-5), args[] have values.
// There are some tests we don't do here, e.g. we don't try to verify that invoking a method that
// takes a double is done with consecutive registers. This requires parsing the target method
// signature, which we will be doing later on during the code flow analysis.
bool CheckVarArgRegs(uint32_t vA, uint32_t arg[]);
// Check the register indices used in a "vararg/range" instruction, such as invoke-virtual/range
// or filled-new-array/range.
// - vA holds word count, vC holds index of first reg.
bool CheckVarArgRangeRegs(uint32_t vA, uint32_t vC);
// Extract the relative offset from a branch instruction.
// Returns "false" on failure (e.g. this isn't a branch instruction).
bool GetBranchOffset(uint32_t cur_offset, int32_t* pOffset, bool* pConditional,
bool* selfOkay);
/* Perform detailed code-flow analysis on a single method. */
bool VerifyCodeFlow();
// Set the register types for the first instruction in the method based on the method signature.
// This has the side-effect of validating the signature.
bool SetTypesFromSignature();
/*
* Perform code flow on a method.
*
* The basic strategy is as outlined in v3 4.11.1.2: set the "changed" bit on the first
* instruction, process it (setting additional "changed" bits), and repeat until there are no
* more.
*
* v3 4.11.1.1
* - (N/A) operand stack is always the same size
* - operand stack [registers] contain the correct types of values
* - local variables [registers] contain the correct types of values
* - methods are invoked with the appropriate arguments
* - fields are assigned using values of appropriate types
* - opcodes have the correct type values in operand registers
* - there is never an uninitialized class instance in a local variable in code protected by an
* exception handler (operand stack is okay, because the operand stack is discarded when an
* exception is thrown) [can't know what's a local var w/o the debug info -- should fall out of
* register typing]
*
* v3 4.11.1.2
* - execution cannot fall off the end of the code
*
* (We also do many of the items described in the "static checks" sections, because it's easier to
* do them here.)
*
* We need an array of RegType values, one per register, for every instruction. If the method uses
* monitor-enter, we need extra data for every register, and a stack for every "interesting"
* instruction. In theory this could become quite large -- up to several megabytes for a monster
* function.
*
* NOTE:
* The spec forbids backward branches when there's an uninitialized reference in a register. The
* idea is to prevent something like this:
* loop:
* move r1, r0
* new-instance r0, MyClass
* ...
* if-eq rN, loop // once
* initialize r0
*
* This leaves us with two different instances, both allocated by the same instruction, but only
* one is initialized. The scheme outlined in v3 4.11.1.4 wouldn't catch this, so they work around
* it by preventing backward branches. We achieve identical results without restricting code
* reordering by specifying that you can't execute the new-instance instruction if a register
* contains an uninitialized instance created by that same instruction.
*/
bool CodeFlowVerifyMethod();
/*
* Perform verification for a single instruction.
*
* This requires fully decoding the instruction to determine the effect it has on registers.
*
* Finds zero or more following instructions and sets the "changed" flag if execution at that
* point needs to be (re-)evaluated. Register changes are merged into "reg_types_" at the target
* addresses. Does not set or clear any other flags in "insn_flags_".
*/
bool CodeFlowVerifyInstruction(uint32_t* start_guess);
// Perform verification of an aget instruction. The destination register's type will be set to
// be that of component type of the array unless the array type is unknown, in which case a
// bottom type inferred from the type of instruction is used. is_primitive is false for an
// aget-object.
void VerifyAGet(const Instruction::DecodedInstruction& insn, const RegType& insn_type,
bool is_primitive);
// Perform verification of an aput instruction.
void VerifyAPut(const Instruction::DecodedInstruction& insn, const RegType& insn_type,
bool is_primitive);
// Lookup instance field and fail for resolution violations
Field* GetInstanceField(const RegType& obj_type, int field_idx);
// Lookup static field and fail for resolution violations
Field* GetStaticField(int field_idx);
// Perform verification of an iget or sget instruction.
void VerifyISGet(const Instruction::DecodedInstruction& insn, const RegType& insn_type,
bool is_primitive, bool is_static);
// Perform verification of an iput or sput instruction.
void VerifyISPut(const Instruction::DecodedInstruction& insn, const RegType& insn_type,
bool is_primitive, bool is_static);
// Verify that the arguments in a filled-new-array instruction are valid.
void VerifyFilledNewArrayRegs(const Instruction::DecodedInstruction& dec_insn,
const RegType& res_type, bool is_range);
// Resolves a class based on an index and performs access checks to ensure the referrer can
// access the resolved class.
const RegType& ResolveClassAndCheckAccess(uint32_t class_idx);
/*
* For the "move-exception" instruction at "work_insn_idx_", which must be at an exception handler
* address, determine the first common superclass of all exceptions that can land here.
* Returns NULL if no matching exception handler can be found, or if the exception is not a
* subclass of Throwable.
*/
const RegType& GetCaughtExceptionType();
/*
* Resolves a method based on an index and performs access checks to ensure
* the referrer can access the resolved method.
* Does not throw exceptions.
*/
Method* ResolveMethodAndCheckAccess(uint32_t method_idx, bool is_direct);
/*
* Verify the arguments to a method. We're executing in "method", making
* a call to the method reference in vB.
*
* If this is a "direct" invoke, we allow calls to <init>. For calls to
* <init>, the first argument may be an uninitialized reference. Otherwise,
* calls to anything starting with '<' will be rejected, as will any
* uninitialized reference arguments.
*
* For non-static method calls, this will verify that the method call is
* appropriate for the "this" argument.
*
* The method reference is in vBBBB. The "is_range" parameter determines
* whether we use 0-4 "args" values or a range of registers defined by
* vAA and vCCCC.
*
* Widening conversions on integers and references are allowed, but
* narrowing conversions are not.
*
* Returns the resolved method on success, NULL on failure (with *failure
* set appropriately).
*/
Method* VerifyInvocationArgs(const Instruction::DecodedInstruction& dec_insn,
MethodType method_type, bool is_range, bool is_super);
/*
* Return the register type for the method. We can't just use the already-computed
* DalvikJniReturnType, because if it's a reference type we need to do the class lookup.
* Returned references are assumed to be initialized. Returns kRegTypeUnknown for "void".
*/
const RegType& GetMethodReturnType();
/*
* Verify that the target instruction is not "move-exception". It's important that the only way
* to execute a move-exception is as the first instruction of an exception handler.
* Returns "true" if all is well, "false" if the target instruction is move-exception.
*/
bool CheckMoveException(const uint16_t* insns, int insn_idx);
/*
* Replace an instruction with "throw-verification-error". This allows us to
* defer error reporting until the code path is first used.
*/
void ReplaceFailingInstruction();
/*
* Control can transfer to "next_insn". Merge the registers from merge_line into the table at
* next_insn, and set the changed flag on the target address if any of the registers were changed.
* Returns "false" if an error is encountered.
*/
bool UpdateRegisters(uint32_t next_insn, const RegisterLine* merge_line);
/*
* Generate the GC map for a method that has just been verified (i.e. we're doing this as part of
* verification). For type-precise determination we have all the data we need, so we just need to
* encode it in some clever fashion.
* Returns a pointer to a newly-allocated RegisterMap, or NULL on failure.
*/
const std::vector<uint8_t>* GenerateGcMap();
// Verify that the GC map associated with method_ is well formed
void VerifyGcMap(const std::vector<uint8_t>& data);
// Compute sizes for GC map data
void ComputeGcMapSizes(size_t* gc_points, size_t* ref_bitmap_bits, size_t* log2_max_gc_pc);
InsnFlags CurrentInsnFlags() {
return insn_flags_[work_insn_idx_];
}
typedef std::map<const Compiler::MethodReference, const std::vector<uint8_t>*> GcMapTable;
// All the GC maps that the verifier has created
static GcMapTable gc_maps_;
static void SetGcMap(Compiler::MethodReference ref, const std::vector<uint8_t>& gc_map);
RegTypeCache reg_types_;
PcToRegisterLineTable reg_table_;
// Storage for the register status we're currently working on.
UniquePtr<RegisterLine> work_line_;
// The address of the instruction we're currently working on, note that this is in 2 byte
// quantities
uint32_t work_insn_idx_;
// Storage for the register status we're saving for later.
UniquePtr<RegisterLine> saved_line_;
Method* method_; // The method we're working on.
const DexFile* dex_file_; // The dex file containing the method.
const DexFile::CodeItem* code_item_; // The code item containing the code for the method.
UniquePtr<InsnFlags[]> insn_flags_; // Instruction widths and flags, one entry per code unit.
// The type of any error that occurs
VerifyError failure_;
// Failure message log
std::ostringstream fail_messages_;
// Info message log
std::ostringstream info_messages_;
// The number of occurrences of specific opcodes.
size_t new_instance_count_;
size_t monitor_enter_count_;
};
// Lightweight wrapper for PC to reference bit maps.
class PcToReferenceMap {
public:
PcToReferenceMap(const uint8_t* data, size_t data_length) {
data_ = data;
CHECK(data_ != NULL);
// Check the size of the table agrees with the number of entries
size_t data_size = data_length - 4;
DCHECK_EQ(EntryWidth() * NumEntries(), data_size);
}
// The number of entries in the table
size_t NumEntries() const {
return GetData()[2] | (GetData()[3] << 8);
}
// Get the PC at the given index
uint16_t GetPC(size_t index) const {
size_t entry_offset = index * EntryWidth();
if (PcWidth() == 1) {
return Table()[entry_offset];
} else {
return Table()[entry_offset] | (Table()[entry_offset + 1] << 8);
}
}
// Return address of bitmap encoding what are live references
const uint8_t* GetBitMap(size_t index) const {
size_t entry_offset = index * EntryWidth();
return &Table()[entry_offset + PcWidth()];
}
// Find the bitmap associated with the given dex pc
const uint8_t* FindBitMap(uint16_t dex_pc, bool error_if_not_present = true) const;
// The number of bytes used to encode registers
size_t RegWidth() const {
return GetData()[1];
}
private:
// Table of num_entries * (dex pc, bitmap)
const uint8_t* Table() const {
return GetData() + 4;
}
// The format of the table of the PCs for the table
RegisterMapFormat Format() const {
return static_cast<RegisterMapFormat>(GetData()[0]);
}
// Number of bytes used to encode a dex pc
size_t PcWidth() const {
RegisterMapFormat format = Format();
switch (format) {
case kRegMapFormatCompact8:
return 1;
case kRegMapFormatCompact16:
return 2;
default:
LOG(FATAL) << "Invalid format " << static_cast<int>(format);
return -1;
}
}
// The width of an entry in the table
size_t EntryWidth() const {
return PcWidth() + RegWidth();
}
const uint8_t* GetData() const {
return data_;
}
const uint8_t* data_; // The header and table data
};
} // namespace verifier
} // namespace art
#endif // ART_SRC_DEX_VERIFY_H_
|