diff options
Diffstat (limited to 'sandbox/src/sidestep/mini_disassembler.cpp')
-rw-r--r-- | sandbox/src/sidestep/mini_disassembler.cpp | 395 |
1 files changed, 0 insertions, 395 deletions
diff --git a/sandbox/src/sidestep/mini_disassembler.cpp b/sandbox/src/sidestep/mini_disassembler.cpp deleted file mode 100644 index 514522a..0000000 --- a/sandbox/src/sidestep/mini_disassembler.cpp +++ /dev/null @@ -1,395 +0,0 @@ -// Copyright (c) 2012 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -// Implementation of MiniDisassembler. - -#ifdef _WIN64 -#error The code in this file should not be used on 64-bit Windows. -#endif - -#include "sandbox/src/sidestep/mini_disassembler.h" - -namespace sidestep { - -MiniDisassembler::MiniDisassembler(bool operand_default_is_32_bits, - bool address_default_is_32_bits) - : operand_default_is_32_bits_(operand_default_is_32_bits), - address_default_is_32_bits_(address_default_is_32_bits) { - Initialize(); -} - -MiniDisassembler::MiniDisassembler() - : operand_default_is_32_bits_(true), - address_default_is_32_bits_(true) { - Initialize(); -} - -InstructionType MiniDisassembler::Disassemble( - unsigned char* start_byte, - unsigned int* instruction_bytes) { - // Clean up any state from previous invocations. - Initialize(); - - // Start by processing any prefixes. - unsigned char* current_byte = start_byte; - unsigned int size = 0; - InstructionType instruction_type = ProcessPrefixes(current_byte, &size); - - if (IT_UNKNOWN == instruction_type) - return instruction_type; - - current_byte += size; - size = 0; - - // Invariant: We have stripped all prefixes, and the operand_is_32_bits_ - // and address_is_32_bits_ flags are correctly set. - - instruction_type = ProcessOpcode(current_byte, 0, &size); - - // Check for error processing instruction - if ((IT_UNKNOWN == instruction_type_) || (IT_UNUSED == instruction_type_)) { - return IT_UNKNOWN; - } - - current_byte += size; - - // Invariant: operand_bytes_ indicates the total size of operands - // specified by the opcode and/or ModR/M byte and/or SIB byte. - // pCurrentByte points to the first byte after the ModR/M byte, or after - // the SIB byte if it is present (i.e. the first byte of any operands - // encoded in the instruction). - - // We get the total length of any prefixes, the opcode, and the ModR/M and - // SIB bytes if present, by taking the difference of the original starting - // address and the current byte (which points to the first byte of the - // operands if present, or to the first byte of the next instruction if - // they are not). Adding the count of bytes in the operands encoded in - // the instruction gives us the full length of the instruction in bytes. - *instruction_bytes += operand_bytes_ + (current_byte - start_byte); - - // Return the instruction type, which was set by ProcessOpcode(). - return instruction_type_; -} - -void MiniDisassembler::Initialize() { - operand_is_32_bits_ = operand_default_is_32_bits_; - address_is_32_bits_ = address_default_is_32_bits_; - operand_bytes_ = 0; - have_modrm_ = false; - should_decode_modrm_ = false; - instruction_type_ = IT_UNKNOWN; - got_f2_prefix_ = false; - got_f3_prefix_ = false; - got_66_prefix_ = false; -} - -InstructionType MiniDisassembler::ProcessPrefixes(unsigned char* start_byte, - unsigned int* size) { - InstructionType instruction_type = IT_GENERIC; - const Opcode& opcode = s_ia32_opcode_map_[0].table_[*start_byte]; - - switch (opcode.type_) { - case IT_PREFIX_ADDRESS: - address_is_32_bits_ = !address_default_is_32_bits_; - goto nochangeoperand; - case IT_PREFIX_OPERAND: - operand_is_32_bits_ = !operand_default_is_32_bits_; - nochangeoperand: - case IT_PREFIX: - - if (0xF2 == (*start_byte)) - got_f2_prefix_ = true; - else if (0xF3 == (*start_byte)) - got_f3_prefix_ = true; - else if (0x66 == (*start_byte)) - got_66_prefix_ = true; - - instruction_type = opcode.type_; - (*size)++; - // we got a prefix, so add one and check next byte - ProcessPrefixes(start_byte + 1, size); - default: - break; // not a prefix byte - } - - return instruction_type; -} - -InstructionType MiniDisassembler::ProcessOpcode(unsigned char* start_byte, - unsigned int table_index, - unsigned int* size) { - const OpcodeTable& table = s_ia32_opcode_map_[table_index]; // Get our table - unsigned char current_byte = (*start_byte) >> table.shift_; - current_byte = current_byte & table.mask_; // Mask out the bits we will use - - // Check whether the byte we have is inside the table we have. - if (current_byte < table.min_lim_ || current_byte > table.max_lim_) { - instruction_type_ = IT_UNKNOWN; - return instruction_type_; - } - - const Opcode& opcode = table.table_[current_byte]; - if (IT_UNUSED == opcode.type_) { - // This instruction is not used by the IA-32 ISA, so we indicate - // this to the user. Probably means that we were pointed to - // a byte in memory that was not the start of an instruction. - instruction_type_ = IT_UNUSED; - return instruction_type_; - } else if (IT_REFERENCE == opcode.type_) { - // We are looking at an opcode that has more bytes (or is continued - // in the ModR/M byte). Recursively find the opcode definition in - // the table for the opcode's next byte. - (*size)++; - ProcessOpcode(start_byte + 1, opcode.table_index_, size); - return instruction_type_; - } - - const SpecificOpcode* specific_opcode = reinterpret_cast< - const SpecificOpcode*>(&opcode); - if (opcode.is_prefix_dependent_) { - if (got_f2_prefix_ && opcode.opcode_if_f2_prefix_.mnemonic_ != 0) { - specific_opcode = &opcode.opcode_if_f2_prefix_; - } else if (got_f3_prefix_ && opcode.opcode_if_f3_prefix_.mnemonic_ != 0) { - specific_opcode = &opcode.opcode_if_f3_prefix_; - } else if (got_66_prefix_ && opcode.opcode_if_66_prefix_.mnemonic_ != 0) { - specific_opcode = &opcode.opcode_if_66_prefix_; - } - } - - // Inv: The opcode type is known. - instruction_type_ = specific_opcode->type_; - - // Let's process the operand types to see if we have any immediate - // operands, and/or a ModR/M byte. - - ProcessOperand(specific_opcode->flag_dest_); - ProcessOperand(specific_opcode->flag_source_); - ProcessOperand(specific_opcode->flag_aux_); - - // Inv: We have processed the opcode and incremented operand_bytes_ - // by the number of bytes of any operands specified by the opcode - // that are stored in the instruction (not registers etc.). Now - // we need to return the total number of bytes for the opcode and - // for the ModR/M or SIB bytes if they are present. - - if (table.mask_ != 0xff) { - if (have_modrm_) { - // we're looking at a ModR/M byte so we're not going to - // count that into the opcode size - ProcessModrm(start_byte, size); - return IT_GENERIC; - } else { - // need to count the ModR/M byte even if it's just being - // used for opcode extension - (*size)++; - return IT_GENERIC; - } - } else { - if (have_modrm_) { - // The ModR/M byte is the next byte. - (*size)++; - ProcessModrm(start_byte + 1, size); - return IT_GENERIC; - } else { - (*size)++; - return IT_GENERIC; - } - } -} - -bool MiniDisassembler::ProcessOperand(int flag_operand) { - bool succeeded = true; - if (AM_NOT_USED == flag_operand) - return succeeded; - - // Decide what to do based on the addressing mode. - switch (flag_operand & AM_MASK) { - // No ModR/M byte indicated by these addressing modes, and no - // additional (e.g. immediate) parameters. - case AM_A: // Direct address - case AM_F: // EFLAGS register - case AM_X: // Memory addressed by the DS:SI register pair - case AM_Y: // Memory addressed by the ES:DI register pair - case AM_IMPLICIT: // Parameter is implicit, occupies no space in - // instruction - break; - - // There is a ModR/M byte but it does not necessarily need - // to be decoded. - case AM_C: // reg field of ModR/M selects a control register - case AM_D: // reg field of ModR/M selects a debug register - case AM_G: // reg field of ModR/M selects a general register - case AM_P: // reg field of ModR/M selects an MMX register - case AM_R: // mod field of ModR/M may refer only to a general register - case AM_S: // reg field of ModR/M selects a segment register - case AM_T: // reg field of ModR/M selects a test register - case AM_V: // reg field of ModR/M selects a 128-bit XMM register - have_modrm_ = true; - break; - - // In these addressing modes, there is a ModR/M byte and it needs to be - // decoded. No other (e.g. immediate) params than indicated in ModR/M. - case AM_E: // Operand is either a general-purpose register or memory, - // specified by ModR/M byte - case AM_M: // ModR/M byte will refer only to memory - case AM_Q: // Operand is either an MMX register or memory (complex - // evaluation), specified by ModR/M byte - case AM_W: // Operand is either a 128-bit XMM register or memory (complex - // eval), specified by ModR/M byte - have_modrm_ = true; - should_decode_modrm_ = true; - break; - - // These addressing modes specify an immediate or an offset value - // directly, so we need to look at the operand type to see how many - // bytes. - case AM_I: // Immediate data. - case AM_J: // Jump to offset. - case AM_O: // Operand is at offset. - switch (flag_operand & OT_MASK) { - case OT_B: // Byte regardless of operand-size attribute. - operand_bytes_ += OS_BYTE; - break; - case OT_C: // Byte or word, depending on operand-size attribute. - if (operand_is_32_bits_) - operand_bytes_ += OS_WORD; - else - operand_bytes_ += OS_BYTE; - break; - case OT_D: // Doubleword, regardless of operand-size attribute. - operand_bytes_ += OS_DOUBLE_WORD; - break; - case OT_DQ: // Double-quadword, regardless of operand-size attribute. - operand_bytes_ += OS_DOUBLE_QUAD_WORD; - break; - case OT_P: // 32-bit or 48-bit pointer, depending on operand-size - // attribute. - if (operand_is_32_bits_) - operand_bytes_ += OS_48_BIT_POINTER; - else - operand_bytes_ += OS_32_BIT_POINTER; - break; - case OT_PS: // 128-bit packed single-precision floating-point data. - operand_bytes_ += OS_128_BIT_PACKED_SINGLE_PRECISION_FLOATING; - break; - case OT_Q: // Quadword, regardless of operand-size attribute. - operand_bytes_ += OS_QUAD_WORD; - break; - case OT_S: // 6-byte pseudo-descriptor. - operand_bytes_ += OS_PSEUDO_DESCRIPTOR; - break; - case OT_SD: // Scalar Double-Precision Floating-Point Value - case OT_PD: // Unaligned packed double-precision floating point value - operand_bytes_ += OS_DOUBLE_PRECISION_FLOATING; - break; - case OT_SS: - // Scalar element of a 128-bit packed single-precision - // floating data. - // We simply return enItUnknown since we don't have to support - // floating point - succeeded = false; - break; - case OT_V: // Word or doubleword, depending on operand-size attribute. - if (operand_is_32_bits_) - operand_bytes_ += OS_DOUBLE_WORD; - else - operand_bytes_ += OS_WORD; - break; - case OT_W: // Word, regardless of operand-size attribute. - operand_bytes_ += OS_WORD; - break; - - // Can safely ignore these. - case OT_A: // Two one-word operands in memory or two double-word - // operands in memory - case OT_PI: // Quadword MMX technology register (e.g. mm0) - case OT_SI: // Doubleword integer register (e.g., eax) - break; - - default: - break; - } - break; - - default: - break; - } - - return succeeded; -} - -bool MiniDisassembler::ProcessModrm(unsigned char* start_byte, - unsigned int* size) { - // If we don't need to decode, we just return the size of the ModR/M - // byte (there is never a SIB byte in this case). - if (!should_decode_modrm_) { - (*size)++; - return true; - } - - // We never care about the reg field, only the combination of the mod - // and r/m fields, so let's start by packing those fields together into - // 5 bits. - unsigned char modrm = (*start_byte); - unsigned char mod = modrm & 0xC0; // mask out top two bits to get mod field - modrm = modrm & 0x07; // mask out bottom 3 bits to get r/m field - mod = mod >> 3; // shift the mod field to the right place - modrm = mod | modrm; // combine the r/m and mod fields as discussed - mod = mod >> 3; // shift the mod field to bits 2..0 - - // Invariant: modrm contains the mod field in bits 4..3 and the r/m field - // in bits 2..0, and mod contains the mod field in bits 2..0 - - const ModrmEntry* modrm_entry = 0; - if (address_is_32_bits_) - modrm_entry = &s_ia32_modrm_map_[modrm]; - else - modrm_entry = &s_ia16_modrm_map_[modrm]; - - // Invariant: modrm_entry points to information that we need to decode - // the ModR/M byte. - - // Add to the count of operand bytes, if the ModR/M byte indicates - // that some operands are encoded in the instruction. - if (modrm_entry->is_encoded_in_instruction_) - operand_bytes_ += modrm_entry->operand_size_; - - // Process the SIB byte if necessary, and return the count - // of ModR/M and SIB bytes. - if (modrm_entry->use_sib_byte_) { - (*size)++; - return ProcessSib(start_byte + 1, mod, size); - } else { - (*size)++; - return true; - } -} - -bool MiniDisassembler::ProcessSib(unsigned char* start_byte, - unsigned char mod, - unsigned int* size) { - // get the mod field from the 2..0 bits of the SIB byte - unsigned char sib_base = (*start_byte) & 0x07; - if (0x05 == sib_base) { - switch (mod) { - case 0x00: // mod == 00 - case 0x02: // mod == 10 - operand_bytes_ += OS_DOUBLE_WORD; - break; - case 0x01: // mod == 01 - operand_bytes_ += OS_BYTE; - break; - case 0x03: // mod == 11 - // According to the IA-32 docs, there does not seem to be a disp - // value for this value of mod - default: - break; - } - } - - (*size)++; - return true; -} - -}; // namespace sidestep |