diff options
Diffstat (limited to 'tools/memory_watcher/mini_disassembler.cc')
-rw-r--r-- | tools/memory_watcher/mini_disassembler.cc | 418 |
1 files changed, 418 insertions, 0 deletions
diff --git a/tools/memory_watcher/mini_disassembler.cc b/tools/memory_watcher/mini_disassembler.cc new file mode 100644 index 0000000..6b1dec8 --- /dev/null +++ b/tools/memory_watcher/mini_disassembler.cc @@ -0,0 +1,418 @@ +/* Copyright (c) 2007, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * --- + * + * Implementation of MiniDisassembler. + */ + +#include "mini_disassembler.h" + +namespace sidestep { + +MiniDisassembler::MiniDisassembler(bool operand_default_is_32_bits, + bool address_default_is_32_bits) + : operand_default_is_32_bits_(operand_default_is_32_bits), + address_default_is_32_bits_(address_default_is_32_bits) { + Initialize(); +} + +MiniDisassembler::MiniDisassembler() + : operand_default_is_32_bits_(true), + address_default_is_32_bits_(true) { + Initialize(); +} + +InstructionType MiniDisassembler::Disassemble( + unsigned char* start_byte, + unsigned int& instruction_bytes) { + // Clean up any state from previous invocations. + Initialize(); + + // Start by processing any prefixes. + unsigned char* current_byte = start_byte; + unsigned int size = 0; + InstructionType instruction_type = ProcessPrefixes(current_byte, size); + + if (IT_UNKNOWN == instruction_type) + return instruction_type; + + current_byte += size; + size = 0; + + // Invariant: We have stripped all prefixes, and the operand_is_32_bits_ + // and address_is_32_bits_ flags are correctly set. + + instruction_type = ProcessOpcode(current_byte, 0, size); + + // Check for error processing instruction + if ((IT_UNKNOWN == instruction_type_) || (IT_UNUSED == instruction_type_)) { + return IT_UNKNOWN; + } + + current_byte += size; + + // Invariant: operand_bytes_ indicates the total size of operands + // specified by the opcode and/or ModR/M byte and/or SIB byte. + // pCurrentByte points to the first byte after the ModR/M byte, or after + // the SIB byte if it is present (i.e. the first byte of any operands + // encoded in the instruction). + + // We get the total length of any prefixes, the opcode, and the ModR/M and + // SIB bytes if present, by taking the difference of the original starting + // address and the current byte (which points to the first byte of the + // operands if present, or to the first byte of the next instruction if + // they are not). Adding the count of bytes in the operands encoded in + // the instruction gives us the full length of the instruction in bytes. + instruction_bytes += operand_bytes_ + (current_byte - start_byte); + + // Return the instruction type, which was set by ProcessOpcode(). + return instruction_type_; +} + +void MiniDisassembler::Initialize() { + operand_is_32_bits_ = operand_default_is_32_bits_; + address_is_32_bits_ = address_default_is_32_bits_; + operand_bytes_ = 0; + have_modrm_ = false; + should_decode_modrm_ = false; + instruction_type_ = IT_UNKNOWN; + got_f2_prefix_ = false; + got_f3_prefix_ = false; + got_66_prefix_ = false; +} + +InstructionType MiniDisassembler::ProcessPrefixes(unsigned char* start_byte, + unsigned int& size) { + InstructionType instruction_type = IT_GENERIC; + const Opcode& opcode = s_ia32_opcode_map_[0].table_[*start_byte]; + + switch (opcode.type_) { + case IT_PREFIX_ADDRESS: + address_is_32_bits_ = !address_default_is_32_bits_; + goto nochangeoperand; + case IT_PREFIX_OPERAND: + operand_is_32_bits_ = !operand_default_is_32_bits_; + nochangeoperand: + case IT_PREFIX: + + if (0xF2 == (*start_byte)) + got_f2_prefix_ = true; + else if (0xF3 == (*start_byte)) + got_f3_prefix_ = true; + else if (0x66 == (*start_byte)) + got_66_prefix_ = true; + + instruction_type = opcode.type_; + size ++; + // we got a prefix, so add one and check next byte + ProcessPrefixes(start_byte + 1, size); + default: + break; // not a prefix byte + } + + return instruction_type; +} + +InstructionType MiniDisassembler::ProcessOpcode(unsigned char* start_byte, + unsigned int table_index, + unsigned int& size) { + const OpcodeTable& table = s_ia32_opcode_map_[table_index]; // Get our table + unsigned char current_byte = (*start_byte) >> table.shift_; + current_byte = current_byte & table.mask_; // Mask out the bits we will use + + // Check whether the byte we have is inside the table we have. + if (current_byte < table.min_lim_ || current_byte > table.max_lim_) { + instruction_type_ = IT_UNKNOWN; + return instruction_type_; + } + + const Opcode& opcode = table.table_[current_byte]; + if (IT_UNUSED == opcode.type_) { + // This instruction is not used by the IA-32 ISA, so we indicate + // this to the user. Probably means that we were pointed to + // a byte in memory that was not the start of an instruction. + instruction_type_ = IT_UNUSED; + return instruction_type_; + } else if (IT_REFERENCE == opcode.type_) { + // We are looking at an opcode that has more bytes (or is continued + // in the ModR/M byte). Recursively find the opcode definition in + // the table for the opcode's next byte. + size++; + ProcessOpcode(start_byte + 1, opcode.table_index_, size); + return instruction_type_; + } + + const SpecificOpcode* specific_opcode = (SpecificOpcode*)&opcode; + if (opcode.is_prefix_dependent_) { + if (got_f2_prefix_ && opcode.opcode_if_f2_prefix_.mnemonic_ != 0) { + specific_opcode = &opcode.opcode_if_f2_prefix_; + } else if (got_f3_prefix_ && opcode.opcode_if_f3_prefix_.mnemonic_ != 0) { + specific_opcode = &opcode.opcode_if_f3_prefix_; + } else if (got_66_prefix_ && opcode.opcode_if_66_prefix_.mnemonic_ != 0) { + specific_opcode = &opcode.opcode_if_66_prefix_; + } + } + + // Inv: The opcode type is known. + instruction_type_ = specific_opcode->type_; + + // Let's process the operand types to see if we have any immediate + // operands, and/or a ModR/M byte. + + ProcessOperand(specific_opcode->flag_dest_); + ProcessOperand(specific_opcode->flag_source_); + ProcessOperand(specific_opcode->flag_aux_); + + // Inv: We have processed the opcode and incremented operand_bytes_ + // by the number of bytes of any operands specified by the opcode + // that are stored in the instruction (not registers etc.). Now + // we need to return the total number of bytes for the opcode and + // for the ModR/M or SIB bytes if they are present. + + if (table.mask_ != 0xff) { + if (have_modrm_) { + // we're looking at a ModR/M byte so we're not going to + // count that into the opcode size + ProcessModrm(start_byte, size); + return IT_GENERIC; + } else { + // need to count the ModR/M byte even if it's just being + // used for opcode extension + size++; + return IT_GENERIC; + } + } else { + if (have_modrm_) { + // The ModR/M byte is the next byte. + size++; + ProcessModrm(start_byte + 1, size); + return IT_GENERIC; + } else { + size++; + return IT_GENERIC; + } + } +} + +bool MiniDisassembler::ProcessOperand(int flag_operand) { + bool succeeded = true; + if (AM_NOT_USED == flag_operand) + return succeeded; + + // Decide what to do based on the addressing mode. + switch (flag_operand & AM_MASK) { + // No ModR/M byte indicated by these addressing modes, and no + // additional (e.g. immediate) parameters. + case AM_A: // Direct address + case AM_F: // EFLAGS register + case AM_X: // Memory addressed by the DS:SI register pair + case AM_Y: // Memory addressed by the ES:DI register pair + case AM_IMPLICIT: // Parameter is implicit, occupies no space in + // instruction + break; + + // There is a ModR/M byte but it does not necessarily need + // to be decoded. + case AM_C: // reg field of ModR/M selects a control register + case AM_D: // reg field of ModR/M selects a debug register + case AM_G: // reg field of ModR/M selects a general register + case AM_P: // reg field of ModR/M selects an MMX register + case AM_R: // mod field of ModR/M may refer only to a general register + case AM_S: // reg field of ModR/M selects a segment register + case AM_T: // reg field of ModR/M selects a test register + case AM_V: // reg field of ModR/M selects a 128-bit XMM register + have_modrm_ = true; + break; + + // In these addressing modes, there is a ModR/M byte and it needs to be + // decoded. No other (e.g. immediate) params than indicated in ModR/M. + case AM_E: // Operand is either a general-purpose register or memory, + // specified by ModR/M byte + case AM_M: // ModR/M byte will refer only to memory + case AM_Q: // Operand is either an MMX register or memory (complex + // evaluation), specified by ModR/M byte + case AM_W: // Operand is either a 128-bit XMM register or memory (complex + // eval), specified by ModR/M byte + have_modrm_ = true; + should_decode_modrm_ = true; + break; + + // These addressing modes specify an immediate or an offset value + // directly, so we need to look at the operand type to see how many + // bytes. + case AM_I: // Immediate data. + case AM_J: // Jump to offset. + case AM_O: // Operand is at offset. + switch (flag_operand & OT_MASK) { + case OT_B: // Byte regardless of operand-size attribute. + operand_bytes_ += OS_BYTE; + break; + case OT_C: // Byte or word, depending on operand-size attribute. + if (operand_is_32_bits_) + operand_bytes_ += OS_WORD; + else + operand_bytes_ += OS_BYTE; + break; + case OT_D: // Doubleword, regardless of operand-size attribute. + operand_bytes_ += OS_DOUBLE_WORD; + break; + case OT_DQ: // Double-quadword, regardless of operand-size attribute. + operand_bytes_ += OS_DOUBLE_QUAD_WORD; + break; + case OT_P: // 32-bit or 48-bit pointer, depending on operand-size + // attribute. + if (operand_is_32_bits_) + operand_bytes_ += OS_48_BIT_POINTER; + else + operand_bytes_ += OS_32_BIT_POINTER; + break; + case OT_PS: // 128-bit packed single-precision floating-point data. + operand_bytes_ += OS_128_BIT_PACKED_SINGLE_PRECISION_FLOATING; + break; + case OT_Q: // Quadword, regardless of operand-size attribute. + operand_bytes_ += OS_QUAD_WORD; + break; + case OT_S: // 6-byte pseudo-descriptor. + operand_bytes_ += OS_PSEUDO_DESCRIPTOR; + break; + case OT_SD: // Scalar Double-Precision Floating-Point Value + case OT_PD: // Unaligned packed double-precision floating point value + operand_bytes_ += OS_DOUBLE_PRECISION_FLOATING; + break; + case OT_SS: + // Scalar element of a 128-bit packed single-precision + // floating data. + // We simply return enItUnknown since we don't have to support + // floating point + succeeded = false; + break; + case OT_V: // Word or doubleword, depending on operand-size attribute. + if (operand_is_32_bits_) + operand_bytes_ += OS_DOUBLE_WORD; + else + operand_bytes_ += OS_WORD; + break; + case OT_W: // Word, regardless of operand-size attribute. + operand_bytes_ += OS_WORD; + break; + + // Can safely ignore these. + case OT_A: // Two one-word operands in memory or two double-word + // operands in memory + case OT_PI: // Quadword MMX technology register (e.g. mm0) + case OT_SI: // Doubleword integer register (e.g., eax) + break; + + default: + break; + } + break; + + default: + break; + } + + return succeeded; +} + +bool MiniDisassembler::ProcessModrm(unsigned char* start_byte, + unsigned int& size) { + // If we don't need to decode, we just return the size of the ModR/M + // byte (there is never a SIB byte in this case). + if (!should_decode_modrm_) { + size++; + return true; + } + + // We never care about the reg field, only the combination of the mod + // and r/m fields, so let's start by packing those fields together into + // 5 bits. + unsigned char modrm = (*start_byte); + unsigned char mod = modrm & 0xC0; // mask out top two bits to get mod field + modrm = modrm & 0x07; // mask out bottom 3 bits to get r/m field + mod = mod >> 3; // shift the mod field to the right place + modrm = mod | modrm; // combine the r/m and mod fields as discussed + mod = mod >> 3; // shift the mod field to bits 2..0 + + // Invariant: modrm contains the mod field in bits 4..3 and the r/m field + // in bits 2..0, and mod contains the mod field in bits 2..0 + + const ModrmEntry* modrm_entry = 0; + if (address_is_32_bits_) + modrm_entry = &s_ia32_modrm_map_[modrm]; + else + modrm_entry = &s_ia16_modrm_map_[modrm]; + + // Invariant: modrm_entry points to information that we need to decode + // the ModR/M byte. + + // Add to the count of operand bytes, if the ModR/M byte indicates + // that some operands are encoded in the instruction. + if (modrm_entry->is_encoded_in_instruction_) + operand_bytes_ += modrm_entry->operand_size_; + + // Process the SIB byte if necessary, and return the count + // of ModR/M and SIB bytes. + if (modrm_entry->use_sib_byte_) { + size++; + return ProcessSib(start_byte + 1, mod, size); + } else { + size++; + return true; + } +} + +bool MiniDisassembler::ProcessSib(unsigned char* start_byte, + unsigned char mod, + unsigned int& size) { + // get the mod field from the 2..0 bits of the SIB byte + unsigned char sib_base = (*start_byte) & 0x07; + if (0x05 == sib_base) { + switch (mod) { + case 0x00: // mod == 00 + case 0x02: // mod == 10 + operand_bytes_ += OS_DOUBLE_WORD; + break; + case 0x01: // mod == 01 + operand_bytes_ += OS_BYTE; + break; + case 0x03: // mod == 11 + // According to the IA-32 docs, there does not seem to be a disp + // value for this value of mod + default: + break; + } + } + + size++; + return true; +} + +}; // namespace sidestep |