diff options
author | Mark Mendell <mark.p.mendell@intel.com> | 2014-04-29 16:55:20 -0400 |
---|---|---|
committer | buzbee <buzbee@google.com> | 2014-05-16 11:04:27 -0700 |
commit | d65c51a556e6649db4e18bd083c8fec37607a442 (patch) | |
tree | 97fcb17ae74a587c6ef756dda6f4b03db5e9950f /compiler/dex/quick/x86 | |
parent | 1e97c4a4ab9f17d1394b952882d59d894b1e3c74 (diff) | |
download | art-d65c51a556e6649db4e18bd083c8fec37607a442.zip art-d65c51a556e6649db4e18bd083c8fec37607a442.tar.gz art-d65c51a556e6649db4e18bd083c8fec37607a442.tar.bz2 |
ART: Add support for constant vector literals
Add in some vector instructions. Implement the ConstVector
instruction, which takes 4 words of data and loads it into
an XMM register.
Initially, only the ConstVector MIR opcode is implemented. Others will
be added after this one goes in.
Change-Id: I5c79bc8b7de9030ef1c213fc8b227debc47f6337
Signed-off-by: Mark Mendell <mark.p.mendell@intel.com>
Diffstat (limited to 'compiler/dex/quick/x86')
-rw-r--r-- | compiler/dex/quick/x86/assemble_x86.cc | 25 | ||||
-rw-r--r-- | compiler/dex/quick/x86/codegen_x86.h | 32 | ||||
-rw-r--r-- | compiler/dex/quick/x86/target_x86.cc | 123 | ||||
-rw-r--r-- | compiler/dex/quick/x86/utility_x86.cc | 3 | ||||
-rw-r--r-- | compiler/dex/quick/x86/x86_lir.h | 2 |
5 files changed, 170 insertions, 15 deletions
diff --git a/compiler/dex/quick/x86/assemble_x86.cc b/compiler/dex/quick/x86/assemble_x86.cc index c0c60d7..9200106 100644 --- a/compiler/dex/quick/x86/assemble_x86.cc +++ b/compiler/dex/quick/x86/assemble_x86.cc @@ -320,6 +320,11 @@ ENCODING_MAP(Cmp, IS_LOAD, 0, 0, { kX86Fstp32M, kMem, IS_STORE | IS_UNARY_OP | REG_USE0 | USE_FP_STACK, { 0x0, 0, 0xD9, 0x00, 0, 3, 0, 0 }, "FstpsM", "[!0r,!1d]" }, { kX86Fstp64M, kMem, IS_STORE | IS_UNARY_OP | REG_USE0 | USE_FP_STACK, { 0x0, 0, 0xDD, 0x00, 0, 3, 0, 0 }, "FstpdM", "[!0r,!1d]" }, + EXT_0F_ENCODING_MAP(Mova128, 0x66, 0x6F, REG_DEF0), + { kX86Mova128MR, kMemReg, IS_STORE | IS_TERTIARY_OP | REG_USE02, { 0x66, 0, 0x0F, 0x6F, 0, 0, 0, 0 }, "Mova128MR", "[!0r+!1d],!2r" }, + { kX86Mova128AR, kArrayReg, IS_STORE | IS_QUIN_OP | REG_USE014, { 0x66, 0, 0x0F, 0x6F, 0, 0, 0, 0 }, "Mova128AR", "[!0r+!1r<<!2d+!3d],!4r" }, + + EXT_0F_ENCODING_MAP(Movups, 0x0, 0x10, REG_DEF0), { kX86MovupsMR, kMemReg, IS_STORE | IS_TERTIARY_OP | REG_USE02, { 0x0, 0, 0x0F, 0x11, 0, 0, 0, 0 }, "MovupsMR", "[!0r+!1d],!2r" }, { kX86MovupsAR, kArrayReg, IS_STORE | IS_QUIN_OP | REG_USE014, { 0x0, 0, 0x0F, 0x11, 0, 0, 0, 0 }, "MovupsAR", "[!0r+!1r<<!2d+!3d],!4r" }, @@ -1508,6 +1513,26 @@ int X86Mir2Lir::AssignInsnOffsets() { void X86Mir2Lir::AssignOffsets() { int offset = AssignInsnOffsets(); + if (const_vectors_ != nullptr) { + /* assign offsets to vector literals */ + + // First, get offset to 12 mod 16 to align to 16 byte boundary. + // This will ensure that the vector is 16 byte aligned, as the procedure is + // always aligned at at 4 mod 16. + int align_size = (16-4) - (offset & 0xF); + if (align_size < 0) { + align_size += 16; + } + + offset += align_size; + + // Now assign each literal the right offset. + for (LIR *p = const_vectors_; p != nullptr; p = p->next) { + p->offset = offset; + offset += 16; + } + } + /* Const values have to be word aligned */ offset = RoundUp(offset, 4); diff --git a/compiler/dex/quick/x86/codegen_x86.h b/compiler/dex/quick/x86/codegen_x86.h index 47d1792..cc0e1f2 100644 --- a/compiler/dex/quick/x86/codegen_x86.h +++ b/compiler/dex/quick/x86/codegen_x86.h @@ -408,6 +408,22 @@ class X86Mir2Lir FINAL : public Mir2Lir { bool GenInlinedIndexOf(CallInfo* info, bool zero_based); /* + * @brief Load 128 bit constant into vector register. + * @param bb The basic block in which the MIR is from. + * @param mir The MIR whose opcode is kMirConstVector + * @note vA is the TypeSize for the register. + * @note vB is the destination XMM register. arg[0..3] are 32 bit constant values. + */ + void GenConst128(BasicBlock* bb, MIR* mir); + + /* + * @brief Generate code for a vector opcode. + * @param bb The basic block in which the MIR is from. + * @param mir The MIR whose opcode is a non-standard opcode. + */ + void GenMachineSpecificExtendedMethodMIR(BasicBlock* bb, MIR* mir); + + /* * @brief Return the correct x86 opcode for the Dex operation * @param op Dex opcode for the operation * @param loc Register location of the operand @@ -613,6 +629,22 @@ class X86Mir2Lir FINAL : public Mir2Lir { // 64-bit mode bool gen64bit_; + + // The list of const vector literals. + LIR *const_vectors_; + + /* + * @brief Search for a matching vector literal + * @param mir A kMirOpConst128b MIR instruction to match. + * @returns pointer to matching LIR constant, or nullptr if not found. + */ + LIR *ScanVectorLiteral(MIR *mir); + + /* + * @brief Add a constant vector literal + * @param mir A kMirOpConst128b MIR instruction to match. + */ + LIR *AddVectorLiteral(MIR *mir); }; } // namespace art diff --git a/compiler/dex/quick/x86/target_x86.cc b/compiler/dex/quick/x86/target_x86.cc index 2e6bfde..237c68c 100644 --- a/compiler/dex/quick/x86/target_x86.cc +++ b/compiler/dex/quick/x86/target_x86.cc @@ -641,13 +641,15 @@ X86Mir2Lir::X86Mir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* method_address_insns_(arena, 100, kGrowableArrayMisc), class_type_address_insns_(arena, 100, kGrowableArrayMisc), call_method_insns_(arena, 100, kGrowableArrayMisc), - stack_decrement_(nullptr), stack_increment_(nullptr), gen64bit_(gen64bit) { + stack_decrement_(nullptr), stack_increment_(nullptr), gen64bit_(gen64bit), + const_vectors_(nullptr) { + store_method_addr_used_ = false; if (kIsDebugBuild) { for (int i = 0; i < kX86Last; i++) { if (X86Mir2Lir::EncodingMap[i].opcode != i) { LOG(FATAL) << "Encoding order for " << X86Mir2Lir::EncodingMap[i].name - << " is wrong: expecting " << i << ", seeing " - << static_cast<int>(X86Mir2Lir::EncodingMap[i].opcode); + << " is wrong: expecting " << i << ", seeing " + << static_cast<int>(X86Mir2Lir::EncodingMap[i].opcode); } } } @@ -838,12 +840,46 @@ LIR *X86Mir2Lir::CallWithLinkerFixup(const MethodReference& target_method, Invok return call; } +/* + * @brief Enter a 32 bit quantity into a buffer + * @param buf buffer. + * @param data Data value. + */ + +static void PushWord(std::vector<uint8_t>&buf, int32_t data) { + buf.push_back(data & 0xff); + buf.push_back((data >> 8) & 0xff); + buf.push_back((data >> 16) & 0xff); + buf.push_back((data >> 24) & 0xff); +} + void X86Mir2Lir::InstallLiteralPools() { // These are handled differently for x86. DCHECK(code_literal_list_ == nullptr); DCHECK(method_literal_list_ == nullptr); DCHECK(class_literal_list_ == nullptr); + // Align to 16 byte boundary. We have implicit knowledge that the start of the method is + // on a 4 byte boundary. How can I check this if it changes (other than aligned loads + // will fail at runtime)? + if (const_vectors_ != nullptr) { + int align_size = (16-4) - (code_buffer_.size() & 0xF); + if (align_size < 0) { + align_size += 16; + } + + while (align_size > 0) { + code_buffer_.push_back(0); + align_size--; + } + for (LIR *p = const_vectors_; p != nullptr; p = p->next) { + PushWord(code_buffer_, p->operands[0]); + PushWord(code_buffer_, p->operands[1]); + PushWord(code_buffer_, p->operands[2]); + PushWord(code_buffer_, p->operands[3]); + } + } + // Handle the fixups for methods. for (uint32_t i = 0; i < method_address_insns_.Size(); i++) { LIR* p = method_address_insns_.Get(i); @@ -1074,18 +1110,6 @@ bool X86Mir2Lir::GenInlinedIndexOf(CallInfo* info, bool zero_based) { } /* - * @brief Enter a 32 bit quantity into the FDE buffer - * @param buf FDE buffer. - * @param data Data value. - */ -static void PushWord(std::vector<uint8_t>&buf, int data) { - buf.push_back(data & 0xff); - buf.push_back((data >> 8) & 0xff); - buf.push_back((data >> 16) & 0xff); - buf.push_back((data >> 24) & 0xff); -} - -/* * @brief Enter an 'advance LOC' into the FDE buffer * @param buf FDE buffer. * @param increment Amount by which to increase the current location. @@ -1235,4 +1259,73 @@ std::vector<uint8_t>* X86Mir2Lir::ReturnCallFrameInformation() { return cfi_info; } +void X86Mir2Lir::GenMachineSpecificExtendedMethodMIR(BasicBlock* bb, MIR* mir) { + switch (static_cast<ExtendedMIROpcode>(mir->dalvikInsn.opcode)) { + case kMirOpConstVector: + GenConst128(bb, mir); + break; + default: + break; + } +} + +void X86Mir2Lir::GenConst128(BasicBlock* bb, MIR* mir) { + int type_size = mir->dalvikInsn.vA; + // We support 128 bit vectors. + DCHECK_EQ(type_size & 0xFFFF, 128); + int reg = mir->dalvikInsn.vB; + DCHECK_LT(reg, 8); + uint32_t *args = mir->dalvikInsn.arg; + // Check for all 0 case. + if (args[0] == 0 && args[1] == 0 && args[2] == 0 && args[3] == 0) { + NewLIR2(kX86XorpsRR, reg, reg); + return; + } + // Okay, load it from the constant vector area. + LIR *data_target = ScanVectorLiteral(mir); + if (data_target == nullptr) { + data_target = AddVectorLiteral(mir); + } + + // Address the start of the method. + RegLocation rl_method = mir_graph_->GetRegLocation(base_of_code_->s_reg_low); + rl_method = LoadValue(rl_method, kCoreReg); + + // Load the proper value from the literal area. + // We don't know the proper offset for the value, so pick one that will force + // 4 byte offset. We will fix this up in the assembler later to have the right + // value. + LIR *load = NewLIR3(kX86Mova128RM, reg, rl_method.reg.GetReg(), 256 /* bogus */); + load->flags.fixup = kFixupLoad; + load->target = data_target; + SetMemRefType(load, true, kLiteral); +} + +LIR *X86Mir2Lir::ScanVectorLiteral(MIR *mir) { + int *args = reinterpret_cast<int*>(mir->dalvikInsn.arg); + for (LIR *p = const_vectors_; p != nullptr; p = p->next) { + if (args[0] == p->operands[0] && args[1] == p->operands[1] && + args[2] == p->operands[2] && args[3] == p->operands[3]) { + return p; + } + } + return nullptr; +} + +LIR *X86Mir2Lir::AddVectorLiteral(MIR *mir) { + LIR* new_value = static_cast<LIR*>(arena_->Alloc(sizeof(LIR), kArenaAllocData)); + int *args = reinterpret_cast<int*>(mir->dalvikInsn.arg); + new_value->operands[0] = args[0]; + new_value->operands[1] = args[1]; + new_value->operands[2] = args[2]; + new_value->operands[3] = args[3]; + new_value->next = const_vectors_; + if (const_vectors_ == nullptr) { + estimated_native_code_size_ += 12; // Amount needed to align to 16 byte boundary. + } + estimated_native_code_size_ += 16; // Space for one vector. + const_vectors_ = new_value; + return new_value; +} + } // namespace art diff --git a/compiler/dex/quick/x86/utility_x86.cc b/compiler/dex/quick/x86/utility_x86.cc index fb85318..e9592a6 100644 --- a/compiler/dex/quick/x86/utility_x86.cc +++ b/compiler/dex/quick/x86/utility_x86.cc @@ -866,6 +866,9 @@ void X86Mir2Lir::AnalyzeExtendedMIR(int opcode, BasicBlock * bb, MIR *mir) { case kMirOpFusedCmpgDouble: AnalyzeFPInstruction(opcode, bb, mir); break; + case kMirOpConstVector: + store_method_addr_ = true; + break; default: // Ignore the rest. break; diff --git a/compiler/dex/quick/x86/x86_lir.h b/compiler/dex/quick/x86/x86_lir.h index c8c2542..adfed0c 100644 --- a/compiler/dex/quick/x86/x86_lir.h +++ b/compiler/dex/quick/x86/x86_lir.h @@ -425,6 +425,8 @@ enum X86OpCode { kX86Fild64M, // push 64-bit integer on x87 stack kX86Fstp32M, // pop top x87 fp stack and do 32-bit store kX86Fstp64M, // pop top x87 fp stack and do 64-bit store + Binary0fOpCode(kX86Mova128), // move 128 bits aligned + kX86Mova128MR, kX86Mova128AR, // store 128 bit aligned from xmm1 to m128 Binary0fOpCode(kX86Movups), // load unaligned packed single FP values from xmm2/m128 to xmm1 kX86MovupsMR, kX86MovupsAR, // store unaligned packed single FP values from xmm1 to m128 Binary0fOpCode(kX86Movaps), // load aligned packed single FP values from xmm2/m128 to xmm1 |