diff options
83 files changed, 1915 insertions, 759 deletions
diff --git a/compiler/dex/compiler_ir.h b/compiler/dex/compiler_ir.h index 51c4a43..0c46d43 100644 --- a/compiler/dex/compiler_ir.h +++ b/compiler/dex/compiler_ir.h @@ -34,6 +34,129 @@ class CompilerDriver; class Mir2Lir; class MIRGraph; +constexpr size_t kOptionStringMaxLength = 2048; + +/** + * Structure abstracting pass option values, which can be of type string or integer. + */ +struct OptionContent { + OptionContent(const OptionContent& option) : + type(option.type), container(option.container, option.type) {} + + explicit OptionContent(const char* value) : + type(kString), container(value) {} + + explicit OptionContent(int value) : + type(kInteger), container(value) {} + + explicit OptionContent(int64_t value) : + type(kInteger), container(value) {} + + ~OptionContent() { + if (type == kString) { + container.StringDelete(); + } + } + + /** + * Allows for a transparent display of the option content. + */ + friend std::ostream& operator<<(std::ostream& out, const OptionContent& option) { + if (option.type == kString) { + out << option.container.s; + } else { + out << option.container.i; + } + + return out; + } + + inline const char* GetString() const { + return container.s; + } + + inline int64_t GetInteger() const { + return container.i; + } + + /** + * @brief Used to compare a string option value to a given @p value. + * @details Will return whether the internal string option is equal to + * the parameter @p value. It will return false if the type of the + * object is not a string. + * @param value The string to compare to. + * @return Returns whether the internal string option is equal to the + * parameter @p value. + */ + inline bool Equals(const char* value) const { + DCHECK(value != nullptr); + if (type != kString) { + return false; + } + return !strncmp(container.s, value, kOptionStringMaxLength); + } + + /** + * @brief Used to compare an integer option value to a given @p value. + * @details Will return whether the internal integer option is equal to + * the parameter @p value. It will return false if the type of the + * object is not an integer. + * @param value The integer to compare to. + * @return Returns whether the internal integer option is equal to the + * parameter @p value. + */ + inline bool Equals(int64_t value) const { + if (type != kInteger) { + return false; + } + return container.i == value; + } + + /** + * Describes the type of parameters allowed as option values. + */ + enum OptionType { + kString = 0, + kInteger + }; + + OptionType type; + + private: + /** + * Union containing the option value of either type. + */ + union OptionContainer { + explicit OptionContainer(const OptionContainer& c, OptionType t) { + if (t == kString) { + DCHECK(c.s != nullptr); + s = strndup(c.s, kOptionStringMaxLength); + } else { + i = c.i; + } + } + + explicit OptionContainer(const char* value) { + DCHECK(value != nullptr); + s = strndup(value, kOptionStringMaxLength); + } + + explicit OptionContainer(int64_t value) : i(value) {} + ~OptionContainer() {} + + void StringDelete() { + if (s != nullptr) { + free(s); + } + } + + char* s; + int64_t i; + }; + + OptionContainer container; +}; + struct CompilationUnit { CompilationUnit(ArenaPool* pool, InstructionSet isa, CompilerDriver* driver, ClassLinker* linker); ~CompilationUnit(); @@ -77,7 +200,7 @@ struct CompilationUnit { * default settings have been changed. The key is simply the option string without * the pass name. */ - SafeMap<const std::string, int> overridden_pass_options; + SafeMap<const std::string, const OptionContent> overridden_pass_options; }; } // namespace art diff --git a/compiler/dex/mir_optimization.cc b/compiler/dex/mir_optimization.cc index 05414b3..8718191 100644 --- a/compiler/dex/mir_optimization.cc +++ b/compiler/dex/mir_optimization.cc @@ -116,13 +116,14 @@ MIR* MIRGraph::AdvanceMIR(BasicBlock** p_bb, MIR* mir) { BasicBlock* bb = *p_bb; if (mir != NULL) { mir = mir->next; - if (mir == NULL) { + while (mir == NULL) { bb = GetBasicBlock(bb->fall_through); if ((bb == NULL) || Predecessors(bb) != 1) { - mir = NULL; + // mir is null and we cannot proceed further. + break; } else { - *p_bb = bb; - mir = bb->first_mir_insn; + *p_bb = bb; + mir = bb->first_mir_insn; } } } diff --git a/compiler/dex/pass_driver_me.h b/compiler/dex/pass_driver_me.h index fed92be..94eef22 100644 --- a/compiler/dex/pass_driver_me.h +++ b/compiler/dex/pass_driver_me.h @@ -165,7 +165,7 @@ class PassDriverME: public PassDriver { const PassME* me_pass = down_cast<const PassME*>(pass); if (me_pass->HasOptions()) { LOG(INFO) << "Pass options for \"" << me_pass->GetName() << "\" are:"; - SafeMap<const std::string, int> overridden_settings; + SafeMap<const std::string, const OptionContent> overridden_settings; FillOverriddenPassSettings(&manager->GetOptions(), me_pass->GetName(), overridden_settings); me_pass->PrintPassOptions(overridden_settings); @@ -212,7 +212,7 @@ class PassDriverME: public PassDriver { * configuration. */ static void FillOverriddenPassSettings(const PassManagerOptions* options, const char* pass_name, - SafeMap<const std::string, int>& settings_to_fill) { + SafeMap<const std::string, const OptionContent>& settings_to_fill) { const std::string& settings = options->GetOverriddenPassOptions(); const size_t settings_len = settings.size(); @@ -285,17 +285,28 @@ class PassDriverME: public PassDriver { continue; } - // Get the actual setting itself. Strtol is being used to convert because it is - // exception safe. If the input is not sane, it will set a setting of 0. + // Get the actual setting itself. std::string setting_string = settings.substr(setting_pos, next_configuration_separator - setting_pos); - int setting = std::strtol(setting_string.c_str(), 0, 0); std::string setting_name = settings.substr(setting_name_pos, setting_pos - setting_name_pos - 1); - settings_to_fill.Put(setting_name, setting); - + // We attempt to convert the option value to integer. Strtoll is being used to + // convert because it is exception safe. + char* end_ptr = nullptr; + const char* setting_ptr = setting_string.c_str(); + DCHECK(setting_ptr != nullptr); // Paranoid: setting_ptr must be a valid pointer. + int64_t int_value = strtoll(setting_ptr, &end_ptr, 0); + DCHECK(end_ptr != nullptr); // Paranoid: end_ptr must be set by the strtoll call. + + // If strtoll call succeeded, the option is now considered as integer. + if (*setting_ptr != '\0' && end_ptr != setting_ptr && *end_ptr == '\0') { + settings_to_fill.Put(setting_name, OptionContent(int_value)); + } else { + // Otherwise, it is considered as a string. + settings_to_fill.Put(setting_name, OptionContent(setting_string.c_str())); + } search_pos = next_configuration_separator; } while (true); } diff --git a/compiler/dex/pass_me.h b/compiler/dex/pass_me.h index 79d8f51..d3cf393 100644 --- a/compiler/dex/pass_me.h +++ b/compiler/dex/pass_me.h @@ -21,6 +21,7 @@ #include "base/logging.h" #include "pass.h" +#include "compiler_ir.h" #include "safe_map.h" namespace art { @@ -104,7 +105,7 @@ class PassME : public Pass { */ void PrintPassDefaultOptions() const { for (const auto& option : default_options_) { - LOG(INFO) << "\t" << option.first << ":" << std::dec << option.second; + LOG(INFO) << "\t" << option.first << ":" << option.second; } } @@ -112,15 +113,49 @@ class PassME : public Pass { * @brief Prints the pass options along with either default or overridden setting. * @param overridden_options The overridden settings for this pass. */ - void PrintPassOptions(SafeMap<const std::string, int>& overridden_options) const { + void PrintPassOptions(SafeMap<const std::string, const OptionContent>& overridden_options) const { // We walk through the default options only to get the pass names. We use GetPassOption to // also consider the overridden ones. for (const auto& option : default_options_) { - LOG(INFO) << "\t" << option.first << ":" << std::dec + LOG(INFO) << "\t" << option.first << ":" << GetPassOption(option.first, overridden_options); } } + /** + * @brief Used to obtain the option structure for a pass. + * @details Will return the overridden option if it exists or default one otherwise. + * @param option_name The name of option whose setting to look for. + * @param c_unit The compilation unit currently being handled. + * @return Returns the option structure containing the option value. + */ + const OptionContent& GetPassOption(const char* option_name, CompilationUnit* c_unit) const { + return GetPassOption(option_name, c_unit->overridden_pass_options); + } + + /** + * @brief Used to obtain the option for a pass as a string. + * @details Will return the overridden option if it exists or default one otherwise. + * It will return nullptr if the required option value is not a string. + * @param option_name The name of option whose setting to look for. + * @param c_unit The compilation unit currently being handled. + * @return Returns the overridden option if it exists or the default one otherwise. + */ + const char* GetStringPassOption(const char* option_name, CompilationUnit* c_unit) const { + return GetStringPassOption(option_name, c_unit->overridden_pass_options); + } + + /** + * @brief Used to obtain the pass option value as an integer. + * @details Will return the overridden option if it exists or default one otherwise. + * It will return 0 if the required option value is not an integer. + * @param c_unit The compilation unit currently being handled. + * @return Returns the overriden option if it exists or the default one otherwise. + */ + int64_t GetIntegerPassOption(const char* option_name, CompilationUnit* c_unit) const { + return GetIntegerPassOption(option_name, c_unit->overridden_pass_options); + } + const char* GetDumpCFGFolder() const { return dump_cfg_folder_; } @@ -130,29 +165,51 @@ class PassME : public Pass { } protected: - int GetPassOption(const char* option_name, const SafeMap<const std::string, int>& overridden_options) const { + const OptionContent& GetPassOption(const char* option_name, + const SafeMap<const std::string, const OptionContent>& overridden_options) const { + DCHECK(option_name != nullptr); + // First check if there are any overridden settings. auto overridden_it = overridden_options.find(std::string(option_name)); if (overridden_it != overridden_options.end()) { return overridden_it->second; + } else { + // Otherwise, there must be a default value for this option name. + auto default_it = default_options_.find(option_name); + // An invalid option is being requested. + if (default_it == default_options_.end()) { + LOG(FATAL) << "Fatal: Cannot find an option named \"" << option_name << "\""; + } + + return default_it->second; } + } - // Next check the default options. - auto default_it = default_options_.find(option_name); + const char* GetStringPassOption(const char* option_name, + const SafeMap<const std::string, const OptionContent>& overridden_options) const { + const OptionContent& option_content = GetPassOption(option_name, overridden_options); + if (option_content.type != OptionContent::kString) { + return nullptr; + } - if (default_it == default_options_.end()) { - // An invalid option is being requested. - DCHECK(false); + return option_content.GetString(); + } + + int64_t GetIntegerPassOption(const char* option_name, + const SafeMap<const std::string, const OptionContent>& overridden_options) const { + const OptionContent& option_content = GetPassOption(option_name, overridden_options); + if (option_content.type != OptionContent::kInteger) { return 0; } - return default_it->second; + return option_content.GetInteger(); } /** @brief Type of traversal: determines the order to execute the pass on the BasicBlocks. */ const DataFlowAnalysisMode traversal_type_; - /** @brief Flags for additional directives: used to determine if a particular post-optimization pass is necessary. */ + /** @brief Flags for additional directives: used to determine if a particular + * post-optimization pass is necessary. */ const unsigned int flags_; /** @brief CFG Dump Folder: what sub-folder to use for dumping the CFGs post pass. */ @@ -163,7 +220,7 @@ class PassME : public Pass { * @details The constructor of the specific pass instance should fill this * with default options. * */ - SafeMap<const char*, int> default_options_; + SafeMap<const char*, const OptionContent> default_options_; }; } // namespace art #endif // ART_COMPILER_DEX_PASS_ME_H_ diff --git a/compiler/dex/quick/arm/target_arm.cc b/compiler/dex/quick/arm/target_arm.cc index 5538d79..13f9072 100644 --- a/compiler/dex/quick/arm/target_arm.cc +++ b/compiler/dex/quick/arm/target_arm.cc @@ -19,6 +19,7 @@ #include <inttypes.h> #include <string> +#include <sstream> #include "backend_arm.h" #include "base/logging.h" @@ -490,6 +491,24 @@ std::string ArmMir2Lir::BuildInsnString(const char* fmt, LIR* lir, unsigned char buf += *fmt++; } } + // Dump thread offset. + std::string fmt_str = GetTargetInstFmt(lir->opcode); + if (std::string::npos != fmt_str.find(", [!1C, #!2") && rARM_SELF == lir->operands[1] && + std::string::npos != buf.find(", [")) { + int offset = lir->operands[2]; + if (std::string::npos != fmt_str.find("#!2d")) { + } else if (std::string::npos != fmt_str.find("#!2E")) { + offset *= 4; + } else if (std::string::npos != fmt_str.find("#!2F")) { + offset *= 2; + } else { + LOG(FATAL) << "Should not reach here"; + } + std::ostringstream tmp_stream; + Thread::DumpThreadOffset<4>(tmp_stream, offset); + buf += " ; "; + buf += tmp_stream.str(); + } return buf; } diff --git a/compiler/dex/quick/arm64/call_arm64.cc b/compiler/dex/quick/arm64/call_arm64.cc index d2204f5..6492442 100644 --- a/compiler/dex/quick/arm64/call_arm64.cc +++ b/compiler/dex/quick/arm64/call_arm64.cc @@ -400,8 +400,9 @@ void Arm64Mir2Lir::GenSpecialExitSequence() { } static bool Arm64UseRelativeCall(CompilationUnit* cu, const MethodReference& target_method) { - // Emit relative calls anywhere in the image or within a dex file otherwise. - return cu->compiler_driver->IsImage() || cu->dex_file == target_method.dex_file; + UNUSED(cu, target_method); + // Always emit relative calls. + return true; } /* diff --git a/compiler/dex/quick/arm64/target_arm64.cc b/compiler/dex/quick/arm64/target_arm64.cc index 34662f2..136be94 100644 --- a/compiler/dex/quick/arm64/target_arm64.cc +++ b/compiler/dex/quick/arm64/target_arm64.cc @@ -19,6 +19,7 @@ #include <inttypes.h> #include <string> +#include <sstream> #include "backend_arm64.h" #include "base/logging.h" @@ -522,6 +523,24 @@ std::string Arm64Mir2Lir::BuildInsnString(const char* fmt, LIR* lir, unsigned ch buf += *fmt++; } } + // Dump thread offset. + std::string fmt_str = GetTargetInstFmt(lir->opcode); + if (std::string::npos != fmt_str.find(", [!1X, #!2") && rxSELF == lir->operands[1] && + std::string::npos != buf.find(", [")) { + int offset = lir->operands[2]; + if (std::string::npos != fmt_str.find("#!2d")) { + } else if (std::string::npos != fmt_str.find("#!2D")) { + offset *= (IS_WIDE(lir->opcode)) ? 8 : 4; + } else if (std::string::npos != fmt_str.find("#!2F")) { + offset *= 2; + } else { + LOG(FATAL) << "Should not reach here"; + } + std::ostringstream tmp_stream; + Thread::DumpThreadOffset<8>(tmp_stream, offset); + buf += " ; "; + buf += tmp_stream.str(); + } return buf; } diff --git a/compiler/dex/quick/x86/target_x86.cc b/compiler/dex/quick/x86/target_x86.cc index 0337096..c4adb09 100755 --- a/compiler/dex/quick/x86/target_x86.cc +++ b/compiler/dex/quick/x86/target_x86.cc @@ -2303,9 +2303,9 @@ void X86Mir2Lir::GenReduceVector(MIR* mir) { StoreFinalValue(rl_dest, rl_result); } else { int displacement = SRegOffset(rl_result.s_reg_low); + ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); LIR *l = NewLIR4(extr_opcode, rs_rX86_SP_32.GetReg(), displacement, vector_src.GetReg(), extract_index); - AnnotateDalvikRegAccess(l, displacement >> 2, true /* is_load */, is_wide /* is_64bit */); AnnotateDalvikRegAccess(l, displacement >> 2, false /* is_load */, is_wide /* is_64bit */); } } @@ -2469,11 +2469,17 @@ RegStorage X86Mir2Lir::InToRegStorageX86Mapper::GetNextReg(ShortyArg arg) { return m2l_->TargetReg(fpArgMappingToPhysicalReg[cur_fp_reg_++], arg.IsWide() ? kWide : kNotWide); } - } else { - if (cur_core_reg_ < coreArgMappingToPhysicalRegSize) { - result = m2l_->TargetReg(coreArgMappingToPhysicalReg[cur_core_reg_++], - arg.IsRef() ? kRef : kNotWide); - if (arg.IsWide() && cur_core_reg_ < coreArgMappingToPhysicalRegSize) { + } else if (cur_core_reg_ < coreArgMappingToPhysicalRegSize) { + result = m2l_->TargetReg(coreArgMappingToPhysicalReg[cur_core_reg_++], + arg.IsRef() ? kRef : kNotWide); + if (arg.IsWide()) { + // This must be a long, as double is handled above. + // Ensure that we don't split a long across the last register and the stack. + if (cur_core_reg_ == coreArgMappingToPhysicalRegSize) { + // Leave the last core register unused and force the whole long to the stack. + cur_core_reg_++; + result = RegStorage::InvalidReg(); + } else if (cur_core_reg_ < coreArgMappingToPhysicalRegSize) { result = RegStorage::MakeRegPair( result, m2l_->TargetReg(coreArgMappingToPhysicalReg[cur_core_reg_++], kNotWide)); } diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc index cc59a2f..7451bd5 100644 --- a/compiler/driver/compiler_driver.cc +++ b/compiler/driver/compiler_driver.cc @@ -1267,14 +1267,14 @@ void CompilerDriver::GetCodeAndMethodForDirectCall(InvokeType* type, InvokeType } // TODO: support patching on all architectures. use_dex_cache = use_dex_cache || (force_relocations && !support_boot_image_fixup_); - mirror::Class* declaring_class = method->GetDeclaringClass(); - bool method_code_in_boot = (declaring_class->GetClassLoader() == nullptr); + bool method_code_in_boot = (method->GetDeclaringClass()->GetClassLoader() == nullptr); if (!use_dex_cache) { if (!method_code_in_boot) { use_dex_cache = true; } else { - bool has_clinit_trampoline = method->IsStatic() && !declaring_class->IsInitialized(); - if (has_clinit_trampoline && (declaring_class != referrer_class)) { + bool has_clinit_trampoline = + method->IsStatic() && !method->GetDeclaringClass()->IsInitialized(); + if (has_clinit_trampoline && (method->GetDeclaringClass() != referrer_class)) { // Ensure we run the clinit trampoline unless we are invoking a static method in the same // class. use_dex_cache = true; @@ -1285,15 +1285,7 @@ void CompilerDriver::GetCodeAndMethodForDirectCall(InvokeType* type, InvokeType *stats_flags |= kFlagDirectCallToBoot | kFlagDirectMethodToBoot; } if (!use_dex_cache && force_relocations) { - bool is_in_image; - if (IsImage()) { - is_in_image = IsImageClass(method->GetDeclaringClassDescriptor()); - } else { - is_in_image = instruction_set_ != kX86 && instruction_set_ != kX86_64 && - Runtime::Current()->GetHeap()->FindSpaceFromObject(declaring_class, - false)->IsImageSpace(); - } - if (!is_in_image) { + if (!IsImage() || !IsImageClass(method->GetDeclaringClassDescriptor())) { // We can only branch directly to Methods that are resolved in the DexCache. // Otherwise we won't invoke the resolution trampoline. use_dex_cache = true; @@ -1302,7 +1294,7 @@ void CompilerDriver::GetCodeAndMethodForDirectCall(InvokeType* type, InvokeType // The method is defined not within this dex file. We need a dex cache slot within the current // dex file or direct pointers. bool must_use_direct_pointers = false; - if (target_method->dex_file == declaring_class->GetDexCache()->GetDexFile()) { + if (target_method->dex_file == method->GetDeclaringClass()->GetDexCache()->GetDexFile()) { target_method->dex_method_index = method->GetDexMethodIndex(); } else { if (no_guarantee_of_dex_cache_entry) { @@ -1315,7 +1307,7 @@ void CompilerDriver::GetCodeAndMethodForDirectCall(InvokeType* type, InvokeType } else { if (force_relocations && !use_dex_cache) { target_method->dex_method_index = method->GetDexMethodIndex(); - target_method->dex_file = declaring_class->GetDexCache()->GetDexFile(); + target_method->dex_file = method->GetDeclaringClass()->GetDexCache()->GetDexFile(); } must_use_direct_pointers = true; } @@ -1338,14 +1330,14 @@ void CompilerDriver::GetCodeAndMethodForDirectCall(InvokeType* type, InvokeType *type = sharp_type; *direct_method = force_relocations ? -1 : reinterpret_cast<uintptr_t>(method); *direct_code = force_relocations ? -1 : compiler_->GetEntryPointOf(method); - target_method->dex_file = declaring_class->GetDexCache()->GetDexFile(); + target_method->dex_file = method->GetDeclaringClass()->GetDexCache()->GetDexFile(); target_method->dex_method_index = method->GetDexMethodIndex(); } else if (!must_use_direct_pointers) { // Set the code and rely on the dex cache for the method. *type = sharp_type; if (force_relocations) { *direct_code = -1; - target_method->dex_file = declaring_class->GetDexCache()->GetDexFile(); + target_method->dex_file = method->GetDeclaringClass()->GetDexCache()->GetDexFile(); target_method->dex_method_index = method->GetDexMethodIndex(); } else { *direct_code = compiler_->GetEntryPointOf(method); diff --git a/compiler/jni/quick/calling_convention.h b/compiler/jni/quick/calling_convention.h index 6db0c3b..0c64a36 100644 --- a/compiler/jni/quick/calling_convention.h +++ b/compiler/jni/quick/calling_convention.h @@ -141,7 +141,7 @@ class CallingConvention { if (IsStatic()) { param++; // 0th argument must skip return value at start of the shorty } else if (param == 0) { - return true; // this argument + return false; // this argument } return shorty_[param] == 'J'; } diff --git a/compiler/jni/quick/x86/calling_convention_x86.cc b/compiler/jni/quick/x86/calling_convention_x86.cc index fc72e88..8a45f0c 100644 --- a/compiler/jni/quick/x86/calling_convention_x86.cc +++ b/compiler/jni/quick/x86/calling_convention_x86.cc @@ -85,9 +85,19 @@ ManagedRegister X86ManagedRuntimeCallingConvention::CurrentParamRegister() { ManagedRegister res = ManagedRegister::NoRegister(); if (!IsCurrentParamAFloatOrDouble()) { switch (gpr_arg_count_) { - case 0: res = X86ManagedRegister::FromCpuRegister(ECX); break; - case 1: res = X86ManagedRegister::FromCpuRegister(EDX); break; - case 2: res = X86ManagedRegister::FromCpuRegister(EBX); break; + case 0: + res = X86ManagedRegister::FromCpuRegister(ECX); + break; + case 1: + res = X86ManagedRegister::FromCpuRegister(EDX); + break; + case 2: + // Don't split a long between the last register and the stack. + if (IsCurrentParamALong()) { + return ManagedRegister::NoRegister(); + } + res = X86ManagedRegister::FromCpuRegister(EBX); + break; } } else if (itr_float_and_doubles_ < 4) { // First four float parameters are passed via XMM0..XMM3 @@ -120,27 +130,34 @@ const ManagedRegisterEntrySpills& X86ManagedRuntimeCallingConvention::EntrySpill ResetIterator(FrameOffset(0)); while (HasNext()) { ManagedRegister in_reg = CurrentParamRegister(); + bool is_long = IsCurrentParamALong(); if (!in_reg.IsNoRegister()) { int32_t size = IsParamADouble(itr_args_) ? 8 : 4; int32_t spill_offset = CurrentParamStackOffset().Uint32Value(); ManagedRegisterSpill spill(in_reg, size, spill_offset); entry_spills_.push_back(spill); - if (IsCurrentParamALong() && !IsCurrentParamAReference()) { // Long. - // special case, as we may need a second register here. + if (is_long) { + // special case, as we need a second register here. in_reg = CurrentParamHighLongRegister(); - if (!in_reg.IsNoRegister()) { - // We have to spill the second half of the long. - ManagedRegisterSpill spill2(in_reg, size, spill_offset + 4); - entry_spills_.push_back(spill2); - // Long was allocated in 2 registers. - gpr_arg_count_++; - } + DCHECK(!in_reg.IsNoRegister()); + // We have to spill the second half of the long. + ManagedRegisterSpill spill2(in_reg, size, spill_offset + 4); + entry_spills_.push_back(spill2); } // Keep track of the number of GPRs allocated. if (!IsCurrentParamAFloatOrDouble()) { - gpr_arg_count_++; + if (is_long) { + // Long was allocated in 2 registers. + gpr_arg_count_ += 2; + } else { + gpr_arg_count_++; + } } + } else if (is_long) { + // We need to skip the unused last register, which is empty. + // If we are already out of registers, this is harmless. + gpr_arg_count_ += 2; } Next(); } diff --git a/compiler/oat_writer.cc b/compiler/oat_writer.cc index 7516811..3c36ffa 100644 --- a/compiler/oat_writer.cc +++ b/compiler/oat_writer.cc @@ -1103,18 +1103,10 @@ class OatWriter::WriteCodeMethodVisitor : public OatDexMethodVisitor { if (UNLIKELY(target_offset == 0)) { mirror::ArtMethod* target = GetTargetMethod(patch); DCHECK(target != nullptr); - size_t size = GetInstructionSetPointerSize(writer_->compiler_driver_->GetInstructionSet()); - const void* oat_code_offset = target->GetEntryPointFromQuickCompiledCodePtrSize(size); - if (oat_code_offset != 0) { - DCHECK(!Runtime::Current()->GetClassLinker()->IsQuickResolutionStub(oat_code_offset)); - DCHECK(!Runtime::Current()->GetClassLinker()->IsQuickToInterpreterBridge(oat_code_offset)); - DCHECK(!Runtime::Current()->GetClassLinker()->IsQuickGenericJniStub(oat_code_offset)); - target_offset = PointerToLowMemUInt32(oat_code_offset); - } else { - target_offset = target->IsNative() - ? writer_->oat_header_->GetQuickGenericJniTrampolineOffset() - : writer_->oat_header_->GetQuickToInterpreterBridgeOffset(); - } + DCHECK_EQ(target->GetQuickOatCodeOffset(), 0u); + target_offset = target->IsNative() + ? writer_->oat_header_->GetQuickGenericJniTrampolineOffset() + : writer_->oat_header_->GetQuickToInterpreterBridgeOffset(); } return target_offset; } @@ -1146,9 +1138,10 @@ class OatWriter::WriteCodeMethodVisitor : public OatDexMethodVisitor { void PatchCodeAddress(std::vector<uint8_t>* code, uint32_t offset, uint32_t target_offset) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { - uint32_t address = writer_->image_writer_ == nullptr ? target_offset : - PointerToLowMemUInt32(writer_->image_writer_->GetOatFileBegin() + - writer_->oat_data_offset_ + target_offset); + // NOTE: Direct calls across oat files don't use linker patches. + DCHECK(writer_->image_writer_ != nullptr); + uint32_t address = PointerToLowMemUInt32(writer_->image_writer_->GetOatFileBegin() + + writer_->oat_data_offset_ + target_offset); DCHECK_LE(offset + 4, code->size()); uint8_t* data = &(*code)[offset]; data[0] = address & 0xffu; diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc index 4b97a62..20a1b03 100644 --- a/compiler/optimizing/builder.cc +++ b/compiler/optimizing/builder.cc @@ -190,37 +190,37 @@ void HGraphBuilder::InitializeParameters(uint16_t number_of_parameters) { template<typename T> void HGraphBuilder::If_22t(const Instruction& instruction, uint32_t dex_pc) { int32_t target_offset = instruction.GetTargetOffset(); - PotentiallyAddSuspendCheck(target_offset, dex_pc); + HBasicBlock* branch_target = FindBlockStartingAt(dex_pc + target_offset); + HBasicBlock* fallthrough_target = FindBlockStartingAt(dex_pc + instruction.SizeInCodeUnits()); + DCHECK(branch_target != nullptr); + DCHECK(fallthrough_target != nullptr); + PotentiallyAddSuspendCheck(branch_target, dex_pc); HInstruction* first = LoadLocal(instruction.VRegA(), Primitive::kPrimInt); HInstruction* second = LoadLocal(instruction.VRegB(), Primitive::kPrimInt); T* comparison = new (arena_) T(first, second); current_block_->AddInstruction(comparison); HInstruction* ifinst = new (arena_) HIf(comparison); current_block_->AddInstruction(ifinst); - HBasicBlock* target = FindBlockStartingAt(dex_pc + target_offset); - DCHECK(target != nullptr); - current_block_->AddSuccessor(target); - target = FindBlockStartingAt(dex_pc + instruction.SizeInCodeUnits()); - DCHECK(target != nullptr); - current_block_->AddSuccessor(target); + current_block_->AddSuccessor(branch_target); + current_block_->AddSuccessor(fallthrough_target); current_block_ = nullptr; } template<typename T> void HGraphBuilder::If_21t(const Instruction& instruction, uint32_t dex_pc) { int32_t target_offset = instruction.GetTargetOffset(); - PotentiallyAddSuspendCheck(target_offset, dex_pc); + HBasicBlock* branch_target = FindBlockStartingAt(dex_pc + target_offset); + HBasicBlock* fallthrough_target = FindBlockStartingAt(dex_pc + instruction.SizeInCodeUnits()); + DCHECK(branch_target != nullptr); + DCHECK(fallthrough_target != nullptr); + PotentiallyAddSuspendCheck(branch_target, dex_pc); HInstruction* value = LoadLocal(instruction.VRegA(), Primitive::kPrimInt); T* comparison = new (arena_) T(value, GetIntConstant(0)); current_block_->AddInstruction(comparison); HInstruction* ifinst = new (arena_) HIf(comparison); current_block_->AddInstruction(ifinst); - HBasicBlock* target = FindBlockStartingAt(dex_pc + target_offset); - DCHECK(target != nullptr); - current_block_->AddSuccessor(target); - target = FindBlockStartingAt(dex_pc + instruction.SizeInCodeUnits()); - DCHECK(target != nullptr); - current_block_->AddSuccessor(target); + current_block_->AddSuccessor(branch_target); + current_block_->AddSuccessor(fallthrough_target); current_block_ = nullptr; } @@ -259,13 +259,14 @@ bool HGraphBuilder::SkipCompilation(size_t number_of_dex_instructions, return false; } -HGraph* HGraphBuilder::BuildGraph(const DexFile::CodeItem& code_item, int start_instruction_id) { +bool HGraphBuilder::BuildGraph(const DexFile::CodeItem& code_item) { + DCHECK(graph_->GetBlocks().IsEmpty()); + const uint16_t* code_ptr = code_item.insns_; const uint16_t* code_end = code_item.insns_ + code_item.insns_size_in_code_units_; code_start_ = code_ptr; // Setup the graph with the entry block and exit block. - graph_ = new (arena_) HGraph(arena_, start_instruction_id); entry_block_ = new (arena_) HBasicBlock(graph_, 0); graph_->AddBlock(entry_block_); exit_block_ = new (arena_) HBasicBlock(graph_, kNoDexPc); @@ -289,7 +290,7 @@ HGraph* HGraphBuilder::BuildGraph(const DexFile::CodeItem& code_item, int start_ // Note that the compiler driver is null when unit testing. if ((compiler_driver_ != nullptr) && SkipCompilation(number_of_dex_instructions, number_of_blocks, number_of_branches)) { - return nullptr; + return false; } // Also create blocks for catch handlers. @@ -319,7 +320,7 @@ HGraph* HGraphBuilder::BuildGraph(const DexFile::CodeItem& code_item, int start_ MaybeUpdateCurrentBlock(dex_pc); const Instruction& instruction = *Instruction::At(code_ptr); if (!AnalyzeDexInstruction(instruction, dex_pc)) { - return nullptr; + return false; } dex_pc += instruction.SizeInCodeUnits(); code_ptr += instruction.SizeInCodeUnits(); @@ -331,7 +332,8 @@ HGraph* HGraphBuilder::BuildGraph(const DexFile::CodeItem& code_item, int start_ // Add the suspend check to the entry block. entry_block_->AddInstruction(new (arena_) HSuspendCheck(0)); entry_block_->AddInstruction(new (arena_) HGoto()); - return graph_; + + return true; } void HGraphBuilder::MaybeUpdateCurrentBlock(size_t index) { @@ -1034,7 +1036,9 @@ void HGraphBuilder::BuildSwitchCaseHelper(const Instruction& instruction, size_t bool is_last_case, const SwitchTable& table, HInstruction* value, int32_t case_value_int, int32_t target_offset, uint32_t dex_pc) { - PotentiallyAddSuspendCheck(target_offset, dex_pc); + HBasicBlock* case_target = FindBlockStartingAt(dex_pc + target_offset); + DCHECK(case_target != nullptr); + PotentiallyAddSuspendCheck(case_target, dex_pc); // The current case's value. HInstruction* this_case_value = GetIntConstant(case_value_int); @@ -1046,8 +1050,6 @@ void HGraphBuilder::BuildSwitchCaseHelper(const Instruction& instruction, size_t current_block_->AddInstruction(ifinst); // Case hit: use the target offset to determine where to go. - HBasicBlock* case_target = FindBlockStartingAt(dex_pc + target_offset); - DCHECK(case_target != nullptr); current_block_->AddSuccessor(case_target); // Case miss: go to the next case (or default fall-through). @@ -1072,10 +1074,19 @@ void HGraphBuilder::BuildSwitchCaseHelper(const Instruction& instruction, size_t } } -void HGraphBuilder::PotentiallyAddSuspendCheck(int32_t target_offset, uint32_t dex_pc) { +void HGraphBuilder::PotentiallyAddSuspendCheck(HBasicBlock* target, uint32_t dex_pc) { + int32_t target_offset = target->GetDexPc() - dex_pc; if (target_offset <= 0) { - // Unconditionnally add a suspend check to backward branches. We can remove - // them after we recognize loops in the graph. + // DX generates back edges to the first encountered return. We can save + // time of later passes by not adding redundant suspend checks. + HInstruction* last_in_target = target->GetLastInstruction(); + if (last_in_target != nullptr && + (last_in_target->IsReturn() || last_in_target->IsReturnVoid())) { + return; + } + + // Add a suspend check to backward branches which may potentially loop. We + // can remove them after we recognize loops in the graph. current_block_->AddInstruction(new (arena_) HSuspendCheck(dex_pc)); } } @@ -1197,9 +1208,9 @@ bool HGraphBuilder::AnalyzeDexInstruction(const Instruction& instruction, uint32 case Instruction::GOTO_16: case Instruction::GOTO_32: { int32_t offset = instruction.GetTargetOffset(); - PotentiallyAddSuspendCheck(offset, dex_pc); HBasicBlock* target = FindBlockStartingAt(offset + dex_pc); DCHECK(target != nullptr); + PotentiallyAddSuspendCheck(target, dex_pc); current_block_->AddInstruction(new (arena_) HGoto()); current_block_->AddSuccessor(target); current_block_ = nullptr; diff --git a/compiler/optimizing/builder.h b/compiler/optimizing/builder.h index 592db23..c510136 100644 --- a/compiler/optimizing/builder.h +++ b/compiler/optimizing/builder.h @@ -34,19 +34,19 @@ class SwitchTable; class HGraphBuilder : public ValueObject { public: - HGraphBuilder(ArenaAllocator* arena, + HGraphBuilder(HGraph* graph, DexCompilationUnit* dex_compilation_unit, const DexCompilationUnit* const outer_compilation_unit, const DexFile* dex_file, CompilerDriver* driver, OptimizingCompilerStats* compiler_stats) - : arena_(arena), - branch_targets_(arena, 0), - locals_(arena, 0), + : arena_(graph->GetArena()), + branch_targets_(graph->GetArena(), 0), + locals_(graph->GetArena(), 0), entry_block_(nullptr), exit_block_(nullptr), current_block_(nullptr), - graph_(nullptr), + graph_(graph), constant0_(nullptr), constant1_(nullptr), dex_file_(dex_file), @@ -59,14 +59,14 @@ class HGraphBuilder : public ValueObject { compilation_stats_(compiler_stats) {} // Only for unit testing. - HGraphBuilder(ArenaAllocator* arena, Primitive::Type return_type = Primitive::kPrimInt) - : arena_(arena), - branch_targets_(arena, 0), - locals_(arena, 0), + HGraphBuilder(HGraph* graph, Primitive::Type return_type = Primitive::kPrimInt) + : arena_(graph->GetArena()), + branch_targets_(graph->GetArena(), 0), + locals_(graph->GetArena(), 0), entry_block_(nullptr), exit_block_(nullptr), current_block_(nullptr), - graph_(nullptr), + graph_(graph), constant0_(nullptr), constant1_(nullptr), dex_file_(nullptr), @@ -78,7 +78,7 @@ class HGraphBuilder : public ValueObject { latest_result_(nullptr), compilation_stats_(nullptr) {} - HGraph* BuildGraph(const DexFile::CodeItem& code, int start_instruction_id = 0); + bool BuildGraph(const DexFile::CodeItem& code); private: // Analyzes the dex instruction and adds HInstruction to the graph @@ -106,7 +106,7 @@ class HGraphBuilder : public ValueObject { HLocal* GetLocalAt(int register_index) const; void UpdateLocal(int register_index, HInstruction* instruction) const; HInstruction* LoadLocal(int register_index, Primitive::Type type) const; - void PotentiallyAddSuspendCheck(int32_t target_offset, uint32_t dex_pc); + void PotentiallyAddSuspendCheck(HBasicBlock* target, uint32_t dex_pc); void InitializeParameters(uint16_t number_of_parameters); bool NeedsAccessCheck(uint32_t type_index) const; @@ -249,7 +249,7 @@ class HGraphBuilder : public ValueObject { HBasicBlock* entry_block_; HBasicBlock* exit_block_; HBasicBlock* current_block_; - HGraph* graph_; + HGraph* const graph_; HIntConstant* constant0_; HIntConstant* constant1_; diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc index 0a405c4..fd4e391 100644 --- a/compiler/optimizing/code_generator.cc +++ b/compiler/optimizing/code_generator.cc @@ -41,60 +41,57 @@ size_t CodeGenerator::GetCacheOffset(uint32_t index) { } void CodeGenerator::CompileBaseline(CodeAllocator* allocator, bool is_leaf) { - const GrowableArray<HBasicBlock*>& blocks = GetGraph()->GetBlocks(); - DCHECK(blocks.Get(0) == GetGraph()->GetEntryBlock()); - DCHECK(GoesToNextBlock(GetGraph()->GetEntryBlock(), blocks.Get(1))); - Initialize(); - DCHECK_EQ(frame_size_, kUninitializedFrameSize); + + Initialize(); if (!is_leaf) { MarkNotLeaf(); } - ComputeFrameSize(GetGraph()->GetNumberOfLocalVRegs() - + GetGraph()->GetTemporariesVRegSlots() - + 1 /* filler */, - 0, /* the baseline compiler does not have live registers at slow path */ - 0, /* the baseline compiler does not have live registers at slow path */ - GetGraph()->GetMaximumNumberOfOutVRegs() - + 1 /* current method */); - GenerateFrameEntry(); + InitializeCodeGeneration(GetGraph()->GetNumberOfLocalVRegs() + + GetGraph()->GetTemporariesVRegSlots() + + 1 /* filler */, + 0, /* the baseline compiler does not have live registers at slow path */ + 0, /* the baseline compiler does not have live registers at slow path */ + GetGraph()->GetMaximumNumberOfOutVRegs() + + 1 /* current method */, + GetGraph()->GetBlocks()); + CompileInternal(allocator, /* is_baseline */ true); +} +void CodeGenerator::CompileInternal(CodeAllocator* allocator, bool is_baseline) { HGraphVisitor* location_builder = GetLocationBuilder(); HGraphVisitor* instruction_visitor = GetInstructionVisitor(); - for (size_t i = 0, e = blocks.Size(); i < e; ++i) { - HBasicBlock* block = blocks.Get(i); + DCHECK_EQ(current_block_index_, 0u); + GenerateFrameEntry(); + for (size_t e = block_order_->Size(); current_block_index_ < e; ++current_block_index_) { + HBasicBlock* block = block_order_->Get(current_block_index_); Bind(block); for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) { HInstruction* current = it.Current(); - current->Accept(location_builder); - InitLocations(current); + if (is_baseline) { + current->Accept(location_builder); + InitLocations(current); + } current->Accept(instruction_visitor); } } - GenerateSlowPaths(); + + // Generate the slow paths. + for (size_t i = 0, e = slow_paths_.Size(); i < e; ++i) { + slow_paths_.Get(i)->EmitNativeCode(this); + } + + // Finalize instructions in assember; Finalize(allocator); } void CodeGenerator::CompileOptimized(CodeAllocator* allocator) { - // The frame size has already been computed during register allocation. + // The register allocator already called `InitializeCodeGeneration`, + // where the frame size has been computed. DCHECK_NE(frame_size_, kUninitializedFrameSize); - const GrowableArray<HBasicBlock*>& blocks = GetGraph()->GetBlocks(); - DCHECK(blocks.Get(0) == GetGraph()->GetEntryBlock()); - DCHECK(GoesToNextBlock(GetGraph()->GetEntryBlock(), blocks.Get(1))); + DCHECK(block_order_ != nullptr); Initialize(); - - GenerateFrameEntry(); - HGraphVisitor* instruction_visitor = GetInstructionVisitor(); - for (size_t i = 0, e = blocks.Size(); i < e; ++i) { - HBasicBlock* block = blocks.Get(i); - Bind(block); - for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) { - HInstruction* current = it.Current(); - current->Accept(instruction_visitor); - } - } - GenerateSlowPaths(); - Finalize(allocator); + CompileInternal(allocator, /* is_baseline */ false); } void CodeGenerator::Finalize(CodeAllocator* allocator) { @@ -105,12 +102,6 @@ void CodeGenerator::Finalize(CodeAllocator* allocator) { GetAssembler()->FinalizeInstructions(code); } -void CodeGenerator::GenerateSlowPaths() { - for (size_t i = 0, e = slow_paths_.Size(); i < e; ++i) { - slow_paths_.Get(i)->EmitNativeCode(this); - } -} - size_t CodeGenerator::FindFreeEntry(bool* array, size_t length) { for (size_t i = 0; i < length; ++i) { if (!array[i]) { @@ -136,10 +127,14 @@ size_t CodeGenerator::FindTwoFreeConsecutiveAlignedEntries(bool* array, size_t l return -1; } -void CodeGenerator::ComputeFrameSize(size_t number_of_spill_slots, - size_t maximum_number_of_live_core_registers, - size_t maximum_number_of_live_fp_registers, - size_t number_of_out_slots) { +void CodeGenerator::InitializeCodeGeneration(size_t number_of_spill_slots, + size_t maximum_number_of_live_core_registers, + size_t maximum_number_of_live_fp_registers, + size_t number_of_out_slots, + const GrowableArray<HBasicBlock*>& block_order) { + block_order_ = &block_order; + DCHECK(block_order_->Get(0) == GetGraph()->GetEntryBlock()); + DCHECK(GoesToNextBlock(GetGraph()->GetEntryBlock(), block_order_->Get(1))); ComputeSpillMask(); first_register_slot_in_slow_path_ = (number_of_out_slots + number_of_spill_slots) * kVRegSize; @@ -295,7 +290,7 @@ void CodeGenerator::AllocateRegistersLocally(HInstruction* instruction) const { result_location = locations->InAt(0); break; } - locations->SetOut(result_location); + locations->UpdateOut(result_location); } } @@ -326,8 +321,9 @@ void CodeGenerator::InitLocations(HInstruction* instruction) { } bool CodeGenerator::GoesToNextBlock(HBasicBlock* current, HBasicBlock* next) const { - // We currently iterate over the block in insertion order. - return current->GetBlockId() + 1 == next->GetBlockId(); + DCHECK_EQ(block_order_->Get(current_block_index_), current); + return (current_block_index_ < block_order_->Size() - 1) + && (block_order_->Get(current_block_index_ + 1) == next); } CodeGenerator* CodeGenerator::Create(HGraph* graph, diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h index 45f02e5..ab63b91 100644 --- a/compiler/optimizing/code_generator.h +++ b/compiler/optimizing/code_generator.h @@ -113,10 +113,11 @@ class CodeGenerator { virtual size_t GetWordSize() const = 0; virtual size_t GetFloatingPointSpillSlotSize() const = 0; virtual uintptr_t GetAddressOf(HBasicBlock* block) const = 0; - void ComputeFrameSize(size_t number_of_spill_slots, - size_t maximum_number_of_live_core_registers, - size_t maximum_number_of_live_fp_registers, - size_t number_of_out_slots); + void InitializeCodeGeneration(size_t number_of_spill_slots, + size_t maximum_number_of_live_core_registers, + size_t maximum_number_of_live_fp_registers, + size_t number_of_out_slots, + const GrowableArray<HBasicBlock*>& block_order); int32_t GetStackSlot(HLocal* local) const; Location GetTemporaryLocation(HTemporary* temp) const; @@ -181,8 +182,6 @@ class CodeGenerator { slow_paths_.Add(slow_path); } - void GenerateSlowPaths(); - void BuildMappingTable(std::vector<uint8_t>* vector, DefaultSrcMap* src_map) const; void BuildVMapTable(std::vector<uint8_t>* vector) const; void BuildNativeGCMap( @@ -253,6 +252,8 @@ class CodeGenerator { compiler_options_(compiler_options), pc_infos_(graph->GetArena(), 32), slow_paths_(graph->GetArena(), 8), + block_order_(nullptr), + current_block_index_(0), is_leaf_(true), stack_map_stream_(graph->GetArena()) {} @@ -312,6 +313,7 @@ class CodeGenerator { private: void InitLocations(HInstruction* instruction); size_t GetStackOffsetOfSavedRegister(size_t index); + void CompileInternal(CodeAllocator* allocator, bool is_baseline); HGraph* const graph_; const CompilerOptions& compiler_options_; @@ -319,6 +321,13 @@ class CodeGenerator { GrowableArray<PcInfo> pc_infos_; GrowableArray<SlowPathCode*> slow_paths_; + // The order to use for code generation. + const GrowableArray<HBasicBlock*>* block_order_; + + // The current block index in `block_order_` of the block + // we are generating code for. + size_t current_block_index_; + bool is_leaf_; StackMapStream stack_map_stream_; diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc index b0cd7ba..78fd181 100644 --- a/compiler/optimizing/code_generator_arm.cc +++ b/compiler/optimizing/code_generator_arm.cc @@ -1296,13 +1296,14 @@ void LocationsBuilderARM::VisitNeg(HNeg* neg) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(neg, LocationSummary::kNoCall); switch (neg->GetResultType()) { - case Primitive::kPrimInt: + case Primitive::kPrimInt: { + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + break; + } case Primitive::kPrimLong: { - Location::OutputOverlap output_overlaps = (neg->GetResultType() == Primitive::kPrimLong) - ? Location::kOutputOverlap - : Location::kNoOutputOverlap; locations->SetInAt(0, Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister(), output_overlaps); + locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); break; } @@ -1837,7 +1838,7 @@ void LocationsBuilderARM::VisitAdd(HAdd* add) { case Primitive::kPrimLong: { locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); break; } @@ -1914,7 +1915,7 @@ void LocationsBuilderARM::VisitSub(HSub* sub) { case Primitive::kPrimLong: { locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); break; } case Primitive::kPrimFloat: @@ -2297,7 +2298,7 @@ void LocationsBuilderARM::HandleShift(HBinaryOperation* op) { case Primitive::kPrimInt: { locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RegisterOrConstant(op->InputAt(1))); - locations->SetOut(Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); break; } case Primitive::kPrimLong: { @@ -2492,7 +2493,8 @@ void LocationsBuilderARM::VisitCompare(HCompare* compare) { case Primitive::kPrimLong: { locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + // Output overlaps because it is written before doing the low comparison. + locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); break; } case Primitive::kPrimFloat: @@ -2765,12 +2767,14 @@ void LocationsBuilderARM::HandleFieldGet(HInstruction* instruction, const FieldI LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); locations->SetInAt(0, Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); - bool generate_volatile = field_info.IsVolatile() + bool volatile_for_double = field_info.IsVolatile() && (field_info.GetFieldType() == Primitive::kPrimDouble) && !codegen_->GetInstructionSetFeatures().HasAtomicLdrdAndStrd(); - if (generate_volatile) { + bool overlap = field_info.IsVolatile() && (field_info.GetFieldType() == Primitive::kPrimLong); + locations->SetOut(Location::RequiresRegister(), + (overlap ? Location::kOutputOverlap : Location::kNoOutputOverlap)); + if (volatile_for_double) { // Arm encoding have some additional constraints for ldrexd/strexd: // - registers need to be consecutive // - the first register should be even but not R14. @@ -3614,7 +3618,8 @@ void LocationsBuilderARM::VisitInstanceOf(HInstanceOf* instruction) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind); locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister()); + // The out register is used as a temporary, so it overlaps with the inputs. + locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); } void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) { @@ -3710,10 +3715,7 @@ void LocationsBuilderARM::HandleBitwiseOperation(HBinaryOperation* instruction) || instruction->GetResultType() == Primitive::kPrimLong); locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RequiresRegister()); - Location::OutputOverlap output_overlaps = (instruction->GetResultType() == Primitive::kPrimLong) - ? Location::kOutputOverlap - : Location::kNoOutputOverlap; - locations->SetOut(Location::RequiresRegister(), output_overlaps); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); } void InstructionCodeGeneratorARM::VisitAnd(HAnd* instruction) { diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index c840793..98f93a4 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -535,9 +535,6 @@ Location InvokeDexCallingConventionVisitor::GetNextLocation(Primitive::Type type X86ManagedRegister pair = X86ManagedRegister::FromRegisterPair( calling_convention.GetRegisterPairAt(index)); return Location::RegisterPairLocation(pair.AsRegisterPairLow(), pair.AsRegisterPairHigh()); - } else if (index + 1 == calling_convention.GetNumberOfRegisters()) { - // stack_index_ is the right offset for the memory. - return Location::QuickParameter(index, stack_index_ - 2); } else { return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 2)); } @@ -629,16 +626,6 @@ void CodeGeneratorX86::Move64(Location destination, Location source) { Location::RegisterLocation(destination.AsRegisterPairLow<Register>())); } else if (source.IsFpuRegister()) { LOG(FATAL) << "Unimplemented"; - } else if (source.IsQuickParameter()) { - uint16_t register_index = source.GetQuickParameterRegisterIndex(); - uint16_t stack_index = source.GetQuickParameterStackIndex(); - InvokeDexCallingConvention calling_convention; - EmitParallelMoves( - Location::RegisterLocation(calling_convention.GetRegisterAt(register_index)), - Location::RegisterLocation(destination.AsRegisterPairLow<Register>()), - Location::StackSlot( - calling_convention.GetStackOffsetOf(stack_index + 1) + GetFrameSize()), - Location::RegisterLocation(destination.AsRegisterPairHigh<Register>())); } else { // No conflict possible, so just do the moves. DCHECK(source.IsDoubleStackSlot()); @@ -646,23 +633,6 @@ void CodeGeneratorX86::Move64(Location destination, Location source) { __ movl(destination.AsRegisterPairHigh<Register>(), Address(ESP, source.GetHighStackIndex(kX86WordSize))); } - } else if (destination.IsQuickParameter()) { - InvokeDexCallingConvention calling_convention; - uint16_t register_index = destination.GetQuickParameterRegisterIndex(); - uint16_t stack_index = destination.GetQuickParameterStackIndex(); - if (source.IsRegisterPair()) { - LOG(FATAL) << "Unimplemented"; - } else if (source.IsFpuRegister()) { - LOG(FATAL) << "Unimplemented"; - } else { - DCHECK(source.IsDoubleStackSlot()); - EmitParallelMoves( - Location::StackSlot(source.GetStackIndex()), - Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index)), - Location::StackSlot(source.GetHighStackIndex(kX86WordSize)), - Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index + 1))); - __ movl(calling_convention.GetRegisterAt(register_index), Address(ESP, source.GetStackIndex())); - } } else if (destination.IsFpuRegister()) { if (source.IsFpuRegister()) { __ movaps(destination.AsFpuRegister<XmmRegister>(), source.AsFpuRegister<XmmRegister>()); @@ -678,18 +648,6 @@ void CodeGeneratorX86::Move64(Location destination, Location source) { __ movl(Address(ESP, destination.GetStackIndex()), source.AsRegisterPairLow<Register>()); __ movl(Address(ESP, destination.GetHighStackIndex(kX86WordSize)), source.AsRegisterPairHigh<Register>()); - } else if (source.IsQuickParameter()) { - // No conflict possible, so just do the move. - InvokeDexCallingConvention calling_convention; - uint16_t register_index = source.GetQuickParameterRegisterIndex(); - uint16_t stack_index = source.GetQuickParameterStackIndex(); - // Just move the low part. The only time a source is a quick parameter is - // when moving the parameter to its stack locations. And the (Java) caller - // of this method has already done that. - __ movl(Address(ESP, destination.GetStackIndex()), - calling_convention.GetRegisterAt(register_index)); - DCHECK_EQ(calling_convention.GetStackOffsetOf(stack_index + 1) + GetFrameSize(), - static_cast<size_t>(destination.GetHighStackIndex(kX86WordSize))); } else if (source.IsFpuRegister()) { __ movsd(Address(ESP, destination.GetStackIndex()), source.AsFpuRegister<XmmRegister>()); } else { diff --git a/compiler/optimizing/codegen_test.cc b/compiler/optimizing/codegen_test.cc index dfa4748..e0e0b4c 100644 --- a/compiler/optimizing/codegen_test.cc +++ b/compiler/optimizing/codegen_test.cc @@ -180,10 +180,11 @@ static void RunCodeOptimized(HGraph* graph, static void TestCode(const uint16_t* data, bool has_result = false, int32_t expected = 0) { ArenaPool pool; ArenaAllocator arena(&pool); - HGraphBuilder builder(&arena); + HGraph* graph = new (&arena) HGraph(&arena); + HGraphBuilder builder(graph); const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data); - HGraph* graph = builder.BuildGraph(*item); - ASSERT_NE(graph, nullptr); + bool graph_built = builder.BuildGraph(*item); + ASSERT_TRUE(graph_built); // Remove suspend checks, they cannot be executed in this context. RemoveSuspendChecks(graph); RunCodeBaseline(graph, has_result, expected); @@ -192,10 +193,11 @@ static void TestCode(const uint16_t* data, bool has_result = false, int32_t expe static void TestCodeLong(const uint16_t* data, bool has_result, int64_t expected) { ArenaPool pool; ArenaAllocator arena(&pool); - HGraphBuilder builder(&arena, Primitive::kPrimLong); + HGraph* graph = new (&arena) HGraph(&arena); + HGraphBuilder builder(graph, Primitive::kPrimLong); const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data); - HGraph* graph = builder.BuildGraph(*item); - ASSERT_NE(graph, nullptr); + bool graph_built = builder.BuildGraph(*item); + ASSERT_TRUE(graph_built); // Remove suspend checks, they cannot be executed in this context. RemoveSuspendChecks(graph); RunCodeBaseline(graph, has_result, expected); diff --git a/compiler/optimizing/dominator_test.cc b/compiler/optimizing/dominator_test.cc index 3062e37..b246c6f 100644 --- a/compiler/optimizing/dominator_test.cc +++ b/compiler/optimizing/dominator_test.cc @@ -27,10 +27,11 @@ namespace art { static void TestCode(const uint16_t* data, const int* blocks, size_t blocks_length) { ArenaPool pool; ArenaAllocator allocator(&pool); - HGraphBuilder builder(&allocator); + HGraph* graph = new (&allocator) HGraph(&allocator); + HGraphBuilder builder(graph); const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data); - HGraph* graph = builder.BuildGraph(*item); - ASSERT_NE(graph, nullptr); + bool graph_built = builder.BuildGraph(*item); + ASSERT_TRUE(graph_built); graph->BuildDominatorTree(); ASSERT_EQ(graph->GetBlocks().Size(), blocks_length); for (size_t i = 0, e = blocks_length; i < e; ++i) { diff --git a/compiler/optimizing/find_loops_test.cc b/compiler/optimizing/find_loops_test.cc index 82fe03c..e05d9b3 100644 --- a/compiler/optimizing/find_loops_test.cc +++ b/compiler/optimizing/find_loops_test.cc @@ -28,9 +28,10 @@ namespace art { static HGraph* TestCode(const uint16_t* data, ArenaAllocator* allocator) { - HGraphBuilder builder(allocator); + HGraph* graph = new (allocator) HGraph(allocator); + HGraphBuilder builder(graph); const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data); - HGraph* graph = builder.BuildGraph(*item); + builder.BuildGraph(*item); graph->BuildDominatorTree(); graph->AnalyzeNaturalLoops(); return graph; diff --git a/compiler/optimizing/graph_checker.cc b/compiler/optimizing/graph_checker.cc index 35c5269..4ebb136 100644 --- a/compiler/optimizing/graph_checker.cc +++ b/compiler/optimizing/graph_checker.cc @@ -153,8 +153,9 @@ void GraphChecker::VisitInstruction(HInstruction* instruction) { ? use->GetBlock()->GetPhis() : use->GetBlock()->GetInstructions(); if (!list.Contains(use)) { - AddError(StringPrintf("User %d of instruction %d is not defined " + AddError(StringPrintf("User %s:%d of instruction %d is not defined " "in a basic block of the control-flow graph.", + use->DebugName(), use->GetId(), instruction->GetId())); } diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc index 22a3d12..835bca6 100644 --- a/compiler/optimizing/graph_visualizer.cc +++ b/compiler/optimizing/graph_visualizer.cc @@ -194,7 +194,22 @@ class HGraphVisualizerPrinter : public HGraphVisitor { } output_ << "]"; } - if (pass_name_ == kLivenessPassName && instruction->GetLifetimePosition() != kNoLifetime) { + if (instruction->HasEnvironment()) { + HEnvironment* env = instruction->GetEnvironment(); + output_ << " (env: [ "; + for (size_t i = 0, e = env->Size(); i < e; ++i) { + HInstruction* insn = env->GetInstructionAt(i); + if (insn != nullptr) { + output_ << GetTypeId(insn->GetType()) << insn->GetId() << " "; + } else { + output_ << " _ "; + } + } + output_ << "])"; + } + if (pass_name_ == kLivenessPassName + && is_after_pass_ + && instruction->GetLifetimePosition() != kNoLifetime) { output_ << " (liveness: " << instruction->GetLifetimePosition(); if (instruction->HasLiveInterval()) { output_ << " "; @@ -202,7 +217,7 @@ class HGraphVisualizerPrinter : public HGraphVisitor { interval.Dump(output_); } output_ << ")"; - } else if (pass_name_ == kRegisterAllocatorPassName) { + } else if (pass_name_ == kRegisterAllocatorPassName && is_after_pass_) { LocationSummary* locations = instruction->GetLocations(); if (locations != nullptr) { output_ << " ( "; @@ -310,18 +325,13 @@ class HGraphVisualizerPrinter : public HGraphVisitor { HGraphVisualizer::HGraphVisualizer(std::ostream* output, HGraph* graph, - const char* string_filter, const CodeGenerator& codegen, const char* method_name) - : output_(output), graph_(graph), codegen_(codegen), is_enabled_(false) { + : output_(output), graph_(graph), codegen_(codegen) { if (output == nullptr) { return; } - if (strstr(method_name, string_filter) == nullptr) { - return; - } - is_enabled_ = true; HGraphVisualizerPrinter printer(graph_, *output_, "", true, codegen_); printer.StartTag("compilation"); printer.PrintProperty("name", method_name); @@ -331,7 +341,8 @@ HGraphVisualizer::HGraphVisualizer(std::ostream* output, } void HGraphVisualizer::DumpGraph(const char* pass_name, bool is_after_pass) const { - if (is_enabled_) { + DCHECK(output_ != nullptr); + if (!graph_->GetBlocks().IsEmpty()) { HGraphVisualizerPrinter printer(graph_, *output_, pass_name, is_after_pass, codegen_); printer.Run(); } diff --git a/compiler/optimizing/graph_visualizer.h b/compiler/optimizing/graph_visualizer.h index 8d6fe04..bc553ae 100644 --- a/compiler/optimizing/graph_visualizer.h +++ b/compiler/optimizing/graph_visualizer.h @@ -35,7 +35,6 @@ class HGraphVisualizer : public ValueObject { public: HGraphVisualizer(std::ostream* output, HGraph* graph, - const char* string_filter, const CodeGenerator& codegen, const char* method_name); @@ -46,10 +45,6 @@ class HGraphVisualizer : public ValueObject { HGraph* const graph_; const CodeGenerator& codegen_; - // Is true when `output_` is not null, and the compiled method's name - // contains the string_filter given in the constructor. - bool is_enabled_; - DISALLOW_COPY_AND_ASSIGN(HGraphVisualizer); }; diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc index 532167c..32f6972 100644 --- a/compiler/optimizing/inliner.cc +++ b/compiler/optimizing/inliner.cc @@ -35,24 +35,26 @@ namespace art { static constexpr int kMaxInlineCodeUnits = 100; -static constexpr int kMaxInlineNumberOfBlocks = 3; +static constexpr int kDepthLimit = 5; void HInliner::Run() { - for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) { - for (HInstructionIterator instr_it(it.Current()->GetInstructions()); - !instr_it.Done(); - instr_it.Advance()) { - HInvokeStaticOrDirect* current = instr_it.Current()->AsInvokeStaticOrDirect(); - if (current != nullptr) { - if (!TryInline(current, current->GetDexMethodIndex(), current->GetInvokeType())) { + const GrowableArray<HBasicBlock*>& blocks = graph_->GetReversePostOrder(); + for (size_t i = 0; i < blocks.Size(); ++i) { + HBasicBlock* block = blocks.Get(i); + for (HInstruction* instruction = block->GetFirstInstruction(); instruction != nullptr;) { + HInstruction* next = instruction->GetNext(); + HInvokeStaticOrDirect* call = instruction->AsInvokeStaticOrDirect(); + if (call != nullptr) { + if (!TryInline(call, call->GetDexMethodIndex(), call->GetInvokeType())) { if (kIsDebugBuild) { std::string callee_name = - PrettyMethod(current->GetDexMethodIndex(), *outer_compilation_unit_.GetDexFile()); + PrettyMethod(call->GetDexMethodIndex(), *outer_compilation_unit_.GetDexFile()); bool should_inline = callee_name.find("$inline$") != std::string::npos; CHECK(!should_inline) << "Could not inline " << callee_name; } } } + instruction = next; } } } @@ -122,28 +124,23 @@ bool HInliner::TryInline(HInvoke* invoke_instruction, resolved_method->GetAccessFlags(), nullptr); + HGraph* callee_graph = + new (graph_->GetArena()) HGraph(graph_->GetArena(), graph_->GetCurrentInstructionId()); + OptimizingCompilerStats inline_stats; - HGraphBuilder builder(graph_->GetArena(), + HGraphBuilder builder(callee_graph, &dex_compilation_unit, &outer_compilation_unit_, &outer_dex_file, compiler_driver_, &inline_stats); - HGraph* callee_graph = builder.BuildGraph(*code_item, graph_->GetCurrentInstructionId()); - if (callee_graph == nullptr) { + if (!builder.BuildGraph(*code_item)) { VLOG(compiler) << "Method " << PrettyMethod(method_index, outer_dex_file) << " could not be built, so cannot be inlined"; return false; } - if (callee_graph->GetBlocks().Size() > kMaxInlineNumberOfBlocks) { - VLOG(compiler) << "Method " << PrettyMethod(method_index, outer_dex_file) - << " has too many blocks to be inlined: " - << callee_graph->GetBlocks().Size(); - return false; - } - if (!RegisterAllocator::CanAllocateRegistersFor(*callee_graph, compiler_driver_->GetInstructionSet())) { VLOG(compiler) << "Method " << PrettyMethod(method_index, outer_dex_file) @@ -157,8 +154,34 @@ bool HInliner::TryInline(HInvoke* invoke_instruction, return false; } + // Run simple optimizations on the graph. + SsaRedundantPhiElimination redundant_phi(callee_graph); + SsaDeadPhiElimination dead_phi(callee_graph); + HDeadCodeElimination dce(callee_graph); + HConstantFolding fold(callee_graph); + InstructionSimplifier simplify(callee_graph); + + HOptimization* optimizations[] = { + &redundant_phi, + &dead_phi, + &dce, + &fold, + &simplify, + }; + + for (size_t i = 0; i < arraysize(optimizations); ++i) { + HOptimization* optimization = optimizations[i]; + optimization->Run(); + } + + if (depth_ + 1 < kDepthLimit) { + HInliner inliner( + callee_graph, outer_compilation_unit_, compiler_driver_, outer_stats_, depth_ + 1); + inliner.Run(); + } + HReversePostOrderIterator it(*callee_graph); - it.Advance(); // Past the entry block to avoid seeing the suspend check. + it.Advance(); // Past the entry block, it does not contain instructions that prevent inlining. for (; !it.Done(); it.Advance()) { HBasicBlock* block = it.Current(); if (block->IsLoopHeader()) { @@ -171,6 +194,10 @@ bool HInliner::TryInline(HInvoke* invoke_instruction, !instr_it.Done(); instr_it.Advance()) { HInstruction* current = instr_it.Current(); + if (current->IsSuspendCheck()) { + continue; + } + if (current->CanThrow()) { VLOG(compiler) << "Method " << PrettyMethod(method_index, outer_dex_file) << " could not be inlined because " << current->DebugName() @@ -187,26 +214,6 @@ bool HInliner::TryInline(HInvoke* invoke_instruction, } } - // Run simple optimizations on the graph. - SsaRedundantPhiElimination redundant_phi(callee_graph); - SsaDeadPhiElimination dead_phi(callee_graph); - HDeadCodeElimination dce(callee_graph); - HConstantFolding fold(callee_graph); - InstructionSimplifier simplify(callee_graph); - - HOptimization* optimizations[] = { - &redundant_phi, - &dead_phi, - &dce, - &fold, - &simplify, - }; - - for (size_t i = 0; i < arraysize(optimizations); ++i) { - HOptimization* optimization = optimizations[i]; - optimization->Run(); - } - callee_graph->InlineInto(graph_, invoke_instruction); // Now that we have inlined the callee, we need to update the next diff --git a/compiler/optimizing/inliner.h b/compiler/optimizing/inliner.h index 370e33c..07d893e 100644 --- a/compiler/optimizing/inliner.h +++ b/compiler/optimizing/inliner.h @@ -33,11 +33,13 @@ class HInliner : public HOptimization { HInliner(HGraph* outer_graph, const DexCompilationUnit& outer_compilation_unit, CompilerDriver* compiler_driver, - OptimizingCompilerStats* stats) + OptimizingCompilerStats* stats, + size_t depth = 0) : HOptimization(outer_graph, true, "inliner"), outer_compilation_unit_(outer_compilation_unit), compiler_driver_(compiler_driver), - outer_stats_(stats) {} + outer_stats_(stats), + depth_(depth) {} void Run() OVERRIDE; @@ -47,6 +49,7 @@ class HInliner : public HOptimization { const DexCompilationUnit& outer_compilation_unit_; CompilerDriver* const compiler_driver_; OptimizingCompilerStats* const outer_stats_; + const size_t depth_; DISALLOW_COPY_AND_ASSIGN(HInliner); }; diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc index ba26afe..7a3d7d8 100644 --- a/compiler/optimizing/intrinsics_arm64.cc +++ b/compiler/optimizing/intrinsics_arm64.cc @@ -930,7 +930,10 @@ void IntrinsicLocationsBuilderARM64::VisitStringCharAt(HInvoke* invoke) { kIntrinsified); locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + // In case we need to go in the slow path, we can't have the output be the same + // as the input: the current liveness analysis considers the input to be live + // at the point of the call. + locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); } void IntrinsicCodeGeneratorARM64::VisitStringCharAt(HInvoke* invoke) { diff --git a/compiler/optimizing/linearize_test.cc b/compiler/optimizing/linearize_test.cc index 2ab9b57..eb27965 100644 --- a/compiler/optimizing/linearize_test.cc +++ b/compiler/optimizing/linearize_test.cc @@ -38,10 +38,11 @@ namespace art { static void TestCode(const uint16_t* data, const int* expected_order, size_t number_of_blocks) { ArenaPool pool; ArenaAllocator allocator(&pool); - HGraphBuilder builder(&allocator); + HGraph* graph = new (&allocator) HGraph(&allocator); + HGraphBuilder builder(graph); const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data); - HGraph* graph = builder.BuildGraph(*item); - ASSERT_NE(graph, nullptr); + bool graph_built = builder.BuildGraph(*item); + ASSERT_TRUE(graph_built); graph->TryBuildingSsa(); diff --git a/compiler/optimizing/live_ranges_test.cc b/compiler/optimizing/live_ranges_test.cc index 92742f9..0558b85 100644 --- a/compiler/optimizing/live_ranges_test.cc +++ b/compiler/optimizing/live_ranges_test.cc @@ -31,9 +31,10 @@ namespace art { static HGraph* BuildGraph(const uint16_t* data, ArenaAllocator* allocator) { - HGraphBuilder builder(allocator); + HGraph* graph = new (allocator) HGraph(allocator); + HGraphBuilder builder(graph); const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data); - HGraph* graph = builder.BuildGraph(*item); + builder.BuildGraph(*item); // Suspend checks implementation may change in the future, and this test relies // on how instructions are ordered. RemoveSuspendChecks(graph); diff --git a/compiler/optimizing/liveness_test.cc b/compiler/optimizing/liveness_test.cc index f2d49ac..c9be570 100644 --- a/compiler/optimizing/liveness_test.cc +++ b/compiler/optimizing/liveness_test.cc @@ -45,10 +45,11 @@ static void DumpBitVector(BitVector* vector, static void TestCode(const uint16_t* data, const char* expected) { ArenaPool pool; ArenaAllocator allocator(&pool); - HGraphBuilder builder(&allocator); + HGraph* graph = new (&allocator) HGraph(&allocator); + HGraphBuilder builder(graph); const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data); - HGraph* graph = builder.BuildGraph(*item); - ASSERT_NE(graph, nullptr); + bool graph_built = builder.BuildGraph(*item); + ASSERT_TRUE(graph_built); graph->TryBuildingSsa(); // `Inline` conditions into ifs. PrepareForRegisterAllocation(graph).Run(); diff --git a/compiler/optimizing/locations.h b/compiler/optimizing/locations.h index 8b06d60..bf27c5c 100644 --- a/compiler/optimizing/locations.h +++ b/compiler/optimizing/locations.h @@ -62,17 +62,11 @@ class Location : public ValueObject { // We do not use the value 9 because it conflicts with kLocationConstantMask. kDoNotUse9 = 9, - // On 32bits architectures, quick can pass a long where the - // low bits are in the last parameter register, and the high - // bits are in a stack slot. The kQuickParameter kind is for - // handling this special case. - kQuickParameter = 10, - // Unallocated location represents a location that is not fixed and can be // allocated by a register allocator. Each unallocated location has // a policy that specifies what kind of location is suitable. Payload // contains register allocation policy. - kUnallocated = 11, + kUnallocated = 10, }; Location() : value_(kInvalid) { @@ -82,7 +76,6 @@ class Location : public ValueObject { static_assert((kStackSlot & kLocationConstantMask) != kConstant, "TagError"); static_assert((kDoubleStackSlot & kLocationConstantMask) != kConstant, "TagError"); static_assert((kRegister & kLocationConstantMask) != kConstant, "TagError"); - static_assert((kQuickParameter & kLocationConstantMask) != kConstant, "TagError"); static_assert((kFpuRegister & kLocationConstantMask) != kConstant, "TagError"); static_assert((kRegisterPair & kLocationConstantMask) != kConstant, "TagError"); static_assert((kFpuRegisterPair & kLocationConstantMask) != kConstant, "TagError"); @@ -267,24 +260,6 @@ class Location : public ValueObject { return GetPayload() - kStackIndexBias + word_size; } - static Location QuickParameter(uint16_t register_index, uint16_t stack_index) { - return Location(kQuickParameter, register_index << 16 | stack_index); - } - - uint32_t GetQuickParameterRegisterIndex() const { - DCHECK(IsQuickParameter()); - return GetPayload() >> 16; - } - - uint32_t GetQuickParameterStackIndex() const { - DCHECK(IsQuickParameter()); - return GetPayload() & 0xFFFF; - } - - bool IsQuickParameter() const { - return GetKind() == kQuickParameter; - } - Kind GetKind() const { return IsConstant() ? kConstant : KindField::Decode(value_); } @@ -299,7 +274,6 @@ class Location : public ValueObject { case kRegister: return "R"; case kStackSlot: return "S"; case kDoubleStackSlot: return "DS"; - case kQuickParameter: return "Q"; case kUnallocated: return "U"; case kConstant: return "C"; case kFpuRegister: return "F"; @@ -482,16 +456,17 @@ class LocationSummary : public ArenaObject<kArenaAllocMisc> { } void SetOut(Location location, Location::OutputOverlap overlaps = Location::kOutputOverlap) { - DCHECK(output_.IsUnallocated() || output_.IsInvalid()); + DCHECK(output_.IsInvalid()); output_overlaps_ = overlaps; output_ = location; } void UpdateOut(Location location) { - // The only reason for updating an output is for parameters where - // we only know the exact stack slot after doing full register - // allocation. - DCHECK(output_.IsStackSlot() || output_.IsDoubleStackSlot()); + // There are two reasons for updating an output: + // 1) Parameters, where we only know the exact stack slot after + // doing full register allocation. + // 2) Unallocated location. + DCHECK(output_.IsStackSlot() || output_.IsDoubleStackSlot() || output_.IsUnallocated()); output_ = location; } @@ -563,28 +538,22 @@ class LocationSummary : public ArenaObject<kArenaAllocMisc> { return live_registers_.GetNumberOfRegisters(); } - bool InputOverlapsWithOutputOrTemp(uint32_t input_index, bool is_environment) const { - if (is_environment) return true; - if ((input_index == 0) + bool OutputUsesSameAs(uint32_t input_index) const { + return (input_index == 0) && output_.IsUnallocated() - && (output_.GetPolicy() == Location::kSameAsFirstInput)) { - return false; - } + && (output_.GetPolicy() == Location::kSameAsFirstInput); + } + + bool IsFixedInput(uint32_t input_index) const { Location input = inputs_.Get(input_index); - if (input.IsRegister() + return input.IsRegister() || input.IsFpuRegister() || input.IsPair() || input.IsStackSlot() - || input.IsDoubleStackSlot()) { - // For fixed locations, the register allocator requires to have inputs die before - // the instruction, so that input moves use the location of the input just - // before that instruction (and not potential moves due to splitting). - return false; - } - return true; + || input.IsDoubleStackSlot(); } - bool OutputOverlapsWithInputs() const { + bool OutputCanOverlapWithInputs() const { return output_overlaps_ == Location::kOutputOverlap; } diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc index 5fd75f6..cd36598 100644 --- a/compiler/optimizing/nodes.cc +++ b/compiler/optimizing/nodes.cc @@ -292,6 +292,10 @@ bool HGraph::AnalyzeNaturalLoops() const { return true; } +void HLoopInformation::Add(HBasicBlock* block) { + blocks_.SetBit(block->GetBlockId()); +} + void HLoopInformation::PopulateRecursive(HBasicBlock* block) { if (blocks_.IsBitSet(block->GetBlockId())) { return; @@ -730,10 +734,121 @@ void HInstruction::MoveBefore(HInstruction* cursor) { } } -void HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) { - // We currently only support graphs with one entry block, one body block, and one exit block. - DCHECK_EQ(GetBlocks().Size(), 3u); +HBasicBlock* HBasicBlock::SplitAfter(HInstruction* cursor) { + DCHECK(!cursor->IsControlFlow()); + DCHECK_NE(instructions_.last_instruction_, cursor); + DCHECK_EQ(cursor->GetBlock(), this); + + HBasicBlock* new_block = new (GetGraph()->GetArena()) HBasicBlock(GetGraph(), GetDexPc()); + new_block->instructions_.first_instruction_ = cursor->GetNext(); + new_block->instructions_.last_instruction_ = instructions_.last_instruction_; + cursor->next_->previous_ = nullptr; + cursor->next_ = nullptr; + instructions_.last_instruction_ = cursor; + + new_block->instructions_.SetBlockOfInstructions(new_block); + for (size_t i = 0, e = GetSuccessors().Size(); i < e; ++i) { + HBasicBlock* successor = GetSuccessors().Get(i); + new_block->successors_.Add(successor); + successor->predecessors_.Put(successor->GetPredecessorIndexOf(this), new_block); + } + successors_.Reset(); + + for (size_t i = 0, e = GetDominatedBlocks().Size(); i < e; ++i) { + HBasicBlock* dominated = GetDominatedBlocks().Get(i); + dominated->dominator_ = new_block; + new_block->dominated_blocks_.Add(dominated); + } + dominated_blocks_.Reset(); + return new_block; +} + +void HInstructionList::SetBlockOfInstructions(HBasicBlock* block) const { + for (HInstruction* current = first_instruction_; + current != nullptr; + current = current->GetNext()) { + current->SetBlock(block); + } +} + +void HInstructionList::AddAfter(HInstruction* cursor, const HInstructionList& instruction_list) { + DCHECK(Contains(cursor)); + if (!instruction_list.IsEmpty()) { + if (cursor == last_instruction_) { + last_instruction_ = instruction_list.last_instruction_; + } else { + cursor->next_->previous_ = instruction_list.last_instruction_; + } + instruction_list.last_instruction_->next_ = cursor->next_; + cursor->next_ = instruction_list.first_instruction_; + instruction_list.first_instruction_->previous_ = cursor; + } +} + +void HInstructionList::Add(const HInstructionList& instruction_list) { + DCHECK(!IsEmpty()); + AddAfter(last_instruction_, instruction_list); +} + +void HBasicBlock::MergeWith(HBasicBlock* other) { + DCHECK(successors_.IsEmpty()) << "Unimplemented block merge scenario"; + DCHECK(dominated_blocks_.IsEmpty()) << "Unimplemented block merge scenario"; + DCHECK(other->GetDominator()->IsEntryBlock() && other->GetGraph() != graph_) + << "Unimplemented block merge scenario"; + DCHECK(other->GetPhis().IsEmpty()); + + successors_.Reset(); + dominated_blocks_.Reset(); + instructions_.Add(other->GetInstructions()); + other->GetInstructions().SetBlockOfInstructions(this); + + while (!other->GetSuccessors().IsEmpty()) { + HBasicBlock* successor = other->GetSuccessors().Get(0); + successor->ReplacePredecessor(other, this); + } + + for (size_t i = 0, e = other->GetDominatedBlocks().Size(); i < e; ++i) { + HBasicBlock* dominated = other->GetDominatedBlocks().Get(i); + dominated_blocks_.Add(dominated); + dominated->SetDominator(this); + } + other->dominated_blocks_.Reset(); + other->dominator_ = nullptr; + other->graph_ = nullptr; +} + +void HBasicBlock::ReplaceWith(HBasicBlock* other) { + while (!GetPredecessors().IsEmpty()) { + HBasicBlock* predecessor = GetPredecessors().Get(0); + predecessor->ReplaceSuccessor(this, other); + } + while (!GetSuccessors().IsEmpty()) { + HBasicBlock* successor = GetSuccessors().Get(0); + successor->ReplacePredecessor(this, other); + } + for (size_t i = 0; i < dominated_blocks_.Size(); ++i) { + other->AddDominatedBlock(dominated_blocks_.Get(i)); + } + GetDominator()->ReplaceDominatedBlock(this, other); + other->SetDominator(GetDominator()); + dominator_ = nullptr; + graph_ = nullptr; +} + +// Create space in `blocks` for adding `number_of_new_blocks` entries +// starting at location `at`. Blocks after `at` are moved accordingly. +static void MakeRoomFor(GrowableArray<HBasicBlock*>* blocks, + size_t number_of_new_blocks, + size_t at) { + size_t old_size = blocks->Size(); + size_t new_size = old_size + number_of_new_blocks; + blocks->SetSize(new_size); + for (size_t i = old_size - 1, j = new_size - 1; i > at; --i, --j) { + blocks->Put(j, blocks->Get(i)); + } +} +void HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) { // Walk over the entry block and: // - Move constants from the entry block to the outer_graph's entry block, // - Replace HParameterValue instructions with their real value. @@ -751,41 +866,124 @@ void HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) { } } - // Insert the body's instructions except the last, just after the `invoke` - // instruction. - HBasicBlock* body = GetBlocks().Get(1); - DCHECK(!body->IsExitBlock()); - HInstruction* last = body->GetLastInstruction(); - HInstruction* first = body->GetFirstInstruction(); - - if (first != last) { - HInstruction* antelast = last->GetPrevious(); - - // Update the instruction list of the body to only contain the last - // instruction. - last->previous_ = nullptr; - body->instructions_.first_instruction_ = last; - body->instructions_.last_instruction_ = last; - - // Update the instruction list of the `invoke`'s block to now contain the - // body's instructions. - antelast->next_ = invoke->GetNext(); - antelast->next_->previous_ = antelast; - first->previous_ = invoke; - invoke->next_ = first; - - // Update the block pointer of all instructions. - for (HInstruction* current = antelast; current != invoke; current = current->GetPrevious()) { - current->SetBlock(invoke->GetBlock()); + if (GetBlocks().Size() == 3) { + // Simple case of an entry block, a body block, and an exit block. + // Put the body block's instruction into `invoke`'s block. + HBasicBlock* body = GetBlocks().Get(1); + DCHECK(GetBlocks().Get(0)->IsEntryBlock()); + DCHECK(GetBlocks().Get(2)->IsExitBlock()); + DCHECK(!body->IsExitBlock()); + HInstruction* last = body->GetLastInstruction(); + + invoke->GetBlock()->instructions_.AddAfter(invoke, body->GetInstructions()); + body->GetInstructions().SetBlockOfInstructions(invoke->GetBlock()); + + // Replace the invoke with the return value of the inlined graph. + if (last->IsReturn()) { + invoke->ReplaceWith(last->InputAt(0)); + } else { + DCHECK(last->IsReturnVoid()); } - } - // Replace the invoke with the return value of the inlined graph. - if (last->IsReturn()) { - invoke->ReplaceWith(last->InputAt(0)); - body->RemoveInstruction(last); + invoke->GetBlock()->RemoveInstruction(last); } else { - DCHECK(last->IsReturnVoid()); + // Need to inline multiple blocks. We split `invoke`'s block + // into two blocks, merge the first block of the inlined graph into + // the first half, and replace the exit block of the inlined graph + // with the second half. + ArenaAllocator* allocator = outer_graph->GetArena(); + HBasicBlock* at = invoke->GetBlock(); + HBasicBlock* to = at->SplitAfter(invoke); + + HBasicBlock* first = entry_block_->GetSuccessors().Get(0); + DCHECK(!first->IsInLoop()); + at->MergeWith(first); + exit_block_->ReplaceWith(to); + + // Update all predecessors of the exit block (now the `to` block) + // to not `HReturn` but `HGoto` instead. Also collect the return + // values if any, and potentially make it a phi if there are multiple + // predecessors. + HInstruction* return_value = nullptr; + for (size_t i = 0, e = to->GetPredecessors().Size(); i < e; ++i) { + HBasicBlock* predecessor = to->GetPredecessors().Get(i); + HInstruction* last = predecessor->GetLastInstruction(); + if (!last->IsReturnVoid()) { + if (return_value != nullptr) { + if (!return_value->IsPhi()) { + HPhi* phi = new (allocator) HPhi(allocator, kNoRegNumber, 0, invoke->GetType()); + to->AddPhi(phi); + phi->AddInput(return_value); + return_value = phi; + } + return_value->AsPhi()->AddInput(last->InputAt(0)); + } else { + return_value = last->InputAt(0); + } + } + predecessor->AddInstruction(new (allocator) HGoto()); + predecessor->RemoveInstruction(last); + } + + if (return_value != nullptr) { + invoke->ReplaceWith(return_value); + } + + // Update the meta information surrounding blocks: + // (1) the graph they are now in, + // (2) the reverse post order of that graph, + // (3) the potential loop information they are now in. + + // We don't add the entry block, the exit block, and the first block, which + // has been merged with `at`. + static constexpr int kNumberOfSkippedBlocksInCallee = 3; + + // We add the `to` block. + static constexpr int kNumberOfNewBlocksInCaller = 1; + size_t blocks_added = (reverse_post_order_.Size() - kNumberOfSkippedBlocksInCallee) + + kNumberOfNewBlocksInCaller; + + // Find the location of `at` in the outer graph's reverse post order. The new + // blocks will be added after it. + size_t index_of_at = 0; + while (outer_graph->reverse_post_order_.Get(index_of_at) != at) { + index_of_at++; + } + MakeRoomFor(&outer_graph->reverse_post_order_, blocks_added, index_of_at); + + // Do a reverse post order of the blocks in the callee and do (1), (2), + // and (3) to the blocks that apply. + HLoopInformation* info = at->GetLoopInformation(); + for (HReversePostOrderIterator it(*this); !it.Done(); it.Advance()) { + HBasicBlock* current = it.Current(); + if (current != exit_block_ && current != entry_block_ && current != first) { + DCHECK(!current->IsInLoop()); + DCHECK(current->GetGraph() == this); + current->SetGraph(outer_graph); + outer_graph->AddBlock(current); + outer_graph->reverse_post_order_.Put(++index_of_at, current); + if (info != nullptr) { + info->Add(current); + current->SetLoopInformation(info); + } + } + } + + // Do (1), (2), and (3) to `to`. + to->SetGraph(outer_graph); + outer_graph->AddBlock(to); + outer_graph->reverse_post_order_.Put(++index_of_at, to); + if (info != nullptr) { + info->Add(to); + to->SetLoopInformation(info); + if (info->IsBackEdge(at)) { + // Only `at` can become a back edge, as the inlined blocks + // are predecessors of `at`. + DCHECK_EQ(1u, info->NumberOfBackEdges()); + info->ClearBackEdges(); + info->AddBackEdge(to); + } + } } // Finally remove the invoke from the caller. diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index 6f7bc0c..30d869d 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -73,6 +73,15 @@ class HInstructionList { bool FoundBefore(const HInstruction* instruction1, const HInstruction* instruction2) const; + bool IsEmpty() const { return first_instruction_ == nullptr; } + void Clear() { first_instruction_ = last_instruction_ = nullptr; } + + // Update the block of all instructions to be `block`. + void SetBlockOfInstructions(HBasicBlock* block) const; + + void AddAfter(HInstruction* cursor, const HInstructionList& instruction_list); + void Add(const HInstructionList& instruction_list); + private: HInstruction* first_instruction_; HInstruction* last_instruction_; @@ -241,6 +250,10 @@ class HLoopInformation : public ArenaObject<kArenaAllocMisc> { return header_; } + void SetHeader(HBasicBlock* block) { + header_ = block; + } + HSuspendCheck* GetSuspendCheck() const { return suspend_check_; } void SetSuspendCheck(HSuspendCheck* check) { suspend_check_ = check; } bool HasSuspendCheck() const { return suspend_check_ != nullptr; } @@ -288,6 +301,8 @@ class HLoopInformation : public ArenaObject<kArenaAllocMisc> { const ArenaBitVector& GetBlocks() const { return blocks_; } + void Add(HBasicBlock* block); + private: // Internal recursive implementation of `Populate`. void PopulateRecursive(HBasicBlock* block); @@ -351,6 +366,7 @@ class HBasicBlock : public ArenaObject<kArenaAllocMisc> { } HGraph* GetGraph() const { return graph_; } + void SetGraph(HGraph* graph) { graph_ = graph; } int GetBlockId() const { return block_id_; } void SetBlockId(int id) { block_id_ = id; } @@ -358,6 +374,16 @@ class HBasicBlock : public ArenaObject<kArenaAllocMisc> { HBasicBlock* GetDominator() const { return dominator_; } void SetDominator(HBasicBlock* dominator) { dominator_ = dominator; } void AddDominatedBlock(HBasicBlock* block) { dominated_blocks_.Add(block); } + void ReplaceDominatedBlock(HBasicBlock* existing, HBasicBlock* new_block) { + for (size_t i = 0, e = dominated_blocks_.Size(); i < e; ++i) { + if (dominated_blocks_.Get(i) == existing) { + dominated_blocks_.Put(i, new_block); + return; + } + } + LOG(FATAL) << "Unreachable"; + UNREACHABLE(); + } int NumberOfBackEdges() const { return loop_information_ == nullptr @@ -384,10 +410,22 @@ class HBasicBlock : public ArenaObject<kArenaAllocMisc> { successors_.Put(successor_index, new_block); } + void ReplacePredecessor(HBasicBlock* existing, HBasicBlock* new_block) { + size_t predecessor_index = GetPredecessorIndexOf(existing); + DCHECK_NE(predecessor_index, static_cast<size_t>(-1)); + existing->RemoveSuccessor(this); + new_block->successors_.Add(this); + predecessors_.Put(predecessor_index, new_block); + } + void RemovePredecessor(HBasicBlock* block) { predecessors_.Delete(block); } + void RemoveSuccessor(HBasicBlock* block) { + successors_.Delete(block); + } + void ClearAllPredecessors() { predecessors_.Reset(); } @@ -422,6 +460,26 @@ class HBasicBlock : public ArenaObject<kArenaAllocMisc> { return -1; } + // Split the block into two blocks just after `cursor`. Returns the newly + // created block. Note that this method just updates raw block information, + // like predecessors, successors, dominators, and instruction list. It does not + // update the graph, reverse post order, loop information, nor make sure the + // blocks are consistent (for example ending with a control flow instruction). + HBasicBlock* SplitAfter(HInstruction* cursor); + + // Merge `other` at the end of `this`. Successors and dominated blocks of + // `other` are changed to be successors and dominated blocks of `this`. Note + // that this method does not update the graph, reverse post order, loop + // information, nor make sure the blocks are consistent (for example ending + // with a control flow instruction). + void MergeWith(HBasicBlock* other); + + // Replace `this` with `other`. Predecessors, successors, and dominated blocks + // of `this` are moved to `other`. + // Note that this method does not update the graph, reverse post order, loop + // information, nor make sure the blocks are consistent (for example ending + void ReplaceWith(HBasicBlock* other); + void AddInstruction(HInstruction* instruction); void RemoveInstruction(HInstruction* instruction); void InsertInstructionBefore(HInstruction* instruction, HInstruction* cursor); @@ -446,8 +504,9 @@ class HBasicBlock : public ArenaObject<kArenaAllocMisc> { return loop_information_; } - // Set the loop_information_ on this block. This method overrides the current + // Set the loop_information_ on this block. Overrides the current // loop_information if it is an outer loop of the passed loop information. + // Note that this method is called while creating the loop information. void SetInLoop(HLoopInformation* info) { if (IsLoopHeader()) { // Nothing to do. This just means `info` is an outer loop. @@ -465,6 +524,11 @@ class HBasicBlock : public ArenaObject<kArenaAllocMisc> { } } + // Raw update of the loop information. + void SetLoopInformation(HLoopInformation* info) { + loop_information_ = info; + } + bool IsInLoop() const { return loop_information_ != nullptr; } // Returns wheter this block dominates the blocked passed as parameter. @@ -482,7 +546,7 @@ class HBasicBlock : public ArenaObject<kArenaAllocMisc> { void SetIsCatchBlock() { is_catch_block_ = true; } private: - HGraph* const graph_; + HGraph* graph_; GrowableArray<HBasicBlock*> predecessors_; GrowableArray<HBasicBlock*> successors_; HInstructionList instructions_; @@ -2180,6 +2244,8 @@ class HTypeConversion : public HExpression<1> { DISALLOW_COPY_AND_ASSIGN(HTypeConversion); }; +static constexpr uint32_t kNoRegNumber = -1; + class HPhi : public HInstruction { public: HPhi(ArenaAllocator* arena, uint32_t reg_number, size_t number_of_inputs, Primitive::Type type) diff --git a/compiler/optimizing/optimization.h b/compiler/optimizing/optimization.h index 9315d89..d9e082a 100644 --- a/compiler/optimizing/optimization.h +++ b/compiler/optimizing/optimization.h @@ -21,6 +21,8 @@ namespace art { +static const char* kBuilderPassName = "builder"; +static const char* kSsaBuilderPassName = "ssa_builder"; static const char* kLivenessPassName = "liveness"; static const char* kRegisterAllocatorPassName = "register"; static const char* kLoopInvariantCodeMotionPassName = "licm"; diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc index 50d7924..38f7daa 100644 --- a/compiler/optimizing/optimizing_compiler.cc +++ b/compiler/optimizing/optimizing_compiler.cc @@ -19,6 +19,8 @@ #include <fstream> #include <stdint.h> +#include "base/dumpable.h" +#include "base/timing_logger.h" #include "bounds_check_elimination.h" #include "builder.h" #include "code_generator.h" @@ -78,6 +80,70 @@ class CodeVectorAllocator FINAL : public CodeAllocator { */ static const char* kStringFilter = ""; +class PassInfoPrinter : public ValueObject { + public: + PassInfoPrinter(HGraph* graph, + const char* method_name, + const CodeGenerator& codegen, + std::ostream* visualizer_output, + bool timing_logger_enabled, + bool visualizer_enabled) + : method_name_(method_name), + timing_logger_enabled_(timing_logger_enabled), + timing_logger_running_(false), + timing_logger_(method_name, true, true), + visualizer_enabled_(visualizer_enabled), + visualizer_(visualizer_output, graph, codegen, method_name_) { + if (strstr(method_name, kStringFilter) == nullptr) { + timing_logger_enabled_ = visualizer_enabled_ = false; + } + } + + void BeforePass(const char* pass_name) { + // Dump graph first, then start timer. + if (visualizer_enabled_) { + visualizer_.DumpGraph(pass_name, /* is_after_pass */ false); + } + if (timing_logger_enabled_) { + DCHECK(!timing_logger_running_); + timing_logger_running_ = true; + timing_logger_.StartTiming(pass_name); + } + } + + void AfterPass(const char* pass_name) { + // Pause timer first, then dump graph. + if (timing_logger_enabled_) { + DCHECK(timing_logger_running_); + timing_logger_.EndTiming(); + timing_logger_running_ = false; + } + if (visualizer_enabled_) { + visualizer_.DumpGraph(pass_name, /* is_after_pass */ true); + } + } + + ~PassInfoPrinter() { + if (timing_logger_enabled_) { + DCHECK(!timing_logger_running_); + LOG(INFO) << "TIMINGS " << method_name_; + LOG(INFO) << Dumpable<TimingLogger>(timing_logger_); + } + } + + private: + const char* method_name_; + + bool timing_logger_enabled_; + bool timing_logger_running_; + TimingLogger timing_logger_; + + bool visualizer_enabled_; + HGraphVisualizer visualizer_; + + DISALLOW_COPY_AND_ASSIGN(PassInfoPrinter); +}; + class OptimizingCompiler FINAL : public Compiler { public: explicit OptimizingCompiler(CompilerDriver* driver); @@ -123,7 +189,7 @@ class OptimizingCompiler FINAL : public Compiler { CodeGenerator* codegen, CompilerDriver* driver, const DexCompilationUnit& dex_compilation_unit, - const HGraphVisualizer& visualizer) const; + PassInfoPrinter* pass_info) const; // Just compile without doing optimizations. CompiledMethod* CompileBaseline(CodeGenerator* codegen, @@ -200,12 +266,12 @@ static bool CanOptimize(const DexFile::CodeItem& code_item) { static void RunOptimizations(HOptimization* optimizations[], size_t length, - const HGraphVisualizer& visualizer) { + PassInfoPrinter* pass_info) { for (size_t i = 0; i < length; ++i) { HOptimization* optimization = optimizations[i]; - visualizer.DumpGraph(optimization->GetPassName(), /*is_after=*/false); + pass_info->BeforePass(optimization->GetPassName()); optimization->Run(); - visualizer.DumpGraph(optimization->GetPassName(), /*is_after=*/true); + pass_info->AfterPass(optimization->GetPassName()); optimization->Check(); } } @@ -214,7 +280,7 @@ static void RunOptimizations(HGraph* graph, CompilerDriver* driver, OptimizingCompilerStats* stats, const DexCompilationUnit& dex_compilation_unit, - const HGraphVisualizer& visualizer) { + PassInfoPrinter* pass_info) { SsaRedundantPhiElimination redundant_phi(graph); SsaDeadPhiElimination dead_phi(graph); HDeadCodeElimination dce(graph); @@ -250,7 +316,7 @@ static void RunOptimizations(HGraph* graph, &simplify2 }; - RunOptimizations(optimizations, arraysize(optimizations), visualizer); + RunOptimizations(optimizations, arraysize(optimizations), pass_info); } // The stack map we generate must be 4-byte aligned on ARM. Since existing @@ -269,18 +335,20 @@ CompiledMethod* OptimizingCompiler::CompileOptimized(HGraph* graph, CodeGenerator* codegen, CompilerDriver* compiler_driver, const DexCompilationUnit& dex_compilation_unit, - const HGraphVisualizer& visualizer) const { + PassInfoPrinter* pass_info) const { RunOptimizations( - graph, compiler_driver, &compilation_stats_, dex_compilation_unit, visualizer); + graph, compiler_driver, &compilation_stats_, dex_compilation_unit, pass_info); + pass_info->BeforePass(kLivenessPassName); PrepareForRegisterAllocation(graph).Run(); SsaLivenessAnalysis liveness(*graph, codegen); liveness.Analyze(); - visualizer.DumpGraph(kLivenessPassName); + pass_info->AfterPass(kLivenessPassName); + pass_info->BeforePass(kRegisterAllocatorPassName); RegisterAllocator register_allocator(graph->GetArena(), codegen, liveness); register_allocator.AllocateRegisters(); - visualizer.DumpGraph(kRegisterAllocatorPassName); + pass_info->AfterPass(kRegisterAllocatorPassName); CodeVectorAllocator allocator; codegen->CompileOptimized(&allocator); @@ -339,6 +407,7 @@ CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item, jobject class_loader, const DexFile& dex_file) const { UNUSED(invoke_type); + std::string method_name = PrettyMethod(method_idx, dex_file); compilation_stats_.RecordStat(MethodCompilationStat::kAttemptCompilation); CompilerDriver* compiler_driver = GetCompilerDriver(); InstructionSet instruction_set = compiler_driver->GetInstructionSet(); @@ -364,29 +433,15 @@ CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item, class_def_idx, method_idx, access_flags, compiler_driver->GetVerifiedMethod(&dex_file, method_idx)); - std::string method_name = PrettyMethod(method_idx, dex_file); + ArenaPool pool; + ArenaAllocator arena(&pool); + HGraph* graph = new (&arena) HGraph(&arena); // For testing purposes, we put a special marker on method names that should be compiled // with this compiler. This makes sure we're not regressing. bool shouldCompile = method_name.find("$opt$") != std::string::npos; bool shouldOptimize = method_name.find("$opt$reg$") != std::string::npos; - ArenaPool pool; - ArenaAllocator arena(&pool); - HGraphBuilder builder(&arena, - &dex_compilation_unit, - &dex_compilation_unit, - &dex_file, - compiler_driver, - &compilation_stats_); - - VLOG(compiler) << "Building " << PrettyMethod(method_idx, dex_file); - HGraph* graph = builder.BuildGraph(*code_item); - if (graph == nullptr) { - CHECK(!shouldCompile) << "Could not build graph in optimizing compiler"; - return nullptr; - } - std::unique_ptr<CodeGenerator> codegen( CodeGenerator::Create(graph, instruction_set, @@ -398,29 +453,53 @@ CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item, return nullptr; } - HGraphVisualizer visualizer( - visualizer_output_.get(), graph, kStringFilter, *codegen.get(), method_name.c_str()); - visualizer.DumpGraph("builder"); + PassInfoPrinter pass_info(graph, + method_name.c_str(), + *codegen.get(), + visualizer_output_.get(), + GetCompilerDriver()->GetDumpPasses(), + !GetCompilerDriver()->GetDumpCfgFileName().empty()); + + HGraphBuilder builder(graph, + &dex_compilation_unit, + &dex_compilation_unit, + &dex_file, + compiler_driver, + &compilation_stats_); + + VLOG(compiler) << "Building " << method_name; + + pass_info.BeforePass(kBuilderPassName); + if (!builder.BuildGraph(*code_item)) { + CHECK(!shouldCompile) << "Could not build graph in optimizing compiler"; + return nullptr; + } + pass_info.AfterPass(kBuilderPassName); bool can_optimize = CanOptimize(*code_item); bool can_allocate_registers = RegisterAllocator::CanAllocateRegistersFor(*graph, instruction_set); - CompiledMethod* result = nullptr; if (run_optimizations_ && can_optimize && can_allocate_registers) { - VLOG(compiler) << "Optimizing " << PrettyMethod(method_idx, dex_file); + VLOG(compiler) << "Optimizing " << method_name; + + pass_info.BeforePass(kSsaBuilderPassName); if (!graph->TryBuildingSsa()) { - LOG(INFO) << "Skipping compilation of " - << PrettyMethod(method_idx, dex_file) - << ": it contains a non natural loop"; // We could not transform the graph to SSA, bailout. + LOG(INFO) << "Skipping compilation of " << method_name << ": it contains a non natural loop"; compilation_stats_.RecordStat(MethodCompilationStat::kNotCompiledCannotBuildSSA); - } else { - result = CompileOptimized(graph, codegen.get(), compiler_driver, dex_compilation_unit, visualizer); + return nullptr; } + pass_info.AfterPass(kSsaBuilderPassName); + + return CompileOptimized(graph, + codegen.get(), + compiler_driver, + dex_compilation_unit, + &pass_info); } else if (shouldOptimize && RegisterAllocator::Supports(instruction_set)) { LOG(FATAL) << "Could not allocate registers in optimizing compiler"; UNREACHABLE(); } else { - VLOG(compiler) << "Compile baseline " << PrettyMethod(method_idx, dex_file); + VLOG(compiler) << "Compile baseline " << method_name; if (!run_optimizations_) { compilation_stats_.RecordStat(MethodCompilationStat::kNotOptimizedDisabled); @@ -430,9 +509,8 @@ CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item, compilation_stats_.RecordStat(MethodCompilationStat::kNotOptimizedRegisterAllocator); } - result = CompileBaseline(codegen.get(), compiler_driver, dex_compilation_unit); + return CompileBaseline(codegen.get(), compiler_driver, dex_compilation_unit); } - return result; } Compiler* CreateOptimizingCompiler(CompilerDriver* driver) { diff --git a/compiler/optimizing/optimizing_unit_test.h b/compiler/optimizing/optimizing_unit_test.h index 29d47e1..6b23692 100644 --- a/compiler/optimizing/optimizing_unit_test.h +++ b/compiler/optimizing/optimizing_unit_test.h @@ -76,11 +76,12 @@ void RemoveSuspendChecks(HGraph* graph) { inline HGraph* CreateCFG(ArenaAllocator* allocator, const uint16_t* data, Primitive::Type return_type = Primitive::kPrimInt) { - HGraphBuilder builder(allocator, return_type); + HGraph* graph = new (allocator) HGraph(allocator); + HGraphBuilder builder(graph, return_type); const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data); - HGraph* graph = builder.BuildGraph(*item); - return graph; + bool graph_built = builder.BuildGraph(*item); + return graph_built ? graph : nullptr; } // Naive string diff data type. diff --git a/compiler/optimizing/pretty_printer_test.cc b/compiler/optimizing/pretty_printer_test.cc index da6b294..9cf8235 100644 --- a/compiler/optimizing/pretty_printer_test.cc +++ b/compiler/optimizing/pretty_printer_test.cc @@ -30,10 +30,11 @@ namespace art { static void TestCode(const uint16_t* data, const char* expected) { ArenaPool pool; ArenaAllocator allocator(&pool); - HGraphBuilder builder(&allocator); + HGraph* graph = new (&allocator) HGraph(&allocator); + HGraphBuilder builder(graph); const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data); - HGraph* graph = builder.BuildGraph(*item); - ASSERT_NE(graph, nullptr); + bool graph_built = builder.BuildGraph(*item); + ASSERT_TRUE(graph_built); StringPrettyPrinter printer(graph); printer.VisitInsertionOrder(); ASSERT_STREQ(expected, printer.str().c_str()); @@ -100,17 +101,16 @@ TEST(PrettyPrinterTest, CFG2) { TEST(PrettyPrinterTest, CFG3) { const char* expected = "BasicBlock 0, succ: 1\n" - " 5: SuspendCheck\n" - " 6: Goto 1\n" + " 4: SuspendCheck\n" + " 5: Goto 1\n" "BasicBlock 1, pred: 0, succ: 3\n" " 0: Goto 3\n" "BasicBlock 2, pred: 3, succ: 4\n" " 1: ReturnVoid\n" "BasicBlock 3, pred: 1, succ: 2\n" - " 2: SuspendCheck\n" - " 3: Goto 2\n" + " 2: Goto 2\n" "BasicBlock 4, pred: 2\n" - " 4: Exit\n"; + " 3: Exit\n"; const uint16_t data1[] = ZERO_REGISTER_CODE_ITEM( Instruction::GOTO | 0x200, @@ -160,15 +160,14 @@ TEST(PrettyPrinterTest, CFG4) { TEST(PrettyPrinterTest, CFG5) { const char* expected = "BasicBlock 0, succ: 1\n" - " 4: SuspendCheck\n" - " 5: Goto 1\n" + " 3: SuspendCheck\n" + " 4: Goto 1\n" "BasicBlock 1, pred: 0, 2, succ: 3\n" " 0: ReturnVoid\n" "BasicBlock 2, succ: 1\n" - " 1: SuspendCheck\n" - " 2: Goto 1\n" + " 1: Goto 1\n" "BasicBlock 3, pred: 1\n" - " 3: Exit\n"; + " 2: Exit\n"; const uint16_t data[] = ZERO_REGISTER_CODE_ITEM( Instruction::RETURN_VOID, diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc index 6f8f688..3809720 100644 --- a/compiler/optimizing/register_allocator.cc +++ b/compiler/optimizing/register_allocator.cc @@ -485,6 +485,9 @@ bool RegisterAllocator::ValidateIntervals(const GrowableArray<LiveInterval*>& in BitVector* liveness_of_register = liveness_of_values.Get(current->GetRegister()); for (size_t j = it.CurrentRange()->GetStart(); j < it.CurrentRange()->GetEnd(); ++j) { if (liveness_of_register->IsBitSet(j)) { + if (current->IsUsingInputRegister() && current->CanUseInputRegister()) { + continue; + } if (log_fatal_on_failure) { std::ostringstream message; message << "Register conflict at " << j << " "; @@ -639,6 +642,29 @@ void RegisterAllocator::LinearScan() { } } +static void FreeIfNotCoverAt(LiveInterval* interval, size_t position, size_t* free_until) { + DCHECK(!interval->IsHighInterval()); + // Note that the same instruction may occur multiple times in the input list, + // so `free_until` may have changed already. + if (interval->IsDeadAt(position)) { + // Set the register to be free. Note that inactive intervals might later + // update this. + free_until[interval->GetRegister()] = kMaxLifetimePosition; + if (interval->HasHighInterval()) { + DCHECK(interval->GetHighInterval()->IsDeadAt(position)); + free_until[interval->GetHighInterval()->GetRegister()] = kMaxLifetimePosition; + } + } else if (!interval->Covers(position)) { + // The interval becomes inactive at `defined_by`. We make its register + // available only until the next use strictly after `defined_by`. + free_until[interval->GetRegister()] = interval->FirstUseAfter(position); + if (interval->HasHighInterval()) { + DCHECK(!interval->GetHighInterval()->Covers(position)); + free_until[interval->GetHighInterval()->GetRegister()] = free_until[interval->GetRegister()]; + } + } +} + // Find a free register. If multiple are found, pick the register that // is free the longest. bool RegisterAllocator::TryAllocateFreeReg(LiveInterval* current) { @@ -656,6 +682,32 @@ bool RegisterAllocator::TryAllocateFreeReg(LiveInterval* current) { free_until[interval->GetRegister()] = 0; } + // An interval that starts an instruction (that is, it is not split), may + // re-use the registers used by the inputs of that instruciton, based on the + // location summary. + HInstruction* defined_by = current->GetDefinedBy(); + if (defined_by != nullptr && !current->IsSplit()) { + LocationSummary* locations = defined_by->GetLocations(); + if (!locations->OutputCanOverlapWithInputs() && locations->Out().IsUnallocated()) { + for (HInputIterator it(defined_by); !it.Done(); it.Advance()) { + // Take the last interval of the input. It is the location of that interval + // that will be used at `defined_by`. + LiveInterval* interval = it.Current()->GetLiveInterval()->GetLastSibling(); + // Note that interval may have not been processed yet. + // TODO: Handle non-split intervals last in the work list. + if (interval->HasRegister() && interval->SameRegisterKind(*current)) { + // The input must be live until the end of `defined_by`, to comply to + // the linear scan algorithm. So we use `defined_by`'s end lifetime + // position to check whether the input is dead or is inactive after + // `defined_by`. + DCHECK(interval->Covers(defined_by->GetLifetimePosition())); + size_t position = defined_by->GetLifetimePosition() + 1; + FreeIfNotCoverAt(interval, position, free_until); + } + } + } + } + // For each inactive interval, set its register to be free until // the next intersection with `current`. for (size_t i = 0, e = inactive_.Size(); i < e; ++i) { @@ -1459,9 +1511,11 @@ void RegisterAllocator::ConnectSplitSiblings(LiveInterval* interval, } void RegisterAllocator::Resolve() { - codegen_->ComputeFrameSize( - spill_slots_.Size(), maximum_number_of_live_core_registers_, - maximum_number_of_live_fp_registers_, reserved_out_slots_); + codegen_->InitializeCodeGeneration(spill_slots_.Size(), + maximum_number_of_live_core_registers_, + maximum_number_of_live_fp_registers_, + reserved_out_slots_, + liveness_.GetLinearOrder()); // Adjust the Out Location of instructions. // TODO: Use pointers of Location inside LiveInterval to avoid doing another iteration. @@ -1495,7 +1549,7 @@ void RegisterAllocator::Resolve() { DCHECK(locations->InAt(0).Equals(source)); } } - locations->SetOut(source); + locations->UpdateOut(source); } else { DCHECK(source.Equals(location)); } diff --git a/compiler/optimizing/register_allocator_test.cc b/compiler/optimizing/register_allocator_test.cc index cb5010a..0cc00c0 100644 --- a/compiler/optimizing/register_allocator_test.cc +++ b/compiler/optimizing/register_allocator_test.cc @@ -37,9 +37,10 @@ namespace art { static bool Check(const uint16_t* data) { ArenaPool pool; ArenaAllocator allocator(&pool); - HGraphBuilder builder(&allocator); + HGraph* graph = new (&allocator) HGraph(&allocator); + HGraphBuilder builder(graph); const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data); - HGraph* graph = builder.BuildGraph(*item); + builder.BuildGraph(*item); graph->TryBuildingSsa(); x86::CodeGeneratorX86 codegen(graph, CompilerOptions()); SsaLivenessAnalysis liveness(*graph, &codegen); @@ -249,9 +250,10 @@ TEST(RegisterAllocatorTest, Loop2) { } static HGraph* BuildSSAGraph(const uint16_t* data, ArenaAllocator* allocator) { - HGraphBuilder builder(allocator); + HGraph* graph = new (allocator) HGraph(allocator); + HGraphBuilder builder(graph); const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data); - HGraph* graph = builder.BuildGraph(*item); + builder.BuildGraph(*item); graph->TryBuildingSsa(); return graph; } @@ -526,7 +528,7 @@ TEST(RegisterAllocatorTest, PhiHint) { // Set the phi to a specific register, and check that the inputs get allocated // the same register. - phi->GetLocations()->SetOut(Location::RegisterLocation(2)); + phi->GetLocations()->UpdateOut(Location::RegisterLocation(2)); RegisterAllocator register_allocator(&allocator, &codegen, liveness); register_allocator.AllocateRegisters(); @@ -543,7 +545,7 @@ TEST(RegisterAllocatorTest, PhiHint) { // Set input1 to a specific register, and check that the phi and other input get allocated // the same register. - input1->GetLocations()->SetOut(Location::RegisterLocation(2)); + input1->GetLocations()->UpdateOut(Location::RegisterLocation(2)); RegisterAllocator register_allocator(&allocator, &codegen, liveness); register_allocator.AllocateRegisters(); @@ -560,7 +562,7 @@ TEST(RegisterAllocatorTest, PhiHint) { // Set input2 to a specific register, and check that the phi and other input get allocated // the same register. - input2->GetLocations()->SetOut(Location::RegisterLocation(2)); + input2->GetLocations()->UpdateOut(Location::RegisterLocation(2)); RegisterAllocator register_allocator(&allocator, &codegen, liveness); register_allocator.AllocateRegisters(); @@ -685,7 +687,7 @@ TEST(RegisterAllocatorTest, SameAsFirstInputHint) { liveness.Analyze(); // check that both adds get the same register. - // Don't use SetOutput because output is already allocated. + // Don't use UpdateOutput because output is already allocated. first_add->InputAt(0)->GetLocations()->output_ = Location::RegisterLocation(2); ASSERT_EQ(first_add->GetLocations()->Out().GetPolicy(), Location::kSameAsFirstInput); ASSERT_EQ(second_add->GetLocations()->Out().GetPolicy(), Location::kSameAsFirstInput); diff --git a/compiler/optimizing/side_effects_analysis.cc b/compiler/optimizing/side_effects_analysis.cc index 96e1c8f..ea1ca5a 100644 --- a/compiler/optimizing/side_effects_analysis.cc +++ b/compiler/optimizing/side_effects_analysis.cc @@ -19,6 +19,11 @@ namespace art { void SideEffectsAnalysis::Run() { + // Inlining might have created more blocks, so we need to increase the size + // if needed. + block_effects_.SetSize(graph_->GetBlocks().Size()); + loop_effects_.SetSize(graph_->GetBlocks().Size()); + if (kIsDebugBuild) { for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) { HBasicBlock* block = it.Current(); diff --git a/compiler/optimizing/ssa_liveness_analysis.h b/compiler/optimizing/ssa_liveness_analysis.h index b0d3853..0e68a61 100644 --- a/compiler/optimizing/ssa_liveness_analysis.h +++ b/compiler/optimizing/ssa_liveness_analysis.h @@ -18,6 +18,7 @@ #define ART_COMPILER_OPTIMIZING_SSA_LIVENESS_ANALYSIS_H_ #include "nodes.h" +#include <iostream> namespace art { @@ -181,12 +182,21 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> { void AddUse(HInstruction* instruction, size_t input_index, bool is_environment) { // Set the use within the instruction. - size_t position = instruction->GetLifetimePosition(); - if (instruction->GetLocations()->InputOverlapsWithOutputOrTemp(input_index, is_environment)) { - // If it overlaps, we need to make sure the user will not try to allocate a temp - // or its output to the same register. - ++position; + size_t position = instruction->GetLifetimePosition() + 1; + LocationSummary* locations = instruction->GetLocations(); + if (!is_environment) { + if (locations->IsFixedInput(input_index) || locations->OutputUsesSameAs(input_index)) { + // For fixed inputs and output same as input, the register allocator + // requires to have inputs die at the instruction, so that input moves use the + // location of the input just before that instruction (and not potential moves due + // to splitting). + position = instruction->GetLifetimePosition(); + } } + + DCHECK(position == instruction->GetLifetimePosition() + || position == instruction->GetLifetimePosition() + 1); + if ((first_use_ != nullptr) && (first_use_->GetUser() == instruction) && (first_use_->GetPosition() < position)) { @@ -301,6 +311,7 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> { LiveInterval* GetParent() const { return parent_; } LiveRange* GetFirstRange() const { return first_range_; } + LiveRange* GetLastRange() const { return last_range_; } int GetRegister() const { return register_; } void SetRegister(int reg) { register_ = reg; } @@ -403,6 +414,23 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> { return FirstRegisterUseAfter(GetStart()); } + size_t FirstUseAfter(size_t position) const { + if (is_temp_) { + return position == GetStart() ? position : kNoLifetime; + } + + UsePosition* use = first_use_; + size_t end = GetEnd(); + while (use != nullptr && use->GetPosition() <= end) { + size_t use_position = use->GetPosition(); + if (use_position > position) { + return use_position; + } + use = use->GetNext(); + } + return kNoLifetime; + } + UsePosition* GetFirstUse() const { return first_use_; } @@ -511,6 +539,13 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> { } LiveInterval* GetNextSibling() const { return next_sibling_; } + LiveInterval* GetLastSibling() { + LiveInterval* result = this; + while (result->next_sibling_ != nullptr) { + result = result->next_sibling_; + } + return result; + } // Returns the first register hint that is at least free before // the value contained in `free_until`. If none is found, returns @@ -541,6 +576,9 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> { // Returns whether `other` and `this` share the same kind of register. bool SameRegisterKind(Location other) const; + bool SameRegisterKind(const LiveInterval& other) const { + return IsFloatingPoint() == other.IsFloatingPoint(); + } bool HasHighInterval() const { return IsLowInterval(); @@ -594,6 +632,60 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> { } } + // Returns whether an interval, when it is non-split, is using + // the same register of one of its input. + bool IsUsingInputRegister() const { + if (defined_by_ != nullptr && !IsSplit()) { + for (HInputIterator it(defined_by_); !it.Done(); it.Advance()) { + LiveInterval* interval = it.Current()->GetLiveInterval(); + + // Find the interval that covers `defined_by`_. + while (interval != nullptr && !interval->Covers(defined_by_->GetLifetimePosition())) { + interval = interval->GetNextSibling(); + } + + // Check if both intervals have the same register of the same kind. + if (interval != nullptr + && interval->SameRegisterKind(*this) + && interval->GetRegister() == GetRegister()) { + return true; + } + } + } + return false; + } + + // Returns whether an interval, when it is non-split, can safely use + // the same register of one of its input. Note that this method requires + // IsUsingInputRegister() to be true. + bool CanUseInputRegister() const { + DCHECK(IsUsingInputRegister()); + if (defined_by_ != nullptr && !IsSplit()) { + LocationSummary* locations = defined_by_->GetLocations(); + if (locations->OutputCanOverlapWithInputs()) { + return false; + } + for (HInputIterator it(defined_by_); !it.Done(); it.Advance()) { + LiveInterval* interval = it.Current()->GetLiveInterval(); + + // Find the interval that covers `defined_by`_. + while (interval != nullptr && !interval->Covers(defined_by_->GetLifetimePosition())) { + interval = interval->GetNextSibling(); + } + + if (interval != nullptr + && interval->SameRegisterKind(*this) + && interval->GetRegister() == GetRegister()) { + // We found the input that has the same register. Check if it is live after + // `defined_by`_. + return !interval->Covers(defined_by_->GetLifetimePosition() + 1); + } + } + } + LOG(FATAL) << "Unreachable"; + UNREACHABLE(); + } + private: LiveInterval(ArenaAllocator* allocator, Primitive::Type type, diff --git a/compiler/optimizing/ssa_test.cc b/compiler/optimizing/ssa_test.cc index 6b6bf05..7e90b37 100644 --- a/compiler/optimizing/ssa_test.cc +++ b/compiler/optimizing/ssa_test.cc @@ -78,10 +78,11 @@ static void ReNumberInstructions(HGraph* graph) { static void TestCode(const uint16_t* data, const char* expected) { ArenaPool pool; ArenaAllocator allocator(&pool); - HGraphBuilder builder(&allocator); + HGraph* graph = new (&allocator) HGraph(&allocator); + HGraphBuilder builder(graph); const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data); - HGraph* graph = builder.BuildGraph(*item); - ASSERT_NE(graph, nullptr); + bool graph_built = builder.BuildGraph(*item); + ASSERT_TRUE(graph_built); graph->BuildDominatorTree(); // Suspend checks implementation may change in the future, and this test relies diff --git a/compiler/optimizing/suspend_check_test.cc b/compiler/optimizing/suspend_check_test.cc index 2e48ee8..a5a0eb2 100644 --- a/compiler/optimizing/suspend_check_test.cc +++ b/compiler/optimizing/suspend_check_test.cc @@ -30,10 +30,11 @@ namespace art { static void TestCode(const uint16_t* data) { ArenaPool pool; ArenaAllocator allocator(&pool); - HGraphBuilder builder(&allocator); + HGraph* graph = new (&allocator) HGraph(&allocator); + HGraphBuilder builder(graph); const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data); - HGraph* graph = builder.BuildGraph(*item); - ASSERT_NE(graph, nullptr); + bool graph_built = builder.BuildGraph(*item); + ASSERT_TRUE(graph_built); HBasicBlock* first_block = graph->GetEntryBlock()->GetSuccessors().Get(0); HInstruction* first_instruction = first_block->GetFirstInstruction(); diff --git a/compiler/utils/arm/assembler_thumb2.cc b/compiler/utils/arm/assembler_thumb2.cc index 3eccd3f..5383c28 100644 --- a/compiler/utils/arm/assembler_thumb2.cc +++ b/compiler/utils/arm/assembler_thumb2.cc @@ -25,8 +25,8 @@ namespace art { namespace arm { -bool Thumb2Assembler::ShifterOperandCanHold(Register rd, - Register rn, +bool Thumb2Assembler::ShifterOperandCanHold(Register rd ATTRIBUTE_UNUSED, + Register rn ATTRIBUTE_UNUSED, Opcode opcode, uint32_t immediate, ShifterOperand* shifter_op) { @@ -37,13 +37,6 @@ bool Thumb2Assembler::ShifterOperandCanHold(Register rd, switch (opcode) { case ADD: case SUB: - if (rn == SP) { - if (rd == SP) { - return immediate < (1 << 9); // 9 bits allowed. - } else { - return immediate < (1 << 12); // 12 bits. - } - } if (immediate < (1 << 12)) { // Less than (or equal to) 12 bits can always be done. return true; } @@ -713,7 +706,7 @@ bool Thumb2Assembler::Is32BitDataProcessing(Condition cond ATTRIBUTE_UNUSED, } bool can_contain_high_register = (opcode == MOV) - || ((opcode == ADD) && (rn == rd)); + || ((opcode == ADD) && (rn == rd) && !set_cc); if (IsHighRegister(rd) || IsHighRegister(rn)) { if (!can_contain_high_register) { @@ -927,41 +920,71 @@ void Thumb2Assembler::Emit16BitDataProcessing(Condition cond, if (so.IsImmediate()) { use_immediate = true; immediate = so.GetImmediate(); + } else { + // Adjust rn and rd: only two registers will be emitted. + switch (opcode) { + case AND: + case ORR: + case EOR: + case RSB: + case ADC: + case SBC: + case BIC: { + if (rn == rd) { + rn = so.GetRegister(); + } else { + CHECK_EQ(rd, so.GetRegister()); + } + break; + } + case CMP: + case CMN: { + CHECK_EQ(rd, 0); + rd = rn; + rn = so.GetRegister(); + break; + } + case TST: + case TEQ: + case MVN: { + CHECK_EQ(rn, 0); + rn = so.GetRegister(); + break; + } + default: + break; + } } switch (opcode) { case AND: thumb_opcode = 0U /* 0b0000 */; break; + case ORR: thumb_opcode = 12U /* 0b1100 */; break; case EOR: thumb_opcode = 1U /* 0b0001 */; break; - case SUB: break; case RSB: thumb_opcode = 9U /* 0b1001 */; break; - case ADD: break; case ADC: thumb_opcode = 5U /* 0b0101 */; break; case SBC: thumb_opcode = 6U /* 0b0110 */; break; - case RSC: break; - case TST: thumb_opcode = 8U /* 0b1000 */; rn = so.GetRegister(); break; - case TEQ: break; - case CMP: + case BIC: thumb_opcode = 14U /* 0b1110 */; break; + case TST: thumb_opcode = 8U /* 0b1000 */; CHECK(!use_immediate); break; + case MVN: thumb_opcode = 15U /* 0b1111 */; CHECK(!use_immediate); break; + case CMP: { if (use_immediate) { // T2 encoding. - dp_opcode = 0; - opcode_shift = 11; - thumb_opcode = 5U /* 0b101 */; - rd_shift = 8; - rn_shift = 8; + dp_opcode = 0; + opcode_shift = 11; + thumb_opcode = 5U /* 0b101 */; + rd_shift = 8; + rn_shift = 8; } else { thumb_opcode = 10U /* 0b1010 */; - rd = rn; - rn = so.GetRegister(); } break; + } case CMN: { + CHECK(!use_immediate); thumb_opcode = 11U /* 0b1011 */; - rd = rn; - rn = so.GetRegister(); break; } - case ORR: thumb_opcode = 12U /* 0b1100 */; break; case MOV: dp_opcode = 0; if (use_immediate) { @@ -984,9 +1007,11 @@ void Thumb2Assembler::Emit16BitDataProcessing(Condition cond, } } break; - case BIC: thumb_opcode = 14U /* 0b1110 */; break; - case MVN: thumb_opcode = 15U /* 0b1111 */; rn = so.GetRegister(); break; + + case TEQ: + case RSC: default: + LOG(FATAL) << "Invalid thumb1 opcode " << opcode; break; } } @@ -1009,7 +1034,7 @@ void Thumb2Assembler::Emit16BitDataProcessing(Condition cond, // ADD and SUB are complex enough to warrant their own emitter. void Thumb2Assembler::Emit16BitAddSub(Condition cond ATTRIBUTE_UNUSED, Opcode opcode, - bool set_cc ATTRIBUTE_UNUSED, + bool set_cc, Register rn, Register rd, const ShifterOperand& so) { @@ -1031,7 +1056,7 @@ void Thumb2Assembler::Emit16BitAddSub(Condition cond ATTRIBUTE_UNUSED, case ADD: if (so.IsRegister()) { Register rm = so.GetRegister(); - if (rn == rd) { + if (rn == rd && !set_cc) { // Can use T2 encoding (allows 4 bit registers) dp_opcode = 1U /* 0b01 */; opcode_shift = 10; diff --git a/compiler/utils/arm/assembler_thumb2_test.cc b/compiler/utils/arm/assembler_thumb2_test.cc index 425ccd7..e571e72 100644 --- a/compiler/utils/arm/assembler_thumb2_test.cc +++ b/compiler/utils/arm/assembler_thumb2_test.cc @@ -30,11 +30,15 @@ class AssemblerThumb2Test : public AssemblerTest<arm::Thumb2Assembler, } std::string GetAssemblerParameters() OVERRIDE { - return " -mthumb -mfpu=neon"; + return " -march=armv7-a -mcpu=cortex-a15 -mfpu=neon -mthumb"; + } + + const char* GetAssemblyHeader() OVERRIDE { + return kThumb2AssemblyHeader; } std::string GetDisassembleParameters() OVERRIDE { - return " -D -bbinary -marm --no-show-raw-insn"; + return " -D -bbinary -marm --disassembler-options=force-thumb --no-show-raw-insn"; } void SetUpHelpers() OVERRIDE { @@ -76,6 +80,8 @@ class AssemblerThumb2Test : public AssemblerTest<arm::Thumb2Assembler, private: std::vector<arm::Register*> registers_; + + static constexpr const char* kThumb2AssemblyHeader = ".syntax unified\n.thumb\n"; }; @@ -192,4 +198,21 @@ TEST_F(AssemblerThumb2Test, strexd) { DriverStr(expected, "strexd"); } +TEST_F(AssemblerThumb2Test, eor) { +#define __ GetAssembler()-> + __ eor(arm::R1, arm::R1, arm::ShifterOperand(arm::R0)); + __ eor(arm::R1, arm::R0, arm::ShifterOperand(arm::R1)); + __ eor(arm::R1, arm::R8, arm::ShifterOperand(arm::R0)); + __ eor(arm::R8, arm::R1, arm::ShifterOperand(arm::R0)); + __ eor(arm::R1, arm::R0, arm::ShifterOperand(arm::R8)); + + const char* expected = + "eors r1, r0\n" + "eor r1, r0, r1\n" + "eor r1, r8, r0\n" + "eor r8, r1, r0\n" + "eor r1, r0, r8\n"; + DriverStr(expected, "abs"); +} + } // namespace art diff --git a/compiler/utils/assembler_thumb_test.cc b/compiler/utils/assembler_thumb_test.cc index e3a9580..a171e59 100644 --- a/compiler/utils/assembler_thumb_test.cc +++ b/compiler/utils/assembler_thumb_test.cc @@ -309,13 +309,13 @@ TEST(Thumb2AssemblerTest, DataProcessingRegister) { // 16 bit variants. __ add(R0, R1, ShifterOperand()); __ sub(R0, R1, ShifterOperand()); - __ and_(R0, R1, ShifterOperand()); - __ orr(R0, R1, ShifterOperand()); - __ eor(R0, R1, ShifterOperand()); - __ bic(R0, R1, ShifterOperand()); - __ adc(R0, R1, ShifterOperand()); - __ sbc(R0, R1, ShifterOperand()); - __ rsb(R0, R1, ShifterOperand()); + __ and_(R0, R0, ShifterOperand(R1)); + __ orr(R0, R0, ShifterOperand(R1)); + __ eor(R0, R0, ShifterOperand(R1)); + __ bic(R0, R0, ShifterOperand(R1)); + __ adc(R0, R0, ShifterOperand(R1)); + __ sbc(R0, R0, ShifterOperand(R1)); + __ rsb(R0, R0, ShifterOperand(R1)); __ tst(R0, ShifterOperand(R1)); __ teq(R0, ShifterOperand(R1)); diff --git a/disassembler/disassembler_arm64.cc b/disassembler/disassembler_arm64.cc index bd3bebf..4ff44b4 100644 --- a/disassembler/disassembler_arm64.cc +++ b/disassembler/disassembler_arm64.cc @@ -18,7 +18,7 @@ #include <inttypes.h> -#include <ostream> +#include <sstream> #include "base/logging.h" #include "base/stringprintf.h" @@ -27,22 +27,23 @@ namespace art { namespace arm64 { +// This enumeration should mirror the declarations in +// runtime/arch/arm64/registers_arm64.h. We do not include that file to +// avoid a dependency on libart. +enum { + TR = 18, + ETR = 21, + IP0 = 16, + IP1 = 17, + FP = 29, + LR = 30 +}; + void CustomDisassembler::AppendRegisterNameToOutput( const vixl::Instruction* instr, const vixl::CPURegister& reg) { USE(instr); if (reg.IsRegister()) { - // This enumeration should mirror the declarations in - // runtime/arch/arm64/registers_arm64.h. We do not include that file to - // avoid a dependency on libart. - enum { - TR = 18, - ETR = 21, - IP0 = 16, - IP1 = 17, - FP = 29, - LR = 30 - }; switch (reg.code()) { case IP0: AppendToOutput(reg.Is64Bits() ? "ip0" : "wip0"); return; case IP1: AppendToOutput(reg.Is64Bits() ? "ip1" : "wip1"); return; @@ -66,16 +67,7 @@ void CustomDisassembler::VisitLoadLiteral(const vixl::Instruction* instr) { return; } - char* buffer = buffer_; - char* buffer_end = buffer_ + buffer_size_; - - // Find the end position in the buffer. - while ((*buffer != 0) && (buffer < buffer_end)) { - ++buffer; - } - void* data_address = instr->LiteralAddress<void*>(); - ptrdiff_t buf_size_remaining = buffer_end - buffer; vixl::Instr op = instr->Mask(vixl::LoadLiteralMask); switch (op) { @@ -84,14 +76,14 @@ void CustomDisassembler::VisitLoadLiteral(const vixl::Instruction* instr) { case vixl::LDRSW_x_lit: { int64_t data = op == vixl::LDR_x_lit ? *reinterpret_cast<int64_t*>(data_address) : *reinterpret_cast<int32_t*>(data_address); - snprintf(buffer, buf_size_remaining, " (0x%" PRIx64 " / %" PRId64 ")", data, data); + AppendToOutput(" (0x%" PRIx64 " / %" PRId64 ")", data, data); break; } case vixl::LDR_s_lit: case vixl::LDR_d_lit: { double data = (op == vixl::LDR_s_lit) ? *reinterpret_cast<float*>(data_address) : *reinterpret_cast<double*>(data_address); - snprintf(buffer, buf_size_remaining, " (%g)", data); + AppendToOutput(" (%g)", data); break; } default: @@ -99,6 +91,17 @@ void CustomDisassembler::VisitLoadLiteral(const vixl::Instruction* instr) { } } +void CustomDisassembler::VisitLoadStoreUnsignedOffset(const vixl::Instruction* instr) { + Disassembler::VisitLoadStoreUnsignedOffset(instr); + + if (instr->Rn() == TR) { + int64_t offset = instr->ImmLSUnsigned() << instr->SizeLS(); + std::ostringstream tmp_stream; + Thread::DumpThreadOffset<8>(tmp_stream, static_cast<uint32_t>(offset)); + AppendToOutput(" (%s)", tmp_stream.str().c_str()); + } +} + size_t DisassemblerArm64::Dump(std::ostream& os, const uint8_t* begin) { const vixl::Instruction* instr = reinterpret_cast<const vixl::Instruction*>(begin); decoder.Decode(instr); diff --git a/disassembler/disassembler_arm64.h b/disassembler/disassembler_arm64.h index a370b8d..57f11c8 100644 --- a/disassembler/disassembler_arm64.h +++ b/disassembler/disassembler_arm64.h @@ -34,11 +34,14 @@ class CustomDisassembler FINAL : public vixl::Disassembler { vixl::Disassembler(), read_literals_(read_literals) {} // Use register aliases in the disassembly. - virtual void AppendRegisterNameToOutput(const vixl::Instruction* instr, - const vixl::CPURegister& reg) OVERRIDE; + void AppendRegisterNameToOutput(const vixl::Instruction* instr, + const vixl::CPURegister& reg) OVERRIDE; // Improve the disassembly of literal load instructions. - virtual void VisitLoadLiteral(const vixl::Instruction* instr) OVERRIDE; + void VisitLoadLiteral(const vixl::Instruction* instr) OVERRIDE; + + // Improve the disassembly of thread offset. + void VisitLoadStoreUnsignedOffset(const vixl::Instruction* instr) OVERRIDE; private: // Indicate if the disassembler should read data loaded from literal pools. diff --git a/disassembler/disassembler_x86.cc b/disassembler/disassembler_x86.cc index 1a768c8..203488d 100644 --- a/disassembler/disassembler_x86.cc +++ b/disassembler/disassembler_x86.cc @@ -119,12 +119,6 @@ static void DumpBaseReg(std::ostream& os, uint8_t rex, uint8_t reg) { DumpAddrReg(os, rex, reg_num); } -static void DumpIndexReg(std::ostream& os, uint8_t rex, uint8_t reg) { - bool rex_x = (rex & REX_X) != 0; - uint8_t reg_num = rex_x ? (reg + 8) : reg; - DumpAddrReg(os, rex, reg_num); -} - static void DumpOpcodeReg(std::ostream& os, uint8_t rex, uint8_t reg, bool byte_operand, uint8_t size_override) { bool rex_b = (rex & REX_B) != 0; @@ -184,18 +178,30 @@ std::string DisassemblerX86::DumpAddress(uint8_t mod, uint8_t rm, uint8_t rex64, uint8_t index = (sib >> 3) & 7; uint8_t base = sib & 7; address << "["; + + // REX.x is bit 3 of index. + if ((rex64 & REX_X) != 0) { + index += 8; + } + + // Mod = 0 && base = 5 (ebp): no base (ignores REX.b). + bool has_base = false; if (base != 5 || mod != 0) { + has_base = true; DumpBaseReg(address, rex64, base); - if (index != 4) { - address << " + "; - } } + + // Index = 4 (esp/rsp) is disallowed. if (index != 4) { - DumpIndexReg(address, rex64, index); + if (has_base) { + address << " + "; + } + DumpAddrReg(address, rex64, index); if (scale != 0) { address << StringPrintf(" * %d", 1 << scale); } } + if (mod == 0) { if (base == 5) { if (index != 4) { diff --git a/runtime/Android.mk b/runtime/Android.mk index 907d884..4714610 100644 --- a/runtime/Android.mk +++ b/runtime/Android.mk @@ -301,6 +301,7 @@ LIBART_ENUM_OPERATOR_OUT_HEADER_FILES := \ gc/collector/gc_type.h \ gc/allocator_type.h \ gc/collector_type.h \ + gc/space/region_space.h \ gc/space/space.h \ gc/heap.h \ instrumentation.h \ diff --git a/runtime/arch/mips64/quick_entrypoints_mips64.S b/runtime/arch/mips64/quick_entrypoints_mips64.S index 3430eb5..60e692b 100644 --- a/runtime/arch/mips64/quick_entrypoints_mips64.S +++ b/runtime/arch/mips64/quick_entrypoints_mips64.S @@ -756,9 +756,48 @@ END \name GENERATE_ALL_ALLOC_ENTRYPOINTS UNIMPLEMENTED art_quick_test_suspend -UNIMPLEMENTED art_quick_proxy_invoke_handler + + /* + * Called by managed code that is attempting to call a method on a proxy class. On entry + * r0 holds the proxy method; r1, r2 and r3 may contain arguments. + */ + .extern artQuickProxyInvokeHandler +ENTRY art_quick_proxy_invoke_handler + SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME + sd $a0, 0($sp) # place proxy method at bottom of frame + move $a2, rSELF # pass Thread::Current + jal artQuickProxyInvokeHandler # (Method* proxy method, receiver, Thread*, SP) + move $a3, $sp # pass $sp + ld $t0, THREAD_EXCEPTION_OFFSET(rSELF) # load Thread::Current()->exception_ + daddiu $sp, $sp, REFS_AND_ARGS_MINUS_REFS_SIZE # skip a0-a7 and f12-f19 + RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME + bne $t0, $zero, 1f + dmtc1 $v0, $f0 # place return value to FP return value + jalr $zero, $ra + dmtc1 $v1, $f1 # place return value to FP return value +1: + DELIVER_PENDING_EXCEPTION +END art_quick_proxy_invoke_handler + UNIMPLEMENTED art_quick_imt_conflict_trampoline -UNIMPLEMENTED art_quick_resolution_trampoline + + .extern artQuickResolutionTrampoline +ENTRY art_quick_resolution_trampoline + SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME + move $a2, rSELF # pass Thread::Current + jal artQuickResolutionTrampoline # (Method* called, receiver, Thread*, SP) + move $a3, $sp # pass $sp + beq $v0, $zero, 1f + lwu $a0, 0($sp) # load resolved method in $a0 + # artQuickResolutionTrampoline puts resolved method in *SP + RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME + move $t9, $v0 # code pointer must be in $t9 to generate the global pointer + jalr $zero, $t9 # tail call to method + nop +1: + RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME + DELIVER_PENDING_EXCEPTION +END art_quick_resolution_trampoline .extern artQuickGenericJniTrampoline .extern artQuickGenericJniEndTrampoline diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S index fd3a1cf..beacd49 100644 --- a/runtime/arch/x86/quick_entrypoints_x86.S +++ b/runtime/arch/x86/quick_entrypoints_x86.S @@ -467,7 +467,8 @@ DEFINE_FUNCTION art_quick_invoke_stub // Now check ebx SKIP_OVER_FLOATS esi, edi, al, .Lgpr_setup_finished - // Must be first word of a long, or an integer. + // Must be first word of a long, or an integer. First word of long doesn't + // go into EBX, but can be loaded there anyways, as it is harmless. movl (%edi), %ebx jmp .Lgpr_setup_finished .LfirstLong: @@ -569,7 +570,8 @@ DEFINE_FUNCTION art_quick_invoke_static_stub // Is there anything for ebx? SKIP_OVER_FLOATS esi, edi, al, .Lgpr_setup_finished2 - // First word of long or integer. Load into EBX. + // Must be first word of a long, or an integer. First word of long doesn't + // go into EBX, but can be loaded there anyways, as it is harmless. movl (%edi), %ebx jmp .Lgpr_setup_finished2 .LSecondLong2: @@ -585,7 +587,8 @@ DEFINE_FUNCTION art_quick_invoke_static_stub // Anything for EBX? SKIP_OVER_FLOATS esi, edi, al, .Lgpr_setup_finished2 - // First word of long or integer. Load into EBX. + // Must be first word of a long, or an integer. First word of long doesn't + // go into EBX, but can be loaded there anyways, as it is harmless. movl (%edi), %ebx jmp .Lgpr_setup_finished2 // Nothing left to load. diff --git a/runtime/base/bit_vector.cc b/runtime/base/bit_vector.cc index 4390180..c3e24a7 100644 --- a/runtime/base/bit_vector.cc +++ b/runtime/base/bit_vector.cc @@ -276,6 +276,10 @@ void BitVector::Copy(const BitVector *src) { } } +#if defined(__clang__) && defined(__ARM_64BIT_STATE) +// b/19180814 When POPCOUNT is inlined, boot up failed on arm64 devices. +__attribute__((optnone)) +#endif uint32_t BitVector::NumSetBits(const uint32_t* storage, uint32_t end) { uint32_t word_end = WordIndex(end); uint32_t partial_word_bits = end & 0x1f; diff --git a/runtime/debugger.cc b/runtime/debugger.cc index d89ad5e..a0e978b 100644 --- a/runtime/debugger.cc +++ b/runtime/debugger.cc @@ -4382,6 +4382,10 @@ class HeapChunkContext { LOG(ERROR) << "Invalid class for managed heap object: " << o << " " << c; return HPSG_STATE(SOLIDITY_HARD, KIND_UNKNOWN); } + if (c->GetClass() == nullptr) { + LOG(ERROR) << "Null class of class " << c << " for object " << o; + return HPSG_STATE(SOLIDITY_HARD, KIND_UNKNOWN); + } if (c->IsClassClass()) { return HPSG_STATE(SOLIDITY_HARD, KIND_CLASS_OBJECT); } diff --git a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc index a67ebca..98f1684 100644 --- a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc +++ b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc @@ -59,6 +59,7 @@ class QuickArgumentVisitor { // | S0 | // | | 4x2 bytes padding // | Method* | <- sp + static constexpr bool kSplitPairAcrossRegisterAndStack = kArm32QuickCodeUseSoftFloat; static constexpr bool kAlignPairRegister = !kArm32QuickCodeUseSoftFloat; static constexpr bool kQuickSoftFloatAbi = kArm32QuickCodeUseSoftFloat; static constexpr bool kQuickDoubleRegAlignedFloatBackFilled = !kArm32QuickCodeUseSoftFloat; @@ -95,6 +96,7 @@ class QuickArgumentVisitor { // | D0 | // | | padding // | Method* | <- sp + static constexpr bool kSplitPairAcrossRegisterAndStack = false; static constexpr bool kAlignPairRegister = false; static constexpr bool kQuickSoftFloatAbi = false; // This is a hard float ABI. static constexpr bool kQuickDoubleRegAlignedFloatBackFilled = false; @@ -125,6 +127,7 @@ class QuickArgumentVisitor { // | A2 | arg2 // | A1 | arg1 // | A0/Method* | <- sp + static constexpr bool kSplitPairAcrossRegisterAndStack = true; static constexpr bool kAlignPairRegister = false; static constexpr bool kQuickSoftFloatAbi = true; // This is a soft float ABI. static constexpr bool kQuickDoubleRegAlignedFloatBackFilled = false; @@ -203,6 +206,7 @@ class QuickArgumentVisitor { // | XMM1 | float arg 2 // | XMM0 | float arg 1 // | EAX/Method* | <- sp + static constexpr bool kSplitPairAcrossRegisterAndStack = false; static constexpr bool kAlignPairRegister = false; static constexpr bool kQuickSoftFloatAbi = false; // This is a hard float ABI. static constexpr bool kQuickDoubleRegAlignedFloatBackFilled = false; @@ -243,6 +247,7 @@ class QuickArgumentVisitor { // | XMM0 | float arg 1 // | Padding | // | RDI/Method* | <- sp + static constexpr bool kSplitPairAcrossRegisterAndStack = false; static constexpr bool kAlignPairRegister = false; static constexpr bool kQuickSoftFloatAbi = false; // This is a hard float ABI. static constexpr bool kQuickDoubleRegAlignedFloatBackFilled = false; @@ -452,6 +457,11 @@ class QuickArgumentVisitor { } is_split_long_or_double_ = (GetBytesPerGprSpillLocation(kRuntimeISA) == 4) && ((gpr_index_ + 1) == kNumQuickGprArgs); + if (!kSplitPairAcrossRegisterAndStack && is_split_long_or_double_) { + // We don't want to split this. Pass over this register. + gpr_index_++; + is_split_long_or_double_ = false; + } Visit(); if (kBytesStackArgLocation == 4) { stack_index_+= 2; diff --git a/runtime/gc/collector/concurrent_copying.cc b/runtime/gc/collector/concurrent_copying.cc index 754e217..734c935 100644 --- a/runtime/gc/collector/concurrent_copying.cc +++ b/runtime/gc/collector/concurrent_copying.cc @@ -298,7 +298,11 @@ class EmptyCheckpoint : public Closure { Thread* self = Thread::Current(); CHECK(thread == self || thread->IsSuspended() || thread->GetState() == kWaitingPerformingGc) << thread->GetState() << " thread " << thread << " self " << self; - concurrent_copying_->GetBarrier().Pass(self); + // If thread is a running mutator, then act on behalf of the garbage collector. + // See the code in ThreadList::RunCheckpoint. + if (thread->GetState() == kRunnable) { + concurrent_copying_->GetBarrier().Pass(self); + } } private: @@ -431,6 +435,11 @@ void ConcurrentCopying::IssueEmptyCheckpoint() { ThreadList* thread_list = Runtime::Current()->GetThreadList(); gc_barrier_->Init(self, 0); size_t barrier_count = thread_list->RunCheckpoint(&check_point); + // If there are no threads to wait which implys that all the checkpoint functions are finished, + // then no need to release the mutator lock. + if (barrier_count == 0) { + return; + } // Release locks then wait for all mutator threads to pass the barrier. Locks::mutator_lock_->SharedUnlock(self); { @@ -810,10 +819,10 @@ class ConcurrentCopyingClearBlackPtrsVisitor { void operator()(mirror::Object* obj) const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_) { DCHECK(obj != nullptr); - CHECK(collector_->heap_->GetMarkBitmap()->Test(obj)) << obj; - CHECK_EQ(obj->GetReadBarrierPointer(), ReadBarrier::BlackPtr()) << obj; + DCHECK(collector_->heap_->GetMarkBitmap()->Test(obj)) << obj; + DCHECK_EQ(obj->GetReadBarrierPointer(), ReadBarrier::BlackPtr()) << obj; obj->SetReadBarrierPointer(ReadBarrier::WhitePtr()); - CHECK_EQ(obj->GetReadBarrierPointer(), ReadBarrier::WhitePtr()) << obj; + DCHECK_EQ(obj->GetReadBarrierPointer(), ReadBarrier::WhitePtr()) << obj; } private: @@ -955,10 +964,10 @@ class ConcurrentCopyingComputeUnevacFromSpaceLiveRatioVisitor { void operator()(mirror::Object* ref) const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_) { DCHECK(ref != nullptr); - CHECK(collector_->region_space_bitmap_->Test(ref)) << ref; - CHECK(collector_->region_space_->IsInUnevacFromSpace(ref)) << ref; + DCHECK(collector_->region_space_bitmap_->Test(ref)) << ref; + DCHECK(collector_->region_space_->IsInUnevacFromSpace(ref)) << ref; if (kUseBakerReadBarrier) { - CHECK(ref->GetReadBarrierPointer() == ReadBarrier::BlackPtr()) << ref; + DCHECK_EQ(ref->GetReadBarrierPointer(), ReadBarrier::BlackPtr()) << ref; // Clear the black ptr. ref->SetReadBarrierPointer(ReadBarrier::WhitePtr()); } @@ -1380,17 +1389,18 @@ mirror::Object* ConcurrentCopying::Copy(mirror::Object* from_ref) { mirror::Object* ConcurrentCopying::IsMarked(mirror::Object* from_ref) { DCHECK(from_ref != nullptr); - if (region_space_->IsInToSpace(from_ref)) { + space::RegionSpace::RegionType rtype = region_space_->GetRegionType(from_ref); + if (rtype == space::RegionSpace::RegionType::kRegionTypeToSpace) { // It's already marked. return from_ref; } mirror::Object* to_ref; - if (region_space_->IsInFromSpace(from_ref)) { + if (rtype == space::RegionSpace::RegionType::kRegionTypeFromSpace) { to_ref = GetFwdPtr(from_ref); DCHECK(to_ref == nullptr || region_space_->IsInToSpace(to_ref) || heap_->non_moving_space_->HasAddress(to_ref)) << "from_ref=" << from_ref << " to_ref=" << to_ref; - } else if (region_space_->IsInUnevacFromSpace(from_ref)) { + } else if (rtype == space::RegionSpace::RegionType::kRegionTypeUnevacFromSpace) { if (region_space_bitmap_->Test(from_ref)) { to_ref = from_ref; } else { @@ -1455,12 +1465,13 @@ mirror::Object* ConcurrentCopying::Mark(mirror::Object* from_ref) { } DCHECK(from_ref != nullptr); DCHECK(heap_->collector_type_ == kCollectorTypeCC); - if (region_space_->IsInToSpace(from_ref)) { + space::RegionSpace::RegionType rtype = region_space_->GetRegionType(from_ref); + if (rtype == space::RegionSpace::RegionType::kRegionTypeToSpace) { // It's already marked. return from_ref; } mirror::Object* to_ref; - if (region_space_->IsInFromSpace(from_ref)) { + if (rtype == space::RegionSpace::RegionType::kRegionTypeFromSpace) { to_ref = GetFwdPtr(from_ref); if (kUseBakerReadBarrier) { DCHECK(to_ref != ReadBarrier::GrayPtr()) << "from_ref=" << from_ref << " to_ref=" << to_ref; @@ -1471,7 +1482,7 @@ mirror::Object* ConcurrentCopying::Mark(mirror::Object* from_ref) { } DCHECK(region_space_->IsInToSpace(to_ref) || heap_->non_moving_space_->HasAddress(to_ref)) << "from_ref=" << from_ref << " to_ref=" << to_ref; - } else if (region_space_->IsInUnevacFromSpace(from_ref)) { + } else if (rtype == space::RegionSpace::RegionType::kRegionTypeUnevacFromSpace) { // This may or may not succeed, which is ok. if (kUseBakerReadBarrier) { from_ref->AtomicSetReadBarrierPointer(ReadBarrier::WhitePtr(), ReadBarrier::GrayPtr()); diff --git a/runtime/gc/collector/mark_sweep.cc b/runtime/gc/collector/mark_sweep.cc index d7a9292..cd63d26 100644 --- a/runtime/gc/collector/mark_sweep.cc +++ b/runtime/gc/collector/mark_sweep.cc @@ -989,7 +989,11 @@ class CheckpointMarkThreadRoots : public Closure { mark_sweep_->GetHeap()->RevokeRosAllocThreadLocalBuffers(thread); ATRACE_END(); } - mark_sweep_->GetBarrier().Pass(self); + // If thread is a running mutator, then act on behalf of the garbage collector. + // See the code in ThreadList::RunCheckpoint. + if (thread->GetState() == kRunnable) { + mark_sweep_->GetBarrier().Pass(self); + } } private: @@ -1006,7 +1010,11 @@ void MarkSweep::MarkRootsCheckpoint(Thread* self, // run through the barrier including self. size_t barrier_count = thread_list->RunCheckpoint(&check_point); // Release locks then wait for all mutator threads to pass the barrier. - // TODO: optimize to not release locks when there are no threads to wait for. + // If there are no threads to wait which implys that all the checkpoint functions are finished, + // then no need to release locks. + if (barrier_count == 0) { + return; + } Locks::heap_bitmap_lock_->ExclusiveUnlock(self); Locks::mutator_lock_->SharedUnlock(self); { diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc index dc42510..419d555 100644 --- a/runtime/gc/heap.cc +++ b/runtime/gc/heap.cc @@ -1061,7 +1061,11 @@ class TrimIndirectReferenceTableClosure : public Closure { ATRACE_BEGIN("Trimming reference table"); thread->GetJniEnv()->locals.Trim(); ATRACE_END(); - barrier_->Pass(Thread::Current()); + // If thread is a running mutator, then act on behalf of the trim thread. + // See the code in ThreadList::RunCheckpoint. + if (thread->GetState() == kRunnable) { + barrier_->Pass(Thread::Current()); + } } private: @@ -1079,7 +1083,9 @@ void Heap::TrimIndirectReferenceTables(Thread* self) { TrimIndirectReferenceTableClosure closure(&barrier); ScopedThreadStateChange tsc(self, kWaitingForCheckPointsToRun); size_t barrier_count = Runtime::Current()->GetThreadList()->RunCheckpoint(&closure); - barrier.Increment(self, barrier_count); + if (barrier_count != 0) { + barrier.Increment(self, barrier_count); + } ATRACE_END(); } diff --git a/runtime/gc/space/region_space-inl.h b/runtime/gc/space/region_space-inl.h index fd00739..a4ed718 100644 --- a/runtime/gc/space/region_space-inl.h +++ b/runtime/gc/space/region_space-inl.h @@ -104,7 +104,7 @@ inline mirror::Object* RegionSpace::AllocNonvirtual(size_t num_bytes, size_t* by inline mirror::Object* RegionSpace::Region::Alloc(size_t num_bytes, size_t* bytes_allocated, size_t* usable_size) { - DCHECK_EQ(state_, static_cast<uint8_t>(kRegionToSpace)); + DCHECK(IsAllocated() && IsInToSpace()); DCHECK(IsAligned<kAlignment>(num_bytes)); Atomic<uint8_t*>* atomic_top = reinterpret_cast<Atomic<uint8_t*>*>(&top_); uint8_t* old_top; @@ -132,7 +132,7 @@ inline size_t RegionSpace::AllocationSizeNonvirtual(mirror::Object* obj, size_t* size_t num_bytes = obj->SizeOf(); if (usable_size != nullptr) { if (LIKELY(num_bytes <= kRegionSize)) { - DCHECK(RefToRegion(obj)->IsNormal()); + DCHECK(RefToRegion(obj)->IsAllocated()); *usable_size = RoundUp(num_bytes, kAlignment); } else { DCHECK(RefToRegion(obj)->IsLarge()); @@ -142,7 +142,7 @@ inline size_t RegionSpace::AllocationSizeNonvirtual(mirror::Object* obj, size_t* return num_bytes; } -template<RegionSpace::SubSpaceType kSubSpaceType> +template<RegionSpace::RegionType kRegionType> uint64_t RegionSpace::GetBytesAllocatedInternal() { uint64_t bytes = 0; MutexLock mu(Thread::Current(), region_lock_); @@ -151,33 +151,33 @@ uint64_t RegionSpace::GetBytesAllocatedInternal() { if (r->IsFree()) { continue; } - switch (kSubSpaceType) { - case kAllSpaces: + switch (kRegionType) { + case RegionType::kRegionTypeAll: bytes += r->BytesAllocated(); break; - case kFromSpace: + case RegionType::kRegionTypeFromSpace: if (r->IsInFromSpace()) { bytes += r->BytesAllocated(); } break; - case kUnevacFromSpace: + case RegionType::kRegionTypeUnevacFromSpace: if (r->IsInUnevacFromSpace()) { bytes += r->BytesAllocated(); } break; - case kToSpace: + case RegionType::kRegionTypeToSpace: if (r->IsInToSpace()) { bytes += r->BytesAllocated(); } break; default: - LOG(FATAL) << "Unexpected space type : " << static_cast<int>(kSubSpaceType); + LOG(FATAL) << "Unexpected space type : " << kRegionType; } } return bytes; } -template<RegionSpace::SubSpaceType kSubSpaceType> +template<RegionSpace::RegionType kRegionType> uint64_t RegionSpace::GetObjectsAllocatedInternal() { uint64_t bytes = 0; MutexLock mu(Thread::Current(), region_lock_); @@ -186,27 +186,27 @@ uint64_t RegionSpace::GetObjectsAllocatedInternal() { if (r->IsFree()) { continue; } - switch (kSubSpaceType) { - case kAllSpaces: + switch (kRegionType) { + case RegionType::kRegionTypeAll: bytes += r->ObjectsAllocated(); break; - case kFromSpace: + case RegionType::kRegionTypeFromSpace: if (r->IsInFromSpace()) { bytes += r->ObjectsAllocated(); } break; - case kUnevacFromSpace: + case RegionType::kRegionTypeUnevacFromSpace: if (r->IsInUnevacFromSpace()) { bytes += r->ObjectsAllocated(); } break; - case kToSpace: + case RegionType::kRegionTypeToSpace: if (r->IsInToSpace()) { bytes += r->ObjectsAllocated(); } break; default: - LOG(FATAL) << "Unexpected space type : " << static_cast<int>(kSubSpaceType); + LOG(FATAL) << "Unexpected space type : " << kRegionType; } } return bytes; diff --git a/runtime/gc/space/region_space.cc b/runtime/gc/space/region_space.cc index 2ecb79e..2c556d9 100644 --- a/runtime/gc/space/region_space.cc +++ b/runtime/gc/space/region_space.cc @@ -71,7 +71,7 @@ RegionSpace::RegionSpace(const std::string& name, MemMap* mem_map) } full_region_ = Region(); DCHECK(!full_region_.IsFree()); - DCHECK(full_region_.IsNormal()); + DCHECK(full_region_.IsAllocated()); current_region_ = &full_region_; evac_region_ = nullptr; size_t ignored; @@ -115,7 +115,7 @@ size_t RegionSpace::ToSpaceSize() { } inline bool RegionSpace::Region::ShouldBeEvacuated() { - DCHECK(state_ == kRegionToSpace || state_ == kRegionLargeToSpace); + DCHECK((IsAllocated() || IsLarge()) && IsInToSpace()); // if the region was allocated after the start of the // previous GC or the live ratio is below threshold, evacuate // it. @@ -126,13 +126,13 @@ inline bool RegionSpace::Region::ShouldBeEvacuated() { bool is_live_percent_valid = live_bytes_ != static_cast<size_t>(-1); if (is_live_percent_valid) { uint live_percent = GetLivePercent(); - if (state_ == kRegionToSpace) { + if (IsAllocated()) { // Side node: live_percent == 0 does not necessarily mean // there's no live objects due to rounding (there may be a // few). result = live_percent < kEvaculateLivePercentThreshold; } else { - DCHECK(state_ == kRegionLargeToSpace); + DCHECK(IsLarge()); result = live_percent == 0U; } } else { @@ -155,11 +155,14 @@ void RegionSpace::SetFromSpace(accounting::ReadBarrierTable* rb_table, bool forc bool prev_large_evacuated = false; for (size_t i = 0; i < num_regions_; ++i) { Region* r = ®ions_[i]; - RegionState state = static_cast<RegionState>(r->state_); + RegionState state = r->State(); + RegionType type = r->Type(); if (!r->IsFree()) { DCHECK(r->IsInToSpace()); if (LIKELY(num_expected_large_tails == 0U)) { - DCHECK(state == kRegionToSpace || state == kRegionLargeToSpace); + DCHECK((state == RegionState::kRegionStateAllocated || + state == RegionState::kRegionStateLarge) && + type == RegionType::kRegionTypeToSpace); bool should_evacuate = force_evacuate_all || r->ShouldBeEvacuated(); if (should_evacuate) { r->SetAsFromSpace(); @@ -168,13 +171,15 @@ void RegionSpace::SetFromSpace(accounting::ReadBarrierTable* rb_table, bool forc r->SetAsUnevacFromSpace(); DCHECK(r->IsInUnevacFromSpace()); } - if (UNLIKELY(state == kRegionLargeToSpace)) { + if (UNLIKELY(state == RegionState::kRegionStateLarge && + type == RegionType::kRegionTypeToSpace)) { prev_large_evacuated = should_evacuate; num_expected_large_tails = RoundUp(r->BytesAllocated(), kRegionSize) / kRegionSize - 1; DCHECK_GT(num_expected_large_tails, 0U); } } else { - DCHECK(state == kRegionLargeTailToSpace); + DCHECK(state == RegionState::kRegionStateLargeTail && + type == RegionType::kRegionTypeToSpace); if (prev_large_evacuated) { r->SetAsFromSpace(); DCHECK(r->IsInFromSpace()); @@ -361,7 +366,7 @@ void RegionSpace::RevokeThreadLocalBuffersLocked(Thread* thread) { if (tlab_start != nullptr) { DCHECK(IsAligned<kRegionSize>(tlab_start)); Region* r = RefToRegionLocked(reinterpret_cast<mirror::Object*>(tlab_start)); - DCHECK(r->IsNormal()); + DCHECK(r->IsAllocated()); DCHECK_EQ(thread->GetThreadLocalBytesAllocated(), kRegionSize); r->RecordThreadLocalAllocations(thread->GetThreadLocalObjectsAllocated(), thread->GetThreadLocalBytesAllocated()); @@ -402,7 +407,8 @@ void RegionSpace::AssertAllThreadLocalBuffersAreRevoked() { void RegionSpace::Region::Dump(std::ostream& os) const { os << "Region[" << idx_ << "]=" << reinterpret_cast<void*>(begin_) << "-" << reinterpret_cast<void*>(top_) << "-" << reinterpret_cast<void*>(end_) - << " state=" << static_cast<uint>(state_) << " objects_allocated=" << objects_allocated_ + << " state=" << static_cast<uint>(state_) << " type=" << static_cast<uint>(type_) + << " objects_allocated=" << objects_allocated_ << " alloc_time=" << alloc_time_ << " live_bytes=" << live_bytes_ << " is_newly_allocated=" << is_newly_allocated_ << " is_a_tlab=" << is_a_tlab_ << " thread=" << thread_ << "\n"; } diff --git a/runtime/gc/space/region_space.h b/runtime/gc/space/region_space.h index b4a043f..4160547 100644 --- a/runtime/gc/space/region_space.h +++ b/runtime/gc/space/region_space.h @@ -17,9 +17,10 @@ #ifndef ART_RUNTIME_GC_SPACE_REGION_SPACE_H_ #define ART_RUNTIME_GC_SPACE_REGION_SPACE_H_ +#include "gc/accounting/read_barrier_table.h" #include "object_callbacks.h" #include "space.h" -#include "gc/accounting/read_barrier_table.h" +#include "thread.h" namespace art { namespace gc { @@ -94,32 +95,40 @@ class RegionSpace FINAL : public ContinuousMemMapAllocSpace { void AssertAllThreadLocalBuffersAreRevoked() LOCKS_EXCLUDED(Locks::runtime_shutdown_lock_, Locks::thread_list_lock_); - enum SubSpaceType { - kAllSpaces, // All spaces. - kFromSpace, // From-space. To be evacuated. - kUnevacFromSpace, // Unevacuated from-space. Not to be evacuated. - kToSpace, // To-space. + enum class RegionType : uint8_t { + kRegionTypeAll, // All types. + kRegionTypeFromSpace, // From-space. To be evacuated. + kRegionTypeUnevacFromSpace, // Unevacuated from-space. Not to be evacuated. + kRegionTypeToSpace, // To-space. + kRegionTypeNone, // None. + }; + + enum class RegionState : uint8_t { + kRegionStateFree, // Free region. + kRegionStateAllocated, // Allocated region. + kRegionStateLarge, // Large allocated (allocation larger than the region size). + kRegionStateLargeTail, // Large tail (non-first regions of a large allocation). }; - template<SubSpaceType kSubSpaceType> uint64_t GetBytesAllocatedInternal(); - template<SubSpaceType kSubSpaceType> uint64_t GetObjectsAllocatedInternal(); + template<RegionType kRegionType> uint64_t GetBytesAllocatedInternal(); + template<RegionType kRegionType> uint64_t GetObjectsAllocatedInternal(); uint64_t GetBytesAllocated() { - return GetBytesAllocatedInternal<kAllSpaces>(); + return GetBytesAllocatedInternal<RegionType::kRegionTypeAll>(); } uint64_t GetObjectsAllocated() { - return GetObjectsAllocatedInternal<kAllSpaces>(); + return GetObjectsAllocatedInternal<RegionType::kRegionTypeAll>(); } uint64_t GetBytesAllocatedInFromSpace() { - return GetBytesAllocatedInternal<kFromSpace>(); + return GetBytesAllocatedInternal<RegionType::kRegionTypeFromSpace>(); } uint64_t GetObjectsAllocatedInFromSpace() { - return GetObjectsAllocatedInternal<kFromSpace>(); + return GetObjectsAllocatedInternal<RegionType::kRegionTypeFromSpace>(); } uint64_t GetBytesAllocatedInUnevacFromSpace() { - return GetBytesAllocatedInternal<kUnevacFromSpace>(); + return GetBytesAllocatedInternal<RegionType::kRegionTypeUnevacFromSpace>(); } uint64_t GetObjectsAllocatedInUnevacFromSpace() { - return GetObjectsAllocatedInternal<kUnevacFromSpace>(); + return GetObjectsAllocatedInternal<RegionType::kRegionTypeUnevacFromSpace>(); } bool CanMoveObjects() const OVERRIDE { @@ -181,6 +190,14 @@ class RegionSpace FINAL : public ContinuousMemMapAllocSpace { return false; } + RegionType GetRegionType(mirror::Object* ref) { + if (HasAddress(ref)) { + Region* r = RefToRegionUnlocked(ref); + return r->Type(); + } + return RegionType::kRegionTypeNone; + } + void SetFromSpace(accounting::ReadBarrierTable* rb_table, bool force_evacuate_all) LOCKS_EXCLUDED(region_lock_); @@ -190,7 +207,7 @@ class RegionSpace FINAL : public ContinuousMemMapAllocSpace { void ClearFromSpace(); void AddLiveBytes(mirror::Object* ref, size_t alloc_size) { - Region* reg = RefToRegion(ref); + Region* reg = RefToRegionUnlocked(ref); reg->AddLiveBytes(alloc_size); } @@ -209,38 +226,36 @@ class RegionSpace FINAL : public ContinuousMemMapAllocSpace { template<bool kToSpaceOnly> void WalkInternal(ObjectCallback* callback, void* arg) NO_THREAD_SAFETY_ANALYSIS; - enum RegionState { - kRegionFree, // Free region. - kRegionToSpace, // To-space region. - kRegionFromSpace, // From-space region. To be evacuated. - kRegionUnevacFromSpace, // Unevacuated from-space region. Not to be evacuated. - kRegionLargeToSpace, // Large (allocation larger than the region size) to-space. - kRegionLargeFromSpace, // Large from-space. To be evacuated. - kRegionLargeUnevacFromSpace, // Large unevacuated from-space. - kRegionLargeTailToSpace, // Large tail (non-first regions of a large allocation). - kRegionLargeTailFromSpace, // Large tail from-space. - kRegionLargeTailUnevacFromSpace, // Large tail unevacuated from-space. - }; - class Region { public: Region() : idx_(static_cast<size_t>(-1)), - begin_(nullptr), top_(nullptr), end_(nullptr), state_(kRegionToSpace), + begin_(nullptr), top_(nullptr), end_(nullptr), + state_(RegionState::kRegionStateAllocated), type_(RegionType::kRegionTypeToSpace), objects_allocated_(0), alloc_time_(0), live_bytes_(static_cast<size_t>(-1)), is_newly_allocated_(false), is_a_tlab_(false), thread_(nullptr) {} Region(size_t idx, uint8_t* begin, uint8_t* end) - : idx_(idx), begin_(begin), top_(begin), end_(end), state_(kRegionFree), + : idx_(idx), begin_(begin), top_(begin), end_(end), + state_(RegionState::kRegionStateFree), type_(RegionType::kRegionTypeNone), objects_allocated_(0), alloc_time_(0), live_bytes_(static_cast<size_t>(-1)), is_newly_allocated_(false), is_a_tlab_(false), thread_(nullptr) { DCHECK_LT(begin, end); DCHECK_EQ(static_cast<size_t>(end - begin), kRegionSize); } + RegionState State() const { + return state_; + } + + RegionType Type() const { + return type_; + } + void Clear() { top_ = begin_; - state_ = kRegionFree; + state_ = RegionState::kRegionStateFree; + type_ = RegionType::kRegionTypeNone; objects_allocated_ = 0; alloc_time_ = 0; live_bytes_ = static_cast<size_t>(-1); @@ -257,8 +272,9 @@ class RegionSpace FINAL : public ContinuousMemMapAllocSpace { size_t* usable_size); bool IsFree() const { - bool is_free = state_ == kRegionFree; + bool is_free = state_ == RegionState::kRegionStateFree; if (is_free) { + DCHECK(IsInNoSpace()); DCHECK_EQ(begin_, top_); DCHECK_EQ(objects_allocated_, 0U); } @@ -268,19 +284,22 @@ class RegionSpace FINAL : public ContinuousMemMapAllocSpace { // Given a free region, declare it non-free (allocated). void Unfree(uint32_t alloc_time) { DCHECK(IsFree()); - state_ = kRegionToSpace; + state_ = RegionState::kRegionStateAllocated; + type_ = RegionType::kRegionTypeToSpace; alloc_time_ = alloc_time; } void UnfreeLarge(uint32_t alloc_time) { DCHECK(IsFree()); - state_ = kRegionLargeToSpace; + state_ = RegionState::kRegionStateLarge; + type_ = RegionType::kRegionTypeToSpace; alloc_time_ = alloc_time; } void UnfreeLargeTail(uint32_t alloc_time) { DCHECK(IsFree()); - state_ = kRegionLargeTailToSpace; + state_ = RegionState::kRegionStateLargeTail; + type_ = RegionType::kRegionTypeToSpace; alloc_time_ = alloc_time; } @@ -288,25 +307,23 @@ class RegionSpace FINAL : public ContinuousMemMapAllocSpace { is_newly_allocated_ = true; } - // Non-large, non-large-tail. - bool IsNormal() const { - return state_ == kRegionToSpace || state_ == kRegionFromSpace || - state_ == kRegionUnevacFromSpace; + // Non-large, non-large-tail allocated. + bool IsAllocated() const { + return state_ == RegionState::kRegionStateAllocated; } + // Large allocated. bool IsLarge() const { - bool is_large = state_ == kRegionLargeToSpace || state_ == kRegionLargeFromSpace || - state_ == kRegionLargeUnevacFromSpace; + bool is_large = state_ == RegionState::kRegionStateLarge; if (is_large) { DCHECK_LT(begin_ + 1 * MB, top_); } return is_large; } + // Large-tail allocated. bool IsLargeTail() const { - bool is_large_tail = state_ == kRegionLargeTailToSpace || - state_ == kRegionLargeTailFromSpace || - state_ == kRegionLargeTailUnevacFromSpace; + bool is_large_tail = state_ == RegionState::kRegionStateLargeTail; if (is_large_tail) { DCHECK_EQ(begin_, top_); } @@ -318,71 +335,36 @@ class RegionSpace FINAL : public ContinuousMemMapAllocSpace { } bool IsInFromSpace() const { - return state_ == kRegionFromSpace || state_ == kRegionLargeFromSpace || - state_ == kRegionLargeTailFromSpace; + return type_ == RegionType::kRegionTypeFromSpace; } bool IsInToSpace() const { - return state_ == kRegionToSpace || state_ == kRegionLargeToSpace || - state_ == kRegionLargeTailToSpace; + return type_ == RegionType::kRegionTypeToSpace; } bool IsInUnevacFromSpace() const { - return state_ == kRegionUnevacFromSpace || state_ == kRegionLargeUnevacFromSpace || - state_ == kRegionLargeTailUnevacFromSpace; + return type_ == RegionType::kRegionTypeUnevacFromSpace; + } + + bool IsInNoSpace() const { + return type_ == RegionType::kRegionTypeNone; } void SetAsFromSpace() { - switch (state_) { - case kRegionToSpace: - state_ = kRegionFromSpace; - break; - case kRegionLargeToSpace: - state_ = kRegionLargeFromSpace; - break; - case kRegionLargeTailToSpace: - state_ = kRegionLargeTailFromSpace; - break; - default: - LOG(FATAL) << "Unexpected region state : " << static_cast<uint>(state_) - << " idx=" << idx_; - } + DCHECK(!IsFree() && IsInToSpace()); + type_ = RegionType::kRegionTypeFromSpace; live_bytes_ = static_cast<size_t>(-1); } void SetAsUnevacFromSpace() { - switch (state_) { - case kRegionToSpace: - state_ = kRegionUnevacFromSpace; - break; - case kRegionLargeToSpace: - state_ = kRegionLargeUnevacFromSpace; - break; - case kRegionLargeTailToSpace: - state_ = kRegionLargeTailUnevacFromSpace; - break; - default: - LOG(FATAL) << "Unexpected region state : " << static_cast<uint>(state_) - << " idx=" << idx_; - } + DCHECK(!IsFree() && IsInToSpace()); + type_ = RegionType::kRegionTypeUnevacFromSpace; live_bytes_ = 0U; } void SetUnevacFromSpaceAsToSpace() { - switch (state_) { - case kRegionUnevacFromSpace: - state_ = kRegionToSpace; - break; - case kRegionLargeUnevacFromSpace: - state_ = kRegionLargeToSpace; - break; - case kRegionLargeTailUnevacFromSpace: - state_ = kRegionLargeTailToSpace; - break; - default: - LOG(FATAL) << "Unexpected region state : " << static_cast<uint>(state_) - << " idx=" << idx_; - } + DCHECK(!IsFree() && IsInUnevacFromSpace()); + type_ = RegionType::kRegionTypeToSpace; } ALWAYS_INLINE bool ShouldBeEvacuated(); @@ -419,7 +401,7 @@ class RegionSpace FINAL : public ContinuousMemMapAllocSpace { DCHECK_EQ(begin_, top_); return 0; } else { - DCHECK(IsNormal()) << static_cast<uint>(state_); + DCHECK(IsAllocated()) << static_cast<uint>(state_); DCHECK_LE(begin_, top_); size_t bytes = static_cast<size_t>(top_ - begin_); DCHECK_LE(bytes, kRegionSize); @@ -437,7 +419,7 @@ class RegionSpace FINAL : public ContinuousMemMapAllocSpace { DCHECK_EQ(objects_allocated_, 0U); return 0; } else { - DCHECK(IsNormal()) << static_cast<uint>(state_); + DCHECK(IsAllocated()) << static_cast<uint>(state_); return objects_allocated_; } } @@ -465,7 +447,7 @@ class RegionSpace FINAL : public ContinuousMemMapAllocSpace { void Dump(std::ostream& os) const; void RecordThreadLocalAllocations(size_t num_objects, size_t num_bytes) { - DCHECK(IsNormal()); + DCHECK(IsAllocated()); DCHECK_EQ(objects_allocated_, 0U); DCHECK_EQ(top_, end_); objects_allocated_ = num_objects; @@ -479,7 +461,8 @@ class RegionSpace FINAL : public ContinuousMemMapAllocSpace { // Can't use Atomic<uint8_t*> as Atomic's copy operator is implicitly deleted. uint8_t* top_; // The current position of the allocation. uint8_t* end_; // The end address of the region. - uint8_t state_; // The region state (see RegionState). + RegionState state_; // The region state (see RegionState). + RegionType type_; // The region type (see RegionType). uint64_t objects_allocated_; // The number of objects allocated. uint32_t alloc_time_; // The allocation time of the region. size_t live_bytes_; // The live bytes. Used to compute the live percent. @@ -534,6 +517,9 @@ class RegionSpace FINAL : public ContinuousMemMapAllocSpace { DISALLOW_COPY_AND_ASSIGN(RegionSpace); }; +std::ostream& operator<<(std::ostream& os, const RegionSpace::RegionState& value); +std::ostream& operator<<(std::ostream& os, const RegionSpace::RegionType& value); + } // namespace space } // namespace gc } // namespace art diff --git a/runtime/oat.h b/runtime/oat.h index 3e28606..7faf33b 100644 --- a/runtime/oat.h +++ b/runtime/oat.h @@ -32,7 +32,7 @@ class InstructionSetFeatures; class PACKED(4) OatHeader { public: static constexpr uint8_t kOatMagic[] = { 'o', 'a', 't', '\n' }; - static constexpr uint8_t kOatVersion[] = { '0', '5', '4', '\0' }; + static constexpr uint8_t kOatVersion[] = { '0', '5', '5', '\0' }; static constexpr const char* kImageLocationKey = "image-location"; static constexpr const char* kDex2OatCmdLineKey = "dex2oat-cmdline"; diff --git a/runtime/thread_list.cc b/runtime/thread_list.cc index 58e5b9d..05a0bff 100644 --- a/runtime/thread_list.cc +++ b/runtime/thread_list.cc @@ -44,6 +44,11 @@ namespace art { static constexpr uint64_t kLongThreadSuspendThreshold = MsToNs(5); +static constexpr uint64_t kThreadSuspendTimeoutMs = 30 * 1000; // 30s. +// Use 0 since we want to yield to prevent blocking for an unpredictable amount of time. +static constexpr useconds_t kThreadSuspendInitialSleepUs = 0; +static constexpr useconds_t kThreadSuspendMaxYieldUs = 3000; +static constexpr useconds_t kThreadSuspendMaxSleepUs = 5000; ThreadList::ThreadList() : suspend_all_count_(0), debug_suspend_all_count_(0), @@ -174,7 +179,9 @@ class DumpCheckpoint FINAL : public Closure { MutexLock mu(self, *Locks::logging_lock_); *os_ << local_os.str(); } - barrier_.Pass(self); + if (thread->GetState() == kRunnable) { + barrier_.Pass(self); + } } void WaitForThreadsToRunThroughCheckpoint(size_t threads_running_checkpoint) { @@ -202,7 +209,9 @@ void ThreadList::Dump(std::ostream& os) { } DumpCheckpoint checkpoint(&os); size_t threads_running_checkpoint = RunCheckpoint(&checkpoint); - checkpoint.WaitForThreadsToRunThroughCheckpoint(threads_running_checkpoint); + if (threads_running_checkpoint != 0) { + checkpoint.WaitForThreadsToRunThroughCheckpoint(threads_running_checkpoint); + } } void ThreadList::AssertThreadsAreSuspended(Thread* self, Thread* ignore1, Thread* ignore2) { @@ -233,22 +242,13 @@ static void UnsafeLogFatalForThreadSuspendAllTimeout() { #endif // Unlike suspending all threads where we can wait to acquire the mutator_lock_, suspending an -// individual thread requires polling. delay_us is the requested sleep and total_delay_us -// accumulates the total time spent sleeping for timeouts. The first sleep is just a yield, -// subsequently sleeps increase delay_us from 1ms to 500ms by doubling. -static void ThreadSuspendSleep(useconds_t* delay_us, useconds_t* total_delay_us) { - useconds_t new_delay_us = (*delay_us) * 2; - CHECK_GE(new_delay_us, *delay_us); - if (new_delay_us < 500000) { // Don't allow sleeping to be more than 0.5s. - *delay_us = new_delay_us; - } - if (*delay_us == 0) { +// individual thread requires polling. delay_us is the requested sleep wait. If delay_us is 0 then +// we use sched_yield instead of calling usleep. +static void ThreadSuspendSleep(useconds_t delay_us) { + if (delay_us == 0) { sched_yield(); - // Default to 1 milliseconds (note that this gets multiplied by 2 before the first sleep). - *delay_us = 500; } else { - usleep(*delay_us); - *total_delay_us += *delay_us; + usleep(delay_us); } } @@ -297,16 +297,23 @@ size_t ThreadList::RunCheckpoint(Closure* checkpoint_function) { // Run the checkpoint on the suspended threads. for (const auto& thread : suspended_count_modified_threads) { if (!thread->IsSuspended()) { - // Wait until the thread is suspended. - useconds_t total_delay_us = 0; + if (ATRACE_ENABLED()) { + std::ostringstream oss; + thread->ShortDump(oss); + ATRACE_BEGIN((std::string("Waiting for suspension of thread ") + oss.str()).c_str()); + } + // Busy wait until the thread is suspended. + const uint64_t start_time = NanoTime(); do { - useconds_t delay_us = 100; - ThreadSuspendSleep(&delay_us, &total_delay_us); + ThreadSuspendSleep(kThreadSuspendInitialSleepUs); } while (!thread->IsSuspended()); + const uint64_t total_delay = NanoTime() - start_time; // Shouldn't need to wait for longer than 1000 microseconds. - constexpr useconds_t kLongWaitThresholdUS = 1000; - if (UNLIKELY(total_delay_us > kLongWaitThresholdUS)) { - LOG(WARNING) << "Waited " << total_delay_us << " us for thread suspend!"; + constexpr uint64_t kLongWaitThreshold = MsToNs(1); + ATRACE_END(); + if (UNLIKELY(total_delay > kLongWaitThreshold)) { + LOG(WARNING) << "Long wait of " << PrettyDuration(total_delay) << " for " + << *thread << " suspension!"; } } // We know for sure that the thread is suspended at this point. @@ -324,8 +331,7 @@ size_t ThreadList::RunCheckpoint(Closure* checkpoint_function) { Thread::resume_cond_->Broadcast(self); } - // Add one for self. - return count + suspended_count_modified_threads.size() + 1; + return count; } // Request that a checkpoint function be run on all active (non-suspended) @@ -480,7 +486,7 @@ void ThreadList::SuspendAll() { // Block on the mutator lock until all Runnable threads release their share of access. #if HAVE_TIMED_RWLOCK // Timeout if we wait more than 30 seconds. - if (!Locks::mutator_lock_->ExclusiveLockWithTimeout(self, 30 * 1000, 0)) { + if (!Locks::mutator_lock_->ExclusiveLockWithTimeout(self, kThreadSuspendTimeoutMs, 0)) { UnsafeLogFatalForThreadSuspendAllTimeout(); } #else @@ -609,11 +615,10 @@ static void ThreadSuspendByPeerWarning(Thread* self, LogSeverity severity, const Thread* ThreadList::SuspendThreadByPeer(jobject peer, bool request_suspension, bool debug_suspension, bool* timed_out) { - static const useconds_t kTimeoutUs = 30 * 1000000; // 30s. - useconds_t total_delay_us = 0; - useconds_t delay_us = 0; + const uint64_t start_time = NanoTime(); + useconds_t sleep_us = kThreadSuspendInitialSleepUs; *timed_out = false; - Thread* self = Thread::Current(); + Thread* const self = Thread::Current(); Thread* suspended_thread = nullptr; VLOG(threads) << "SuspendThreadByPeer starting"; while (true) { @@ -680,7 +685,8 @@ Thread* ThreadList::SuspendThreadByPeer(jobject peer, bool request_suspension, } return thread; } - if (total_delay_us >= kTimeoutUs) { + const uint64_t total_delay = NanoTime() - start_time; + if (total_delay >= MsToNs(kThreadSuspendTimeoutMs)) { ThreadSuspendByPeerWarning(self, FATAL, "Thread suspension timed out", peer); if (suspended_thread != nullptr) { CHECK_EQ(suspended_thread, thread); @@ -688,12 +694,20 @@ Thread* ThreadList::SuspendThreadByPeer(jobject peer, bool request_suspension, } *timed_out = true; return nullptr; + } else if (sleep_us == 0 && + total_delay > static_cast<uint64_t>(kThreadSuspendMaxYieldUs) * 1000) { + // We have spun for kThreadSuspendMaxYieldUs time, switch to sleeps to prevent + // excessive CPU usage. + sleep_us = kThreadSuspendMaxYieldUs / 2; } } // Release locks and come out of runnable state. } - VLOG(threads) << "SuspendThreadByPeer sleeping to allow thread chance to suspend"; - ThreadSuspendSleep(&delay_us, &total_delay_us); + VLOG(threads) << "SuspendThreadByPeer waiting to allow thread chance to suspend"; + ThreadSuspendSleep(sleep_us); + // This may stay at 0 if sleep_us == 0, but this is WAI since we want to avoid using usleep at + // all if possible. This shouldn't be an issue since time to suspend should always be small. + sleep_us = std::min(sleep_us * 2, kThreadSuspendMaxSleepUs); } } @@ -704,12 +718,11 @@ static void ThreadSuspendByThreadIdWarning(LogSeverity severity, const char* mes Thread* ThreadList::SuspendThreadByThreadId(uint32_t thread_id, bool debug_suspension, bool* timed_out) { - static const useconds_t kTimeoutUs = 30 * 1000000; // 30s. - useconds_t total_delay_us = 0; - useconds_t delay_us = 0; + const uint64_t start_time = NanoTime(); + useconds_t sleep_us = kThreadSuspendInitialSleepUs; *timed_out = false; Thread* suspended_thread = nullptr; - Thread* self = Thread::Current(); + Thread* const self = Thread::Current(); CHECK_NE(thread_id, kInvalidThreadId); VLOG(threads) << "SuspendThreadByThreadId starting"; while (true) { @@ -771,19 +784,26 @@ Thread* ThreadList::SuspendThreadByThreadId(uint32_t thread_id, bool debug_suspe VLOG(threads) << "SuspendThreadByThreadId thread suspended: " << *thread; return thread; } - if (total_delay_us >= kTimeoutUs) { + const uint64_t total_delay = NanoTime() - start_time; + if (total_delay >= MsToNs(kThreadSuspendTimeoutMs)) { ThreadSuspendByThreadIdWarning(WARNING, "Thread suspension timed out", thread_id); if (suspended_thread != nullptr) { thread->ModifySuspendCount(soa.Self(), -1, debug_suspension); } *timed_out = true; return nullptr; + } else if (sleep_us == 0 && + total_delay > static_cast<uint64_t>(kThreadSuspendMaxYieldUs) * 1000) { + // We have spun for kThreadSuspendMaxYieldUs time, switch to sleeps to prevent + // excessive CPU usage. + sleep_us = kThreadSuspendMaxYieldUs / 2; } } // Release locks and come out of runnable state. } - VLOG(threads) << "SuspendThreadByThreadId sleeping to allow thread chance to suspend"; - ThreadSuspendSleep(&delay_us, &total_delay_us); + VLOG(threads) << "SuspendThreadByThreadId waiting to allow thread chance to suspend"; + ThreadSuspendSleep(sleep_us); + sleep_us = std::min(sleep_us * 2, kThreadSuspendMaxSleepUs); } } diff --git a/test/074-gc-thrash/src/Main.java b/test/074-gc-thrash/src/Main.java index 32fbf2d..238e73a 100644 --- a/test/074-gc-thrash/src/Main.java +++ b/test/074-gc-thrash/src/Main.java @@ -292,8 +292,8 @@ class Deep extends Thread { break; } - strong[depth] = funStr; weak[depth] = new WeakReference(funStr); + strong[depth] = funStr; if (depth+1 < MAX_DEPTH) dive(depth+1, iteration+1); else diff --git a/test/099-vmdebug/src/Main.java b/test/099-vmdebug/src/Main.java index 7f24b1b..4d781c3 100644 --- a/test/099-vmdebug/src/Main.java +++ b/test/099-vmdebug/src/Main.java @@ -28,14 +28,22 @@ public class Main { testMethodTracing(); } - private static void testMethodTracing() throws Exception { - File tempFile; + private static File createTempFile() throws Exception { try { - tempFile = File.createTempFile("test", ".trace"); + return File.createTempFile("test", ".trace"); } catch (IOException e) { - System.setProperty("java.io.tmpdir", "/sdcard"); - tempFile = File.createTempFile("test", ".trace"); + System.setProperty("java.io.tmpdir", "/data/local/tmp"); + try { + return File.createTempFile("test", ".trace"); + } catch (IOException e2) { + System.setProperty("java.io.tmpdir", "/sdcard"); + return File.createTempFile("test", ".trace"); + } } + } + + private static void testMethodTracing() throws Exception { + File tempFile = createTempFile(); tempFile.deleteOnExit(); String tempFileName = tempFile.getPath(); diff --git a/test/116-nodex2oat/run b/test/116-nodex2oat/run index 72488f0..9e5c7dd 100755 --- a/test/116-nodex2oat/run +++ b/test/116-nodex2oat/run @@ -16,6 +16,14 @@ flags="${@}" +# This test is supposed to test without oat files, so doesn't work for prebuild. Make sure that +# flag isn't set, or complain. +# Note: prebuild is the default. +if [[ "${flags}" == *--prebuild* || "${flags}" != *--no-prebuild* ]] ; then + echo "Test 116-nodex2oat cannot run in prebuild mode." + exit 1 +fi + # Make sure we can run without an oat file, echo "Run -Xnodex2oat" ${RUN} ${flags} --runtime-option -Xnodex2oat diff --git a/test/117-nopatchoat/run b/test/117-nopatchoat/run index a7c96a0..c749c74 100755 --- a/test/117-nopatchoat/run +++ b/test/117-nopatchoat/run @@ -16,10 +16,23 @@ # ensure flags includes prebuild and relocate. It doesn't make sense unless we # have a oat file we want to relocate. -# TODO Unfortunately we have no way to force prebuild on for both host and target (or skip if not on). -flags="${@/--relocate/}" -flags="${flags/--no-relocate/}" -flags="${flags} --relocate" +flags="$@" + +# This test is supposed to test with oat files. Make sure that the no-prebuild flag isn't set, +# or complain. +# Note: prebuild is the default. +if [[ "${flags}" == *--no-prebuild* ]] ; then + echo "Test 117-nopatchoat is not intended to run in no-prebuild mode." + exit 1 +fi + +# This test is supposed to test relocation. Make sure that the no-relocate flag isn't set, +# or complain. +# Note: relocate is the default. +if [[ "${flags}" == *--no-relocate* ]] ; then + echo "Test 117-nopatchoat is not intended to run in no-relocate mode." + exit 1 +fi # Make sure we can run without relocation echo "Run without dex2oat/patchoat" diff --git a/test/118-noimage-dex2oat/run b/test/118-noimage-dex2oat/run index 92a4ec2..2037797 100644 --- a/test/118-noimage-dex2oat/run +++ b/test/118-noimage-dex2oat/run @@ -14,17 +14,26 @@ # See the License for the specific language governing permissions and # limitations under the License. -# Force relocation otherwise we will just use the already created core.oat/art pair. -flags="${@/--no-relocate/--relocate}" +flags="$@" + +# This test is supposed to test without oat files, so doesn't work for prebuild. Make sure that +# flag isn't set, or complain. +# Note: prebuild is the default. +if [[ "${flags}" == *--prebuild* || "${flags}" != *--no-prebuild* ]] ; then + echo "Test 118-noimage-dex2oat cannot run in prebuild mode." + exit 1 +fi -# Use the non-prebuild script. -RUN="${RUN/push-and-run-prebuilt-test-jar/push-and-run-test-jar}" +# Force relocation otherwise we will just use the already created core.oat/art pair. +# Note: relocate is the default. +if [[ "${flags}" == *--no-relocate* ]] ; then + echo "Test 118-noimage-dex2oat is not intended to run in no-relocate mode." + exit 1 +fi -if [ $(basename $RUN) == 'host-run-test-jar' ]; then +if [[ $@ == *--host* ]]; then framework="${ANDROID_HOST_OUT}/framework" bpath_suffix="-hostdex" - # Remove prebuild from the flags, this test is for testing not having oat files. - flags="${flags/--prebuild/}" else framework="/system/framework" bpath_suffix="" diff --git a/test/119-noimage-patchoat/run b/test/119-noimage-patchoat/run index 745b0c9..c409cbb 100644 --- a/test/119-noimage-patchoat/run +++ b/test/119-noimage-patchoat/run @@ -14,10 +14,16 @@ # See the License for the specific language governing permissions and # limitations under the License. +flags="$@" + # Force relocation otherwise we will just use the already created core.oat/art pair. -flags="${@/--no-relocate/--relocate}" +# Note: relocate is the default. +if [[ "${flags}" == *--no-relocate* ]] ; then + echo "Test 119-noimage-patchoat is not intended to run in no-relocate mode." + exit 1 +fi -if [ $(basename $RUN) == 'host-run-test-jar' ]; then +if [[ $@ == *--host* ]]; then false_bin="/bin/false" else false_bin="/system/bin/false" diff --git a/test/446-checker-inliner2/expected.txt b/test/446-checker-inliner2/expected.txt new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/test/446-checker-inliner2/expected.txt diff --git a/test/446-checker-inliner2/info.txt b/test/446-checker-inliner2/info.txt new file mode 100644 index 0000000..66a3270 --- /dev/null +++ b/test/446-checker-inliner2/info.txt @@ -0,0 +1 @@ +Tests inlining in the optimizing compiler. diff --git a/test/446-checker-inliner2/src/Main.java b/test/446-checker-inliner2/src/Main.java new file mode 100644 index 0000000..ecf071e --- /dev/null +++ b/test/446-checker-inliner2/src/Main.java @@ -0,0 +1,72 @@ +/* +* Copyright (C) 2014 The Android Open Source Project +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ + +public class Main { + + // CHECK-START: int Main.inlineInstanceCall(Main) inliner (before) + // CHECK-DAG: [[Invoke:i\d+]] InvokeStaticOrDirect + // CHECK-DAG: Return [ [[Invoke]] ] + + // CHECK-START: int Main.inlineInstanceCall(Main) inliner (after) + // CHECK-NOT: InvokeStaticOrDirect + + // CHECK-START: int Main.inlineInstanceCall(Main) inliner (after) + // CHECK-DAG: [[Field:i\d+]] InstanceFieldGet + // CHECK-DAG: Return [ [[Field]] ] + + public static int inlineInstanceCall(Main m) { + return m.foo(); + } + + private int foo() { + return field; + } + + int field = 42; + + // CHECK-START: int Main.inlineNestedCall() inliner (before) + // CHECK-DAG: [[Invoke:i\d+]] InvokeStaticOrDirect + // CHECK-DAG: Return [ [[Invoke]] ] + + // CHECK-START: int Main.inlineNestedCall() inliner (after) + // CHECK-NOT: InvokeStaticOrDirect + + // CHECK-START: int Main.inlineNestedCall() inliner (after) + // CHECK-DAG: [[Const38:i\d+]] IntConstant 38 + // CHECK-DAG: Return [ [[Const38]] ] + + public static int inlineNestedCall() { + return nestedCall(); + } + + public static int nestedCall() { + return bar(); + } + + public static int bar() { + return 38; + } + + public static void main(String[] args) { + if (inlineInstanceCall(new Main()) != 42) { + throw new Error("Expected 42"); + } + + if (inlineNestedCall() != 38) { + throw new Error("Expected 38"); + } + } +} diff --git a/test/447-checker-inliner3/expected.txt b/test/447-checker-inliner3/expected.txt new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/test/447-checker-inliner3/expected.txt diff --git a/test/447-checker-inliner3/info.txt b/test/447-checker-inliner3/info.txt new file mode 100644 index 0000000..66a3270 --- /dev/null +++ b/test/447-checker-inliner3/info.txt @@ -0,0 +1 @@ +Tests inlining in the optimizing compiler. diff --git a/test/447-checker-inliner3/src/Main.java b/test/447-checker-inliner3/src/Main.java new file mode 100644 index 0000000..db4b236 --- /dev/null +++ b/test/447-checker-inliner3/src/Main.java @@ -0,0 +1,77 @@ +/* +* Copyright (C) 2014 The Android Open Source Project +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ + +public class Main { + + // CHECK-START: int Main.inlineIfThenElse() inliner (before) + // CHECK-DAG: [[Invoke:i\d+]] InvokeStaticOrDirect + // CHECK-DAG: Return [ [[Invoke]] ] + + // CHECK-START: int Main.inlineIfThenElse() inliner (after) + // CHECK-NOT: InvokeStaticOrDirect + + public static int inlineIfThenElse() { + return foo(true); + } + + private static int foo(boolean value) { + if (value) { + return 1; + } else { + return 0; + } + } + + // CHECK-START: int Main.inlineInLoop() inliner (before) + // CHECK-DAG: InvokeStaticOrDirect + + // CHECK-START: int Main.inlineInLoop() inliner (after) + // CHECK-NOT: InvokeStaticOrDirect + + public static int inlineInLoop() { + int result = 0; + for (int i = 0; i < 32; ++i) { + result += foo(i % 2 == 0); + } + return result; + } + + // CHECK-START: int Main.inlineInLoopHeader() inliner (before) + // CHECK-DAG: InvokeStaticOrDirect + + // CHECK-START: int Main.inlineInLoopHeader() inliner (after) + // CHECK-NOT: InvokeStaticOrDirect + + public static int inlineInLoopHeader() { + int result = 0; + for (int i = 0; i < foo(i % 2 == 0); ++i) { + result += 42; + } + return result; + } + + public static void main(String[] args) { + if (inlineIfThenElse() != 1) { + throw new Error("Expected 1"); + } + if (inlineInLoop() != 16) { + throw new Error("Expected 16"); + } + if (inlineInLoopHeader() != 42) { + throw new Error("Expected 16"); + } + } +} diff --git a/test/448-multiple-returns/expected.txt b/test/448-multiple-returns/expected.txt new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/test/448-multiple-returns/expected.txt diff --git a/test/448-multiple-returns/info.txt b/test/448-multiple-returns/info.txt new file mode 100644 index 0000000..cdd354b --- /dev/null +++ b/test/448-multiple-returns/info.txt @@ -0,0 +1,2 @@ +Tests inlining of a pattern not generated by DX: multiple +returns in a single method. diff --git a/test/448-multiple-returns/smali/MultipleReturns.smali b/test/448-multiple-returns/smali/MultipleReturns.smali new file mode 100644 index 0000000..23815d8 --- /dev/null +++ b/test/448-multiple-returns/smali/MultipleReturns.smali @@ -0,0 +1,45 @@ +# Copyright (C) 2015 The Android Open Source Project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +.class public LMultipleReturns; + +.super Ljava/lang/Object; + +.method public static caller()I + .registers 1 + invoke-static {}, LMultipleReturns;->$opt$CalleeReturnVoid()V + invoke-static {}, LMultipleReturns;->$opt$CalleeReturnInt()I + move-result v0 + return v0 +.end method + +.method public static $opt$CalleeReturnVoid()V + .registers 2 + const/4 v0, 0x0 + const/4 v1, 0x1 + if-eq v1, v0, :else + return-void + :else + return-void +.end method + +.method public static $opt$CalleeReturnInt()I + .registers 2 + const/4 v0, 0x0 + const/4 v1, 0x1 + if-eq v1, v0, :else + return v0 + :else + return v1 +.end method diff --git a/test/448-multiple-returns/src/Main.java b/test/448-multiple-returns/src/Main.java new file mode 100644 index 0000000..4050ed1 --- /dev/null +++ b/test/448-multiple-returns/src/Main.java @@ -0,0 +1,32 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.lang.reflect.Method; + +public class Main { + + // Workaround for b/18051191. + class InnerClass {} + + public static void main(String[] args) throws Exception { + Class<?> c = Class.forName("MultipleReturns"); + Method m = c.getMethod("caller"); + int result = (Integer)m.invoke(null); + if (result != 0) { + throw new Error("Expected 0, got " + result); + } + } +} diff --git a/test/802-deoptimization/src/CatchHandlerOnEntryHelper.java b/test/802-deoptimization/src/CatchHandlerOnEntryHelper.java index a88d31b..9c41abf 100644 --- a/test/802-deoptimization/src/CatchHandlerOnEntryHelper.java +++ b/test/802-deoptimization/src/CatchHandlerOnEntryHelper.java @@ -21,10 +21,10 @@ public class CatchHandlerOnEntryHelper { public static void throwExceptionDuringDeopt(int i) { if (i == 0) { - DeoptimizationController.startDeoptomization(); + DeoptimizationController.startDeoptimization(); throw new RuntimeException("Test exception"); } else { - DeoptimizationController.stopDeoptomization(); + DeoptimizationController.stopDeoptimization(); } } } diff --git a/test/802-deoptimization/src/DeoptimizationController.java b/test/802-deoptimization/src/DeoptimizationController.java index c031c07..c926669 100644 --- a/test/802-deoptimization/src/DeoptimizationController.java +++ b/test/802-deoptimization/src/DeoptimizationController.java @@ -22,15 +22,23 @@ import java.lang.reflect.Method; * Controls deoptimization using dalvik.system.VMDebug class. */ public class DeoptimizationController { - public static void startDeoptomization() { + private static File createTempFile() throws Exception { try { - File tempFile; + return File.createTempFile("test", ".trace"); + } catch (IOException e) { + System.setProperty("java.io.tmpdir", "/data/local/tmp"); try { - tempFile = File.createTempFile("test", ".trace"); - } catch (IOException e) { + return File.createTempFile("test", ".trace"); + } catch (IOException e2) { System.setProperty("java.io.tmpdir", "/sdcard"); - tempFile = File.createTempFile("test", ".trace"); + return File.createTempFile("test", ".trace"); } + } + } + + public static void startDeoptimization() { + try { + File tempFile = createTempFile(); tempFile.deleteOnExit(); String tempFileName = tempFile.getPath(); @@ -43,7 +51,7 @@ public class DeoptimizationController { } } - public static void stopDeoptomization() { + public static void stopDeoptimization() { try { VMDebug.stopMethodTracing(); if (VMDebug.getMethodTracingMode() != 0) { diff --git a/test/Android.run-test.mk b/test/Android.run-test.mk index 2057cb9..a8f2001 100644 --- a/test/Android.run-test.mk +++ b/test/Android.run-test.mk @@ -180,7 +180,8 @@ TEST_ART_TIMING_SENSITIVE_RUN_TESTS := # Note 116-nodex2oat is not broken per-se it just doesn't (and isn't meant to) work with --prebuild. TEST_ART_BROKEN_PREBUILD_RUN_TESTS := \ - 116-nodex2oat + 116-nodex2oat \ + 118-noimage-dex2oat ifneq (,$(filter prebuild,$(PREBUILD_TYPES))) ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),prebuild, \ @@ -204,7 +205,9 @@ TEST_ART_BROKEN_NO_PREBUILD_TESTS := # Note 117-nopatchoat is not broken per-se it just doesn't work (and isn't meant to) without # --prebuild --relocate TEST_ART_BROKEN_NO_RELOCATE_TESTS := \ - 117-nopatchoat + 117-nopatchoat \ + 118-noimage-dex2oat \ + 119-noimage-patchoat ifneq (,$(filter no-relocate,$(RELOCATE_TYPES))) ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \ diff --git a/test/etc/run-test-jar b/test/etc/run-test-jar index 92b1e82..907218a 100755 --- a/test/etc/run-test-jar +++ b/test/etc/run-test-jar @@ -309,6 +309,9 @@ dalvikvm_cmdline="$INVOKE_WITH $GDB $ANDROID_ROOT/bin/$DALVIKVM \ $DALVIKVM_BOOT_OPT \ -cp $DEX_LOCATION/$TEST_NAME.jar$SECONDARY_DEX $MAIN" +# Remove whitespace. +dex2oat_cmdline=$(echo $dex2oat_cmdline) +dalvikvm_cmdline=$(echo $dalvikvm_cmdline) if [ "$HOST" = "n" ]; then adb root > /dev/null |