diff options
101 files changed, 2365 insertions, 1953 deletions
diff --git a/build/Android.common_build.mk b/build/Android.common_build.mk index 5beb959..d1724cc 100644 --- a/build/Android.common_build.mk +++ b/build/Android.common_build.mk @@ -83,19 +83,10 @@ ART_TARGET_CLANG := $(USE_CLANG_PLATFORM_BUILD) else ART_TARGET_CLANG := false endif - -ifeq ($(TARGET_ARCH)|$(ART_TARGET_CLANG),mips|true) - # b/18807290, Clang generated mips assembly code for array.cc - # cannot be compiled by gas. - # b/18789639, Clang assembler cannot compile inlined assembly code in - # valgrind_malloc_space-inl.h:192:5: error: used $at without ".set noat" - $(warning Clang is disabled for the mips target) -endif ART_TARGET_CLANG_arm := ART_TARGET_CLANG_arm64 := -# TODO: Enable clang mips when b/18807290 and b/18789639 are fixed. -ART_TARGET_CLANG_mips := false -ART_TARGET_CLANG_mips64 := false +ART_TARGET_CLANG_mips := +ART_TARGET_CLANG_mips64 := ART_TARGET_CLANG_x86 := ART_TARGET_CLANG_x86_64 := diff --git a/compiler/dex/mir_method_info.cc b/compiler/dex/mir_method_info.cc index 5654604..94be1fd 100644 --- a/compiler/dex/mir_method_info.cc +++ b/compiler/dex/mir_method_info.cc @@ -169,7 +169,8 @@ void MirMethodLoweringInfo::Resolve(CompilerDriver* compiler_driver, ~(kFlagFastPath | kFlagIsIntrinsic | kFlagIsSpecial | kFlagClassIsInitialized | (kInvokeTypeMask << kBitSharpTypeBegin)); it->flags_ = other_flags | - (fast_path_flags != 0 ? kFlagFastPath : 0u) | + // String init path is a special always-fast path. + (fast_path_flags != 0 || string_init ? kFlagFastPath : 0u) | ((is_intrinsic_or_special & kInlineIntrinsic) != 0 ? kFlagIsIntrinsic : 0u) | ((is_intrinsic_or_special & kInlineSpecial) != 0 ? kFlagIsSpecial : 0u) | (static_cast<uint16_t>(invoke_type) << kBitSharpTypeBegin) | diff --git a/compiler/dwarf/dwarf_test.h b/compiler/dwarf/dwarf_test.h index 99b8e79..230ebe3 100644 --- a/compiler/dwarf/dwarf_test.h +++ b/compiler/dwarf/dwarf_test.h @@ -57,44 +57,41 @@ class DwarfTest : public CommonRuntimeTest { // Pretty-print the generated DWARF data using objdump. template<typename ElfTypes> - std::vector<std::string> Objdump(bool is64bit, const char* args) { + std::vector<std::string> Objdump(const char* args) { // Write simple elf file with just the DWARF sections. + InstructionSet isa = (sizeof(typename ElfTypes::Addr) == 8) ? kX86_64 : kX86; class NoCode : public CodeOutput { - virtual void SetCodeOffset(size_t) { } - virtual bool Write(OutputStream*) { return true; } - } code; - ScratchFile file; - InstructionSet isa = is64bit ? kX86_64 : kX86; - ElfBuilder<ElfTypes> builder( - &code, file.GetFile(), isa, 0, 0, 0, 0, 0, 0, false, false); - typedef ElfRawSectionBuilder<ElfTypes> Section; - Section debug_info(".debug_info", SHT_PROGBITS, 0, nullptr, 0, 1, 0); - Section debug_abbrev(".debug_abbrev", SHT_PROGBITS, 0, nullptr, 0, 1, 0); - Section debug_str(".debug_str", SHT_PROGBITS, 0, nullptr, 0, 1, 0); - Section debug_line(".debug_line", SHT_PROGBITS, 0, nullptr, 0, 1, 0); - Section eh_frame(".eh_frame", SHT_PROGBITS, SHF_ALLOC, nullptr, 0, 4, 0); + bool Write(OutputStream*) OVERRIDE { return true; } // NOLINT + } no_code; + ElfBuilder<ElfTypes> builder(isa, 0, &no_code, 0, &no_code, 0); + typedef typename ElfBuilder<ElfTypes>::RawSection RawSection; + RawSection debug_info(".debug_info", SHT_PROGBITS, 0, nullptr, 0, 1, 0); + RawSection debug_abbrev(".debug_abbrev", SHT_PROGBITS, 0, nullptr, 0, 1, 0); + RawSection debug_str(".debug_str", SHT_PROGBITS, 0, nullptr, 0, 1, 0); + RawSection debug_line(".debug_line", SHT_PROGBITS, 0, nullptr, 0, 1, 0); + RawSection eh_frame(".eh_frame", SHT_PROGBITS, SHF_ALLOC, nullptr, 0, kPageSize, 0); if (!debug_info_data_.empty()) { debug_info.SetBuffer(debug_info_data_); - builder.RegisterRawSection(&debug_info); + builder.RegisterSection(&debug_info); } if (!debug_abbrev_data_.empty()) { debug_abbrev.SetBuffer(debug_abbrev_data_); - builder.RegisterRawSection(&debug_abbrev); + builder.RegisterSection(&debug_abbrev); } if (!debug_str_data_.empty()) { debug_str.SetBuffer(debug_str_data_); - builder.RegisterRawSection(&debug_str); + builder.RegisterSection(&debug_str); } if (!debug_line_data_.empty()) { debug_line.SetBuffer(debug_line_data_); - builder.RegisterRawSection(&debug_line); + builder.RegisterSection(&debug_line); } if (!eh_frame_data_.empty()) { eh_frame.SetBuffer(eh_frame_data_); - builder.RegisterRawSection(&eh_frame); + builder.RegisterSection(&eh_frame); } - builder.Init(); - builder.Write(); + ScratchFile file; + builder.Write(file.GetFile()); // Read the elf file back using objdump. std::vector<std::string> lines; @@ -123,9 +120,9 @@ class DwarfTest : public CommonRuntimeTest { std::vector<std::string> Objdump(bool is64bit, const char* args) { if (is64bit) { - return Objdump<ElfTypes64>(is64bit, args); + return Objdump<ElfTypes64>(args); } else { - return Objdump<ElfTypes32>(is64bit, args); + return Objdump<ElfTypes32>(args); } } diff --git a/compiler/elf_builder.h b/compiler/elf_builder.h index 32c8cce..63d3a0d 100644 --- a/compiler/elf_builder.h +++ b/compiler/elf_builder.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2014 The Android Open Source Project + * Copyright (C) 2015 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,584 +17,498 @@ #ifndef ART_COMPILER_ELF_BUILDER_H_ #define ART_COMPILER_ELF_BUILDER_H_ +#include <vector> + #include "arch/instruction_set.h" -#include "base/stl_util.h" -#include "base/value_object.h" +#include "base/unix_file/fd_file.h" #include "buffered_output_stream.h" #include "elf_utils.h" #include "file_output_stream.h" namespace art { -template <typename ElfTypes> -class ElfSectionBuilder : public ValueObject { +class CodeOutput { public: - using Elf_Word = typename ElfTypes::Word; - using Elf_Shdr = typename ElfTypes::Shdr; - - ElfSectionBuilder(const std::string& sec_name, Elf_Word type, Elf_Word flags, - const ElfSectionBuilder<ElfTypes> *link, Elf_Word info, - Elf_Word align, Elf_Word entsize) - : section_index_(0), name_(sec_name), link_(link) { - memset(§ion_, 0, sizeof(section_)); - section_.sh_type = type; - section_.sh_flags = flags; - section_.sh_info = info; - section_.sh_addralign = align; - section_.sh_entsize = entsize; - } - ElfSectionBuilder(const ElfSectionBuilder&) = default; - - ~ElfSectionBuilder() {} - - Elf_Word GetLink() const { - return (link_ != nullptr) ? link_->section_index_ : 0; - } - - const Elf_Shdr* GetSection() const { - return §ion_; - } - - Elf_Shdr* GetSection() { - return §ion_; - } - - Elf_Word GetSectionIndex() const { - return section_index_; - } - - void SetSectionIndex(Elf_Word section_index) { - section_index_ = section_index; - } - - const std::string& GetName() const { - return name_; - } - - private: - Elf_Shdr section_; - Elf_Word section_index_; - const std::string name_; - const ElfSectionBuilder* const link_; + virtual bool Write(OutputStream* out) = 0; + virtual ~CodeOutput() {} }; +// Writes ELF file. +// The main complication is that the sections often want to reference +// each other. We solve this by writing the ELF file in two stages: +// * Sections are asked about their size, and overall layout is calculated. +// * Sections do the actual writes which may use offsets of other sections. template <typename ElfTypes> -class ElfDynamicBuilder FINAL : public ElfSectionBuilder<ElfTypes> { +class ElfBuilder FINAL { public: + using Elf_Addr = typename ElfTypes::Addr; + using Elf_Off = typename ElfTypes::Off; using Elf_Word = typename ElfTypes::Word; using Elf_Sword = typename ElfTypes::Sword; + using Elf_Ehdr = typename ElfTypes::Ehdr; using Elf_Shdr = typename ElfTypes::Shdr; + using Elf_Sym = typename ElfTypes::Sym; + using Elf_Phdr = typename ElfTypes::Phdr; using Elf_Dyn = typename ElfTypes::Dyn; - void AddDynamicTag(Elf_Sword tag, Elf_Word d_un) { - if (tag == DT_NULL) { - return; + // Base class of all sections. + class Section { + public: + Section(const std::string& name, Elf_Word type, Elf_Word flags, + const Section* link, Elf_Word info, Elf_Word align, Elf_Word entsize) + : header_(), section_index_(0), name_(name), link_(link) { + header_.sh_type = type; + header_.sh_flags = flags; + header_.sh_info = info; + header_.sh_addralign = align; + header_.sh_entsize = entsize; } - dynamics_.push_back({nullptr, tag, d_un}); - } + virtual ~Section() {} - void AddDynamicTag(Elf_Sword tag, Elf_Word d_un, - const ElfSectionBuilder<ElfTypes>* section) { - if (tag == DT_NULL) { - return; - } - dynamics_.push_back({section, tag, d_un}); - } + // Returns the size of the content of this section. It is used to + // calculate file offsets of all sections before doing any writes. + virtual Elf_Word GetSize() const = 0; - ElfDynamicBuilder(const std::string& sec_name, - ElfSectionBuilder<ElfTypes> *link) - : ElfSectionBuilder<ElfTypes>(sec_name, SHT_DYNAMIC, SHF_ALLOC | SHF_ALLOC, - link, 0, kPageSize, sizeof(Elf_Dyn)) {} - ~ElfDynamicBuilder() {} - - Elf_Word GetSize() const { - // Add 1 for the DT_NULL, 1 for DT_STRSZ, and 1 for DT_SONAME. All of - // these must be added when we actually put the file together because - // their values are very dependent on state. - return dynamics_.size() + 3; - } + // Write the content of this section to the given file. + // This must write exactly the number of bytes returned by GetSize(). + // Offsets of all sections are known when this method is called. + virtual bool Write(File* elf_file) = 0; - // Create the actual dynamic vector. strsz should be the size of the .dynstr - // table and soname_off should be the offset of the soname in .dynstr. - // Since niether can be found prior to final layout we will wait until here - // to add them. - std::vector<Elf_Dyn> GetDynamics(Elf_Word strsz, Elf_Word soname) const { - std::vector<Elf_Dyn> ret; - for (auto it = dynamics_.cbegin(); it != dynamics_.cend(); ++it) { - if (it->section_ != nullptr) { - // We are adding an address relative to a section. - ret.push_back( - {it->tag_, {it->off_ + it->section_->GetSection()->sh_addr}}); - } else { - ret.push_back({it->tag_, {it->off_}}); - } + Elf_Word GetLink() const { + return (link_ != nullptr) ? link_->GetSectionIndex() : 0; } - ret.push_back({DT_STRSZ, {strsz}}); - ret.push_back({DT_SONAME, {soname}}); - ret.push_back({DT_NULL, {0}}); - return ret; - } - - private: - struct ElfDynamicState { - const ElfSectionBuilder<ElfTypes>* section_; - Elf_Sword tag_; - Elf_Word off_; - }; - std::vector<ElfDynamicState> dynamics_; -}; - -template <typename ElfTypes> -class ElfRawSectionBuilder FINAL : public ElfSectionBuilder<ElfTypes> { - public: - using Elf_Word = typename ElfTypes::Word; - - ElfRawSectionBuilder(const std::string& sec_name, Elf_Word type, Elf_Word flags, - const ElfSectionBuilder<ElfTypes>* link, Elf_Word info, - Elf_Word align, Elf_Word entsize) - : ElfSectionBuilder<ElfTypes>(sec_name, type, flags, link, info, align, entsize) { - } - ElfRawSectionBuilder(const ElfRawSectionBuilder&) = default; - - ~ElfRawSectionBuilder() {} - - std::vector<uint8_t>* GetBuffer() { - return &buf_; - } - - void SetBuffer(const std::vector<uint8_t>& buf) { - buf_ = buf; - } - - private: - std::vector<uint8_t> buf_; -}; - -template <typename ElfTypes> -class ElfOatSectionBuilder FINAL : public ElfSectionBuilder<ElfTypes> { - public: - using Elf_Word = typename ElfTypes::Word; - - ElfOatSectionBuilder(const std::string& sec_name, Elf_Word size, Elf_Word offset, - Elf_Word type, Elf_Word flags) - : ElfSectionBuilder<ElfTypes>(sec_name, type, flags, nullptr, 0, kPageSize, 0), - offset_(offset), size_(size) { - } - - ~ElfOatSectionBuilder() {} - - Elf_Word GetOffset() const { - return offset_; - } - Elf_Word GetSize() const { - return size_; - } + const Elf_Shdr* GetHeader() const { + return &header_; + } - private: - // Offset of the content within the file. - Elf_Word offset_; - // Size of the content within the file. - Elf_Word size_; -}; + Elf_Shdr* GetHeader() { + return &header_; + } -static inline constexpr uint8_t MakeStInfo(uint8_t binding, uint8_t type) { - return ((binding) << 4) + ((type) & 0xf); -} + Elf_Word GetSectionIndex() const { + DCHECK_NE(section_index_, 0u); + return section_index_; + } -// from bionic -static inline unsigned elfhash(const char *_name) { - const unsigned char *name = (const unsigned char *) _name; - unsigned h = 0, g; + void SetSectionIndex(Elf_Word section_index) { + section_index_ = section_index; + } - while (*name) { - h = (h << 4) + *name++; - g = h & 0xf0000000; - h ^= g; - h ^= g >> 24; - } - return h; -} + const std::string& GetName() const { + return name_; + } -template <typename ElfTypes> -class ElfSymtabBuilder FINAL : public ElfSectionBuilder<ElfTypes> { - public: - using Elf_Addr = typename ElfTypes::Addr; - using Elf_Word = typename ElfTypes::Word; - using Elf_Sym = typename ElfTypes::Sym; + private: + Elf_Shdr header_; + Elf_Word section_index_; + const std::string name_; + const Section* const link_; - // Add a symbol with given name to this symtab. The symbol refers to - // 'relative_addr' within the given section and has the given attributes. - void AddSymbol(const std::string& name, - const ElfSectionBuilder<ElfTypes>* section, - Elf_Addr addr, - bool is_relative, - Elf_Word size, - uint8_t binding, - uint8_t type, - uint8_t other = 0) { - CHECK(section); - ElfSymtabBuilder::ElfSymbolState state {name, section, addr, size, is_relative, - MakeStInfo(binding, type), other, 0}; - symbols_.push_back(state); - } + DISALLOW_COPY_AND_ASSIGN(Section); + }; - ElfSymtabBuilder(const std::string& sec_name, Elf_Word type, - const std::string& str_name, Elf_Word str_type, bool alloc) - : ElfSectionBuilder<ElfTypes>(sec_name, type, ((alloc) ? SHF_ALLOC : 0U), - &strtab_, 0, sizeof(Elf_Word), - sizeof(Elf_Sym)), str_name_(str_name), - str_type_(str_type), - strtab_(str_name, - str_type, - ((alloc) ? SHF_ALLOC : 0U), - nullptr, 0, 1, 1) { - } + // Writer of .dynamic section. + class DynamicSection FINAL : public Section { + public: + void AddDynamicTag(Elf_Sword tag, Elf_Word value, const Section* section) { + DCHECK_NE(tag, static_cast<Elf_Sword>(DT_NULL)); + dynamics_.push_back({tag, value, section}); + } - ~ElfSymtabBuilder() {} + DynamicSection(const std::string& name, Section* link) + : Section(name, SHT_DYNAMIC, SHF_ALLOC, + link, 0, kPageSize, sizeof(Elf_Dyn)) {} - std::vector<Elf_Word> GenerateHashContents() const { - // Here is how The ELF hash table works. - // There are 3 arrays to worry about. - // * The symbol table where the symbol information is. - // * The bucket array which is an array of indexes into the symtab and chain. - // * The chain array which is also an array of indexes into the symtab and chain. - // - // Lets say the state is something like this. - // +--------+ +--------+ +-----------+ - // | symtab | | bucket | | chain | - // | null | | 1 | | STN_UNDEF | - // | <sym1> | | 4 | | 2 | - // | <sym2> | | | | 5 | - // | <sym3> | | | | STN_UNDEF | - // | <sym4> | | | | 3 | - // | <sym5> | | | | STN_UNDEF | - // +--------+ +--------+ +-----------+ - // - // The lookup process (in python psudocode) is - // - // def GetSym(name): - // # NB STN_UNDEF == 0 - // indx = bucket[elfhash(name) % num_buckets] - // while indx != STN_UNDEF: - // if GetSymbolName(symtab[indx]) == name: - // return symtab[indx] - // indx = chain[indx] - // return SYMBOL_NOT_FOUND - // - // Between bucket and chain arrays every symtab index must be present exactly - // once (except for STN_UNDEF, which must be present 1 + num_bucket times). - - // Select number of buckets. - // This is essentially arbitrary. - Elf_Word nbuckets; - Elf_Word chain_size = GetSize(); - if (symbols_.size() < 8) { - nbuckets = 2; - } else if (symbols_.size() < 32) { - nbuckets = 4; - } else if (symbols_.size() < 256) { - nbuckets = 16; - } else { - // Have about 32 ids per bucket. - nbuckets = RoundUp(symbols_.size()/32, 2); + Elf_Word GetSize() const OVERRIDE { + return (dynamics_.size() + 1 /* DT_NULL */) * sizeof(Elf_Dyn); } - std::vector<Elf_Word> hash; - hash.push_back(nbuckets); - hash.push_back(chain_size); - uint32_t bucket_offset = hash.size(); - uint32_t chain_offset = bucket_offset + nbuckets; - hash.resize(hash.size() + nbuckets + chain_size, 0); - - Elf_Word* buckets = hash.data() + bucket_offset; - Elf_Word* chain = hash.data() + chain_offset; - - // Set up the actual hash table. - for (Elf_Word i = 0; i < symbols_.size(); i++) { - // Add 1 since we need to have the null symbol that is not in the symbols - // list. - Elf_Word index = i + 1; - Elf_Word hash_val = static_cast<Elf_Word>(elfhash(symbols_[i].name_.c_str())) % nbuckets; - if (buckets[hash_val] == 0) { - buckets[hash_val] = index; - } else { - hash_val = buckets[hash_val]; - CHECK_LT(hash_val, chain_size); - while (chain[hash_val] != 0) { - hash_val = chain[hash_val]; - CHECK_LT(hash_val, chain_size); + + bool Write(File* elf_file) OVERRIDE { + std::vector<Elf_Dyn> buffer; + buffer.reserve(dynamics_.size() + 1u); + for (const ElfDynamicState& it : dynamics_) { + if (it.section_ != nullptr) { + // We are adding an address relative to a section. + buffer.push_back( + {it.tag_, {it.value_ + it.section_->GetHeader()->sh_addr}}); + } else { + buffer.push_back({it.tag_, {it.value_}}); } - chain[hash_val] = index; - // Check for loops. Works because if this is non-empty then there must be - // another cell which already contains the same symbol index as this one, - // which means some symbol has more then one name, which isn't allowed. - CHECK_EQ(chain[index], static_cast<Elf_Word>(0)); } + buffer.push_back({DT_NULL, {0}}); + return WriteArray(elf_file, buffer.data(), buffer.size()); } - return hash; - } + private: + struct ElfDynamicState { + Elf_Sword tag_; + Elf_Word value_; + const Section* section_; + }; + std::vector<ElfDynamicState> dynamics_; + }; - std::string GenerateStrtab() { - std::string tab; - tab += '\0'; - for (auto it = symbols_.begin(); it != symbols_.end(); ++it) { - it->name_idx_ = tab.size(); - tab += it->name_; - tab += '\0'; + using PatchFn = void (*)(const std::vector<uintptr_t>& patch_locations, + Elf_Addr buffer_address, + Elf_Addr base_address, + std::vector<uint8_t>* buffer); + + // Section with content based on simple memory buffer. + // The buffer can be optionally patched before writing. + class RawSection FINAL : public Section { + public: + RawSection(const std::string& name, Elf_Word type, Elf_Word flags, + const Section* link, Elf_Word info, Elf_Word align, Elf_Word entsize, + PatchFn patch = nullptr, const Section* patch_base_section = nullptr) + : Section(name, type, flags, link, info, align, entsize), + patched_(false), patch_(patch), patch_base_section_(patch_base_section) { + } + + Elf_Word GetSize() const OVERRIDE { + return buffer_.size(); + } + + bool Write(File* elf_file) OVERRIDE { + if (!patch_locations_.empty()) { + DCHECK(!patched_); // Do not patch twice. + DCHECK(patch_ != nullptr); + DCHECK(patch_base_section_ != nullptr); + patch_(patch_locations_, + this->GetHeader()->sh_addr, + patch_base_section_->GetHeader()->sh_addr, + &buffer_); + patched_ = true; + } + return WriteArray(elf_file, buffer_.data(), buffer_.size()); } - strtab_.GetSection()->sh_size = tab.size(); - return tab; - } - std::vector<Elf_Sym> GenerateSymtab() { - std::vector<Elf_Sym> ret; - Elf_Sym undef_sym; - memset(&undef_sym, 0, sizeof(undef_sym)); - undef_sym.st_shndx = SHN_UNDEF; - ret.push_back(undef_sym); - - for (auto it = symbols_.cbegin(); it != symbols_.cend(); ++it) { - Elf_Sym sym; - memset(&sym, 0, sizeof(sym)); - sym.st_name = it->name_idx_; - if (it->is_relative_) { - sym.st_value = it->addr_ + it->section_->GetSection()->sh_offset; - } else { - sym.st_value = it->addr_; - } - sym.st_size = it->size_; - sym.st_other = it->other_; - sym.st_shndx = it->section_->GetSectionIndex(); - sym.st_info = it->info_; + bool IsEmpty() const { + return buffer_.size() == 0; + } - ret.push_back(sym); + std::vector<uint8_t>* GetBuffer() { + return &buffer_; } - return ret; - } - Elf_Word GetSize() const { - // 1 is for the implicit null symbol. - return symbols_.size() + 1; - } + void SetBuffer(const std::vector<uint8_t>& buffer) { + buffer_ = buffer; + } - ElfSectionBuilder<ElfTypes>* GetStrTab() { - return &strtab_; - } + std::vector<uintptr_t>* GetPatchLocations() { + return &patch_locations_; + } - private: - struct ElfSymbolState { - const std::string name_; - const ElfSectionBuilder<ElfTypes>* section_; - Elf_Addr addr_; - Elf_Word size_; - bool is_relative_; - uint8_t info_; - uint8_t other_; - // Used during Write() to temporarially hold name index in the strtab. - Elf_Word name_idx_; + private: + std::vector<uint8_t> buffer_; + std::vector<uintptr_t> patch_locations_; + bool patched_; + // User-provided function to do the actual patching. + PatchFn patch_; + // The section that we patch against (usually .text). + const Section* patch_base_section_; }; - // Information for the strsym for dynstr sections. - const std::string str_name_; - Elf_Word str_type_; - // The symbols in the same order they will be in the symbol table. - std::vector<ElfSymbolState> symbols_; - ElfSectionBuilder<ElfTypes> strtab_; -}; - -template <typename Elf_Word> -class ElfFilePiece { - public: - virtual ~ElfFilePiece() {} + // Writer of .rodata section or .text section. + // The write is done lazily using the provided CodeOutput. + class OatSection FINAL : public Section { + public: + OatSection(const std::string& name, Elf_Word type, Elf_Word flags, + const Section* link, Elf_Word info, Elf_Word align, + Elf_Word entsize, Elf_Word size, CodeOutput* code_output) + : Section(name, type, flags, link, info, align, entsize), + size_(size), code_output_(code_output) { + } - virtual bool Write(File* elf_file) { - if (static_cast<off_t>(offset_) != lseek(elf_file->Fd(), offset_, SEEK_SET)) { - PLOG(ERROR) << "Failed to seek to " << GetDescription() << " offset " << offset_ << " for " - << elf_file->GetPath(); - return false; + Elf_Word GetSize() const OVERRIDE { + return size_; } - return DoActualWrite(elf_file); - } + bool Write(File* elf_file) OVERRIDE { + // The BufferedOutputStream class contains the buffer as field, + // therefore it is too big to allocate on the stack. + std::unique_ptr<BufferedOutputStream> output_stream( + new BufferedOutputStream(new FileOutputStream(elf_file))); + return code_output_->Write(output_stream.get()); + } - static bool Compare(ElfFilePiece* a, ElfFilePiece* b) { - return a->offset_ < b->offset_; - } + private: + Elf_Word size_; + CodeOutput* code_output_; + }; - protected: - explicit ElfFilePiece(Elf_Word offset) : offset_(offset) {} + // Writer of .bss section. + class NoBitsSection FINAL : public Section { + public: + NoBitsSection(const std::string& name, Elf_Word size) + : Section(name, SHT_NOBITS, SHF_ALLOC, nullptr, 0, kPageSize, 0), + size_(size) { + } - Elf_Word GetOffset() const { - return offset_; - } + Elf_Word GetSize() const OVERRIDE { + return size_; + } - virtual const char* GetDescription() const = 0; - virtual bool DoActualWrite(File* elf_file) = 0; + bool Write(File* elf_file ATTRIBUTE_UNUSED) OVERRIDE { + LOG(ERROR) << "This section should not be written to the ELF file"; + return false; + } - private: - const Elf_Word offset_; + private: + Elf_Word size_; + }; - DISALLOW_COPY_AND_ASSIGN(ElfFilePiece); -}; + // Writer of .dynstr .strtab and .shstrtab sections. + class StrtabSection FINAL : public Section { + public: + StrtabSection(const std::string& name, Elf_Word flags) + : Section(name, SHT_STRTAB, flags, nullptr, 0, 1, 1) { + buffer_.reserve(4 * KB); + // The first entry of strtab must be empty string. + buffer_ += '\0'; + } -template <typename Elf_Word> -class ElfFileMemoryPiece FINAL : public ElfFilePiece<Elf_Word> { - public: - ElfFileMemoryPiece(const std::string& name, Elf_Word offset, const void* data, Elf_Word size) - : ElfFilePiece<Elf_Word>(offset), dbg_name_(name), data_(data), size_(size) {} + Elf_Word AddName(const std::string& name) { + Elf_Word offset = buffer_.size(); + buffer_ += name; + buffer_ += '\0'; + return offset; + } - protected: - bool DoActualWrite(File* elf_file) OVERRIDE { - DCHECK(data_ != nullptr || size_ == 0U) << dbg_name_ << " " << size_; + Elf_Word GetSize() const OVERRIDE { + return buffer_.size(); + } - if (!elf_file->WriteFully(data_, size_)) { - PLOG(ERROR) << "Failed to write " << dbg_name_ << " for " << elf_file->GetPath(); - return false; + bool Write(File* elf_file) OVERRIDE { + return WriteArray(elf_file, buffer_.data(), buffer_.size()); } - return true; - } + private: + std::string buffer_; + }; - const char* GetDescription() const OVERRIDE { - return dbg_name_.c_str(); - } + class HashSection; - private: - const std::string& dbg_name_; - const void *data_; - Elf_Word size_; -}; + // Writer of .dynsym and .symtab sections. + class SymtabSection FINAL : public Section { + public: + // Add a symbol with given name to this symtab. The symbol refers to + // 'relative_addr' within the given section and has the given attributes. + void AddSymbol(const std::string& name, const Section* section, + Elf_Addr addr, bool is_relative, Elf_Word size, + uint8_t binding, uint8_t type, uint8_t other = 0) { + CHECK(section != nullptr); + Elf_Word name_idx = strtab_->AddName(name); + symbols_.push_back({ name, section, addr, size, is_relative, + MakeStInfo(binding, type), other, name_idx }); + } -class CodeOutput { - public: - virtual void SetCodeOffset(size_t offset) = 0; - virtual bool Write(OutputStream* out) = 0; - virtual ~CodeOutput() {} -}; + SymtabSection(const std::string& name, Elf_Word type, Elf_Word flags, + StrtabSection* strtab) + : Section(name, type, flags, strtab, 0, sizeof(Elf_Word), sizeof(Elf_Sym)), + strtab_(strtab) { + } -template <typename Elf_Word> -class ElfFileRodataPiece FINAL : public ElfFilePiece<Elf_Word> { - public: - ElfFileRodataPiece(Elf_Word offset, CodeOutput* output) : ElfFilePiece<Elf_Word>(offset), - output_(output) {} - - protected: - bool DoActualWrite(File* elf_file) OVERRIDE { - output_->SetCodeOffset(this->GetOffset()); - std::unique_ptr<BufferedOutputStream> output_stream( - new BufferedOutputStream(new FileOutputStream(elf_file))); - if (!output_->Write(output_stream.get())) { - PLOG(ERROR) << "Failed to write .rodata and .text for " << elf_file->GetPath(); - return false; + bool IsEmpty() const { + return symbols_.empty(); } - return true; - } + Elf_Word GetSize() const OVERRIDE { + return (1 /* NULL */ + symbols_.size()) * sizeof(Elf_Sym); + } - const char* GetDescription() const OVERRIDE { - return ".rodata"; - } + bool Write(File* elf_file) OVERRIDE { + std::vector<Elf_Sym> buffer; + buffer.reserve(1u + symbols_.size()); + buffer.push_back(Elf_Sym()); // NULL. + for (const ElfSymbolState& it : symbols_) { + Elf_Sym sym = Elf_Sym(); + sym.st_name = it.name_idx_; + if (it.is_relative_) { + sym.st_value = it.addr_ + it.section_->GetHeader()->sh_addr; + } else { + sym.st_value = it.addr_; + } + sym.st_size = it.size_; + sym.st_other = it.other_; + sym.st_shndx = it.section_->GetSectionIndex(); + sym.st_info = it.info_; + buffer.push_back(sym); + } + return WriteArray(elf_file, buffer.data(), buffer.size()); + } - private: - CodeOutput* const output_; + private: + struct ElfSymbolState { + const std::string name_; + const Section* section_; + Elf_Addr addr_; + Elf_Word size_; + bool is_relative_; + uint8_t info_; + uint8_t other_; + Elf_Word name_idx_; // index in the strtab. + }; - DISALLOW_COPY_AND_ASSIGN(ElfFileRodataPiece); -}; + static inline constexpr uint8_t MakeStInfo(uint8_t binding, uint8_t type) { + return ((binding) << 4) + ((type) & 0xf); + } -template <typename Elf_Word> -class ElfFileOatTextPiece FINAL : public ElfFilePiece<Elf_Word> { - public: - ElfFileOatTextPiece(Elf_Word offset, CodeOutput* output) : ElfFilePiece<Elf_Word>(offset), - output_(output) {} - - protected: - bool DoActualWrite(File* elf_file ATTRIBUTE_UNUSED) OVERRIDE { - // All data is written by the ElfFileRodataPiece right now, as the oat writer writes in one - // piece. This is for future flexibility. - UNUSED(output_); - return true; - } + // The symbols in the same order they will be in the symbol table. + std::vector<ElfSymbolState> symbols_; + StrtabSection* strtab_; - const char* GetDescription() const OVERRIDE { - return ".text"; - } + friend class HashSection; + }; - private: - CodeOutput* const output_; + // TODO: Consider removing. + // We use it only for the dynsym section which has only 5 symbols. + // We do not use it for symtab, and we probably do not have to + // since we use those symbols only to print backtraces. + class HashSection FINAL : public Section { + public: + HashSection(const std::string& name, Elf_Word flags, SymtabSection* symtab) + : Section(name, SHT_HASH, flags, symtab, + 0, sizeof(Elf_Word), sizeof(Elf_Word)), + symtab_(symtab) { + } + + Elf_Word GetSize() const OVERRIDE { + Elf_Word nbuckets = GetNumBuckets(); + Elf_Word chain_size = symtab_->symbols_.size() + 1 /* NULL */; + return (2 /* header */ + nbuckets + chain_size) * sizeof(Elf_Word); + } + + bool Write(File* const elf_file) OVERRIDE { + // Here is how The ELF hash table works. + // There are 3 arrays to worry about. + // * The symbol table where the symbol information is. + // * The bucket array which is an array of indexes into the symtab and chain. + // * The chain array which is also an array of indexes into the symtab and chain. + // + // Lets say the state is something like this. + // +--------+ +--------+ +-----------+ + // | symtab | | bucket | | chain | + // | null | | 1 | | STN_UNDEF | + // | <sym1> | | 4 | | 2 | + // | <sym2> | | | | 5 | + // | <sym3> | | | | STN_UNDEF | + // | <sym4> | | | | 3 | + // | <sym5> | | | | STN_UNDEF | + // +--------+ +--------+ +-----------+ + // + // The lookup process (in python psudocode) is + // + // def GetSym(name): + // # NB STN_UNDEF == 0 + // indx = bucket[elfhash(name) % num_buckets] + // while indx != STN_UNDEF: + // if GetSymbolName(symtab[indx]) == name: + // return symtab[indx] + // indx = chain[indx] + // return SYMBOL_NOT_FOUND + // + // Between bucket and chain arrays every symtab index must be present exactly + // once (except for STN_UNDEF, which must be present 1 + num_bucket times). + const auto& symbols = symtab_->symbols_; + // Select number of buckets. + // This is essentially arbitrary. + Elf_Word nbuckets = GetNumBuckets(); + // 1 is for the implicit NULL symbol. + Elf_Word chain_size = (symbols.size() + 1); + std::vector<Elf_Word> hash; + hash.push_back(nbuckets); + hash.push_back(chain_size); + uint32_t bucket_offset = hash.size(); + uint32_t chain_offset = bucket_offset + nbuckets; + hash.resize(hash.size() + nbuckets + chain_size, 0); + + Elf_Word* buckets = hash.data() + bucket_offset; + Elf_Word* chain = hash.data() + chain_offset; + + // Set up the actual hash table. + for (Elf_Word i = 0; i < symbols.size(); i++) { + // Add 1 since we need to have the null symbol that is not in the symbols + // list. + Elf_Word index = i + 1; + Elf_Word hash_val = static_cast<Elf_Word>(elfhash(symbols[i].name_.c_str())) % nbuckets; + if (buckets[hash_val] == 0) { + buckets[hash_val] = index; + } else { + hash_val = buckets[hash_val]; + CHECK_LT(hash_val, chain_size); + while (chain[hash_val] != 0) { + hash_val = chain[hash_val]; + CHECK_LT(hash_val, chain_size); + } + chain[hash_val] = index; + // Check for loops. Works because if this is non-empty then there must be + // another cell which already contains the same symbol index as this one, + // which means some symbol has more then one name, which isn't allowed. + CHECK_EQ(chain[index], static_cast<Elf_Word>(0)); + } + } + return WriteArray(elf_file, hash.data(), hash.size()); + } + + private: + Elf_Word GetNumBuckets() const { + const auto& symbols = symtab_->symbols_; + if (symbols.size() < 8) { + return 2; + } else if (symbols.size() < 32) { + return 4; + } else if (symbols.size() < 256) { + return 16; + } else { + // Have about 32 ids per bucket. + return RoundUp(symbols.size()/32, 2); + } + } - DISALLOW_COPY_AND_ASSIGN(ElfFileOatTextPiece); -}; + // from bionic + static inline unsigned elfhash(const char *_name) { + const unsigned char *name = (const unsigned char *) _name; + unsigned h = 0, g; -template <typename Elf_Word> -static bool WriteOutFile(const std::vector<ElfFilePiece<Elf_Word>*>& pieces, File* elf_file) { - // TODO It would be nice if this checked for overlap. - for (auto it = pieces.begin(); it != pieces.end(); ++it) { - if (!(*it)->Write(elf_file)) { - return false; + while (*name) { + h = (h << 4) + *name++; + g = h & 0xf0000000; + h ^= g; + h ^= g >> 24; + } + return h; } - } - return true; -} -template <typename Elf_Word, typename Elf_Shdr> -static inline constexpr Elf_Word NextOffset(const Elf_Shdr& cur, const Elf_Shdr& prev) { - return RoundUp(prev.sh_size + prev.sh_offset, cur.sh_addralign); -} + SymtabSection* symtab_; -template <typename ElfTypes> -class ElfBuilder FINAL { - public: - using Elf_Addr = typename ElfTypes::Addr; - using Elf_Word = typename ElfTypes::Word; - using Elf_Sword = typename ElfTypes::Sword; - using Elf_Ehdr = typename ElfTypes::Ehdr; - using Elf_Shdr = typename ElfTypes::Shdr; - using Elf_Sym = typename ElfTypes::Sym; - using Elf_Phdr = typename ElfTypes::Phdr; - using Elf_Dyn = typename ElfTypes::Dyn; + DISALLOW_COPY_AND_ASSIGN(HashSection); + }; - ElfBuilder(CodeOutput* oat_writer, - File* elf_file, - InstructionSet isa, - Elf_Word rodata_relative_offset, - Elf_Word rodata_size, - Elf_Word text_relative_offset, - Elf_Word text_size, - Elf_Word bss_relative_offset, - Elf_Word bss_size, - const bool add_symbols, - bool debug = false) - : oat_writer_(oat_writer), - elf_file_(elf_file), - add_symbols_(add_symbols), - debug_logging_(debug), - text_builder_(".text", text_size, text_relative_offset, SHT_PROGBITS, - SHF_ALLOC | SHF_EXECINSTR), - rodata_builder_(".rodata", rodata_size, rodata_relative_offset, SHT_PROGBITS, SHF_ALLOC), - bss_builder_(".bss", bss_size, bss_relative_offset, SHT_NOBITS, SHF_ALLOC), - dynsym_builder_(".dynsym", SHT_DYNSYM, ".dynstr", SHT_STRTAB, true), - symtab_builder_(".symtab", SHT_SYMTAB, ".strtab", SHT_STRTAB, false), - hash_builder_(".hash", SHT_HASH, SHF_ALLOC, &dynsym_builder_, 0, sizeof(Elf_Word), - sizeof(Elf_Word)), - dynamic_builder_(".dynamic", &dynsym_builder_), - shstrtab_builder_(".shstrtab", SHT_STRTAB, 0, nullptr, 0, 1, 1) { - SetupEhdr(); - SetupDynamic(); - SetupRequiredSymbols(); - SetISA(isa); + ElfBuilder(InstructionSet isa, + Elf_Word rodata_size, CodeOutput* rodata_writer, + Elf_Word text_size, CodeOutput* text_writer, + Elf_Word bss_size) + : isa_(isa), + dynstr_(".dynstr", SHF_ALLOC), + dynsym_(".dynsym", SHT_DYNSYM, SHF_ALLOC, &dynstr_), + hash_(".hash", SHF_ALLOC, &dynsym_), + rodata_(".rodata", SHT_PROGBITS, SHF_ALLOC, + nullptr, 0, kPageSize, 0, rodata_size, rodata_writer), + text_(".text", SHT_PROGBITS, SHF_ALLOC | SHF_EXECINSTR, + nullptr, 0, kPageSize, 0, text_size, text_writer), + bss_(".bss", bss_size), + dynamic_(".dynamic", &dynsym_), + strtab_(".strtab", 0), + symtab_(".symtab", SHT_SYMTAB, 0, &strtab_), + shstrtab_(".shstrtab", 0) { } ~ElfBuilder() {} - const ElfOatSectionBuilder<ElfTypes>& GetTextBuilder() const { - return text_builder_; - } - - ElfSymtabBuilder<ElfTypes>* GetSymtabBuilder() { - return &symtab_builder_; - } + OatSection* GetText() { return &text_; } + SymtabSection* GetSymtab() { return &symtab_; } - bool Init() { + bool Write(File* elf_file) { // Since the .text section of an oat file contains relative references to .rodata // and (optionally) .bss, we keep these 2 or 3 sections together. This creates // a non-traditional layout where the .bss section is mapped independently of the @@ -605,11 +519,12 @@ class ElfBuilder FINAL { // | Elf_Ehdr | // +-------------------------+ // | Elf_Phdr PHDR | - // | Elf_Phdr LOAD R | .dynsym .dynstr .hash .eh_frame .eh_frame_hdr .rodata + // | Elf_Phdr LOAD R | .dynsym .dynstr .hash .rodata // | Elf_Phdr LOAD R X | .text // | Elf_Phdr LOAD RW | .bss (Optional) // | Elf_Phdr LOAD RW | .dynamic // | Elf_Phdr DYNAMIC | .dynamic + // | Elf_Phdr LOAD R | .eh_frame .eh_frame_hdr // | Elf_Phdr EH_FRAME R | .eh_frame_hdr // +-------------------------+ // | .dynsym | @@ -621,25 +536,10 @@ class ElfBuilder FINAL { // | Elf_Sym oatbsslastword | (Optional) // +-------------------------+ // | .dynstr | - // | \0 | - // | oatdata\0 | - // | oatexec\0 | - // | oatlastword\0 | - // | boot.oat\0 | + // | names for .dynsym | // +-------------------------+ // | .hash | - // | Elf_Word nbucket = b | - // | Elf_Word nchain = c | - // | Elf_Word bucket[0] | - // | ... | - // | Elf_Word bucket[b - 1] | - // | Elf_Word chain[0] | - // | ... | - // | Elf_Word chain[c - 1] | - // +-------------------------+ - // | .eh_frame | (Optional) - // +-------------------------+ - // | .eh_frame_hdr | (Optional) + // | hashtable for dynsym | // +-------------------------+ // | .rodata | // | oatdata..oatexec-4 | @@ -648,38 +548,23 @@ class ElfBuilder FINAL { // | oatexec..oatlastword | // +-------------------------+ // | .dynamic | - // | Elf_Dyn DT_SONAME | // | Elf_Dyn DT_HASH | + // | Elf_Dyn DT_STRTAB | // | Elf_Dyn DT_SYMTAB | // | Elf_Dyn DT_SYMENT | - // | Elf_Dyn DT_STRTAB | // | Elf_Dyn DT_STRSZ | + // | Elf_Dyn DT_SONAME | // | Elf_Dyn DT_NULL | // +-------------------------+ (Optional) - // | .strtab | (Optional) - // | program symbol names | (Optional) - // +-------------------------+ (Optional) // | .symtab | (Optional) // | program symbols | (Optional) - // +-------------------------+ - // | .shstrtab | - // | \0 | - // | .dynamic\0 | - // | .dynsym\0 | - // | .dynstr\0 | - // | .hash\0 | - // | .rodata\0 | - // | .text\0 | - // | .bss\0 | (Optional) - // | .shstrtab\0 | - // | .symtab\0 | (Optional) - // | .strtab\0 | (Optional) - // | .eh_frame\0 | (Optional) - // | .eh_frame_hdr\0 | (Optional) - // | .debug_info\0 | (Optional) - // | .debug_abbrev\0 | (Optional) - // | .debug_str\0 | (Optional) - // | .debug_line\0 | (Optional) + // +-------------------------+ (Optional) + // | .strtab | (Optional) + // | names for .symtab | (Optional) + // +-------------------------+ (Optional) + // | .eh_frame | (Optional) + // +-------------------------+ (Optional) + // | .eh_frame_hdr | (Optional) // +-------------------------+ (Optional) // | .debug_info | (Optional) // +-------------------------+ (Optional) @@ -688,7 +573,10 @@ class ElfBuilder FINAL { // | .debug_str | (Optional) // +-------------------------+ (Optional) // | .debug_line | (Optional) - // +-------------------------+ (Optional) + // +-------------------------+ + // | .shstrtab | + // | names of sections | + // +-------------------------+ // | Elf_Shdr null | // | Elf_Shdr .dynsym | // | Elf_Shdr .dynstr | @@ -697,552 +585,266 @@ class ElfBuilder FINAL { // | Elf_Shdr .text | // | Elf_Shdr .bss | (Optional) // | Elf_Shdr .dynamic | - // | Elf_Shdr .shstrtab | + // | Elf_Shdr .symtab | (Optional) + // | Elf_Shdr .strtab | (Optional) // | Elf_Shdr .eh_frame | (Optional) // | Elf_Shdr .eh_frame_hdr | (Optional) // | Elf_Shdr .debug_info | (Optional) // | Elf_Shdr .debug_abbrev | (Optional) // | Elf_Shdr .debug_str | (Optional) // | Elf_Shdr .debug_line | (Optional) + // | Elf_Shdr .oat_patches | (Optional) + // | Elf_Shdr .shstrtab | // +-------------------------+ - - if (fatal_error_) { - return false; - } - // Step 1. Figure out all the offsets. - - if (debug_logging_) { - LOG(INFO) << "phdr_offset=" << PHDR_OFFSET << std::hex << " " << PHDR_OFFSET; - LOG(INFO) << "phdr_size=" << PHDR_SIZE << std::hex << " " << PHDR_SIZE; - } - - memset(&program_headers_, 0, sizeof(program_headers_)); - program_headers_[PH_PHDR].p_type = PT_PHDR; - program_headers_[PH_PHDR].p_offset = PHDR_OFFSET; - program_headers_[PH_PHDR].p_vaddr = PHDR_OFFSET; - program_headers_[PH_PHDR].p_paddr = PHDR_OFFSET; - program_headers_[PH_PHDR].p_filesz = sizeof(program_headers_); - program_headers_[PH_PHDR].p_memsz = sizeof(program_headers_); - program_headers_[PH_PHDR].p_flags = PF_R; - program_headers_[PH_PHDR].p_align = sizeof(Elf_Word); - - program_headers_[PH_LOAD_R__].p_type = PT_LOAD; - program_headers_[PH_LOAD_R__].p_offset = 0; - program_headers_[PH_LOAD_R__].p_vaddr = 0; - program_headers_[PH_LOAD_R__].p_paddr = 0; - program_headers_[PH_LOAD_R__].p_flags = PF_R; - - program_headers_[PH_LOAD_R_X].p_type = PT_LOAD; - program_headers_[PH_LOAD_R_X].p_flags = PF_R | PF_X; - - program_headers_[PH_LOAD_RW_BSS].p_type = PT_LOAD; - program_headers_[PH_LOAD_RW_BSS].p_flags = PF_R | PF_W; - - program_headers_[PH_LOAD_RW_DYNAMIC].p_type = PT_LOAD; - program_headers_[PH_LOAD_RW_DYNAMIC].p_flags = PF_R | PF_W; - - program_headers_[PH_DYNAMIC].p_type = PT_DYNAMIC; - program_headers_[PH_DYNAMIC].p_flags = PF_R | PF_W; - - program_headers_[PH_EH_FRAME_HDR].p_type = PT_NULL; - program_headers_[PH_EH_FRAME_HDR].p_flags = PF_R; - - // Get the dynstr string. - dynstr_ = dynsym_builder_.GenerateStrtab(); - - // Add the SONAME to the dynstr. - dynstr_soname_offset_ = dynstr_.size(); - std::string file_name(elf_file_->GetPath()); - size_t directory_separator_pos = file_name.rfind('/'); - if (directory_separator_pos != std::string::npos) { - file_name = file_name.substr(directory_separator_pos + 1); - } - dynstr_ += file_name; - dynstr_ += '\0'; - if (debug_logging_) { - LOG(INFO) << "dynstr size (bytes) =" << dynstr_.size() - << std::hex << " " << dynstr_.size(); - LOG(INFO) << "dynsym size (elements)=" << dynsym_builder_.GetSize() - << std::hex << " " << dynsym_builder_.GetSize(); - } - - // Get the section header string table. - shstrtab_ += '\0'; - - // Setup sym_undef - memset(&null_hdr_, 0, sizeof(null_hdr_)); - null_hdr_.sh_type = SHT_NULL; - null_hdr_.sh_link = SHN_UNDEF; - section_ptrs_.push_back(&null_hdr_); - - section_index_ = 1; - - // setup .dynsym - section_ptrs_.push_back(dynsym_builder_.GetSection()); - AssignSectionStr(&dynsym_builder_, &shstrtab_); - dynsym_builder_.SetSectionIndex(section_index_); - section_index_++; - - // Setup .dynstr - section_ptrs_.push_back(dynsym_builder_.GetStrTab()->GetSection()); - AssignSectionStr(dynsym_builder_.GetStrTab(), &shstrtab_); - dynsym_builder_.GetStrTab()->SetSectionIndex(section_index_); - section_index_++; - - // Setup .hash - section_ptrs_.push_back(hash_builder_.GetSection()); - AssignSectionStr(&hash_builder_, &shstrtab_); - hash_builder_.SetSectionIndex(section_index_); - section_index_++; - - // Setup .rodata - section_ptrs_.push_back(rodata_builder_.GetSection()); - AssignSectionStr(&rodata_builder_, &shstrtab_); - rodata_builder_.SetSectionIndex(section_index_); - section_index_++; - - // Setup .text - section_ptrs_.push_back(text_builder_.GetSection()); - AssignSectionStr(&text_builder_, &shstrtab_); - text_builder_.SetSectionIndex(section_index_); - section_index_++; - - // Setup .bss - if (bss_builder_.GetSize() != 0u) { - section_ptrs_.push_back(bss_builder_.GetSection()); - AssignSectionStr(&bss_builder_, &shstrtab_); - bss_builder_.SetSectionIndex(section_index_); - section_index_++; - } - - // Setup .dynamic - section_ptrs_.push_back(dynamic_builder_.GetSection()); - AssignSectionStr(&dynamic_builder_, &shstrtab_); - dynamic_builder_.SetSectionIndex(section_index_); - section_index_++; - - // Fill in the hash section. - hash_ = dynsym_builder_.GenerateHashContents(); - - if (debug_logging_) { - LOG(INFO) << ".hash size (bytes)=" << hash_.size() * sizeof(Elf_Word) - << std::hex << " " << hash_.size() * sizeof(Elf_Word); - } - - Elf_Word base_offset = sizeof(Elf_Ehdr) + sizeof(program_headers_); - - // Get the layout in the sections. - // - // Get the layout of the dynsym section. - dynsym_builder_.GetSection()->sh_offset = - RoundUp(base_offset, dynsym_builder_.GetSection()->sh_addralign); - dynsym_builder_.GetSection()->sh_addr = dynsym_builder_.GetSection()->sh_offset; - dynsym_builder_.GetSection()->sh_size = dynsym_builder_.GetSize() * sizeof(Elf_Sym); - dynsym_builder_.GetSection()->sh_link = dynsym_builder_.GetLink(); - - // Get the layout of the dynstr section. - dynsym_builder_.GetStrTab()->GetSection()->sh_offset = - NextOffset<Elf_Word, Elf_Shdr>(*dynsym_builder_.GetStrTab()->GetSection(), - *dynsym_builder_.GetSection()); - dynsym_builder_.GetStrTab()->GetSection()->sh_addr = - dynsym_builder_.GetStrTab()->GetSection()->sh_offset; - dynsym_builder_.GetStrTab()->GetSection()->sh_size = dynstr_.size(); - dynsym_builder_.GetStrTab()->GetSection()->sh_link = dynsym_builder_.GetStrTab()->GetLink(); - - // Get the layout of the hash section - hash_builder_.GetSection()->sh_offset = - NextOffset<Elf_Word, Elf_Shdr>(*hash_builder_.GetSection(), - *dynsym_builder_.GetStrTab()->GetSection()); - hash_builder_.GetSection()->sh_addr = hash_builder_.GetSection()->sh_offset; - hash_builder_.GetSection()->sh_size = hash_.size() * sizeof(Elf_Word); - hash_builder_.GetSection()->sh_link = hash_builder_.GetLink(); - - // Get the layout of the extra sections with SHF_ALLOC flag. - // This will deal with .eh_frame and .eh_frame_hdr. - // .eh_frame contains relative pointers to .text which we - // want to fixup between the calls to Init() and Write(). - // Therefore we handle those sections here as opposed to Write(). - // It also has the nice side effect of including .eh_frame - // with the rest of LOAD_R segment. It must come before .rodata - // because .rodata and .text must be next to each other. - Elf_Shdr* prev = hash_builder_.GetSection(); - for (auto* it : other_builders_) { - if ((it->GetSection()->sh_flags & SHF_ALLOC) != 0) { - it->GetSection()->sh_offset = NextOffset<Elf_Word, Elf_Shdr>(*it->GetSection(), *prev); - it->GetSection()->sh_addr = it->GetSection()->sh_offset; - it->GetSection()->sh_size = it->GetBuffer()->size(); - it->GetSection()->sh_link = it->GetLink(); - prev = it->GetSection(); + constexpr bool debug_logging_ = false; + + // Create a list of all section which we want to write. + // This is the order in which they will be written. + std::vector<Section*> sections; + sections.push_back(&dynsym_); + sections.push_back(&dynstr_); + sections.push_back(&hash_); + sections.push_back(&rodata_); + sections.push_back(&text_); + if (bss_.GetSize() != 0u) { + sections.push_back(&bss_); + } + sections.push_back(&dynamic_); + if (!symtab_.IsEmpty()) { + sections.push_back(&symtab_); + sections.push_back(&strtab_); + } + for (Section* section : other_sections_) { + sections.push_back(section); + } + sections.push_back(&shstrtab_); + for (size_t i = 0; i < sections.size(); i++) { + // The first section index is 1. Index 0 is reserved for NULL. + // Section index is used for relative symbols and for section links. + sections[i]->SetSectionIndex(i + 1); + // Add section name to .shstrtab. + Elf_Word name_offset = shstrtab_.AddName(sections[i]->GetName()); + sections[i]->GetHeader()->sh_name = name_offset; + } + + // The running program does not have access to section headers + // and the loader is not supposed to use them either. + // The dynamic sections therefore replicates some of the layout + // information like the address and size of .rodata and .text. + // It also contains other metadata like the SONAME. + // The .dynamic section is found using the PT_DYNAMIC program header. + BuildDynsymSection(); + BuildDynamicSection(elf_file->GetPath()); + + // We do not know the number of headers until the final stages of write. + // It is easiest to just reserve a fixed amount of space for them. + constexpr size_t kMaxProgramHeaders = 8; + constexpr size_t kProgramHeadersOffset = sizeof(Elf_Ehdr); + constexpr size_t kProgramHeadersSize = sizeof(Elf_Phdr) * kMaxProgramHeaders; + + // Layout of all sections - determine the final file offsets and addresses. + // This must be done after we have built all sections and know their size. + Elf_Off file_offset = kProgramHeadersOffset + kProgramHeadersSize; + Elf_Addr load_address = file_offset; + std::vector<Elf_Shdr> section_headers; + section_headers.reserve(1u + sections.size()); + section_headers.push_back(Elf_Shdr()); // NULL at index 0. + for (auto* section : sections) { + Elf_Shdr* header = section->GetHeader(); + Elf_Off alignment = header->sh_addralign > 0 ? header->sh_addralign : 1; + header->sh_size = section->GetSize(); + header->sh_link = section->GetLink(); + // Allocate memory for the section in the file. + if (header->sh_type != SHT_NOBITS) { + header->sh_offset = RoundUp(file_offset, alignment); + file_offset = header->sh_offset + header->sh_size; } - } - // If the sections exist, check that they have been handled. - const auto* eh_frame = FindRawSection(".eh_frame"); - if (eh_frame != nullptr) { - DCHECK_NE(eh_frame->GetSection()->sh_offset, 0u); - } - const auto* eh_frame_hdr = FindRawSection(".eh_frame_hdr"); - if (eh_frame_hdr != nullptr) { - DCHECK_NE(eh_frame_hdr->GetSection()->sh_offset, 0u); - } - - // Get the layout of the rodata section. - rodata_builder_.GetSection()->sh_offset = - NextOffset<Elf_Word, Elf_Shdr>(*rodata_builder_.GetSection(), *prev); - rodata_builder_.GetSection()->sh_addr = rodata_builder_.GetSection()->sh_offset; - rodata_builder_.GetSection()->sh_size = rodata_builder_.GetSize(); - rodata_builder_.GetSection()->sh_link = rodata_builder_.GetLink(); - - // Get the layout of the text section. - text_builder_.GetSection()->sh_offset = - NextOffset<Elf_Word, Elf_Shdr>(*text_builder_.GetSection(), - *rodata_builder_.GetSection()); - text_builder_.GetSection()->sh_addr = text_builder_.GetSection()->sh_offset; - text_builder_.GetSection()->sh_size = text_builder_.GetSize(); - text_builder_.GetSection()->sh_link = text_builder_.GetLink(); - CHECK_ALIGNED(rodata_builder_.GetSection()->sh_offset + - rodata_builder_.GetSection()->sh_size, kPageSize); - - // Get the layout of the .bss section. - bss_builder_.GetSection()->sh_offset = - NextOffset<Elf_Word, Elf_Shdr>(*bss_builder_.GetSection(), - *text_builder_.GetSection()); - bss_builder_.GetSection()->sh_addr = bss_builder_.GetSection()->sh_offset; - bss_builder_.GetSection()->sh_size = bss_builder_.GetSize(); - bss_builder_.GetSection()->sh_link = bss_builder_.GetLink(); - - // Get the layout of the dynamic section. - CHECK(IsAlignedParam(bss_builder_.GetSection()->sh_offset, - dynamic_builder_.GetSection()->sh_addralign)); - dynamic_builder_.GetSection()->sh_offset = bss_builder_.GetSection()->sh_offset; - dynamic_builder_.GetSection()->sh_addr = - NextOffset<Elf_Word, Elf_Shdr>(*dynamic_builder_.GetSection(), *bss_builder_.GetSection()); - dynamic_builder_.GetSection()->sh_size = dynamic_builder_.GetSize() * sizeof(Elf_Dyn); - dynamic_builder_.GetSection()->sh_link = dynamic_builder_.GetLink(); - - if (debug_logging_) { - LOG(INFO) << "dynsym off=" << dynsym_builder_.GetSection()->sh_offset - << " dynsym size=" << dynsym_builder_.GetSection()->sh_size; - LOG(INFO) << "dynstr off=" << dynsym_builder_.GetStrTab()->GetSection()->sh_offset - << " dynstr size=" << dynsym_builder_.GetStrTab()->GetSection()->sh_size; - LOG(INFO) << "hash off=" << hash_builder_.GetSection()->sh_offset - << " hash size=" << hash_builder_.GetSection()->sh_size; - LOG(INFO) << "rodata off=" << rodata_builder_.GetSection()->sh_offset - << " rodata size=" << rodata_builder_.GetSection()->sh_size; - LOG(INFO) << "text off=" << text_builder_.GetSection()->sh_offset - << " text size=" << text_builder_.GetSection()->sh_size; - LOG(INFO) << "dynamic off=" << dynamic_builder_.GetSection()->sh_offset - << " dynamic size=" << dynamic_builder_.GetSection()->sh_size; - } - - return true; - } - - bool Write() { - std::vector<ElfFilePiece<Elf_Word>*> pieces; - Elf_Shdr* prev = dynamic_builder_.GetSection(); - std::string strtab; - - if (IncludingDebugSymbols()) { - // Setup .symtab - section_ptrs_.push_back(symtab_builder_.GetSection()); - AssignSectionStr(&symtab_builder_, &shstrtab_); - symtab_builder_.SetSectionIndex(section_index_); - section_index_++; - - // Setup .strtab - section_ptrs_.push_back(symtab_builder_.GetStrTab()->GetSection()); - AssignSectionStr(symtab_builder_.GetStrTab(), &shstrtab_); - symtab_builder_.GetStrTab()->SetSectionIndex(section_index_); - section_index_++; - - strtab = symtab_builder_.GenerateStrtab(); - if (debug_logging_) { - LOG(INFO) << "strtab size (bytes) =" << strtab.size() - << std::hex << " " << strtab.size(); - LOG(INFO) << "symtab size (elements) =" << symtab_builder_.GetSize() - << std::hex << " " << symtab_builder_.GetSize(); + // Allocate memory for the section during program execution. + if ((header->sh_flags & SHF_ALLOC) != 0) { + header->sh_addr = RoundUp(load_address, alignment); + load_address = header->sh_addr + header->sh_size; } - } - - // Setup all the other sections. - for (auto* builder : other_builders_) { - section_ptrs_.push_back(builder->GetSection()); - AssignSectionStr(builder, &shstrtab_); - builder->SetSectionIndex(section_index_); - section_index_++; - } - - // Setup shstrtab - section_ptrs_.push_back(shstrtab_builder_.GetSection()); - AssignSectionStr(&shstrtab_builder_, &shstrtab_); - shstrtab_builder_.SetSectionIndex(section_index_); - section_index_++; - - if (debug_logging_) { - LOG(INFO) << ".shstrtab size (bytes) =" << shstrtab_.size() - << std::hex << " " << shstrtab_.size(); - LOG(INFO) << "section list size (elements)=" << section_ptrs_.size() - << std::hex << " " << section_ptrs_.size(); - } - - if (IncludingDebugSymbols()) { - // Get the layout of the symtab section. - symtab_builder_.GetSection()->sh_offset = - NextOffset<Elf_Word, Elf_Shdr>(*symtab_builder_.GetSection(), - *dynamic_builder_.GetSection()); - symtab_builder_.GetSection()->sh_addr = 0; - // Add to leave space for the null symbol. - symtab_builder_.GetSection()->sh_size = symtab_builder_.GetSize() * sizeof(Elf_Sym); - symtab_builder_.GetSection()->sh_link = symtab_builder_.GetLink(); - - // Get the layout of the dynstr section. - symtab_builder_.GetStrTab()->GetSection()->sh_offset = - NextOffset<Elf_Word, Elf_Shdr>(*symtab_builder_.GetStrTab()->GetSection(), - *symtab_builder_.GetSection()); - symtab_builder_.GetStrTab()->GetSection()->sh_addr = 0; - symtab_builder_.GetStrTab()->GetSection()->sh_size = strtab.size(); - symtab_builder_.GetStrTab()->GetSection()->sh_link = symtab_builder_.GetStrTab()->GetLink(); - - prev = symtab_builder_.GetStrTab()->GetSection(); if (debug_logging_) { - LOG(INFO) << "symtab off=" << symtab_builder_.GetSection()->sh_offset - << " symtab size=" << symtab_builder_.GetSection()->sh_size; - LOG(INFO) << "strtab off=" << symtab_builder_.GetStrTab()->GetSection()->sh_offset - << " strtab size=" << symtab_builder_.GetStrTab()->GetSection()->sh_size; + LOG(INFO) << "Section " << section->GetName() << ":" << std::hex + << " offset=0x" << header->sh_offset + << " addr=0x" << header->sh_addr + << " size=0x" << header->sh_size; } - } - - // Get the layout of the extra sections without SHF_ALLOC flag. - // (This will deal with the debug sections if they are there) - for (auto* it : other_builders_) { - if ((it->GetSection()->sh_flags & SHF_ALLOC) == 0) { - it->GetSection()->sh_offset = NextOffset<Elf_Word, Elf_Shdr>(*it->GetSection(), *prev); - it->GetSection()->sh_addr = 0; - it->GetSection()->sh_size = it->GetBuffer()->size(); - it->GetSection()->sh_link = it->GetLink(); - - // We postpone adding an ElfFilePiece to keep the order in "pieces." - - prev = it->GetSection(); - if (debug_logging_) { - LOG(INFO) << it->GetName() << " off=" << it->GetSection()->sh_offset - << " size=" << it->GetSection()->sh_size; - } + // Collect section headers into continuous array for convenience. + section_headers.push_back(*header); + } + Elf_Off section_headers_offset = RoundUp(file_offset, sizeof(Elf_Word)); + + // Create program headers now that we know the layout of the whole file. + // Each segment contains one or more sections which are mapped together. + // Not all sections are mapped during the execution of the program. + // PT_LOAD does the mapping. Other PT_* types allow the program to locate + // interesting parts of memory and their addresses overlap with PT_LOAD. + std::vector<Elf_Phdr> program_headers; + program_headers.push_back(MakeProgramHeader(PT_PHDR, PF_R, + kProgramHeadersOffset, kProgramHeadersSize, sizeof(Elf_Word))); + // Create the main LOAD R segment which spans all sections up to .rodata. + const Elf_Shdr* rodata = rodata_.GetHeader(); + program_headers.push_back(MakeProgramHeader(PT_LOAD, PF_R, + 0, rodata->sh_offset + rodata->sh_size, rodata->sh_addralign)); + program_headers.push_back(MakeProgramHeader(PT_LOAD, PF_R | PF_X, text_)); + if (bss_.GetHeader()->sh_size != 0u) { + program_headers.push_back(MakeProgramHeader(PT_LOAD, PF_R | PF_W, bss_)); + } + program_headers.push_back(MakeProgramHeader(PT_LOAD, PF_R | PF_W, dynamic_)); + program_headers.push_back(MakeProgramHeader(PT_DYNAMIC, PF_R | PF_W, dynamic_)); + const Section* eh_frame = FindSection(".eh_frame"); + if (eh_frame != nullptr) { + program_headers.push_back(MakeProgramHeader(PT_LOAD, PF_R, *eh_frame)); + const Section* eh_frame_hdr = FindSection(".eh_frame_hdr"); + if (eh_frame_hdr != nullptr) { + // Check layout: eh_frame is before eh_frame_hdr and there is no gap. + CHECK_LE(eh_frame->GetHeader()->sh_offset, eh_frame_hdr->GetHeader()->sh_offset); + CHECK_EQ(eh_frame->GetHeader()->sh_offset + eh_frame->GetHeader()->sh_size, + eh_frame_hdr->GetHeader()->sh_offset); + // Extend the PT_LOAD of .eh_frame to include the .eh_frame_hdr as well. + program_headers.back().p_filesz += eh_frame_hdr->GetHeader()->sh_size; + program_headers.back().p_memsz += eh_frame_hdr->GetHeader()->sh_size; + program_headers.push_back(MakeProgramHeader(PT_GNU_EH_FRAME, PF_R, *eh_frame_hdr)); } } - - // Get the layout of the shstrtab section - shstrtab_builder_.GetSection()->sh_offset = - NextOffset<Elf_Word, Elf_Shdr>(*shstrtab_builder_.GetSection(), *prev); - shstrtab_builder_.GetSection()->sh_addr = 0; - shstrtab_builder_.GetSection()->sh_size = shstrtab_.size(); - shstrtab_builder_.GetSection()->sh_link = shstrtab_builder_.GetLink(); - if (debug_logging_) { - LOG(INFO) << "shstrtab off=" << shstrtab_builder_.GetSection()->sh_offset - << " shstrtab size=" << shstrtab_builder_.GetSection()->sh_size; - } - - // The section list comes after come after. - Elf_Word sections_offset = RoundUp( - shstrtab_builder_.GetSection()->sh_offset + shstrtab_builder_.GetSection()->sh_size, - sizeof(Elf_Word)); - - // Setup the actual symbol arrays. - std::vector<Elf_Sym> dynsym = dynsym_builder_.GenerateSymtab(); - CHECK_EQ(dynsym.size() * sizeof(Elf_Sym), dynsym_builder_.GetSection()->sh_size); - std::vector<Elf_Sym> symtab; - if (IncludingDebugSymbols()) { - symtab = symtab_builder_.GenerateSymtab(); - CHECK_EQ(symtab.size() * sizeof(Elf_Sym), symtab_builder_.GetSection()->sh_size); - } - - // Setup the dynamic section. - // This will add the 2 values we cannot know until now time, namely the size - // and the soname_offset. - std::vector<Elf_Dyn> dynamic = dynamic_builder_.GetDynamics(dynstr_.size(), - dynstr_soname_offset_); - CHECK_EQ(dynamic.size() * sizeof(Elf_Dyn), dynamic_builder_.GetSection()->sh_size); - - // Finish setup of the program headers now that we know the layout of the - // whole file. - Elf_Word load_r_size = - rodata_builder_.GetSection()->sh_offset + rodata_builder_.GetSection()->sh_size; - program_headers_[PH_LOAD_R__].p_filesz = load_r_size; - program_headers_[PH_LOAD_R__].p_memsz = load_r_size; - program_headers_[PH_LOAD_R__].p_align = rodata_builder_.GetSection()->sh_addralign; - - Elf_Word load_rx_size = text_builder_.GetSection()->sh_size; - program_headers_[PH_LOAD_R_X].p_offset = text_builder_.GetSection()->sh_offset; - program_headers_[PH_LOAD_R_X].p_vaddr = text_builder_.GetSection()->sh_offset; - program_headers_[PH_LOAD_R_X].p_paddr = text_builder_.GetSection()->sh_offset; - program_headers_[PH_LOAD_R_X].p_filesz = load_rx_size; - program_headers_[PH_LOAD_R_X].p_memsz = load_rx_size; - program_headers_[PH_LOAD_R_X].p_align = text_builder_.GetSection()->sh_addralign; - - program_headers_[PH_LOAD_RW_BSS].p_offset = bss_builder_.GetSection()->sh_offset; - program_headers_[PH_LOAD_RW_BSS].p_vaddr = bss_builder_.GetSection()->sh_offset; - program_headers_[PH_LOAD_RW_BSS].p_paddr = bss_builder_.GetSection()->sh_offset; - program_headers_[PH_LOAD_RW_BSS].p_filesz = 0; - program_headers_[PH_LOAD_RW_BSS].p_memsz = bss_builder_.GetSection()->sh_size; - program_headers_[PH_LOAD_RW_BSS].p_align = bss_builder_.GetSection()->sh_addralign; - - program_headers_[PH_LOAD_RW_DYNAMIC].p_offset = dynamic_builder_.GetSection()->sh_offset; - program_headers_[PH_LOAD_RW_DYNAMIC].p_vaddr = dynamic_builder_.GetSection()->sh_addr; - program_headers_[PH_LOAD_RW_DYNAMIC].p_paddr = dynamic_builder_.GetSection()->sh_addr; - program_headers_[PH_LOAD_RW_DYNAMIC].p_filesz = dynamic_builder_.GetSection()->sh_size; - program_headers_[PH_LOAD_RW_DYNAMIC].p_memsz = dynamic_builder_.GetSection()->sh_size; - program_headers_[PH_LOAD_RW_DYNAMIC].p_align = dynamic_builder_.GetSection()->sh_addralign; - - program_headers_[PH_DYNAMIC].p_offset = dynamic_builder_.GetSection()->sh_offset; - program_headers_[PH_DYNAMIC].p_vaddr = dynamic_builder_.GetSection()->sh_addr; - program_headers_[PH_DYNAMIC].p_paddr = dynamic_builder_.GetSection()->sh_addr; - program_headers_[PH_DYNAMIC].p_filesz = dynamic_builder_.GetSection()->sh_size; - program_headers_[PH_DYNAMIC].p_memsz = dynamic_builder_.GetSection()->sh_size; - program_headers_[PH_DYNAMIC].p_align = dynamic_builder_.GetSection()->sh_addralign; - - const auto* eh_frame_hdr = FindRawSection(".eh_frame_hdr"); - if (eh_frame_hdr != nullptr) { - const auto* eh_frame = FindRawSection(".eh_frame"); - // Check layout: - // 1) eh_frame is before eh_frame_hdr. - // 2) There's no gap. - CHECK(eh_frame != nullptr); - CHECK_LE(eh_frame->GetSection()->sh_offset, eh_frame_hdr->GetSection()->sh_offset); - CHECK_EQ(eh_frame->GetSection()->sh_offset + eh_frame->GetSection()->sh_size, - eh_frame_hdr->GetSection()->sh_offset); - - program_headers_[PH_EH_FRAME_HDR].p_type = PT_GNU_EH_FRAME; - program_headers_[PH_EH_FRAME_HDR].p_offset = eh_frame_hdr->GetSection()->sh_offset; - program_headers_[PH_EH_FRAME_HDR].p_vaddr = eh_frame_hdr->GetSection()->sh_addr; - program_headers_[PH_EH_FRAME_HDR].p_paddr = eh_frame_hdr->GetSection()->sh_addr; - program_headers_[PH_EH_FRAME_HDR].p_filesz = eh_frame_hdr->GetSection()->sh_size; - program_headers_[PH_EH_FRAME_HDR].p_memsz = eh_frame_hdr->GetSection()->sh_size; - program_headers_[PH_EH_FRAME_HDR].p_align = eh_frame_hdr->GetSection()->sh_addralign; - } - - // Finish setup of the Ehdr values. - elf_header_.e_phoff = PHDR_OFFSET; - elf_header_.e_shoff = sections_offset; - elf_header_.e_phnum = (bss_builder_.GetSection()->sh_size != 0u) ? PH_NUM : PH_NUM - 1; - elf_header_.e_shnum = section_ptrs_.size(); - elf_header_.e_shstrndx = shstrtab_builder_.GetSectionIndex(); - - // Add the rest of the pieces to the list. - pieces.push_back(new ElfFileMemoryPiece<Elf_Word>("Elf Header", 0, &elf_header_, - sizeof(elf_header_))); - if (bss_builder_.GetSection()->sh_size != 0u) { - pieces.push_back(new ElfFileMemoryPiece<Elf_Word>("Program headers", PHDR_OFFSET, - &program_headers_[0], - elf_header_.e_phnum * sizeof(Elf_Phdr))); - } else { - // Skip PH_LOAD_RW_BSS. - Elf_Word part1_size = PH_LOAD_RW_BSS * sizeof(Elf_Phdr); - Elf_Word part2_size = (PH_NUM - PH_LOAD_RW_BSS - 1) * sizeof(Elf_Phdr); - CHECK_EQ(part1_size + part2_size, elf_header_.e_phnum * sizeof(Elf_Phdr)); - pieces.push_back(new ElfFileMemoryPiece<Elf_Word>("Program headers", PHDR_OFFSET, - &program_headers_[0], part1_size)); - pieces.push_back(new ElfFileMemoryPiece<Elf_Word>("Program headers part 2", - PHDR_OFFSET + part1_size, - &program_headers_[PH_LOAD_RW_BSS + 1], - part2_size)); - } - pieces.push_back(new ElfFileMemoryPiece<Elf_Word>(".dynamic", - dynamic_builder_.GetSection()->sh_offset, - dynamic.data(), - dynamic_builder_.GetSection()->sh_size)); - pieces.push_back(new ElfFileMemoryPiece<Elf_Word>(".dynsym", dynsym_builder_.GetSection()->sh_offset, - dynsym.data(), - dynsym.size() * sizeof(Elf_Sym))); - pieces.push_back(new ElfFileMemoryPiece<Elf_Word>(".dynstr", - dynsym_builder_.GetStrTab()->GetSection()->sh_offset, - dynstr_.c_str(), dynstr_.size())); - pieces.push_back(new ElfFileMemoryPiece<Elf_Word>(".hash", hash_builder_.GetSection()->sh_offset, - hash_.data(), - hash_.size() * sizeof(Elf_Word))); - pieces.push_back(new ElfFileRodataPiece<Elf_Word>(rodata_builder_.GetSection()->sh_offset, - oat_writer_)); - pieces.push_back(new ElfFileOatTextPiece<Elf_Word>(text_builder_.GetSection()->sh_offset, - oat_writer_)); - if (IncludingDebugSymbols()) { - pieces.push_back(new ElfFileMemoryPiece<Elf_Word>(".symtab", - symtab_builder_.GetSection()->sh_offset, - symtab.data(), - symtab.size() * sizeof(Elf_Sym))); - pieces.push_back(new ElfFileMemoryPiece<Elf_Word>(".strtab", - symtab_builder_.GetStrTab()->GetSection()->sh_offset, - strtab.c_str(), strtab.size())); + CHECK_LE(program_headers.size(), kMaxProgramHeaders); + + // Create the main ELF header. + Elf_Ehdr elf_header = MakeElfHeader(isa_); + elf_header.e_phoff = kProgramHeadersOffset; + elf_header.e_shoff = section_headers_offset; + elf_header.e_phnum = program_headers.size(); + elf_header.e_shnum = section_headers.size(); + elf_header.e_shstrndx = shstrtab_.GetSectionIndex(); + + // Write all headers and section content to the file. + // Depending on the implementations of Section::Write, this + // might be just memory copies or some more elaborate operations. + if (!WriteArray(elf_file, &elf_header, 1)) { + LOG(INFO) << "Failed to write the ELF header"; + return false; } - pieces.push_back(new ElfFileMemoryPiece<Elf_Word>(".shstrtab", - shstrtab_builder_.GetSection()->sh_offset, - &shstrtab_[0], shstrtab_.size())); - for (uint32_t i = 0; i < section_ptrs_.size(); ++i) { - // Just add all the sections in induvidually since they are all over the - // place on the heap/stack. - Elf_Word cur_off = sections_offset + i * sizeof(Elf_Shdr); - pieces.push_back(new ElfFileMemoryPiece<Elf_Word>("section table piece", cur_off, - section_ptrs_[i], sizeof(Elf_Shdr))); + if (!WriteArray(elf_file, program_headers.data(), program_headers.size())) { + LOG(INFO) << "Failed to write the program headers"; + return false; } - - // Postponed debug info. - for (auto* it : other_builders_) { - pieces.push_back(new ElfFileMemoryPiece<Elf_Word>(it->GetName(), it->GetSection()->sh_offset, - it->GetBuffer()->data(), - it->GetBuffer()->size())); + for (Section* section : sections) { + const Elf_Shdr* header = section->GetHeader(); + if (header->sh_type != SHT_NOBITS) { + if (!SeekTo(elf_file, header->sh_offset) || !section->Write(elf_file)) { + LOG(INFO) << "Failed to write section " << section->GetName(); + return false; + } + Elf_Word current_offset = lseek(elf_file->Fd(), 0, SEEK_CUR); + CHECK_EQ(current_offset, header->sh_offset + header->sh_size) + << "The number of bytes written does not match GetSize()"; + } } - - if (!WriteOutFile(pieces)) { - LOG(ERROR) << "Unable to write to file " << elf_file_->GetPath(); - - STLDeleteElements(&pieces); // Have to manually clean pieces. + if (!SeekTo(elf_file, section_headers_offset) || + !WriteArray(elf_file, section_headers.data(), section_headers.size())) { + LOG(INFO) << "Failed to write the section headers"; return false; } - - STLDeleteElements(&pieces); // Have to manually clean pieces. return true; } - // Adds the given raw section to the builder. It does not take ownership. - void RegisterRawSection(ElfRawSectionBuilder<ElfTypes>* bld) { - other_builders_.push_back(bld); + // Adds the given section to the builder. It does not take ownership. + void RegisterSection(Section* section) { + other_sections_.push_back(section); } - const ElfRawSectionBuilder<ElfTypes>* FindRawSection(const char* name) { - for (const auto* other_builder : other_builders_) { - if (other_builder->GetName() == name) { - return other_builder; + const Section* FindSection(const char* name) { + for (const auto* section : other_sections_) { + if (section->GetName() == name) { + return section; } } return nullptr; } private: - void SetISA(InstructionSet isa) { + static bool SeekTo(File* elf_file, Elf_Word offset) { + DCHECK_LE(lseek(elf_file->Fd(), 0, SEEK_CUR), static_cast<off_t>(offset)) + << "Seeking backwards"; + if (static_cast<off_t>(offset) != lseek(elf_file->Fd(), offset, SEEK_SET)) { + PLOG(ERROR) << "Failed to seek in file " << elf_file->GetPath(); + return false; + } + return true; + } + + template<typename T> + static bool WriteArray(File* elf_file, const T* data, size_t count) { + DCHECK(data != nullptr); + if (!elf_file->WriteFully(data, count * sizeof(T))) { + PLOG(ERROR) << "Failed to write to file " << elf_file->GetPath(); + return false; + } + return true; + } + + // Helper - create segment header based on memory range. + static Elf_Phdr MakeProgramHeader(Elf_Word type, Elf_Word flags, + Elf_Off offset, Elf_Word size, Elf_Word align) { + Elf_Phdr phdr = Elf_Phdr(); + phdr.p_type = type; + phdr.p_flags = flags; + phdr.p_offset = offset; + phdr.p_vaddr = offset; + phdr.p_paddr = offset; + phdr.p_filesz = size; + phdr.p_memsz = size; + phdr.p_align = align; + return phdr; + } + + // Helper - create segment header based on section header. + static Elf_Phdr MakeProgramHeader(Elf_Word type, Elf_Word flags, + const Section& section) { + const Elf_Shdr* shdr = section.GetHeader(); + // Only run-time allocated sections should be in segment headers. + CHECK_NE(shdr->sh_flags & SHF_ALLOC, 0u); + Elf_Phdr phdr = Elf_Phdr(); + phdr.p_type = type; + phdr.p_flags = flags; + phdr.p_offset = shdr->sh_offset; + phdr.p_vaddr = shdr->sh_addr; + phdr.p_paddr = shdr->sh_addr; + phdr.p_filesz = shdr->sh_type != SHT_NOBITS ? shdr->sh_size : 0u; + phdr.p_memsz = shdr->sh_size; + phdr.p_align = shdr->sh_addralign; + return phdr; + } + + static Elf_Ehdr MakeElfHeader(InstructionSet isa) { + Elf_Ehdr elf_header = Elf_Ehdr(); switch (isa) { case kArm: // Fall through. case kThumb2: { - elf_header_.e_machine = EM_ARM; - elf_header_.e_flags = EF_ARM_EABI_VER5; + elf_header.e_machine = EM_ARM; + elf_header.e_flags = EF_ARM_EABI_VER5; break; } case kArm64: { - elf_header_.e_machine = EM_AARCH64; - elf_header_.e_flags = 0; + elf_header.e_machine = EM_AARCH64; + elf_header.e_flags = 0; break; } case kX86: { - elf_header_.e_machine = EM_386; - elf_header_.e_flags = 0; + elf_header.e_machine = EM_386; + elf_header.e_flags = 0; break; } case kX86_64: { - elf_header_.e_machine = EM_X86_64; - elf_header_.e_flags = 0; + elf_header.e_machine = EM_X86_64; + elf_header.e_flags = 0; break; } case kMips: { - elf_header_.e_machine = EM_MIPS; - elf_header_.e_flags = (EF_MIPS_NOREORDER | + elf_header.e_machine = EM_MIPS; + elf_header.e_flags = (EF_MIPS_NOREORDER | EF_MIPS_PIC | EF_MIPS_CPIC | EF_MIPS_ABI_O32 | @@ -1250,147 +852,82 @@ class ElfBuilder FINAL { break; } case kMips64: { - elf_header_.e_machine = EM_MIPS; - elf_header_.e_flags = (EF_MIPS_NOREORDER | + elf_header.e_machine = EM_MIPS; + elf_header.e_flags = (EF_MIPS_NOREORDER | EF_MIPS_PIC | EF_MIPS_CPIC | EF_MIPS_ARCH_64R6); break; } - default: { - fatal_error_ = true; - LOG(FATAL) << "Unknown instruction set: " << isa; - break; + case kNone: { + LOG(FATAL) << "No instruction set"; } } - } - void SetupEhdr() { - memset(&elf_header_, 0, sizeof(elf_header_)); - elf_header_.e_ident[EI_MAG0] = ELFMAG0; - elf_header_.e_ident[EI_MAG1] = ELFMAG1; - elf_header_.e_ident[EI_MAG2] = ELFMAG2; - elf_header_.e_ident[EI_MAG3] = ELFMAG3; - elf_header_.e_ident[EI_CLASS] = (sizeof(Elf_Addr) == sizeof(Elf32_Addr)) + elf_header.e_ident[EI_MAG0] = ELFMAG0; + elf_header.e_ident[EI_MAG1] = ELFMAG1; + elf_header.e_ident[EI_MAG2] = ELFMAG2; + elf_header.e_ident[EI_MAG3] = ELFMAG3; + elf_header.e_ident[EI_CLASS] = (sizeof(Elf_Addr) == sizeof(Elf32_Addr)) ? ELFCLASS32 : ELFCLASS64;; - elf_header_.e_ident[EI_DATA] = ELFDATA2LSB; - elf_header_.e_ident[EI_VERSION] = EV_CURRENT; - elf_header_.e_ident[EI_OSABI] = ELFOSABI_LINUX; - elf_header_.e_ident[EI_ABIVERSION] = 0; - elf_header_.e_type = ET_DYN; - elf_header_.e_version = 1; - elf_header_.e_entry = 0; - elf_header_.e_ehsize = sizeof(Elf_Ehdr); - elf_header_.e_phentsize = sizeof(Elf_Phdr); - elf_header_.e_shentsize = sizeof(Elf_Shdr); - elf_header_.e_phoff = sizeof(Elf_Ehdr); - } - - // Sets up a bunch of the required Dynamic Section entries. - // Namely it will initialize all the mandatory ones that it can. - // Specifically: - // DT_HASH - // DT_STRTAB - // DT_SYMTAB - // DT_SYMENT - // - // Some such as DT_SONAME, DT_STRSZ and DT_NULL will be put in later. - void SetupDynamic() { - dynamic_builder_.AddDynamicTag(DT_HASH, 0, &hash_builder_); - dynamic_builder_.AddDynamicTag(DT_STRTAB, 0, dynsym_builder_.GetStrTab()); - dynamic_builder_.AddDynamicTag(DT_SYMTAB, 0, &dynsym_builder_); - dynamic_builder_.AddDynamicTag(DT_SYMENT, sizeof(Elf_Sym)); - } - - // Sets up the basic dynamic symbols that are needed, namely all those we - // can know already. - // - // Specifically adds: - // oatdata - // oatexec - // oatlastword - void SetupRequiredSymbols() { - dynsym_builder_.AddSymbol("oatdata", &rodata_builder_, 0, true, - rodata_builder_.GetSize(), STB_GLOBAL, STT_OBJECT); - dynsym_builder_.AddSymbol("oatexec", &text_builder_, 0, true, - text_builder_.GetSize(), STB_GLOBAL, STT_OBJECT); - dynsym_builder_.AddSymbol("oatlastword", &text_builder_, text_builder_.GetSize() - 4, - true, 4, STB_GLOBAL, STT_OBJECT); - if (bss_builder_.GetSize() != 0u) { - dynsym_builder_.AddSymbol("oatbss", &bss_builder_, 0, true, - bss_builder_.GetSize(), STB_GLOBAL, STT_OBJECT); - dynsym_builder_.AddSymbol("oatbsslastword", &bss_builder_, bss_builder_.GetSize() - 4, - true, 4, STB_GLOBAL, STT_OBJECT); - } - } - - void AssignSectionStr(ElfSectionBuilder<ElfTypes>* builder, std::string* strtab) { - builder->GetSection()->sh_name = strtab->size(); - *strtab += builder->GetName(); - *strtab += '\0'; - if (debug_logging_) { - LOG(INFO) << "adding section name \"" << builder->GetName() << "\" " - << "to shstrtab at offset " << builder->GetSection()->sh_name; - } - } - - - // Write each of the pieces out to the file. - bool WriteOutFile(const std::vector<ElfFilePiece<Elf_Word>*>& pieces) { - for (auto it = pieces.begin(); it != pieces.end(); ++it) { - if (!(*it)->Write(elf_file_)) { - return false; - } - } - return true; - } - - bool IncludingDebugSymbols() const { - return add_symbols_ && symtab_builder_.GetSize() > 1; - } - - CodeOutput* const oat_writer_; - File* const elf_file_; - const bool add_symbols_; - const bool debug_logging_; - - bool fatal_error_ = false; - - // What phdr is. - static const uint32_t PHDR_OFFSET = sizeof(Elf_Ehdr); - enum : uint8_t { - PH_PHDR = 0, - PH_LOAD_R__ = 1, - PH_LOAD_R_X = 2, - PH_LOAD_RW_BSS = 3, - PH_LOAD_RW_DYNAMIC = 4, - PH_DYNAMIC = 5, - PH_EH_FRAME_HDR = 6, - PH_NUM = 7, - }; - static const uint32_t PHDR_SIZE = sizeof(Elf_Phdr) * PH_NUM; - Elf_Phdr program_headers_[PH_NUM]; - - Elf_Ehdr elf_header_; - - Elf_Shdr null_hdr_; - std::string shstrtab_; - // The index of the current section being built. The first being 1. - uint32_t section_index_; - std::string dynstr_; - uint32_t dynstr_soname_offset_; - std::vector<const Elf_Shdr*> section_ptrs_; - std::vector<Elf_Word> hash_; - - ElfOatSectionBuilder<ElfTypes> text_builder_; - ElfOatSectionBuilder<ElfTypes> rodata_builder_; - ElfOatSectionBuilder<ElfTypes> bss_builder_; - ElfSymtabBuilder<ElfTypes> dynsym_builder_; - ElfSymtabBuilder<ElfTypes> symtab_builder_; - ElfSectionBuilder<ElfTypes> hash_builder_; - ElfDynamicBuilder<ElfTypes> dynamic_builder_; - ElfSectionBuilder<ElfTypes> shstrtab_builder_; - std::vector<ElfRawSectionBuilder<ElfTypes>*> other_builders_; + elf_header.e_ident[EI_DATA] = ELFDATA2LSB; + elf_header.e_ident[EI_VERSION] = EV_CURRENT; + elf_header.e_ident[EI_OSABI] = ELFOSABI_LINUX; + elf_header.e_ident[EI_ABIVERSION] = 0; + elf_header.e_type = ET_DYN; + elf_header.e_version = 1; + elf_header.e_entry = 0; + elf_header.e_ehsize = sizeof(Elf_Ehdr); + elf_header.e_phentsize = sizeof(Elf_Phdr); + elf_header.e_shentsize = sizeof(Elf_Shdr); + elf_header.e_phoff = sizeof(Elf_Ehdr); + return elf_header; + } + + void BuildDynamicSection(const std::string& elf_file_path) { + std::string soname(elf_file_path); + size_t directory_separator_pos = soname.rfind('/'); + if (directory_separator_pos != std::string::npos) { + soname = soname.substr(directory_separator_pos + 1); + } + // NB: We must add the name before adding DT_STRSZ. + Elf_Word soname_offset = dynstr_.AddName(soname); + + dynamic_.AddDynamicTag(DT_HASH, 0, &hash_); + dynamic_.AddDynamicTag(DT_STRTAB, 0, &dynstr_); + dynamic_.AddDynamicTag(DT_SYMTAB, 0, &dynsym_); + dynamic_.AddDynamicTag(DT_SYMENT, sizeof(Elf_Sym), nullptr); + dynamic_.AddDynamicTag(DT_STRSZ, dynstr_.GetSize(), nullptr); + dynamic_.AddDynamicTag(DT_SONAME, soname_offset, nullptr); + } + + void BuildDynsymSection() { + dynsym_.AddSymbol("oatdata", &rodata_, 0, true, + rodata_.GetSize(), STB_GLOBAL, STT_OBJECT); + dynsym_.AddSymbol("oatexec", &text_, 0, true, + text_.GetSize(), STB_GLOBAL, STT_OBJECT); + dynsym_.AddSymbol("oatlastword", &text_, text_.GetSize() - 4, + true, 4, STB_GLOBAL, STT_OBJECT); + if (bss_.GetSize() != 0u) { + dynsym_.AddSymbol("oatbss", &bss_, 0, true, + bss_.GetSize(), STB_GLOBAL, STT_OBJECT); + dynsym_.AddSymbol("oatbsslastword", &bss_, bss_.GetSize() - 4, + true, 4, STB_GLOBAL, STT_OBJECT); + } + } + + InstructionSet isa_; + StrtabSection dynstr_; + SymtabSection dynsym_; + HashSection hash_; + OatSection rodata_; + OatSection text_; + NoBitsSection bss_; + DynamicSection dynamic_; + StrtabSection strtab_; + SymtabSection symtab_; + std::vector<Section*> other_sections_; + StrtabSection shstrtab_; DISALLOW_COPY_AND_ASSIGN(ElfBuilder); }; diff --git a/compiler/elf_writer_debug.cc b/compiler/elf_writer_debug.cc index 28e6999..5e9cf76 100644 --- a/compiler/elf_writer_debug.cc +++ b/compiler/elf_writer_debug.cc @@ -18,6 +18,7 @@ #include <unordered_set> +#include "base/casts.h" #include "compiled_method.h" #include "driver/compiler_driver.h" #include "dex_file-inl.h" @@ -162,33 +163,54 @@ void WriteEhFrame(const CompilerDriver* compiler, ExceptionHeaderValueApplication address_type, std::vector<uint8_t>* eh_frame, std::vector<uintptr_t>* eh_frame_patches, - std::vector<uint8_t>* eh_frame_hdr) { + std::vector<uint8_t>* eh_frame_hdr, + std::vector<uintptr_t>* eh_frame_hdr_patches) { const auto& method_infos = oat_writer->GetMethodDebugInfo(); const InstructionSet isa = compiler->GetInstructionSet(); // Write .eh_frame section. + std::map<uint32_t, size_t> address_to_fde_offset_map; size_t cie_offset = eh_frame->size(); WriteEhFrameCIE(isa, address_type, eh_frame); for (const OatWriter::DebugInfo& mi : method_infos) { - const SwapVector<uint8_t>* opcodes = mi.compiled_method_->GetCFIInfo(); - if (opcodes != nullptr) { - WriteEhFrameFDE(Is64BitInstructionSet(isa), cie_offset, - mi.low_pc_, mi.high_pc_ - mi.low_pc_, - opcodes, eh_frame, eh_frame_patches); + if (!mi.deduped_) { // Only one FDE per unique address. + const SwapVector<uint8_t>* opcodes = mi.compiled_method_->GetCFIInfo(); + if (opcodes != nullptr) { + address_to_fde_offset_map.emplace(mi.low_pc_, eh_frame->size()); + WriteEhFrameFDE(Is64BitInstructionSet(isa), cie_offset, + mi.low_pc_, mi.high_pc_ - mi.low_pc_, + opcodes, eh_frame, eh_frame_patches); + } } } // Write .eh_frame_hdr section. Writer<> header(eh_frame_hdr); header.PushUint8(1); // Version. - header.PushUint8(DW_EH_PE_pcrel | DW_EH_PE_sdata4); // Encoding of .eh_frame pointer. - header.PushUint8(DW_EH_PE_omit); // Encoding of binary search table size. - header.PushUint8(DW_EH_PE_omit); // Encoding of binary search table addresses. - // .eh_frame pointer - .eh_frame_hdr section is after .eh_frame section, and need to encode - // relative to this location as libunwind doesn't honor datarel for eh_frame_hdr correctly. - header.PushInt32(-static_cast<int32_t>(eh_frame->size() + 4U)); - // Omit binary search table size (number of entries). - // Omit binary search table. + // Encoding of .eh_frame pointer - libunwind does not honor datarel here, + // so we have to use pcrel which means relative to the pointer's location. + header.PushUint8(DW_EH_PE_pcrel | DW_EH_PE_sdata4); + // Encoding of binary search table size. + header.PushUint8(DW_EH_PE_udata4); + // Encoding of binary search table addresses - libunwind supports only this + // specific combination, which means relative to the start of .eh_frame_hdr. + header.PushUint8(DW_EH_PE_datarel | DW_EH_PE_sdata4); + // .eh_frame pointer - .eh_frame_hdr section is after .eh_frame section + const int32_t relative_eh_frame_begin = -static_cast<int32_t>(eh_frame->size()); + header.PushInt32(relative_eh_frame_begin - 4U); + // Binary search table size (number of entries). + header.PushUint32(dchecked_integral_cast<uint32_t>(address_to_fde_offset_map.size())); + // Binary search table. + for (const auto& address_to_fde_offset : address_to_fde_offset_map) { + u_int32_t code_address = address_to_fde_offset.first; + int32_t fde_address = dchecked_integral_cast<int32_t>(address_to_fde_offset.second); + eh_frame_hdr_patches->push_back(header.data()->size()); + header.PushUint32(code_address); + // We know the exact layout (eh_frame is immediately before eh_frame_hdr) + // and the data is relative to the start of the eh_frame_hdr, + // so patching isn't necessary (in contrast to the code address above). + header.PushInt32(relative_eh_frame_begin + fde_address); + } } /* diff --git a/compiler/elf_writer_debug.h b/compiler/elf_writer_debug.h index 5bf4841..28d0e2c 100644 --- a/compiler/elf_writer_debug.h +++ b/compiler/elf_writer_debug.h @@ -30,7 +30,8 @@ void WriteEhFrame(const CompilerDriver* compiler, ExceptionHeaderValueApplication address_type, std::vector<uint8_t>* eh_frame, std::vector<uintptr_t>* eh_frame_patches, - std::vector<uint8_t>* eh_frame_hdr); + std::vector<uint8_t>* eh_frame_hdr, + std::vector<uintptr_t>* eh_frame_hdr_patches); void WriteDebugSections(const CompilerDriver* compiler, const OatWriter* oat_writer, diff --git a/compiler/elf_writer_quick.cc b/compiler/elf_writer_quick.cc index 3b2ca94..79f9955 100644 --- a/compiler/elf_writer_quick.cc +++ b/compiler/elf_writer_quick.cc @@ -21,7 +21,6 @@ #include "base/logging.h" #include "base/unix_file/fd_file.h" -#include "buffered_output_stream.h" #include "compiled_method.h" #include "dex_file-inl.h" #include "driver/compiler_driver.h" @@ -30,7 +29,6 @@ #include "elf_file.h" #include "elf_utils.h" #include "elf_writer_debug.h" -#include "file_output_stream.h" #include "globals.h" #include "leb128.h" #include "oat.h" @@ -50,20 +48,6 @@ bool ElfWriterQuick<ElfTypes>::Create(File* elf_file, return elf_writer.Write(oat_writer, dex_files, android_root, is_host); } -class OatWriterWrapper FINAL : public CodeOutput { - public: - explicit OatWriterWrapper(OatWriter* oat_writer) : oat_writer_(oat_writer) {} - - void SetCodeOffset(size_t offset) { - oat_writer_->SetOatDataOffset(offset); - } - bool Write(OutputStream* out) OVERRIDE { - return oat_writer_->Write(out); - } - private: - OatWriter* const oat_writer_; -}; - template <typename ElfTypes> static void WriteDebugSymbols(ElfBuilder<ElfTypes>* builder, OatWriter* oat_writer); @@ -99,15 +83,56 @@ void ElfWriterQuick<ElfTypes>::EncodeOatPatches( buffer->push_back(0); // End of sections. } -template<typename AddressType, bool SubtractPatchLocation = false> -static void PatchAddresses(const std::vector<uintptr_t>* patch_locations, - AddressType delta, std::vector<uint8_t>* buffer) { - // Addresses in .debug_* sections are unaligned. - typedef __attribute__((__aligned__(1))) AddressType UnalignedAddressType; - if (patch_locations != nullptr) { - for (uintptr_t patch_location : *patch_locations) { - *reinterpret_cast<UnalignedAddressType*>(buffer->data() + patch_location) += - delta - (SubtractPatchLocation ? patch_location : 0); +class RodataWriter FINAL : public CodeOutput { + public: + explicit RodataWriter(OatWriter* oat_writer) : oat_writer_(oat_writer) {} + + bool Write(OutputStream* out) OVERRIDE { + return oat_writer_->WriteRodata(out); + } + + private: + OatWriter* oat_writer_; +}; + +class TextWriter FINAL : public CodeOutput { + public: + explicit TextWriter(OatWriter* oat_writer) : oat_writer_(oat_writer) {} + + bool Write(OutputStream* out) OVERRIDE { + return oat_writer_->WriteCode(out); + } + + private: + OatWriter* oat_writer_; +}; + +enum PatchResult { + kAbsoluteAddress, // Absolute memory location. + kPointerRelativeAddress, // Offset relative to the location of the pointer. + kSectionRelativeAddress, // Offset relative to start of containing section. +}; + +// Patch memory addresses within a buffer. +// It assumes that the unpatched addresses are offsets relative to base_address. +// (which generally means method's low_pc relative to the start of .text) +template <typename Elf_Addr, typename Address, PatchResult kPatchResult> +static void Patch(const std::vector<uintptr_t>& patch_locations, + Elf_Addr buffer_address, Elf_Addr base_address, + std::vector<uint8_t>* buffer) { + for (uintptr_t location : patch_locations) { + typedef __attribute__((__aligned__(1))) Address UnalignedAddress; + auto* to_patch = reinterpret_cast<UnalignedAddress*>(buffer->data() + location); + switch (kPatchResult) { + case kAbsoluteAddress: + *to_patch = (base_address + *to_patch); + break; + case kPointerRelativeAddress: + *to_patch = (base_address + *to_patch) - (buffer_address + location); + break; + case kSectionRelativeAddress: + *to_patch = (base_address + *to_patch) - buffer_address; + break; } } } @@ -118,106 +143,80 @@ bool ElfWriterQuick<ElfTypes>::Write( const std::vector<const DexFile*>& dex_files_unused ATTRIBUTE_UNUSED, const std::string& android_root_unused ATTRIBUTE_UNUSED, bool is_host_unused ATTRIBUTE_UNUSED) { - constexpr bool debug = false; - const OatHeader& oat_header = oat_writer->GetOatHeader(); - typename ElfTypes::Word oat_data_size = oat_header.GetExecutableOffset(); - uint32_t oat_exec_size = oat_writer->GetSize() - oat_data_size; - uint32_t oat_bss_size = oat_writer->GetBssSize(); - - OatWriterWrapper wrapper(oat_writer); + using Elf_Addr = typename ElfTypes::Addr; + const InstructionSet isa = compiler_driver_->GetInstructionSet(); + // Setup the builder with the main OAT sections (.rodata .text .bss). + const size_t rodata_size = oat_writer->GetOatHeader().GetExecutableOffset(); + const size_t text_size = oat_writer->GetSize() - rodata_size; + const size_t bss_size = oat_writer->GetBssSize(); + RodataWriter rodata_writer(oat_writer); + TextWriter text_writer(oat_writer); std::unique_ptr<ElfBuilder<ElfTypes>> builder(new ElfBuilder<ElfTypes>( - &wrapper, - elf_file_, - compiler_driver_->GetInstructionSet(), - 0, - oat_data_size, - oat_data_size, - oat_exec_size, - RoundUp(oat_data_size + oat_exec_size, kPageSize), - oat_bss_size, - compiler_driver_->GetCompilerOptions().GetIncludeDebugSymbols(), - debug)); + isa, rodata_size, &rodata_writer, text_size, &text_writer, bss_size)); - InstructionSet isa = compiler_driver_->GetInstructionSet(); - int alignment = GetInstructionSetPointerSize(isa); - typedef ElfRawSectionBuilder<ElfTypes> RawSection; - RawSection eh_frame(".eh_frame", SHT_PROGBITS, SHF_ALLOC, nullptr, 0, alignment, 0); - RawSection eh_frame_hdr(".eh_frame_hdr", SHT_PROGBITS, SHF_ALLOC, nullptr, 0, 4, 0); - RawSection debug_info(".debug_info", SHT_PROGBITS, 0, nullptr, 0, 1, 0); + // Add debug sections. + // They are stack allocated here (in the same scope as the builder), + // but they are registred with the builder only if they are used. + using RawSection = typename ElfBuilder<ElfTypes>::RawSection; + const auto* text = builder->GetText(); + const bool is64bit = Is64BitInstructionSet(isa); + RawSection eh_frame(".eh_frame", SHT_PROGBITS, SHF_ALLOC, nullptr, 0, kPageSize, 0, + is64bit ? Patch<Elf_Addr, uint64_t, kPointerRelativeAddress> : + Patch<Elf_Addr, uint32_t, kPointerRelativeAddress>, + text); + RawSection eh_frame_hdr(".eh_frame_hdr", SHT_PROGBITS, SHF_ALLOC, nullptr, 0, 4, 0, + Patch<Elf_Addr, uint32_t, kSectionRelativeAddress>, text); + RawSection debug_info(".debug_info", SHT_PROGBITS, 0, nullptr, 0, 1, 0, + Patch<Elf_Addr, uint32_t, kAbsoluteAddress>, text); RawSection debug_abbrev(".debug_abbrev", SHT_PROGBITS, 0, nullptr, 0, 1, 0); RawSection debug_str(".debug_str", SHT_PROGBITS, 0, nullptr, 0, 1, 0); - RawSection debug_line(".debug_line", SHT_PROGBITS, 0, nullptr, 0, 1, 0); - RawSection oat_patches(".oat_patches", SHT_OAT_PATCH, 0, nullptr, 0, 1, 0); - - // Do not add to .oat_patches since we will make the addresses relative. - std::vector<uintptr_t> eh_frame_patches; - if (compiler_driver_->GetCompilerOptions().GetIncludeCFI() && - !oat_writer->GetMethodDebugInfo().empty()) { - dwarf::WriteEhFrame(compiler_driver_, oat_writer, - dwarf::DW_EH_PE_pcrel, - eh_frame.GetBuffer(), &eh_frame_patches, - eh_frame_hdr.GetBuffer()); - builder->RegisterRawSection(&eh_frame); - builder->RegisterRawSection(&eh_frame_hdr); - } - - // Must be done after .eh_frame is created since it is used in the Elf layout. - if (!builder->Init()) { - return false; - } - - std::vector<uintptr_t>* debug_info_patches = nullptr; - std::vector<uintptr_t>* debug_line_patches = nullptr; - if (compiler_driver_->GetCompilerOptions().GetIncludeDebugSymbols() && - !oat_writer->GetMethodDebugInfo().empty()) { - // Add methods to .symtab. - WriteDebugSymbols(builder.get(), oat_writer); - // Generate DWARF .debug_* sections. - debug_info_patches = oat_writer->GetAbsolutePatchLocationsFor(".debug_info"); - debug_line_patches = oat_writer->GetAbsolutePatchLocationsFor(".debug_line"); - dwarf::WriteDebugSections(compiler_driver_, oat_writer, - debug_info.GetBuffer(), debug_info_patches, - debug_abbrev.GetBuffer(), - debug_str.GetBuffer(), - debug_line.GetBuffer(), debug_line_patches); - builder->RegisterRawSection(&debug_info); - builder->RegisterRawSection(&debug_abbrev); - builder->RegisterRawSection(&debug_str); - builder->RegisterRawSection(&debug_line); + RawSection debug_line(".debug_line", SHT_PROGBITS, 0, nullptr, 0, 1, 0, + Patch<Elf_Addr, uint32_t, kAbsoluteAddress>, text); + if (!oat_writer->GetMethodDebugInfo().empty()) { + if (compiler_driver_->GetCompilerOptions().GetIncludeCFI()) { + dwarf::WriteEhFrame( + compiler_driver_, oat_writer, dwarf::DW_EH_PE_pcrel, + eh_frame.GetBuffer(), eh_frame.GetPatchLocations(), + eh_frame_hdr.GetBuffer(), eh_frame_hdr.GetPatchLocations()); + builder->RegisterSection(&eh_frame); + builder->RegisterSection(&eh_frame_hdr); + } + if (compiler_driver_->GetCompilerOptions().GetIncludeDebugSymbols()) { + // Add methods to .symtab. + WriteDebugSymbols(builder.get(), oat_writer); + // Generate DWARF .debug_* sections. + dwarf::WriteDebugSections( + compiler_driver_, oat_writer, + debug_info.GetBuffer(), debug_info.GetPatchLocations(), + debug_abbrev.GetBuffer(), + debug_str.GetBuffer(), + debug_line.GetBuffer(), debug_line.GetPatchLocations()); + builder->RegisterSection(&debug_info); + builder->RegisterSection(&debug_abbrev); + builder->RegisterSection(&debug_str); + builder->RegisterSection(&debug_line); + *oat_writer->GetAbsolutePatchLocationsFor(".debug_info") = + *debug_info.GetPatchLocations(); + *oat_writer->GetAbsolutePatchLocationsFor(".debug_line") = + *debug_line.GetPatchLocations(); + } } + // Add relocation section. + RawSection oat_patches(".oat_patches", SHT_OAT_PATCH, 0, nullptr, 0, 1, 0); if (compiler_driver_->GetCompilerOptions().GetIncludePatchInformation() || // ElfWriter::Fixup will be called regardless and it needs to be able // to patch debug sections so we have to include patches for them. compiler_driver_->GetCompilerOptions().GetIncludeDebugSymbols()) { EncodeOatPatches(oat_writer->GetAbsolutePatchLocations(), oat_patches.GetBuffer()); - builder->RegisterRawSection(&oat_patches); - } - - // We know where .text and .eh_frame will be located, so patch the addresses. - typename ElfTypes::Addr text_addr = builder->GetTextBuilder().GetSection()->sh_addr; - // TODO: Simplify once we use Elf64 - we can use ElfTypes::Addr instead of branching. - if (Is64BitInstructionSet(compiler_driver_->GetInstructionSet())) { - // relative_address = (text_addr + address) - (eh_frame_addr + patch_location); - PatchAddresses<uint64_t, true>(&eh_frame_patches, - text_addr - eh_frame.GetSection()->sh_addr, eh_frame.GetBuffer()); - PatchAddresses<uint64_t>(debug_info_patches, text_addr, debug_info.GetBuffer()); - PatchAddresses<uint64_t>(debug_line_patches, text_addr, debug_line.GetBuffer()); - } else { - // relative_address = (text_addr + address) - (eh_frame_addr + patch_location); - PatchAddresses<uint32_t, true>(&eh_frame_patches, - text_addr - eh_frame.GetSection()->sh_addr, eh_frame.GetBuffer()); - PatchAddresses<uint32_t>(debug_info_patches, text_addr, debug_info.GetBuffer()); - PatchAddresses<uint32_t>(debug_line_patches, text_addr, debug_line.GetBuffer()); + builder->RegisterSection(&oat_patches); } - return builder->Write(); + return builder->Write(elf_file_); } template <typename ElfTypes> -// Do not inline to avoid Clang stack frame problems. b/18738594 -NO_INLINE static void WriteDebugSymbols(ElfBuilder<ElfTypes>* builder, OatWriter* oat_writer) { const std::vector<OatWriter::DebugInfo>& method_info = oat_writer->GetMethodDebugInfo(); @@ -230,8 +229,11 @@ static void WriteDebugSymbols(ElfBuilder<ElfTypes>* builder, OatWriter* oat_writ } } - ElfSymtabBuilder<ElfTypes>* symtab = builder->GetSymtabBuilder(); + auto* symtab = builder->GetSymtab(); for (auto it = method_info.begin(); it != method_info.end(); ++it) { + if (it->deduped_) { + continue; // Add symbol only for the first instance. + } std::string name = PrettyMethod(it->dex_method_index_, *it->dex_file_, true); if (deduped_addresses.find(it->low_pc_) != deduped_addresses.end()) { name += " [DEDUPED]"; @@ -240,13 +242,13 @@ static void WriteDebugSymbols(ElfBuilder<ElfTypes>* builder, OatWriter* oat_writ uint32_t low_pc = it->low_pc_; // Add in code delta, e.g., thumb bit 0 for Thumb2 code. low_pc += it->compiled_method_->CodeDelta(); - symtab->AddSymbol(name, &builder->GetTextBuilder(), low_pc, + symtab->AddSymbol(name, builder->GetText(), low_pc, true, it->high_pc_ - it->low_pc_, STB_GLOBAL, STT_FUNC); // Conforming to aaelf, add $t mapping symbol to indicate start of a sequence of thumb2 // instructions, so that disassembler tools can correctly disassemble. if (it->compiled_method_->GetInstructionSet() == kThumb2) { - symtab->AddSymbol("$t", &builder->GetTextBuilder(), it->low_pc_ & ~1, true, + symtab->AddSymbol("$t", builder->GetText(), it->low_pc_ & ~1, true, 0, STB_LOCAL, STT_NOTYPE); } } diff --git a/compiler/jit/jit_compiler.h b/compiler/jit/jit_compiler.h index 0876499..d9a5ac6 100644 --- a/compiler/jit/jit_compiler.h +++ b/compiler/jit/jit_compiler.h @@ -67,10 +67,11 @@ class JitCompiler { const uint8_t* mapping_table, const uint8_t* vmap_table, const uint8_t* gc_map); bool MakeExecutable(CompiledMethod* compiled_method, mirror::ArtMethod* method) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + + DISALLOW_COPY_AND_ASSIGN(JitCompiler); }; } // namespace jit - } // namespace art #endif // ART_COMPILER_JIT_JIT_COMPILER_H_ diff --git a/compiler/oat_writer.cc b/compiler/oat_writer.cc index d2d38da..15b4017 100644 --- a/compiler/oat_writer.cc +++ b/compiler/oat_writer.cc @@ -1112,13 +1112,14 @@ size_t OatWriter::InitOatCodeDexFiles(size_t offset) { return offset; } -bool OatWriter::Write(OutputStream* out) { +bool OatWriter::WriteRodata(OutputStream* out) { const off_t raw_file_offset = out->Seek(0, kSeekCurrent); if (raw_file_offset == (off_t) -1) { LOG(ERROR) << "Failed to get file offset in " << out->GetLocation(); return false; } const size_t file_offset = static_cast<size_t>(raw_file_offset); + oat_data_offset_ = file_offset; // Reserve space for header. It will be written last - after updating the checksum. size_t header_size = oat_header_->GetHeaderSize(); @@ -1146,6 +1147,27 @@ bool OatWriter::Write(OutputStream* out) { return false; } + // Write padding. + off_t new_offset = out->Seek(size_executable_offset_alignment_, kSeekCurrent); + relative_offset += size_executable_offset_alignment_; + DCHECK_EQ(relative_offset, oat_header_->GetExecutableOffset()); + size_t expected_file_offset = file_offset + relative_offset; + if (static_cast<uint32_t>(new_offset) != expected_file_offset) { + PLOG(ERROR) << "Failed to seek to oat code section. Actual: " << new_offset + << " Expected: " << expected_file_offset << " File: " << out->GetLocation(); + return 0; + } + DCHECK_OFFSET(); + + return true; +} + +bool OatWriter::WriteCode(OutputStream* out) { + size_t header_size = oat_header_->GetHeaderSize(); + const size_t file_offset = oat_data_offset_; + size_t relative_offset = oat_header_->GetExecutableOffset(); + DCHECK_OFFSET(); + relative_offset = WriteCode(out, file_offset, relative_offset); if (relative_offset == 0) { LOG(ERROR) << "Failed to write oat code to " << out->GetLocation(); @@ -1215,7 +1237,7 @@ bool OatWriter::Write(OutputStream* out) { PLOG(ERROR) << "Failed to seek to oat header position in " << out->GetLocation(); return false; } - DCHECK_EQ(raw_file_offset, out->Seek(0, kSeekCurrent)); + DCHECK_EQ(file_offset, static_cast<size_t>(out->Seek(0, kSeekCurrent))); if (!out->WriteFully(oat_header_, header_size)) { PLOG(ERROR) << "Failed to write oat header to " << out->GetLocation(); return false; @@ -1290,16 +1312,6 @@ size_t OatWriter::WriteMaps(OutputStream* out, const size_t file_offset, size_t } size_t OatWriter::WriteCode(OutputStream* out, const size_t file_offset, size_t relative_offset) { - off_t new_offset = out->Seek(size_executable_offset_alignment_, kSeekCurrent); - relative_offset += size_executable_offset_alignment_; - DCHECK_EQ(relative_offset, oat_header_->GetExecutableOffset()); - size_t expected_file_offset = file_offset + relative_offset; - if (static_cast<uint32_t>(new_offset) != expected_file_offset) { - PLOG(ERROR) << "Failed to seek to oat code section. Actual: " << new_offset - << " Expected: " << expected_file_offset << " File: " << out->GetLocation(); - return 0; - } - DCHECK_OFFSET(); if (compiler_driver_->IsImage()) { InstructionSet instruction_set = compiler_driver_->GetInstructionSet(); diff --git a/compiler/oat_writer.h b/compiler/oat_writer.h index 8c79b44..6f1b4ec 100644 --- a/compiler/oat_writer.h +++ b/compiler/oat_writer.h @@ -118,11 +118,8 @@ class OatWriter { return it.first->second.get(); } - void SetOatDataOffset(size_t oat_data_offset) { - oat_data_offset_ = oat_data_offset; - } - - bool Write(OutputStream* out); + bool WriteRodata(OutputStream* out); + bool WriteCode(OutputStream* out); ~OatWriter(); diff --git a/compiler/optimizing/bounds_check_elimination.cc b/compiler/optimizing/bounds_check_elimination.cc index 92fa6db..b2b5496 100644 --- a/compiler/optimizing/bounds_check_elimination.cc +++ b/compiler/optimizing/bounds_check_elimination.cc @@ -281,15 +281,22 @@ class ArrayAccessInsideLoopFinder : public ValueObject { return false; } + static bool DominatesAllBackEdges(HBasicBlock* block, HLoopInformation* loop_info) { + for (size_t i = 0, e = loop_info->GetBackEdges().Size(); i < e; ++i) { + HBasicBlock* back_edge = loop_info->GetBackEdges().Get(i); + if (!block->Dominates(back_edge)) { + return false; + } + } + return true; + } + void Run() { HLoopInformation* loop_info = induction_variable_->GetBlock()->GetLoopInformation(); - // Must be simplified loop. - DCHECK_EQ(loop_info->GetBackEdges().Size(), 1U); for (HBlocksInLoopIterator it_loop(*loop_info); !it_loop.Done(); it_loop.Advance()) { HBasicBlock* block = it_loop.Current(); DCHECK(block->IsInLoop()); - HBasicBlock* back_edge = loop_info->GetBackEdges().Get(0); - if (!block->Dominates(back_edge)) { + if (!DominatesAllBackEdges(block, loop_info)) { // In order not to trigger deoptimization unnecessarily, make sure // that all array accesses collected are really executed in the loop. // For array accesses in a branch inside the loop, don't collect the @@ -1151,9 +1158,26 @@ class BCEVisitor : public HGraphVisitor { bounds_check->GetBlock()->RemoveInstruction(bounds_check); } + static bool HasSameInputAtBackEdges(HPhi* phi) { + DCHECK(phi->IsLoopHeaderPhi()); + // Start with input 1. Input 0 is from the incoming block. + HInstruction* input1 = phi->InputAt(1); + DCHECK(phi->GetBlock()->GetLoopInformation()->IsBackEdge( + *phi->GetBlock()->GetPredecessors().Get(1))); + for (size_t i = 2, e = phi->InputCount(); i < e; ++i) { + DCHECK(phi->GetBlock()->GetLoopInformation()->IsBackEdge( + *phi->GetBlock()->GetPredecessors().Get(i))); + if (input1 != phi->InputAt(i)) { + return false; + } + } + return true; + } + void VisitPhi(HPhi* phi) { - if (phi->IsLoopHeaderPhi() && phi->GetType() == Primitive::kPrimInt) { - DCHECK_EQ(phi->InputCount(), 2U); + if (phi->IsLoopHeaderPhi() + && (phi->GetType() == Primitive::kPrimInt) + && HasSameInputAtBackEdges(phi)) { HInstruction* instruction = phi->InputAt(1); HInstruction *left; int32_t increment; diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h index beaff5c..bdbd571 100644 --- a/compiler/optimizing/code_generator.h +++ b/compiler/optimizing/code_generator.h @@ -327,6 +327,7 @@ class CodeGenerator { return GetFpuSpillSize() + GetCoreSpillSize(); } + virtual ParallelMoveResolver* GetMoveResolver() = 0; protected: CodeGenerator(HGraph* graph, @@ -370,7 +371,6 @@ class CodeGenerator { virtual Location GetStackLocation(HLoadLocal* load) const = 0; - virtual ParallelMoveResolver* GetMoveResolver() = 0; virtual HGraphVisitor* GetLocationBuilder() = 0; virtual HGraphVisitor* GetInstructionVisitor() = 0; diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc index e4c37de..f56e446 100644 --- a/compiler/optimizing/code_generator_arm.cc +++ b/compiler/optimizing/code_generator_arm.cc @@ -112,6 +112,10 @@ class SuspendCheckSlowPathARM : public SlowPathCodeARM { return &return_label_; } + HBasicBlock* GetSuccessor() const { + return successor_; + } + private: HSuspendCheck* const instruction_; // If not null, the block to branch to after the suspend check. @@ -3539,8 +3543,18 @@ void InstructionCodeGeneratorARM::VisitSuspendCheck(HSuspendCheck* instruction) void InstructionCodeGeneratorARM::GenerateSuspendCheck(HSuspendCheck* instruction, HBasicBlock* successor) { SuspendCheckSlowPathARM* slow_path = - new (GetGraph()->GetArena()) SuspendCheckSlowPathARM(instruction, successor); - codegen_->AddSlowPath(slow_path); + down_cast<SuspendCheckSlowPathARM*>(instruction->GetSlowPath()); + if (slow_path == nullptr) { + slow_path = new (GetGraph()->GetArena()) SuspendCheckSlowPathARM(instruction, successor); + instruction->SetSlowPath(slow_path); + codegen_->AddSlowPath(slow_path); + if (successor != nullptr) { + DCHECK(successor->IsLoopHeader()); + codegen_->ClearSpillSlotsFromLoopPhisInStackMap(instruction); + } + } else { + DCHECK_EQ(slow_path->GetSuccessor(), successor); + } __ LoadFromOffset( kLoadUnsignedHalfword, IP, TR, Thread::ThreadFlagsOffset<kArmWordSize>().Int32Value()); diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index 9e02a1d..0222f93 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -285,6 +285,10 @@ class SuspendCheckSlowPathARM64 : public SlowPathCodeARM64 { return &return_label_; } + HBasicBlock* GetSuccessor() const { + return successor_; + } + private: HSuspendCheck* const instruction_; // If not null, the block to branch to after the suspend check. @@ -1034,8 +1038,19 @@ void InstructionCodeGeneratorARM64::GenerateMemoryBarrier(MemBarrierKind kind) { void InstructionCodeGeneratorARM64::GenerateSuspendCheck(HSuspendCheck* instruction, HBasicBlock* successor) { SuspendCheckSlowPathARM64* slow_path = - new (GetGraph()->GetArena()) SuspendCheckSlowPathARM64(instruction, successor); - codegen_->AddSlowPath(slow_path); + down_cast<SuspendCheckSlowPathARM64*>(instruction->GetSlowPath()); + if (slow_path == nullptr) { + slow_path = new (GetGraph()->GetArena()) SuspendCheckSlowPathARM64(instruction, successor); + instruction->SetSlowPath(slow_path); + codegen_->AddSlowPath(slow_path); + if (successor != nullptr) { + DCHECK(successor->IsLoopHeader()); + codegen_->ClearSpillSlotsFromLoopPhisInStackMap(instruction); + } + } else { + DCHECK_EQ(slow_path->GetSuccessor(), successor); + } + UseScratchRegisterScope temps(codegen_->GetVIXLAssembler()); Register temp = temps.AcquireW(); diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index 5ee091f..cfb8702 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -153,6 +153,10 @@ class SuspendCheckSlowPathX86 : public SlowPathCodeX86 { return &return_label_; } + HBasicBlock* GetSuccessor() const { + return successor_; + } + private: HSuspendCheck* const instruction_; HBasicBlock* const successor_; @@ -809,7 +813,6 @@ void InstructionCodeGeneratorX86::VisitGoto(HGoto* got) { HLoopInformation* info = block->GetLoopInformation(); if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) { - codegen_->ClearSpillSlotsFromLoopPhisInStackMap(info->GetSuspendCheck()); GenerateSuspendCheck(info->GetSuspendCheck(), successor); return; } @@ -2740,17 +2743,12 @@ void LocationsBuilderX86::HandleShift(HBinaryOperation* op) { new (GetGraph()->GetArena()) LocationSummary(op, LocationSummary::kNoCall); switch (op->GetResultType()) { - case Primitive::kPrimInt: { - locations->SetInAt(0, Location::RequiresRegister()); - // The shift count needs to be in CL. - locations->SetInAt(1, Location::ByteRegisterOrConstant(ECX, op->InputAt(1))); - locations->SetOut(Location::SameAsFirstInput()); - break; - } + case Primitive::kPrimInt: case Primitive::kPrimLong: { + // Can't have Location::Any() and output SameAsFirstInput() locations->SetInAt(0, Location::RequiresRegister()); - // The shift count needs to be in CL. - locations->SetInAt(1, Location::RegisterLocation(ECX)); + // The shift count needs to be in CL or a constant. + locations->SetInAt(1, Location::ByteRegisterOrConstant(ECX, op->InputAt(1))); locations->SetOut(Location::SameAsFirstInput()); break; } @@ -2769,6 +2767,7 @@ void InstructionCodeGeneratorX86::HandleShift(HBinaryOperation* op) { switch (op->GetResultType()) { case Primitive::kPrimInt: { + DCHECK(first.IsRegister()); Register first_reg = first.AsRegister<Register>(); if (second.IsRegister()) { Register second_reg = second.AsRegister<Register>(); @@ -2781,7 +2780,11 @@ void InstructionCodeGeneratorX86::HandleShift(HBinaryOperation* op) { __ shrl(first_reg, second_reg); } } else { - Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftValue); + int32_t shift = second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftValue; + if (shift == 0) { + return; + } + Immediate imm(shift); if (op->IsShl()) { __ shll(first_reg, imm); } else if (op->IsShr()) { @@ -2793,14 +2796,29 @@ void InstructionCodeGeneratorX86::HandleShift(HBinaryOperation* op) { break; } case Primitive::kPrimLong: { - Register second_reg = second.AsRegister<Register>(); - DCHECK_EQ(ECX, second_reg); - if (op->IsShl()) { - GenerateShlLong(first, second_reg); - } else if (op->IsShr()) { - GenerateShrLong(first, second_reg); + if (second.IsRegister()) { + Register second_reg = second.AsRegister<Register>(); + DCHECK_EQ(ECX, second_reg); + if (op->IsShl()) { + GenerateShlLong(first, second_reg); + } else if (op->IsShr()) { + GenerateShrLong(first, second_reg); + } else { + GenerateUShrLong(first, second_reg); + } } else { - GenerateUShrLong(first, second_reg); + // Shift by a constant. + int shift = second.GetConstant()->AsIntConstant()->GetValue() & kMaxLongShiftValue; + // Nothing to do if the shift is 0, as the input is already the output. + if (shift != 0) { + if (op->IsShl()) { + GenerateShlLong(first, shift); + } else if (op->IsShr()) { + GenerateShrLong(first, shift); + } else { + GenerateUShrLong(first, shift); + } + } } break; } @@ -2809,6 +2827,30 @@ void InstructionCodeGeneratorX86::HandleShift(HBinaryOperation* op) { } } +void InstructionCodeGeneratorX86::GenerateShlLong(const Location& loc, int shift) { + Register low = loc.AsRegisterPairLow<Register>(); + Register high = loc.AsRegisterPairHigh<Register>(); + if (shift == 32) { + // Shift by 32 is easy. High gets low, and low gets 0. + codegen_->EmitParallelMoves( + loc.ToLow(), + loc.ToHigh(), + Primitive::kPrimInt, + Location::ConstantLocation(GetGraph()->GetIntConstant(0)), + loc.ToLow(), + Primitive::kPrimInt); + } else if (shift > 32) { + // Low part becomes 0. High part is low part << (shift-32). + __ movl(high, low); + __ shll(high, Immediate(shift - 32)); + __ xorl(low, low); + } else { + // Between 1 and 31. + __ shld(high, low, Immediate(shift)); + __ shll(low, Immediate(shift)); + } +} + void InstructionCodeGeneratorX86::GenerateShlLong(const Location& loc, Register shifter) { Label done; __ shld(loc.AsRegisterPairHigh<Register>(), loc.AsRegisterPairLow<Register>(), shifter); @@ -2820,6 +2862,27 @@ void InstructionCodeGeneratorX86::GenerateShlLong(const Location& loc, Register __ Bind(&done); } +void InstructionCodeGeneratorX86::GenerateShrLong(const Location& loc, int shift) { + Register low = loc.AsRegisterPairLow<Register>(); + Register high = loc.AsRegisterPairHigh<Register>(); + if (shift == 32) { + // Need to copy the sign. + DCHECK_NE(low, high); + __ movl(low, high); + __ sarl(high, Immediate(31)); + } else if (shift > 32) { + DCHECK_NE(low, high); + // High part becomes sign. Low part is shifted by shift - 32. + __ movl(low, high); + __ sarl(high, Immediate(31)); + __ sarl(low, Immediate(shift - 32)); + } else { + // Between 1 and 31. + __ shrd(low, high, Immediate(shift)); + __ sarl(high, Immediate(shift)); + } +} + void InstructionCodeGeneratorX86::GenerateShrLong(const Location& loc, Register shifter) { Label done; __ shrd(loc.AsRegisterPairLow<Register>(), loc.AsRegisterPairHigh<Register>(), shifter); @@ -2831,6 +2894,30 @@ void InstructionCodeGeneratorX86::GenerateShrLong(const Location& loc, Register __ Bind(&done); } +void InstructionCodeGeneratorX86::GenerateUShrLong(const Location& loc, int shift) { + Register low = loc.AsRegisterPairLow<Register>(); + Register high = loc.AsRegisterPairHigh<Register>(); + if (shift == 32) { + // Shift by 32 is easy. Low gets high, and high gets 0. + codegen_->EmitParallelMoves( + loc.ToHigh(), + loc.ToLow(), + Primitive::kPrimInt, + Location::ConstantLocation(GetGraph()->GetIntConstant(0)), + loc.ToHigh(), + Primitive::kPrimInt); + } else if (shift > 32) { + // Low part is high >> (shift - 32). High part becomes 0. + __ movl(low, high); + __ shrl(low, Immediate(shift - 32)); + __ xorl(high, high); + } else { + // Between 1 and 31. + __ shrd(low, high, Immediate(shift)); + __ shrl(high, Immediate(shift)); + } +} + void InstructionCodeGeneratorX86::GenerateUShrLong(const Location& loc, Register shifter) { Label done; __ shrd(loc.AsRegisterPairLow<Register>(), loc.AsRegisterPairHigh<Register>(), shifter); @@ -3909,8 +3996,19 @@ void InstructionCodeGeneratorX86::VisitSuspendCheck(HSuspendCheck* instruction) void InstructionCodeGeneratorX86::GenerateSuspendCheck(HSuspendCheck* instruction, HBasicBlock* successor) { SuspendCheckSlowPathX86* slow_path = - new (GetGraph()->GetArena()) SuspendCheckSlowPathX86(instruction, successor); - codegen_->AddSlowPath(slow_path); + down_cast<SuspendCheckSlowPathX86*>(instruction->GetSlowPath()); + if (slow_path == nullptr) { + slow_path = new (GetGraph()->GetArena()) SuspendCheckSlowPathX86(instruction, successor); + instruction->SetSlowPath(slow_path); + codegen_->AddSlowPath(slow_path); + if (successor != nullptr) { + DCHECK(successor->IsLoopHeader()); + codegen_->ClearSpillSlotsFromLoopPhisInStackMap(instruction); + } + } else { + DCHECK_EQ(slow_path->GetSuccessor(), successor); + } + __ fs()->cmpw(Address::Absolute( Thread::ThreadFlagsOffset<kX86WordSize>().Int32Value()), Immediate(0)); if (successor == nullptr) { diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h index 79dec7a..5a5a37b 100644 --- a/compiler/optimizing/code_generator_x86.h +++ b/compiler/optimizing/code_generator_x86.h @@ -166,6 +166,9 @@ class InstructionCodeGeneratorX86 : public HGraphVisitor { void GenerateShlLong(const Location& loc, Register shifter); void GenerateShrLong(const Location& loc, Register shifter); void GenerateUShrLong(const Location& loc, Register shifter); + void GenerateShlLong(const Location& loc, int shift); + void GenerateShrLong(const Location& loc, int shift); + void GenerateUShrLong(const Location& loc, int shift); void GenerateMemoryBarrier(MemBarrierKind kind); void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info); void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info); diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index 5ac6866..9d2fc43 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -136,6 +136,10 @@ class SuspendCheckSlowPathX86_64 : public SlowPathCodeX86_64 { return &return_label_; } + HBasicBlock* GetSuccessor() const { + return successor_; + } + private: HSuspendCheck* const instruction_; HBasicBlock* const successor_; @@ -771,7 +775,6 @@ void InstructionCodeGeneratorX86_64::VisitGoto(HGoto* got) { HLoopInformation* info = block->GetLoopInformation(); if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) { - codegen_->ClearSpillSlotsFromLoopPhisInStackMap(info->GetSuspendCheck()); GenerateSuspendCheck(info->GetSuspendCheck(), successor); return; } @@ -3864,8 +3867,19 @@ void InstructionCodeGeneratorX86_64::VisitSuspendCheck(HSuspendCheck* instructio void InstructionCodeGeneratorX86_64::GenerateSuspendCheck(HSuspendCheck* instruction, HBasicBlock* successor) { SuspendCheckSlowPathX86_64* slow_path = - new (GetGraph()->GetArena()) SuspendCheckSlowPathX86_64(instruction, successor); - codegen_->AddSlowPath(slow_path); + down_cast<SuspendCheckSlowPathX86_64*>(instruction->GetSlowPath()); + if (slow_path == nullptr) { + slow_path = new (GetGraph()->GetArena()) SuspendCheckSlowPathX86_64(instruction, successor); + instruction->SetSlowPath(slow_path); + codegen_->AddSlowPath(slow_path); + if (successor != nullptr) { + DCHECK(successor->IsLoopHeader()); + codegen_->ClearSpillSlotsFromLoopPhisInStackMap(instruction); + } + } else { + DCHECK_EQ(slow_path->GetSuccessor(), successor); + } + __ gs()->cmpw(Address::Absolute( Thread::ThreadFlagsOffset<kX86_64WordSize>().Int32Value(), true), Immediate(0)); if (successor == nullptr) { diff --git a/compiler/optimizing/dead_code_elimination.cc b/compiler/optimizing/dead_code_elimination.cc index 91cd60a..cd427c5 100644 --- a/compiler/optimizing/dead_code_elimination.cc +++ b/compiler/optimizing/dead_code_elimination.cc @@ -65,10 +65,13 @@ void HDeadCodeElimination::RemoveDeadBlocks() { for (HPostOrderIterator it(*graph_); !it.Done(); it.Advance()) { HBasicBlock* block = it.Current(); if (live_blocks.IsBitSet(block->GetBlockId())) { - continue; + // If this block is part of a loop that is being dismantled, we need to + // update its loop information. + block->UpdateLoopInformation(); + } else { + MaybeRecordDeadBlock(block); + block->DisconnectAndDelete(); } - MaybeRecordDeadBlock(block); - block->DisconnectAndDelete(); } // Connect successive blocks created by dead branches. Order does not matter. diff --git a/compiler/optimizing/find_loops_test.cc b/compiler/optimizing/find_loops_test.cc index 2bfecc6..8f69f4d 100644 --- a/compiler/optimizing/find_loops_test.cc +++ b/compiler/optimizing/find_loops_test.cc @@ -235,14 +235,13 @@ TEST(FindLoopsTest, Loop4) { TestBlock(graph, 0, false, -1); // entry block TestBlock(graph, 1, false, -1); // pre header - const int blocks2[] = {2, 3, 4, 5, 8}; - TestBlock(graph, 2, true, 2, blocks2, 5); // loop header + const int blocks2[] = {2, 3, 4, 5}; + TestBlock(graph, 2, true, 2, blocks2, arraysize(blocks2)); // loop header TestBlock(graph, 3, false, 2); // block in loop - TestBlock(graph, 4, false, 2); // original back edge - TestBlock(graph, 5, false, 2); // original back edge + TestBlock(graph, 4, false, 2); // back edge + TestBlock(graph, 5, false, 2); // back edge TestBlock(graph, 6, false, -1); // return block TestBlock(graph, 7, false, -1); // exit block - TestBlock(graph, 8, false, 2); // synthesized back edge } diff --git a/compiler/optimizing/graph_checker.cc b/compiler/optimizing/graph_checker.cc index dc3124b..bb27a94 100644 --- a/compiler/optimizing/graph_checker.cc +++ b/compiler/optimizing/graph_checker.cc @@ -170,7 +170,8 @@ void GraphChecker::VisitInstruction(HInstruction* instruction) { } } - // Ensure the uses of `instruction` are defined in a block of the graph. + // Ensure the uses of `instruction` are defined in a block of the graph, + // and the entry in the use list is consistent. for (HUseIterator<HInstruction*> use_it(instruction->GetUses()); !use_it.Done(); use_it.Advance()) { HInstruction* use = use_it.Current()->GetUser(); @@ -184,6 +185,27 @@ void GraphChecker::VisitInstruction(HInstruction* instruction) { use->GetId(), instruction->GetId())); } + size_t use_index = use_it.Current()->GetIndex(); + if ((use_index >= use->InputCount()) || (use->InputAt(use_index) != instruction)) { + AddError(StringPrintf("User %s:%d of instruction %d has a wrong " + "UseListNode index.", + use->DebugName(), + use->GetId(), + instruction->GetId())); + } + } + + // Ensure the environment uses entries are consistent. + for (HUseIterator<HEnvironment*> use_it(instruction->GetEnvUses()); + !use_it.Done(); use_it.Advance()) { + HEnvironment* use = use_it.Current()->GetUser(); + size_t use_index = use_it.Current()->GetIndex(); + if ((use_index >= use->Size()) || (use->GetInstructionAt(use_index) != instruction)) { + AddError(StringPrintf("Environment user of %s:%d has a wrong " + "UseListNode index.", + instruction->DebugName(), + instruction->GetId())); + } } // Ensure 'instruction' has pointers to its inputs' use entries. @@ -191,7 +213,11 @@ void GraphChecker::VisitInstruction(HInstruction* instruction) { HUserRecord<HInstruction*> input_record = instruction->InputRecordAt(i); HInstruction* input = input_record.GetInstruction(); HUseListNode<HInstruction*>* use_node = input_record.GetUseNode(); - if (use_node == nullptr || !input->GetUses().Contains(use_node)) { + size_t use_index = use_node->GetIndex(); + if ((use_node == nullptr) + || !input->GetUses().Contains(use_node) + || (use_index >= e) + || (use_index != i)) { AddError(StringPrintf("Instruction %s:%d has an invalid pointer to use entry " "at input %u (%s:%d).", instruction->DebugName(), @@ -262,6 +288,7 @@ void SSAChecker::VisitBasicBlock(HBasicBlock* block) { void SSAChecker::CheckLoop(HBasicBlock* loop_header) { int id = loop_header->GetBlockId(); + HLoopInformation* loop_information = loop_header->GetLoopInformation(); // Ensure the pre-header block is first in the list of // predecessors of a loop header. @@ -271,57 +298,48 @@ void SSAChecker::CheckLoop(HBasicBlock* loop_header) { id)); } - // Ensure the loop header has only two predecessors and that only the - // second one is a back edge. + // Ensure the loop header has only one incoming branch and the remaining + // predecessors are back edges. size_t num_preds = loop_header->GetPredecessors().Size(); if (num_preds < 2) { AddError(StringPrintf( "Loop header %d has less than two predecessors: %zu.", id, num_preds)); - } else if (num_preds > 2) { - AddError(StringPrintf( - "Loop header %d has more than two predecessors: %zu.", - id, - num_preds)); } else { - HLoopInformation* loop_information = loop_header->GetLoopInformation(); HBasicBlock* first_predecessor = loop_header->GetPredecessors().Get(0); if (loop_information->IsBackEdge(*first_predecessor)) { AddError(StringPrintf( "First predecessor of loop header %d is a back edge.", id)); } - HBasicBlock* second_predecessor = loop_header->GetPredecessors().Get(1); - if (!loop_information->IsBackEdge(*second_predecessor)) { - AddError(StringPrintf( - "Second predecessor of loop header %d is not a back edge.", - id)); + for (size_t i = 1, e = loop_header->GetPredecessors().Size(); i < e; ++i) { + HBasicBlock* predecessor = loop_header->GetPredecessors().Get(i); + if (!loop_information->IsBackEdge(*predecessor)) { + AddError(StringPrintf( + "Loop header %d has multiple incoming (non back edge) blocks.", + id)); + } } } - const ArenaBitVector& loop_blocks = loop_header->GetLoopInformation()->GetBlocks(); + const ArenaBitVector& loop_blocks = loop_information->GetBlocks(); - // Ensure there is only one back edge per loop. - size_t num_back_edges = - loop_header->GetLoopInformation()->GetBackEdges().Size(); + // Ensure back edges belong to the loop. + size_t num_back_edges = loop_information->GetBackEdges().Size(); if (num_back_edges == 0) { AddError(StringPrintf( "Loop defined by header %d has no back edge.", id)); - } else if (num_back_edges > 1) { - AddError(StringPrintf( - "Loop defined by header %d has several back edges: %zu.", - id, - num_back_edges)); } else { - DCHECK_EQ(num_back_edges, 1u); - int back_edge_id = loop_header->GetLoopInformation()->GetBackEdges().Get(0)->GetBlockId(); - if (!loop_blocks.IsBitSet(back_edge_id)) { - AddError(StringPrintf( - "Loop defined by header %d has an invalid back edge %d.", - id, - back_edge_id)); + for (size_t i = 0; i < num_back_edges; ++i) { + int back_edge_id = loop_information->GetBackEdges().Get(i)->GetBlockId(); + if (!loop_blocks.IsBitSet(back_edge_id)) { + AddError(StringPrintf( + "Loop defined by header %d has an invalid back edge %d.", + id, + back_edge_id)); + } } } diff --git a/compiler/optimizing/intrinsics.cc b/compiler/optimizing/intrinsics.cc index 5d3db5c..43fe374 100644 --- a/compiler/optimizing/intrinsics.cc +++ b/compiler/optimizing/intrinsics.cc @@ -374,4 +374,3 @@ INTRINSICS_LIST(OPTIMIZING_INTRINSICS) } } // namespace art - diff --git a/compiler/optimizing/intrinsics.h b/compiler/optimizing/intrinsics.h index dbb7cba..c243ef3 100644 --- a/compiler/optimizing/intrinsics.h +++ b/compiler/optimizing/intrinsics.h @@ -17,8 +17,10 @@ #ifndef ART_COMPILER_OPTIMIZING_INTRINSICS_H_ #define ART_COMPILER_OPTIMIZING_INTRINSICS_H_ +#include "code_generator.h" #include "nodes.h" #include "optimization.h" +#include "parallel_move_resolver.h" namespace art { @@ -76,6 +78,38 @@ INTRINSICS_LIST(OPTIMIZING_INTRINSICS) #undef INTRINSICS_LIST #undef OPTIMIZING_INTRINSICS + static void MoveArguments(HInvoke* invoke, + CodeGenerator* codegen, + InvokeDexCallingConventionVisitor* calling_convention_visitor) { + if (kIsDebugBuild && invoke->IsInvokeStaticOrDirect()) { + HInvokeStaticOrDirect* invoke_static_or_direct = invoke->AsInvokeStaticOrDirect(); + // When we do not run baseline, explicit clinit checks triggered by static + // invokes must have been pruned by art::PrepareForRegisterAllocation. + DCHECK(codegen->IsBaseline() || !invoke_static_or_direct->IsStaticWithExplicitClinitCheck()); + } + + if (invoke->GetNumberOfArguments() == 0) { + // No argument to move. + return; + } + + LocationSummary* locations = invoke->GetLocations(); + + // We're moving potentially two or more locations to locations that could overlap, so we need + // a parallel move resolver. + HParallelMove parallel_move(codegen->GetGraph()->GetArena()); + + for (size_t i = 0; i < invoke->GetNumberOfArguments(); i++) { + HInstruction* input = invoke->InputAt(i); + Location cc_loc = calling_convention_visitor->GetNextLocation(input->GetType()); + Location actual_loc = locations->InAt(i); + + parallel_move.AddMove(actual_loc, cc_loc, input->GetType(), nullptr); + } + + codegen->GetMoveResolver()->EmitNativeCode(¶llel_move); + } + protected: IntrinsicVisitor() {} diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc index 259d554..7f7b450 100644 --- a/compiler/optimizing/intrinsics_arm.cc +++ b/compiler/optimizing/intrinsics_arm.cc @@ -77,28 +77,9 @@ static void MoveFromReturnRegister(Location trg, Primitive::Type type, CodeGener } } -static void MoveArguments(HInvoke* invoke, ArenaAllocator* arena, CodeGeneratorARM* codegen) { - if (invoke->GetNumberOfArguments() == 0) { - // No argument to move. - return; - } - - LocationSummary* locations = invoke->GetLocations(); +static void MoveArguments(HInvoke* invoke, CodeGeneratorARM* codegen) { InvokeDexCallingConventionVisitorARM calling_convention_visitor; - - // We're moving potentially two or more locations to locations that could overlap, so we need - // a parallel move resolver. - HParallelMove parallel_move(arena); - - for (size_t i = 0; i < invoke->GetNumberOfArguments(); i++) { - HInstruction* input = invoke->InputAt(i); - Location cc_loc = calling_convention_visitor.GetNextLocation(input->GetType()); - Location actual_loc = locations->InAt(i); - - parallel_move.AddMove(actual_loc, cc_loc, input->GetType(), nullptr); - } - - codegen->GetMoveResolver()->EmitNativeCode(¶llel_move); + IntrinsicVisitor::MoveArguments(invoke, codegen, &calling_convention_visitor); } // Slow-path for fallback (calling the managed code to handle the intrinsic) in an intrinsified @@ -117,7 +98,7 @@ class IntrinsicSlowPathARM : public SlowPathCodeARM { SaveLiveRegisters(codegen, invoke_->GetLocations()); - MoveArguments(invoke_, codegen->GetGraph()->GetArena(), codegen); + MoveArguments(invoke_, codegen); if (invoke_->IsInvokeStaticOrDirect()) { codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(), kArtMethodRegister); diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc index 9cfa782..ca3de99 100644 --- a/compiler/optimizing/intrinsics_arm64.cc +++ b/compiler/optimizing/intrinsics_arm64.cc @@ -86,28 +86,9 @@ static void MoveFromReturnRegister(Location trg, } } -static void MoveArguments(HInvoke* invoke, ArenaAllocator* arena, CodeGeneratorARM64* codegen) { - if (invoke->GetNumberOfArguments() == 0) { - // No argument to move. - return; - } - - LocationSummary* locations = invoke->GetLocations(); +static void MoveArguments(HInvoke* invoke, CodeGeneratorARM64* codegen) { InvokeDexCallingConventionVisitorARM64 calling_convention_visitor; - - // We're moving potentially two or more locations to locations that could overlap, so we need - // a parallel move resolver. - HParallelMove parallel_move(arena); - - for (size_t i = 0; i < invoke->GetNumberOfArguments(); i++) { - HInstruction* input = invoke->InputAt(i); - Location cc_loc = calling_convention_visitor.GetNextLocation(input->GetType()); - Location actual_loc = locations->InAt(i); - - parallel_move.AddMove(actual_loc, cc_loc, input->GetType(), nullptr); - } - - codegen->GetMoveResolver()->EmitNativeCode(¶llel_move); + IntrinsicVisitor::MoveArguments(invoke, codegen, &calling_convention_visitor); } // Slow-path for fallback (calling the managed code to handle the intrinsic) in an intrinsified @@ -126,7 +107,7 @@ class IntrinsicSlowPathARM64 : public SlowPathCodeARM64 { SaveLiveRegisters(codegen, invoke_->GetLocations()); - MoveArguments(invoke_, codegen->GetGraph()->GetArena(), codegen); + MoveArguments(invoke_, codegen); if (invoke_->IsInvokeStaticOrDirect()) { codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(), kArtMethodRegister); diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc index 62cf3eb..1eef1ef 100644 --- a/compiler/optimizing/intrinsics_x86.cc +++ b/compiler/optimizing/intrinsics_x86.cc @@ -111,28 +111,9 @@ static void MoveFromReturnRegister(Location target, } } -static void MoveArguments(HInvoke* invoke, ArenaAllocator* arena, CodeGeneratorX86* codegen) { - if (invoke->GetNumberOfArguments() == 0) { - // No argument to move. - return; - } - - LocationSummary* locations = invoke->GetLocations(); +static void MoveArguments(HInvoke* invoke, CodeGeneratorX86* codegen) { InvokeDexCallingConventionVisitorX86 calling_convention_visitor; - - // We're moving potentially two or more locations to locations that could overlap, so we need - // a parallel move resolver. - HParallelMove parallel_move(arena); - - for (size_t i = 0; i < invoke->GetNumberOfArguments(); i++) { - HInstruction* input = invoke->InputAt(i); - Location cc_loc = calling_convention_visitor.GetNextLocation(input->GetType()); - Location actual_loc = locations->InAt(i); - - parallel_move.AddMove(actual_loc, cc_loc, input->GetType(), nullptr); - } - - codegen->GetMoveResolver()->EmitNativeCode(¶llel_move); + IntrinsicVisitor::MoveArguments(invoke, codegen, &calling_convention_visitor); } // Slow-path for fallback (calling the managed code to handle the intrinsic) in an intrinsified @@ -155,7 +136,7 @@ class IntrinsicSlowPathX86 : public SlowPathCodeX86 { SaveLiveRegisters(codegen, invoke_->GetLocations()); - MoveArguments(invoke_, codegen->GetGraph()->GetArena(), codegen); + MoveArguments(invoke_, codegen); if (invoke_->IsInvokeStaticOrDirect()) { codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(), EAX); @@ -749,7 +730,7 @@ void IntrinsicCodeGeneratorX86::VisitMathSqrt(HInvoke* invoke) { } static void InvokeOutOfLineIntrinsic(CodeGeneratorX86* codegen, HInvoke* invoke) { - MoveArguments(invoke, codegen->GetGraph()->GetArena(), codegen); + MoveArguments(invoke, codegen); DCHECK(invoke->IsInvokeStaticOrDirect()); codegen->GenerateStaticOrDirectCall(invoke->AsInvokeStaticOrDirect(), EAX); diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc index 7e24dca..1fc5432 100644 --- a/compiler/optimizing/intrinsics_x86_64.cc +++ b/compiler/optimizing/intrinsics_x86_64.cc @@ -103,28 +103,9 @@ static void MoveFromReturnRegister(Location trg, } } -static void MoveArguments(HInvoke* invoke, ArenaAllocator* arena, CodeGeneratorX86_64* codegen) { - if (invoke->GetNumberOfArguments() == 0) { - // No argument to move. - return; - } - - LocationSummary* locations = invoke->GetLocations(); +static void MoveArguments(HInvoke* invoke, CodeGeneratorX86_64* codegen) { InvokeDexCallingConventionVisitorX86_64 calling_convention_visitor; - - // We're moving potentially two or more locations to locations that could overlap, so we need - // a parallel move resolver. - HParallelMove parallel_move(arena); - - for (size_t i = 0; i < invoke->GetNumberOfArguments(); i++) { - HInstruction* input = invoke->InputAt(i); - Location cc_loc = calling_convention_visitor.GetNextLocation(input->GetType()); - Location actual_loc = locations->InAt(i); - - parallel_move.AddMove(actual_loc, cc_loc, input->GetType(), nullptr); - } - - codegen->GetMoveResolver()->EmitNativeCode(¶llel_move); + IntrinsicVisitor::MoveArguments(invoke, codegen, &calling_convention_visitor); } // Slow-path for fallback (calling the managed code to handle the intrinsic) in an intrinsified @@ -143,7 +124,7 @@ class IntrinsicSlowPathX86_64 : public SlowPathCodeX86_64 { SaveLiveRegisters(codegen, invoke_->GetLocations()); - MoveArguments(invoke_, codegen->GetGraph()->GetArena(), codegen); + MoveArguments(invoke_, codegen); if (invoke_->IsInvokeStaticOrDirect()) { codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(), CpuRegister(RDI)); @@ -623,7 +604,7 @@ void IntrinsicCodeGeneratorX86_64::VisitMathSqrt(HInvoke* invoke) { } static void InvokeOutOfLineIntrinsic(CodeGeneratorX86_64* codegen, HInvoke* invoke) { - MoveArguments(invoke, codegen->GetGraph()->GetArena(), codegen); + MoveArguments(invoke, codegen); DCHECK(invoke->IsInvokeStaticOrDirect()); codegen->GenerateStaticOrDirectCall(invoke->AsInvokeStaticOrDirect(), CpuRegister(RDI)); diff --git a/compiler/optimizing/liveness_test.cc b/compiler/optimizing/liveness_test.cc index 8a96ee9..1914339 100644 --- a/compiler/optimizing/liveness_test.cc +++ b/compiler/optimizing/liveness_test.cc @@ -445,44 +445,40 @@ TEST(LivenessTest, Loop5) { TEST(LivenessTest, Loop6) { // Bitsets are made of: - // (constant0, constant4, constant5, phi in block 2, phi in block 8) + // (constant0, constant4, constant5, phi in block 2) const char* expected = "Block 0\n" - " live in: (00000)\n" - " live out: (11100)\n" - " kill: (11100)\n" + " live in: (0000)\n" + " live out: (1110)\n" + " kill: (1110)\n" "Block 1\n" - " live in: (11100)\n" - " live out: (01100)\n" - " kill: (00000)\n" + " live in: (1110)\n" + " live out: (0110)\n" + " kill: (0000)\n" "Block 2\n" // loop header - " live in: (01100)\n" - " live out: (01110)\n" - " kill: (00010)\n" + " live in: (0110)\n" + " live out: (0111)\n" + " kill: (0001)\n" "Block 3\n" - " live in: (01100)\n" - " live out: (01100)\n" - " kill: (00000)\n" - "Block 4\n" // original back edge - " live in: (01100)\n" - " live out: (01100)\n" - " kill: (00000)\n" - "Block 5\n" // original back edge - " live in: (01100)\n" - " live out: (01100)\n" - " kill: (00000)\n" + " live in: (0110)\n" + " live out: (0110)\n" + " kill: (0000)\n" + "Block 4\n" // back edge + " live in: (0110)\n" + " live out: (0110)\n" + " kill: (0000)\n" + "Block 5\n" // back edge + " live in: (0110)\n" + " live out: (0110)\n" + " kill: (0000)\n" "Block 6\n" // return block - " live in: (00010)\n" - " live out: (00000)\n" - " kill: (00000)\n" + " live in: (0001)\n" + " live out: (0000)\n" + " kill: (0000)\n" "Block 7\n" // exit block - " live in: (00000)\n" - " live out: (00000)\n" - " kill: (00000)\n" - "Block 8\n" // synthesized back edge - " live in: (01100)\n" - " live out: (01100)\n" - " kill: (00001)\n"; + " live in: (0000)\n" + " live out: (0000)\n" + " kill: (0000)\n"; const uint16_t data[] = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc index 699987c..85c0361 100644 --- a/compiler/optimizing/nodes.cc +++ b/compiler/optimizing/nodes.cc @@ -191,24 +191,6 @@ void HGraph::SplitCriticalEdge(HBasicBlock* block, HBasicBlock* successor) { void HGraph::SimplifyLoop(HBasicBlock* header) { HLoopInformation* info = header->GetLoopInformation(); - // If there are more than one back edge, make them branch to the same block that - // will become the only back edge. This simplifies finding natural loops in the - // graph. - // Also, if the loop is a do/while (that is the back edge is an if), change the - // back edge to be a goto. This simplifies code generation of suspend cheks. - if (info->NumberOfBackEdges() > 1 || info->GetBackEdges().Get(0)->GetLastInstruction()->IsIf()) { - HBasicBlock* new_back_edge = new (arena_) HBasicBlock(this, header->GetDexPc()); - AddBlock(new_back_edge); - new_back_edge->AddInstruction(new (arena_) HGoto()); - for (size_t pred = 0, e = info->GetBackEdges().Size(); pred < e; ++pred) { - HBasicBlock* back_edge = info->GetBackEdges().Get(pred); - back_edge->ReplaceSuccessor(header, new_back_edge); - } - info->ClearBackEdges(); - info->AddBackEdge(new_back_edge); - new_back_edge->AddSuccessor(header); - } - // Make sure the loop has only one pre header. This simplifies SSA building by having // to just look at the pre header to know which locals are initialized at entry of the // loop. @@ -218,11 +200,9 @@ void HGraph::SimplifyLoop(HBasicBlock* header) { AddBlock(pre_header); pre_header->AddInstruction(new (arena_) HGoto()); - ArenaBitVector back_edges(arena_, GetBlocks().Size(), false); - HBasicBlock* back_edge = info->GetBackEdges().Get(0); for (size_t pred = 0; pred < header->GetPredecessors().Size(); ++pred) { HBasicBlock* predecessor = header->GetPredecessors().Get(pred); - if (predecessor != back_edge) { + if (!info->IsBackEdge(*predecessor)) { predecessor->ReplaceSuccessor(header, pre_header); pred--; } @@ -230,9 +210,17 @@ void HGraph::SimplifyLoop(HBasicBlock* header) { pre_header->AddSuccessor(header); } - // Make sure the second predecessor of a loop header is the back edge. - if (header->GetPredecessors().Get(1) != info->GetBackEdges().Get(0)) { - header->SwapPredecessors(); + // Make sure the first predecessor of a loop header is the incoming block. + if (info->IsBackEdge(*header->GetPredecessors().Get(0))) { + HBasicBlock* to_swap = header->GetPredecessors().Get(0); + for (size_t pred = 1, e = header->GetPredecessors().Size(); pred < e; ++pred) { + HBasicBlock* predecessor = header->GetPredecessors().Get(pred); + if (!info->IsBackEdge(*predecessor)) { + header->predecessors_.Put(pred, to_swap); + header->predecessors_.Put(0, predecessor); + break; + } + } } // Place the suspend check at the beginning of the header, so that live registers @@ -357,26 +345,26 @@ void HLoopInformation::PopulateRecursive(HBasicBlock* block) { } bool HLoopInformation::Populate() { - DCHECK_EQ(GetBackEdges().Size(), 1u); - HBasicBlock* back_edge = GetBackEdges().Get(0); - DCHECK(back_edge->GetDominator() != nullptr); - if (!header_->Dominates(back_edge)) { - // This loop is not natural. Do not bother going further. - return false; - } + for (size_t i = 0, e = GetBackEdges().Size(); i < e; ++i) { + HBasicBlock* back_edge = GetBackEdges().Get(i); + DCHECK(back_edge->GetDominator() != nullptr); + if (!header_->Dominates(back_edge)) { + // This loop is not natural. Do not bother going further. + return false; + } - // Populate this loop: starting with the back edge, recursively add predecessors - // that are not already part of that loop. Set the header as part of the loop - // to end the recursion. - // This is a recursive implementation of the algorithm described in - // "Advanced Compiler Design & Implementation" (Muchnick) p192. - blocks_.SetBit(header_->GetBlockId()); - PopulateRecursive(back_edge); + // Populate this loop: starting with the back edge, recursively add predecessors + // that are not already part of that loop. Set the header as part of the loop + // to end the recursion. + // This is a recursive implementation of the algorithm described in + // "Advanced Compiler Design & Implementation" (Muchnick) p192. + blocks_.SetBit(header_->GetBlockId()); + PopulateRecursive(back_edge); + } return true; } HBasicBlock* HLoopInformation::GetPreHeader() const { - DCHECK_EQ(header_->GetPredecessors().Size(), 2u); return header_->GetDominator(); } @@ -388,6 +376,14 @@ bool HLoopInformation::IsIn(const HLoopInformation& other) const { return other.blocks_.IsBitSet(header_->GetBlockId()); } +size_t HLoopInformation::GetLifetimeEnd() const { + size_t last_position = 0; + for (size_t i = 0, e = back_edges_.Size(); i < e; ++i) { + last_position = std::max(back_edges_.Get(i)->GetLifetimeEnd(), last_position); + } + return last_position; +} + bool HBasicBlock::Dominates(HBasicBlock* other) const { // Walk up the dominator tree from `other`, to find out if `this` // is an ancestor. @@ -504,6 +500,16 @@ void HBasicBlock::RemoveInstructionOrPhi(HInstruction* instruction, bool ensure_ } } +void HEnvironment::CopyFrom(const GrowableArray<HInstruction*>& locals) { + for (size_t i = 0; i < locals.Size(); i++) { + HInstruction* instruction = locals.Get(i); + SetRawEnvAt(i, instruction); + if (instruction != nullptr) { + instruction->AddEnvUseAt(this, i); + } + } +} + void HEnvironment::CopyFrom(HEnvironment* env) { for (size_t i = 0; i < env->Size(); i++) { HInstruction* instruction = env->GetInstructionAt(i); @@ -713,6 +719,9 @@ void HPhi::AddInput(HInstruction* input) { void HPhi::RemoveInputAt(size_t index) { RemoveAsUserOfInput(index); inputs_.DeleteAt(index); + for (size_t i = index, e = InputCount(); i < e; ++i) { + InputRecordAt(i).GetUseNode()->SetIndex(i); + } } #define DEFINE_ACCEPT(name, super) \ @@ -961,8 +970,9 @@ void HBasicBlock::DisconnectAndDelete() { HLoopInformation* loop_info = it.Current(); loop_info->Remove(this); if (loop_info->IsBackEdge(*this)) { - // This deliberately leaves the loop in an inconsistent state and will - // fail SSAChecker unless the entire loop is removed during the pass. + // If this was the last back edge of the loop, we deliberately leave the + // loop in an inconsistent state and will fail SSAChecker unless the + // entire loop is removed during the pass. loop_info->RemoveBackEdge(this); } } @@ -1038,6 +1048,20 @@ void HBasicBlock::DisconnectAndDelete() { SetGraph(nullptr); } +void HBasicBlock::UpdateLoopInformation() { + // Check if loop information points to a dismantled loop. If so, replace with + // the loop information of a larger loop which contains this block, or nullptr + // otherwise. We iterate in case the larger loop has been destroyed too. + while (IsInLoop() && loop_information_->GetBackEdges().IsEmpty()) { + if (IsLoopHeader()) { + HSuspendCheck* suspend_check = loop_information_->GetSuspendCheck(); + DCHECK_EQ(suspend_check->GetBlock(), this); + RemoveInstruction(suspend_check); + } + loop_information_ = loop_information_->GetPreHeader()->GetLoopInformation(); + } +} + void HBasicBlock::MergeWith(HBasicBlock* other) { DCHECK_EQ(GetGraph(), other->GetGraph()); DCHECK(GetDominatedBlocks().Contains(other)); @@ -1059,8 +1083,7 @@ void HBasicBlock::MergeWith(HBasicBlock* other) { HLoopInformation* loop_info = it.Current(); loop_info->Remove(other); if (loop_info->IsBackEdge(*other)) { - loop_info->ClearBackEdges(); - loop_info->AddBackEdge(this); + loop_info->ReplaceBackEdge(other, this); } } @@ -1291,11 +1314,9 @@ void HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) { loop_it.Current()->Add(to); } if (info->IsBackEdge(*at)) { - // Only `at` can become a back edge, as the inlined blocks - // are predecessors of `at`. - DCHECK_EQ(1u, info->NumberOfBackEdges()); - info->ClearBackEdges(); - info->AddBackEdge(to); + // Only `to` can become a back edge, as the inlined blocks + // are predecessors of `to`. + info->ReplaceBackEdge(at, to); } } } diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index 3fe23e1..5fc0470 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -48,6 +48,7 @@ class HPhi; class HSuspendCheck; class LiveInterval; class LocationSummary; +class SlowPathCode; class SsaBuilder; static const int kDefaultNumberOfBlocks = 8; @@ -397,11 +398,21 @@ class HLoopInformation : public ArenaObject<kArenaAllocMisc> { return back_edges_; } - void ClearBackEdges() { - back_edges_.Reset(); + // Returns the lifetime position of the back edge that has the + // greatest lifetime position. + size_t GetLifetimeEnd() const; + + void ReplaceBackEdge(HBasicBlock* existing, HBasicBlock* new_back_edge) { + for (size_t i = 0, e = back_edges_.Size(); i < e; ++i) { + if (back_edges_.Get(i) == existing) { + back_edges_.Put(i, new_back_edge); + return; + } + } + UNREACHABLE(); } - // Find blocks that are part of this loop. Returns whether the loop is a natural loop, + // Finds blocks that are part of this loop. Returns whether the loop is a natural loop, // that is the header dominates the back edge. bool Populate(); @@ -636,7 +647,7 @@ class HBasicBlock : public ArenaObject<kArenaAllocMisc> { void RemoveInstructionOrPhi(HInstruction* instruction, bool ensure_safety = true); bool IsLoopHeader() const { - return (loop_information_ != nullptr) && (loop_information_->GetHeader() == this); + return IsInLoop() && (loop_information_->GetHeader() == this); } bool IsLoopPreHeaderFirstPredecessor() const { @@ -655,7 +666,7 @@ class HBasicBlock : public ArenaObject<kArenaAllocMisc> { void SetInLoop(HLoopInformation* info) { if (IsLoopHeader()) { // Nothing to do. This just means `info` is an outer loop. - } else if (loop_information_ == nullptr) { + } else if (!IsInLoop()) { loop_information_ = info; } else if (loop_information_->Contains(*info->GetHeader())) { // Block is currently part of an outer loop. Make it part of this inner loop. @@ -674,6 +685,11 @@ class HBasicBlock : public ArenaObject<kArenaAllocMisc> { loop_information_ = info; } + // Checks if the loop information points to a valid loop. If the loop has been + // dismantled (does not have a back edge any more), loop information is + // removed or replaced with the information of the first valid outer loop. + void UpdateLoopInformation(); + bool IsInLoop() const { return loop_information_ != nullptr; } // Returns wheter this block dominates the blocked passed as parameter. @@ -727,7 +743,7 @@ class HLoopInformationOutwardIterator : public ValueObject { void Advance() { DCHECK(!Done()); - current_ = current_->GetHeader()->GetDominator()->GetLoopInformation(); + current_ = current_->GetPreHeader()->GetLoopInformation(); } HLoopInformation* Current() const { @@ -840,13 +856,14 @@ class HUseListNode : public ArenaObject<kArenaAllocMisc> { HUseListNode* GetNext() const { return next_; } T GetUser() const { return user_; } size_t GetIndex() const { return index_; } + void SetIndex(size_t index) { index_ = index; } private: HUseListNode(T user, size_t index) : user_(user), index_(index), prev_(nullptr), next_(nullptr) {} T const user_; - const size_t index_; + size_t index_; HUseListNode<T>* prev_; HUseListNode<T>* next_; @@ -1051,7 +1068,9 @@ class HEnvironment : public ArenaObject<kArenaAllocMisc> { } } - void CopyFrom(HEnvironment* env); + void CopyFrom(const GrowableArray<HInstruction*>& locals); + void CopyFrom(HEnvironment* environment); + // Copy from `env`. If it's a loop phi for `loop_header`, copy the first // input to the loop phi instead. This is for inserting instructions that // require an environment (like HDeoptimization) in the loop pre-header. @@ -1080,7 +1099,7 @@ class HEnvironment : public ArenaObject<kArenaAllocMisc> { GrowableArray<HUserRecord<HEnvironment*> > vregs_; - friend HInstruction; + friend class HInstruction; DISALLOW_COPY_AND_ASSIGN(HEnvironment); }; @@ -3236,19 +3255,25 @@ class HTemporary : public HTemplateInstruction<0> { class HSuspendCheck : public HTemplateInstruction<0> { public: explicit HSuspendCheck(uint32_t dex_pc) - : HTemplateInstruction(SideEffects::None()), dex_pc_(dex_pc) {} + : HTemplateInstruction(SideEffects::None()), dex_pc_(dex_pc), slow_path_(nullptr) {} bool NeedsEnvironment() const OVERRIDE { return true; } uint32_t GetDexPc() const { return dex_pc_; } + void SetSlowPath(SlowPathCode* slow_path) { slow_path_ = slow_path; } + SlowPathCode* GetSlowPath() const { return slow_path_; } DECLARE_INSTRUCTION(SuspendCheck); private: const uint32_t dex_pc_; + // Only used for code generation, in order to share the same slow path between back edges + // of a same loop. + SlowPathCode* slow_path_; + DISALLOW_COPY_AND_ASSIGN(HSuspendCheck); }; diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc index a8d006f..2375595 100644 --- a/compiler/optimizing/register_allocator.cc +++ b/compiler/optimizing/register_allocator.cc @@ -768,7 +768,7 @@ bool RegisterAllocator::TryAllocateFreeReg(LiveInterval* current) { } } else { DCHECK(!current->IsHighInterval()); - int hint = current->FindFirstRegisterHint(free_until); + int hint = current->FindFirstRegisterHint(free_until, liveness_); if (hint != kNoRegister) { DCHECK(!IsBlocked(hint)); reg = hint; @@ -1101,8 +1101,8 @@ void RegisterAllocator::AddSorted(GrowableArray<LiveInterval*>* array, LiveInter } LiveInterval* RegisterAllocator::SplitBetween(LiveInterval* interval, size_t from, size_t to) { - HBasicBlock* block_from = liveness_.GetBlockFromPosition(from); - HBasicBlock* block_to = liveness_.GetBlockFromPosition(to); + HBasicBlock* block_from = liveness_.GetBlockFromPosition(from / 2); + HBasicBlock* block_to = liveness_.GetBlockFromPosition(to / 2); DCHECK(block_from != nullptr); DCHECK(block_to != nullptr); @@ -1111,6 +1111,41 @@ LiveInterval* RegisterAllocator::SplitBetween(LiveInterval* interval, size_t fro return Split(interval, to); } + /* + * Non-linear control flow will force moves at every branch instruction to the new location. + * To avoid having all branches doing the moves, we find the next non-linear position and + * split the interval at this position. Take the following example (block number is the linear + * order position): + * + * B1 + * / \ + * B2 B3 + * \ / + * B4 + * + * B2 needs to split an interval, whose next use is in B4. If we were to split at the + * beginning of B4, B3 would need to do a move between B3 and B4 to ensure the interval + * is now in the correct location. It makes performance worst if the interval is spilled + * and both B2 and B3 need to reload it before entering B4. + * + * By splitting at B3, we give a chance to the register allocator to allocate the + * interval to the same register as in B1, and therefore avoid doing any + * moves in B3. + */ + if (block_from->GetDominator() != nullptr) { + const GrowableArray<HBasicBlock*>& dominated = block_from->GetDominator()->GetDominatedBlocks(); + for (size_t i = 0; i < dominated.Size(); ++i) { + size_t position = dominated.Get(i)->GetLifetimeStart(); + if ((position > from) && (block_to->GetLifetimeStart() > position)) { + // Even if we found a better block, we continue iterating in case + // a dominated block is closer. + // Note that dominated blocks are not sorted in liveness order. + block_to = dominated.Get(i); + DCHECK_NE(block_to, block_from); + } + } + } + // If `to` is in a loop, find the outermost loop header which does not contain `from`. for (HLoopInformationOutwardIterator it(*block_to); !it.Done(); it.Advance()) { HBasicBlock* header = it.Current()->GetHeader(); @@ -1467,23 +1502,28 @@ void RegisterAllocator::ConnectSiblings(LiveInterval* interval) { LiveRange* range = current->GetFirstRange(); while (range != nullptr) { - DCHECK(use == nullptr || use->GetPosition() >= range->GetStart()); + while (use != nullptr && use->GetPosition() < range->GetStart()) { + DCHECK(use->IsSynthesized()); + use = use->GetNext(); + } while (use != nullptr && use->GetPosition() <= range->GetEnd()) { DCHECK(!use->GetIsEnvironment()); DCHECK(current->CoversSlow(use->GetPosition()) || (use->GetPosition() == range->GetEnd())); - LocationSummary* locations = use->GetUser()->GetLocations(); - Location expected_location = locations->InAt(use->GetInputIndex()); - // The expected (actual) location may be invalid in case the input is unused. Currently - // this only happens for intrinsics. - if (expected_location.IsValid()) { - if (expected_location.IsUnallocated()) { - locations->SetInAt(use->GetInputIndex(), source); - } else if (!expected_location.IsConstant()) { - AddInputMoveFor(interval->GetDefinedBy(), use->GetUser(), source, expected_location); + if (!use->IsSynthesized()) { + LocationSummary* locations = use->GetUser()->GetLocations(); + Location expected_location = locations->InAt(use->GetInputIndex()); + // The expected (actual) location may be invalid in case the input is unused. Currently + // this only happens for intrinsics. + if (expected_location.IsValid()) { + if (expected_location.IsUnallocated()) { + locations->SetInAt(use->GetInputIndex(), source); + } else if (!expected_location.IsConstant()) { + AddInputMoveFor(interval->GetDefinedBy(), use->GetUser(), source, expected_location); + } + } else { + DCHECK(use->GetUser()->IsInvoke()); + DCHECK(use->GetUser()->AsInvoke()->GetIntrinsic() != Intrinsics::kNone); } - } else { - DCHECK(use->GetUser()->IsInvoke()); - DCHECK(use->GetUser()->AsInvoke()->GetIntrinsic() != Intrinsics::kNone); } use = use->GetNext(); } @@ -1561,7 +1601,13 @@ void RegisterAllocator::ConnectSiblings(LiveInterval* interval) { current = next_sibling; } while (current != nullptr); - DCHECK(use == nullptr); + if (kIsDebugBuild) { + // Following uses can only be synthesized uses. + while (use != nullptr) { + DCHECK(use->IsSynthesized()); + use = use->GetNext(); + } + } } void RegisterAllocator::ConnectSplitSiblings(LiveInterval* interval, diff --git a/compiler/optimizing/ssa_builder.cc b/compiler/optimizing/ssa_builder.cc index b66e655..2a713cc 100644 --- a/compiler/optimizing/ssa_builder.cc +++ b/compiler/optimizing/ssa_builder.cc @@ -332,7 +332,7 @@ void SsaBuilder::BuildSsa() { } HInstruction* SsaBuilder::ValueOfLocal(HBasicBlock* block, size_t local) { - return GetLocalsFor(block)->GetInstructionAt(local); + return GetLocalsFor(block)->Get(local); } void SsaBuilder::VisitBasicBlock(HBasicBlock* block) { @@ -349,7 +349,7 @@ void SsaBuilder::VisitBasicBlock(HBasicBlock* block) { HPhi* phi = new (GetGraph()->GetArena()) HPhi( GetGraph()->GetArena(), local, 0, Primitive::kPrimVoid); block->AddPhi(phi); - current_locals_->SetRawEnvAt(local, phi); + current_locals_->Put(local, phi); } } // Save the loop header so that the last phase of the analysis knows which @@ -389,7 +389,7 @@ void SsaBuilder::VisitBasicBlock(HBasicBlock* block) { block->AddPhi(phi); value = phi; } - current_locals_->SetRawEnvAt(local, value); + current_locals_->Put(local, value); } } @@ -520,7 +520,7 @@ HInstruction* SsaBuilder::GetReferenceTypeEquivalent(HInstruction* value) { } void SsaBuilder::VisitLoadLocal(HLoadLocal* load) { - HInstruction* value = current_locals_->GetInstructionAt(load->GetLocal()->GetRegNumber()); + HInstruction* value = current_locals_->Get(load->GetLocal()->GetRegNumber()); // If the operation requests a specific type, we make sure its input is of that type. if (load->GetType() != value->GetType()) { if (load->GetType() == Primitive::kPrimFloat || load->GetType() == Primitive::kPrimDouble) { @@ -534,7 +534,7 @@ void SsaBuilder::VisitLoadLocal(HLoadLocal* load) { } void SsaBuilder::VisitStoreLocal(HStoreLocal* store) { - current_locals_->SetRawEnvAt(store->GetLocal()->GetRegNumber(), store->InputAt(1)); + current_locals_->Put(store->GetLocal()->GetRegNumber(), store->InputAt(1)); store->GetBlock()->RemoveInstruction(store); } @@ -544,7 +544,7 @@ void SsaBuilder::VisitInstruction(HInstruction* instruction) { } HEnvironment* environment = new (GetGraph()->GetArena()) HEnvironment( GetGraph()->GetArena(), current_locals_->Size()); - environment->CopyFrom(current_locals_); + environment->CopyFrom(*current_locals_); instruction->SetRawEnvironment(environment); } diff --git a/compiler/optimizing/ssa_builder.h b/compiler/optimizing/ssa_builder.h index 265e95b..1c83c4b 100644 --- a/compiler/optimizing/ssa_builder.h +++ b/compiler/optimizing/ssa_builder.h @@ -58,14 +58,15 @@ class SsaBuilder : public HGraphVisitor { void BuildSsa(); - HEnvironment* GetLocalsFor(HBasicBlock* block) { - HEnvironment* env = locals_for_.Get(block->GetBlockId()); - if (env == nullptr) { - env = new (GetGraph()->GetArena()) HEnvironment( + GrowableArray<HInstruction*>* GetLocalsFor(HBasicBlock* block) { + GrowableArray<HInstruction*>* locals = locals_for_.Get(block->GetBlockId()); + if (locals == nullptr) { + locals = new (GetGraph()->GetArena()) GrowableArray<HInstruction*>( GetGraph()->GetArena(), GetGraph()->GetNumberOfVRegs()); - locals_for_.Put(block->GetBlockId(), env); + locals->SetSize(GetGraph()->GetNumberOfVRegs()); + locals_for_.Put(block->GetBlockId(), locals); } - return env; + return locals; } HInstruction* ValueOfLocal(HBasicBlock* block, size_t local); @@ -93,14 +94,14 @@ class SsaBuilder : public HGraphVisitor { static HPhi* GetFloatDoubleOrReferenceEquivalentOfPhi(HPhi* phi, Primitive::Type type); // Locals for the current block being visited. - HEnvironment* current_locals_; + GrowableArray<HInstruction*>* current_locals_; // Keep track of loop headers found. The last phase of the analysis iterates // over these blocks to set the inputs of their phis. GrowableArray<HBasicBlock*> loop_headers_; // HEnvironment for each block. - GrowableArray<HEnvironment*> locals_for_; + GrowableArray<GrowableArray<HInstruction*>*> locals_for_; DISALLOW_COPY_AND_ASSIGN(SsaBuilder); }; diff --git a/compiler/optimizing/ssa_liveness_analysis.cc b/compiler/optimizing/ssa_liveness_analysis.cc index b674f74..09a6648 100644 --- a/compiler/optimizing/ssa_liveness_analysis.cc +++ b/compiler/optimizing/ssa_liveness_analysis.cc @@ -75,9 +75,7 @@ void SsaLivenessAnalysis::LinearizeGraph() { HBasicBlock* block = it.Current(); size_t number_of_forward_predecessors = block->GetPredecessors().Size(); if (block->IsLoopHeader()) { - // We rely on having simplified the CFG. - DCHECK_EQ(1u, block->GetLoopInformation()->NumberOfBackEdges()); - number_of_forward_predecessors--; + number_of_forward_predecessors -= block->GetLoopInformation()->NumberOfBackEdges(); } forward_predecessors.Put(block->GetBlockId(), number_of_forward_predecessors); } @@ -264,13 +262,12 @@ void SsaLivenessAnalysis::ComputeLiveRanges() { } if (block->IsLoopHeader()) { - HBasicBlock* back_edge = block->GetLoopInformation()->GetBackEdges().Get(0); + size_t last_position = block->GetLoopInformation()->GetLifetimeEnd(); // For all live_in instructions at the loop header, we need to create a range // that covers the full loop. for (uint32_t idx : live_in->Indexes()) { HInstruction* current = instructions_from_ssa_index_.Get(idx); - current->GetLiveInterval()->AddLoopRange(block->GetLifetimeStart(), - back_edge->GetLifetimeEnd()); + current->GetLiveInterval()->AddLoopRange(block->GetLifetimeStart(), last_position); } } } @@ -322,7 +319,8 @@ static int RegisterOrLowRegister(Location location) { return location.IsPair() ? location.low() : location.reg(); } -int LiveInterval::FindFirstRegisterHint(size_t* free_until) const { +int LiveInterval::FindFirstRegisterHint(size_t* free_until, + const SsaLivenessAnalysis& liveness) const { DCHECK(!IsHighInterval()); if (IsTemp()) return kNoRegister; @@ -336,12 +334,32 @@ int LiveInterval::FindFirstRegisterHint(size_t* free_until) const { } } + if (IsSplit() && liveness.IsAtBlockBoundary(GetStart() / 2)) { + // If the start of this interval is at a block boundary, we look at the + // location of the interval in blocks preceding the block this interval + // starts at. If one location is a register we return it as a hint. This + // will avoid a move between the two blocks. + HBasicBlock* block = liveness.GetBlockFromPosition(GetStart() / 2); + for (size_t i = 0; i < block->GetPredecessors().Size(); ++i) { + size_t position = block->GetPredecessors().Get(i)->GetLifetimeEnd() - 1; + // We know positions above GetStart() do not have a location yet. + if (position < GetStart()) { + LiveInterval* existing = GetParent()->GetSiblingAt(position); + if (existing != nullptr + && existing->HasRegister() + && (free_until[existing->GetRegister()] > GetStart())) { + return existing->GetRegister(); + } + } + } + } + UsePosition* use = first_use_; size_t start = GetStart(); size_t end = GetEnd(); while (use != nullptr && use->GetPosition() <= end) { size_t use_position = use->GetPosition(); - if (use_position >= start) { + if (use_position >= start && !use->IsSynthesized()) { HInstruction* user = use->GetUser(); size_t input_index = use->GetInputIndex(); if (user->IsPhi()) { diff --git a/compiler/optimizing/ssa_liveness_analysis.h b/compiler/optimizing/ssa_liveness_analysis.h index b95276a..b550d8a 100644 --- a/compiler/optimizing/ssa_liveness_analysis.h +++ b/compiler/optimizing/ssa_liveness_analysis.h @@ -23,6 +23,7 @@ namespace art { class CodeGenerator; +class SsaLivenessAnalysis; static constexpr int kNoRegister = -1; @@ -112,12 +113,15 @@ class UsePosition : public ArenaObject<kArenaAllocMisc> { is_environment_(is_environment), position_(position), next_(next) { - DCHECK(user->IsPhi() + DCHECK((user == nullptr) + || user->IsPhi() || (GetPosition() == user->GetLifetimePosition() + 1) || (GetPosition() == user->GetLifetimePosition())); DCHECK(next_ == nullptr || next->GetPosition() >= GetPosition()); } + static constexpr size_t kNoInput = -1; + size_t GetPosition() const { return position_; } UsePosition* GetNext() const { return next_; } @@ -126,14 +130,16 @@ class UsePosition : public ArenaObject<kArenaAllocMisc> { HInstruction* GetUser() const { return user_; } bool GetIsEnvironment() const { return is_environment_; } + bool IsSynthesized() const { return user_ == nullptr; } size_t GetInputIndex() const { return input_index_; } void Dump(std::ostream& stream) const { stream << position_; - if (is_environment_) { - stream << " (env)"; - } + } + + HLoopInformation* GetLoopInformation() const { + return user_->GetBlock()->GetLoopInformation(); } UsePosition* Dup(ArenaAllocator* allocator) const { @@ -142,6 +148,15 @@ class UsePosition : public ArenaObject<kArenaAllocMisc> { next_ == nullptr ? nullptr : next_->Dup(allocator)); } + bool RequiresRegister() const { + if (GetIsEnvironment()) return false; + if (IsSynthesized()) return false; + Location location = GetUser()->GetLocations()->InAt(GetInputIndex()); + return location.IsUnallocated() + && (location.GetPolicy() == Location::kRequiresRegister + || location.GetPolicy() == Location::kRequiresFpuRegister); + } + private: HInstruction* const user_; const size_t input_index_; @@ -240,9 +255,15 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> { // location of the input just before that instruction (and not potential moves due // to splitting). position = instruction->GetLifetimePosition(); + } else if (!locations->InAt(input_index).IsValid()) { + return; } } + if (!is_environment && instruction->IsInLoop()) { + AddBackEdgeUses(*instruction->GetBlock()); + } + DCHECK(position == instruction->GetLifetimePosition() || position == instruction->GetLifetimePosition() + 1); @@ -306,6 +327,9 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> { void AddPhiUse(HInstruction* instruction, size_t input_index, HBasicBlock* block) { DCHECK(instruction->IsPhi()); + if (block->IsInLoop()) { + AddBackEdgeUses(*block); + } first_use_ = new (allocator_) UsePosition( instruction, input_index, false, block->GetLifetimeEnd(), first_use_); } @@ -456,27 +480,9 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> { if (is_temp_) { return position == GetStart() ? position : kNoLifetime; } - if (position == GetStart() && IsParent()) { - LocationSummary* locations = defined_by_->GetLocations(); - Location location = locations->Out(); - // This interval is the first interval of the instruction. If the output - // of the instruction requires a register, we return the position of that instruction - // as the first register use. - if (location.IsUnallocated()) { - if ((location.GetPolicy() == Location::kRequiresRegister) - || (location.GetPolicy() == Location::kSameAsFirstInput - && (locations->InAt(0).IsRegister() - || locations->InAt(0).IsRegisterPair() - || locations->InAt(0).GetPolicy() == Location::kRequiresRegister))) { - return position; - } else if ((location.GetPolicy() == Location::kRequiresFpuRegister) - || (location.GetPolicy() == Location::kSameAsFirstInput - && locations->InAt(0).GetPolicy() == Location::kRequiresFpuRegister)) { - return position; - } - } else if (location.IsRegister() || location.IsRegisterPair()) { - return position; - } + + if (IsDefiningPosition(position) && DefinitionRequiresRegister()) { + return position; } UsePosition* use = first_use_; @@ -484,10 +490,7 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> { while (use != nullptr && use->GetPosition() <= end) { size_t use_position = use->GetPosition(); if (use_position > position) { - Location location = use->GetUser()->GetLocations()->InAt(use->GetInputIndex()); - if (location.IsUnallocated() - && (location.GetPolicy() == Location::kRequiresRegister - || location.GetPolicy() == Location::kRequiresFpuRegister)) { + if (use->RequiresRegister()) { return use_position; } } @@ -505,18 +508,16 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> { return position == GetStart() ? position : kNoLifetime; } - if (position == GetStart() && IsParent()) { - if (defined_by_->GetLocations()->Out().IsValid()) { - return position; - } + if (IsDefiningPosition(position)) { + DCHECK(defined_by_->GetLocations()->Out().IsValid()); + return position; } UsePosition* use = first_use_; size_t end = GetEnd(); while (use != nullptr && use->GetPosition() <= end) { - Location location = use->GetUser()->GetLocations()->InAt(use->GetInputIndex()); size_t use_position = use->GetPosition(); - if (use_position > position && location.IsValid()) { + if (use_position > position) { return use_position; } use = use->GetNext(); @@ -664,7 +665,7 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> { stream << " "; } while ((use = use->GetNext()) != nullptr); } - stream << "}, {"; + stream << "}, { "; use = first_env_use_; if (use != nullptr) { do { @@ -690,7 +691,7 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> { // Returns the first register hint that is at least free before // the value contained in `free_until`. If none is found, returns // `kNoRegister`. - int FindFirstRegisterHint(size_t* free_until) const; + int FindFirstRegisterHint(size_t* free_until, const SsaLivenessAnalysis& liveness) const; // If there is enough at the definition site to find a register (for example // it uses the same input as the first input), returns the register as a hint. @@ -910,6 +911,104 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> { return range; } + bool DefinitionRequiresRegister() const { + DCHECK(IsParent()); + LocationSummary* locations = defined_by_->GetLocations(); + Location location = locations->Out(); + // This interval is the first interval of the instruction. If the output + // of the instruction requires a register, we return the position of that instruction + // as the first register use. + if (location.IsUnallocated()) { + if ((location.GetPolicy() == Location::kRequiresRegister) + || (location.GetPolicy() == Location::kSameAsFirstInput + && (locations->InAt(0).IsRegister() + || locations->InAt(0).IsRegisterPair() + || locations->InAt(0).GetPolicy() == Location::kRequiresRegister))) { + return true; + } else if ((location.GetPolicy() == Location::kRequiresFpuRegister) + || (location.GetPolicy() == Location::kSameAsFirstInput + && (locations->InAt(0).IsFpuRegister() + || locations->InAt(0).IsFpuRegisterPair() + || locations->InAt(0).GetPolicy() == Location::kRequiresFpuRegister))) { + return true; + } + } else if (location.IsRegister() || location.IsRegisterPair()) { + return true; + } + return false; + } + + bool IsDefiningPosition(size_t position) const { + return IsParent() && (position == GetStart()); + } + + bool HasSynthesizeUseAt(size_t position) const { + UsePosition* use = first_use_; + while (use != nullptr) { + size_t use_position = use->GetPosition(); + if ((use_position == position) && use->IsSynthesized()) { + return true; + } + if (use_position > position) break; + use = use->GetNext(); + } + return false; + } + + void AddBackEdgeUses(const HBasicBlock& block_at_use) { + DCHECK(block_at_use.IsInLoop()); + // Add synthesized uses at the back edge of loops to help the register allocator. + // Note that this method is called in decreasing liveness order, to faciliate adding + // uses at the head of the `first_use_` linked list. Because below + // we iterate from inner-most to outer-most, which is in increasing liveness order, + // we need to take extra care of how the `first_use_` linked list is being updated. + UsePosition* first_in_new_list = nullptr; + UsePosition* last_in_new_list = nullptr; + for (HLoopInformationOutwardIterator it(block_at_use); + !it.Done(); + it.Advance()) { + HLoopInformation* current = it.Current(); + if (GetDefinedBy()->GetLifetimePosition() >= current->GetHeader()->GetLifetimeStart()) { + // This interval is defined in the loop. We can stop going outward. + break; + } + + // We're only adding a synthesized use at the last back edge. Adding syntehsized uses on + // all back edges is not necessary: anything used in the loop will have its use at the + // last back edge. If we want branches in a loop to have better register allocation than + // another branch, then it is the linear order we should change. + size_t back_edge_use_position = current->GetLifetimeEnd(); + if ((first_use_ != nullptr) && (first_use_->GetPosition() <= back_edge_use_position)) { + // There was a use already seen in this loop. Therefore the previous call to `AddUse` + // already inserted the backedge use. We can stop going outward. + DCHECK(HasSynthesizeUseAt(back_edge_use_position)); + break; + } + + DCHECK(last_in_new_list == nullptr + || back_edge_use_position > last_in_new_list->GetPosition()); + + UsePosition* new_use = new (allocator_) UsePosition( + nullptr, UsePosition::kNoInput, /* is_environment */ false, + back_edge_use_position, nullptr); + + if (last_in_new_list != nullptr) { + // Going outward. The latest created use needs to point to the new use. + last_in_new_list->SetNext(new_use); + } else { + // This is the inner-most loop. + DCHECK_EQ(current, block_at_use.GetLoopInformation()); + first_in_new_list = new_use; + } + last_in_new_list = new_use; + } + // Link the newly created linked list with `first_use_`. + if (last_in_new_list != nullptr) { + last_in_new_list->SetNext(first_use_); + first_use_ = first_in_new_list; + } + } + ArenaAllocator* const allocator_; // Ranges of this interval. We need a quick access to the last range to test @@ -1022,14 +1121,18 @@ class SsaLivenessAnalysis : public ValueObject { } HBasicBlock* GetBlockFromPosition(size_t index) const { - HInstruction* instruction = GetInstructionFromPosition(index / 2); + HInstruction* instruction = GetInstructionFromPosition(index); if (instruction == nullptr) { // If we are at a block boundary, get the block following. - instruction = GetInstructionFromPosition((index / 2) + 1); + instruction = GetInstructionFromPosition(index + 1); } return instruction->GetBlock(); } + bool IsAtBlockBoundary(size_t index) const { + return GetInstructionFromPosition(index) == nullptr; + } + HInstruction* GetTempUser(LiveInterval* temp) const { // A temporary shares the same lifetime start as the instruction that requires it. DCHECK(temp->IsTemp()); diff --git a/compiler/optimizing/ssa_test.cc b/compiler/optimizing/ssa_test.cc index 00c241b..4cc9c3e 100644 --- a/compiler/optimizing/ssa_test.cc +++ b/compiler/optimizing/ssa_test.cc @@ -373,30 +373,26 @@ TEST(SsaTest, Loop6) { const char* expected = "BasicBlock 0, succ: 1\n" " 0: IntConstant 0 [5]\n" - " 1: IntConstant 4 [14, 8, 8]\n" - " 2: IntConstant 5 [14]\n" + " 1: IntConstant 4 [5, 8, 8]\n" + " 2: IntConstant 5 [5]\n" " 3: Goto\n" "BasicBlock 1, pred: 0, succ: 2\n" " 4: Goto\n" - "BasicBlock 2, pred: 1, 8, succ: 6, 3\n" - " 5: Phi(0, 14) [12, 6, 6]\n" + "BasicBlock 2, pred: 1, 4, 5, succ: 6, 3\n" + " 5: Phi(0, 2, 1) [12, 6, 6]\n" " 6: Equal(5, 5) [7]\n" " 7: If(6)\n" "BasicBlock 3, pred: 2, succ: 5, 4\n" " 8: Equal(1, 1) [9]\n" " 9: If(8)\n" - "BasicBlock 4, pred: 3, succ: 8\n" + "BasicBlock 4, pred: 3, succ: 2\n" " 10: Goto\n" - "BasicBlock 5, pred: 3, succ: 8\n" + "BasicBlock 5, pred: 3, succ: 2\n" " 11: Goto\n" "BasicBlock 6, pred: 2, succ: 7\n" " 12: Return(5)\n" "BasicBlock 7, pred: 6\n" - " 13: Exit\n" - // Synthesized single back edge of loop. - "BasicBlock 8, pred: 5, 4, succ: 2\n" - " 14: Phi(1, 2) [5]\n" - " 15: Goto\n"; + " 13: Exit\n"; const uint16_t data[] = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, diff --git a/compiler/utils/mips64/assembler_mips64.cc b/compiler/utils/mips64/assembler_mips64.cc index 282ab96..5e9653d 100644 --- a/compiler/utils/mips64/assembler_mips64.cc +++ b/compiler/utils/mips64/assembler_mips64.cc @@ -272,6 +272,10 @@ void Mips64Assembler::Lhu(GpuRegister rt, GpuRegister rs, uint16_t imm16) { EmitI(0x25, rs, rt, imm16); } +void Mips64Assembler::Lwu(GpuRegister rt, GpuRegister rs, uint16_t imm16) { + EmitI(0x27, rs, rt, imm16); +} + void Mips64Assembler::Lui(GpuRegister rt, uint16_t imm16) { EmitI(0xf, static_cast<GpuRegister>(0), rt, imm16); } @@ -480,6 +484,9 @@ void Mips64Assembler::LoadFromOffset(LoadOperandType type, GpuRegister reg, GpuR case kLoadWord: Lw(reg, base, offset); break; + case kLoadUnsignedWord: + Lwu(reg, base, offset); + break; case kLoadDoubleword: // TODO: alignment issues ??? Ld(reg, base, offset); @@ -512,7 +519,6 @@ void Mips64Assembler::EmitLoad(ManagedRegister m_dst, GpuRegister src_register, CHECK_EQ(0u, size) << dst; } else if (dst.IsGpuRegister()) { if (size == 4) { - CHECK_EQ(4u, size) << dst; LoadFromOffset(kLoadWord, dst.AsGpuRegister(), src_register, src_offset); } else if (size == 8) { CHECK_EQ(8u, size) << dst; @@ -740,14 +746,13 @@ void Mips64Assembler::LoadFromThread64(ManagedRegister mdest, ThreadOffset<8> sr void Mips64Assembler::LoadRef(ManagedRegister mdest, FrameOffset src) { Mips64ManagedRegister dest = mdest.AsMips64(); CHECK(dest.IsGpuRegister()); - LoadFromOffset(kLoadWord, dest.AsGpuRegister(), SP, src.Int32Value()); + LoadFromOffset(kLoadUnsignedWord, dest.AsGpuRegister(), SP, src.Int32Value()); } -void Mips64Assembler::LoadRef(ManagedRegister mdest, ManagedRegister base, - MemberOffset offs) { +void Mips64Assembler::LoadRef(ManagedRegister mdest, ManagedRegister base, MemberOffset offs) { Mips64ManagedRegister dest = mdest.AsMips64(); - CHECK(dest.IsGpuRegister() && dest.IsGpuRegister()); - LoadFromOffset(kLoadWord, dest.AsGpuRegister(), + CHECK(dest.IsGpuRegister() && base.AsMips64().IsGpuRegister()); + LoadFromOffset(kLoadUnsignedWord, dest.AsGpuRegister(), base.AsMips64().AsGpuRegister(), offs.Int32Value()); if (kPoisonHeapReferences) { Subu(dest.AsGpuRegister(), ZERO, dest.AsGpuRegister()); @@ -921,7 +926,7 @@ void Mips64Assembler::CreateHandleScopeEntry(ManagedRegister mout_reg, // the address in the handle scope holding the reference. // e.g. out_reg = (handle == 0) ? 0 : (SP+handle_offset) if (in_reg.IsNoRegister()) { - LoadFromOffset(kLoadWord, out_reg.AsGpuRegister(), + LoadFromOffset(kLoadUnsignedWord, out_reg.AsGpuRegister(), SP, handle_scope_offset.Int32Value()); in_reg = out_reg; } @@ -944,7 +949,7 @@ void Mips64Assembler::CreateHandleScopeEntry(FrameOffset out_off, CHECK(scratch.IsGpuRegister()) << scratch; if (null_allowed) { Label null_arg; - LoadFromOffset(kLoadWord, scratch.AsGpuRegister(), SP, + LoadFromOffset(kLoadUnsignedWord, scratch.AsGpuRegister(), SP, handle_scope_offset.Int32Value()); // Null values get a handle scope entry value of 0. Otherwise, the handle scope entry is // the address in the handle scope holding the reference. @@ -998,7 +1003,7 @@ void Mips64Assembler::Call(FrameOffset base, Offset offset, ManagedRegister mscr Mips64ManagedRegister scratch = mscratch.AsMips64(); CHECK(scratch.IsGpuRegister()) << scratch; // Call *(*(SP + base) + offset) - LoadFromOffset(kLoadWord, scratch.AsGpuRegister(), + LoadFromOffset(kLoadUnsignedWord, scratch.AsGpuRegister(), SP, base.Int32Value()); LoadFromOffset(kLoadDoubleword, scratch.AsGpuRegister(), scratch.AsGpuRegister(), offset.Int32Value()); diff --git a/compiler/utils/mips64/assembler_mips64.h b/compiler/utils/mips64/assembler_mips64.h index b7f6a9e..2d7c661 100644 --- a/compiler/utils/mips64/assembler_mips64.h +++ b/compiler/utils/mips64/assembler_mips64.h @@ -36,6 +36,7 @@ enum LoadOperandType { kLoadSignedHalfword, kLoadUnsignedHalfword, kLoadWord, + kLoadUnsignedWord, kLoadDoubleword }; @@ -85,6 +86,7 @@ class Mips64Assembler FINAL : public Assembler { void Ld(GpuRegister rt, GpuRegister rs, uint16_t imm16); void Lbu(GpuRegister rt, GpuRegister rs, uint16_t imm16); void Lhu(GpuRegister rt, GpuRegister rs, uint16_t imm16); + void Lwu(GpuRegister rt, GpuRegister rs, uint16_t imm16); void Lui(GpuRegister rt, uint16_t imm16); void Mfhi(GpuRegister rd); void Mflo(GpuRegister rd); diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc index 329698c..f2541a2 100644 --- a/compiler/utils/x86/assembler_x86.cc +++ b/compiler/utils/x86/assembler_x86.cc @@ -1292,32 +1292,62 @@ void X86Assembler::decl(const Address& address) { void X86Assembler::shll(Register reg, const Immediate& imm) { - EmitGenericShift(4, reg, imm); + EmitGenericShift(4, Operand(reg), imm); } void X86Assembler::shll(Register operand, Register shifter) { - EmitGenericShift(4, operand, shifter); + EmitGenericShift(4, Operand(operand), shifter); +} + + +void X86Assembler::shll(const Address& address, const Immediate& imm) { + EmitGenericShift(4, address, imm); +} + + +void X86Assembler::shll(const Address& address, Register shifter) { + EmitGenericShift(4, address, shifter); } void X86Assembler::shrl(Register reg, const Immediate& imm) { - EmitGenericShift(5, reg, imm); + EmitGenericShift(5, Operand(reg), imm); } void X86Assembler::shrl(Register operand, Register shifter) { - EmitGenericShift(5, operand, shifter); + EmitGenericShift(5, Operand(operand), shifter); +} + + +void X86Assembler::shrl(const Address& address, const Immediate& imm) { + EmitGenericShift(5, address, imm); +} + + +void X86Assembler::shrl(const Address& address, Register shifter) { + EmitGenericShift(5, address, shifter); } void X86Assembler::sarl(Register reg, const Immediate& imm) { - EmitGenericShift(7, reg, imm); + EmitGenericShift(7, Operand(reg), imm); } void X86Assembler::sarl(Register operand, Register shifter) { - EmitGenericShift(7, operand, shifter); + EmitGenericShift(7, Operand(operand), shifter); +} + + +void X86Assembler::sarl(const Address& address, const Immediate& imm) { + EmitGenericShift(7, address, imm); +} + + +void X86Assembler::sarl(const Address& address, Register shifter) { + EmitGenericShift(7, address, shifter); } @@ -1330,6 +1360,15 @@ void X86Assembler::shld(Register dst, Register src, Register shifter) { } +void X86Assembler::shld(Register dst, Register src, const Immediate& imm) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x0F); + EmitUint8(0xA4); + EmitRegisterOperand(src, dst); + EmitUint8(imm.value() & 0xFF); +} + + void X86Assembler::shrd(Register dst, Register src, Register shifter) { DCHECK_EQ(ECX, shifter); AssemblerBuffer::EnsureCapacity ensured(&buffer_); @@ -1339,6 +1378,15 @@ void X86Assembler::shrd(Register dst, Register src, Register shifter) { } +void X86Assembler::shrd(Register dst, Register src, const Immediate& imm) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x0F); + EmitUint8(0xAC); + EmitRegisterOperand(src, dst); + EmitUint8(imm.value() & 0xFF); +} + + void X86Assembler::negl(Register reg) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); EmitUint8(0xF7); @@ -1622,28 +1670,28 @@ void X86Assembler::EmitLabelLink(Label* label) { void X86Assembler::EmitGenericShift(int reg_or_opcode, - Register reg, + const Operand& operand, const Immediate& imm) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); CHECK(imm.is_int8()); if (imm.value() == 1) { EmitUint8(0xD1); - EmitOperand(reg_or_opcode, Operand(reg)); + EmitOperand(reg_or_opcode, operand); } else { EmitUint8(0xC1); - EmitOperand(reg_or_opcode, Operand(reg)); + EmitOperand(reg_or_opcode, operand); EmitUint8(imm.value() & 0xFF); } } void X86Assembler::EmitGenericShift(int reg_or_opcode, - Register operand, + const Operand& operand, Register shifter) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); CHECK_EQ(shifter, ECX); EmitUint8(0xD3); - EmitOperand(reg_or_opcode, Operand(operand)); + EmitOperand(reg_or_opcode, operand); } static dwarf::Reg DWARFReg(Register reg) { diff --git a/compiler/utils/x86/assembler_x86.h b/compiler/utils/x86/assembler_x86.h index 7fc8ef0..946c96d 100644 --- a/compiler/utils/x86/assembler_x86.h +++ b/compiler/utils/x86/assembler_x86.h @@ -430,12 +430,20 @@ class X86Assembler FINAL : public Assembler { void shll(Register reg, const Immediate& imm); void shll(Register operand, Register shifter); + void shll(const Address& address, const Immediate& imm); + void shll(const Address& address, Register shifter); void shrl(Register reg, const Immediate& imm); void shrl(Register operand, Register shifter); + void shrl(const Address& address, const Immediate& imm); + void shrl(const Address& address, Register shifter); void sarl(Register reg, const Immediate& imm); void sarl(Register operand, Register shifter); + void sarl(const Address& address, const Immediate& imm); + void sarl(const Address& address, Register shifter); void shld(Register dst, Register src, Register shifter); + void shld(Register dst, Register src, const Immediate& imm); void shrd(Register dst, Register src, Register shifter); + void shrd(Register dst, Register src, const Immediate& imm); void negl(Register reg); void notl(Register reg); @@ -620,8 +628,8 @@ class X86Assembler FINAL : public Assembler { void EmitLabelLink(Label* label); void EmitNearLabelLink(Label* label); - void EmitGenericShift(int rm, Register reg, const Immediate& imm); - void EmitGenericShift(int rm, Register operand, Register shifter); + void EmitGenericShift(int rm, const Operand& operand, const Immediate& imm); + void EmitGenericShift(int rm, const Operand& operand, Register shifter); DISALLOW_COPY_AND_ASSIGN(X86Assembler); }; diff --git a/dex2oat/dex2oat.cc b/dex2oat/dex2oat.cc index 8490afb..b4a45c6 100644 --- a/dex2oat/dex2oat.cc +++ b/dex2oat/dex2oat.cc @@ -691,6 +691,8 @@ class Dex2Oat FINAL { include_cfi = false; } else if (option == "--debuggable") { debuggable = true; + include_debug_symbols = true; + include_cfi = true; } else if (option.starts_with("--profile-file=")) { profile_file_ = option.substr(strlen("--profile-file=")).data(); VLOG(compiler) << "dex2oat: profile file is " << profile_file_; diff --git a/oatdump/oatdump.cc b/oatdump/oatdump.cc index f2e35af..949c2cb 100644 --- a/oatdump/oatdump.cc +++ b/oatdump/oatdump.cc @@ -76,41 +76,38 @@ const char* image_roots_descriptions_[] = { "kClassRoots", }; -class OatSymbolizer FINAL : public CodeOutput { +class OatSymbolizer FINAL { public: - explicit OatSymbolizer(const OatFile* oat_file, const std::string& output_name) : - oat_file_(oat_file), builder_(nullptr), elf_output_(nullptr), - output_name_(output_name.empty() ? "symbolized.oat" : output_name) { - } + class RodataWriter FINAL : public CodeOutput { + public: + explicit RodataWriter(const OatFile* oat_file) : oat_file_(oat_file) {} - bool Init() { - Elf32_Word oat_data_size = oat_file_->GetOatHeader().GetExecutableOffset(); + bool Write(OutputStream* out) OVERRIDE { + const size_t rodata_size = oat_file_->GetOatHeader().GetExecutableOffset(); + return out->WriteFully(oat_file_->Begin(), rodata_size); + } - uint32_t diff = static_cast<uint32_t>(oat_file_->End() - oat_file_->Begin()); - uint32_t oat_exec_size = diff - oat_data_size; - uint32_t oat_bss_size = oat_file_->BssSize(); + private: + const OatFile* oat_file_; + }; - elf_output_ = OS::CreateEmptyFile(output_name_.c_str()); + class TextWriter FINAL : public CodeOutput { + public: + explicit TextWriter(const OatFile* oat_file) : oat_file_(oat_file) {} - builder_.reset(new ElfBuilder<ElfTypes32>( - this, - elf_output_, - oat_file_->GetOatHeader().GetInstructionSet(), - 0, - oat_data_size, - oat_data_size, - oat_exec_size, - RoundUp(oat_data_size + oat_exec_size, kPageSize), - oat_bss_size, - true, - false)); - - if (!builder_->Init()) { - builder_.reset(nullptr); - return false; + bool Write(OutputStream* out) OVERRIDE { + const size_t rodata_size = oat_file_->GetOatHeader().GetExecutableOffset(); + const uint8_t* text_begin = oat_file_->Begin() + rodata_size; + return out->WriteFully(text_begin, oat_file_->End() - text_begin); } - return true; + private: + const OatFile* oat_file_; + }; + + explicit OatSymbolizer(const OatFile* oat_file, const std::string& output_name) : + oat_file_(oat_file), builder_(nullptr), + output_name_(output_name.empty() ? "symbolized.oat" : output_name) { } typedef void (OatSymbolizer::*Callback)(const DexFile::ClassDef&, @@ -122,9 +119,17 @@ class OatSymbolizer FINAL : public CodeOutput { uint32_t); bool Symbolize() { - if (builder_.get() == nullptr) { - return false; - } + Elf32_Word rodata_size = oat_file_->GetOatHeader().GetExecutableOffset(); + uint32_t size = static_cast<uint32_t>(oat_file_->End() - oat_file_->Begin()); + uint32_t text_size = size - rodata_size; + uint32_t bss_size = oat_file_->BssSize(); + RodataWriter rodata_writer(oat_file_); + TextWriter text_writer(oat_file_); + builder_.reset(new ElfBuilder<ElfTypes32>( + oat_file_->GetOatHeader().GetInstructionSet(), + rodata_size, &rodata_writer, + text_size, &text_writer, + bss_size)); Walk(&art::OatSymbolizer::RegisterForDedup); @@ -132,10 +137,11 @@ class OatSymbolizer FINAL : public CodeOutput { Walk(&art::OatSymbolizer::AddSymbol); - bool result = builder_->Write(); + File* elf_output = OS::CreateEmptyFile(output_name_.c_str()); + bool result = builder_->Write(elf_output); // Ignore I/O errors. - UNUSED(elf_output_->FlushClose()); + UNUSED(elf_output->FlushClose()); return result; } @@ -269,24 +275,14 @@ class OatSymbolizer FINAL : public CodeOutput { pretty_name = "[Dedup]" + pretty_name; } - ElfSymtabBuilder<ElfTypes32>* symtab = builder_->GetSymtabBuilder(); + auto* symtab = builder_->GetSymtab(); - symtab->AddSymbol(pretty_name, &builder_->GetTextBuilder(), + symtab->AddSymbol(pretty_name, builder_->GetText(), oat_method.GetCodeOffset() - oat_file_->GetOatHeader().GetExecutableOffset(), true, oat_method.GetQuickCodeSize(), STB_GLOBAL, STT_FUNC); } } - // Set oat data offset. Required by ElfBuilder/CodeOutput. - void SetCodeOffset(size_t offset ATTRIBUTE_UNUSED) { - // Nothing to do. - } - - // Write oat code. Required by ElfBuilder/CodeOutput. - bool Write(OutputStream* out) { - return out->WriteFully(oat_file_->Begin(), oat_file_->End() - oat_file_->Begin()); - } - private: static void SkipAllFields(ClassDataItemIterator* it) { while (it->HasNextStaticField()) { @@ -299,7 +295,6 @@ class OatSymbolizer FINAL : public CodeOutput { const OatFile* oat_file_; std::unique_ptr<ElfBuilder<ElfTypes32> > builder_; - File* elf_output_; std::unordered_map<uint32_t, uint32_t> state_; const std::string output_name_; }; @@ -2097,7 +2092,7 @@ class ImageDumper { gc::space::ImageSpace& image_space_; const ImageHeader& image_header_; std::unique_ptr<OatDumper> oat_dumper_; - std::unique_ptr<OatDumperOptions> oat_dumper_options_; + OatDumperOptions* oat_dumper_options_; DISALLOW_COPY_AND_ASSIGN(ImageDumper); }; @@ -2203,10 +2198,6 @@ static int SymbolizeOat(const char* oat_filename, std::string& output_name) { } OatSymbolizer oat_symbolizer(oat_file, output_name); - if (!oat_symbolizer.Init()) { - fprintf(stderr, "Failed to initialize symbolizer\n"); - return EXIT_FAILURE; - } if (!oat_symbolizer.Symbolize()) { fprintf(stderr, "Failed to symbolize\n"); return EXIT_FAILURE; diff --git a/runtime/Android.mk b/runtime/Android.mk index 240799e..ece9d4b 100644 --- a/runtime/Android.mk +++ b/runtime/Android.mk @@ -468,7 +468,7 @@ $$(ENUM_OPERATOR_OUT_GEN): $$(GENERATED_SRC_DIR)/%_operator_out.cc : $(LOCAL_PAT ifeq ($$(art_target_or_host),target) LOCAL_SHARED_LIBRARIES += libdl # ZipArchive support, the order matters here to get all symbols. - LOCAL_STATIC_LIBRARIES := libziparchive libz + LOCAL_STATIC_LIBRARIES := libziparchive libz libbase # For android::FileMap used by libziparchive. LOCAL_SHARED_LIBRARIES += libutils # For liblog, atrace, properties, ashmem, set_sched_policy and socket_peer_is_trusted. diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S index 1d316fc..f8b0734 100644 --- a/runtime/arch/arm64/quick_entrypoints_arm64.S +++ b/runtime/arch/arm64/quick_entrypoints_arm64.S @@ -182,7 +182,7 @@ // Restore xSELF as it might be scratched. mov xSELF, xETR // ETR - ldr xETR, [sp, #16] + ldr xETR, [sp, #32] .cfi_restore x21 add sp, sp, #112 diff --git a/runtime/arch/mips64/context_mips64.cc b/runtime/arch/mips64/context_mips64.cc index ce99b40..6b3f4c9 100644 --- a/runtime/arch/mips64/context_mips64.cc +++ b/runtime/arch/mips64/context_mips64.cc @@ -18,7 +18,7 @@ #include "mirror/art_method-inl.h" #include "quick/quick_method_frame_info.h" -#include "util.h" +#include "utils.h" namespace art { namespace mips64 { diff --git a/runtime/arch/mips64/quick_entrypoints_mips64.S b/runtime/arch/mips64/quick_entrypoints_mips64.S index d781e76..8330d0c 100644 --- a/runtime/arch/mips64/quick_entrypoints_mips64.S +++ b/runtime/arch/mips64/quick_entrypoints_mips64.S @@ -175,12 +175,6 @@ // This assumes the top part of these stack frame types are identical. #define REFS_AND_ARGS_MINUS_REFS_SIZE (FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE - FRAME_SIZE_REFS_ONLY_CALLEE_SAVE) - /* - * Macro that sets up the callee save frame to conform with - * Runtime::CreateCalleeSaveMethod(kRefsAndArgs). Restoration assumes - * non-moving GC. - * callee-save: padding + $f12-$f19 + $a1-$a7 + $s2-$s7 + $gp + $ra + $s8 = 24 total + 1 words padding + Method* - */ .macro SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_INTERNAL daddiu $sp, $sp, -208 .cfi_adjust_cfa_offset 208 @@ -232,16 +226,15 @@ s.d $f14, 32($sp) s.d $f13, 24($sp) # = kQuickCalleeSaveFrame_RefAndArgs_Fpr1Offset s.d $f12, 16($sp) # This isn't necessary to store. - - # 1x8 bytes paddig + Method* - ld $v0, %got(_ZN3art7Runtime9instance_E)($gp) - ld $v0, 0($v0) - THIS_LOAD_REQUIRES_READ_BARRIER - lwu $v0, RUNTIME_REFS_ONLY_CALLEE_SAVE_FRAME_OFFSET($v0) - sw $v0, 0($sp) # Place Method* at bottom of stack. - sd $sp, THREAD_TOP_QUICK_FRAME_OFFSET(rSELF) # Place sp in Thread::Current()->top_quick_frame. + # 1x8 bytes padding + Method* .endm + /* + * Macro that sets up the callee save frame to conform with + * Runtime::CreateCalleeSaveMethod(kRefsAndArgs). Restoration assumes + * non-moving GC. + * callee-save: padding + $f12-$f19 + $a1-$a7 + $s2-$s7 + $gp + $ra + $s8 = 24 total + 1 words padding + Method* + */ .macro SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_INTERNAL # load appropriate callee-save-method @@ -253,6 +246,12 @@ sd $sp, THREAD_TOP_QUICK_FRAME_OFFSET(rSELF) # Place sp in Thread::Current()->top_quick_frame. .endm +.macro SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_WITH_METHOD_IN_A0 + SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_INTERNAL + sw $a0, 0($sp) # Place Method* at bottom of stack. + sd $sp, THREAD_TOP_QUICK_FRAME_OFFSET(rSELF) # Place sp in Thread::Current()->top_quick_frame. +.endm + .macro RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME ld $ra, 200($sp) .cfi_restore 31 @@ -1326,8 +1325,7 @@ END art_quick_test_suspend */ .extern artQuickProxyInvokeHandler ENTRY art_quick_proxy_invoke_handler - SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME - sd $a0, 0($sp) # place proxy method at bottom of frame + SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_WITH_METHOD_IN_A0 move $a2, rSELF # pass Thread::Current jal artQuickProxyInvokeHandler # (Method* proxy method, receiver, Thread*, SP) move $a3, $sp # pass $sp @@ -1377,8 +1375,7 @@ END art_quick_resolution_trampoline .extern artQuickGenericJniTrampoline .extern artQuickGenericJniEndTrampoline ENTRY art_quick_generic_jni_trampoline - SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_INTERNAL - sd $a0, 0($sp) # store native ArtMethod* to bottom of stack + SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_WITH_METHOD_IN_A0 move $s8, $sp # save $sp # prepare for call to artQuickGenericJniTrampoline(Thread*, SP) diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc index 962e821..b099088 100644 --- a/runtime/class_linker.cc +++ b/runtime/class_linker.cc @@ -82,10 +82,6 @@ namespace art { static constexpr bool kSanityCheckObjects = kIsDebugBuild; -// Do a simple class redefinition check in OpenDexFilesFromOat. This is a conservative check to -// avoid problems with compile-time class-path != runtime class-path. -static constexpr bool kCheckForDexCollisions = true; - static void ThrowNoClassDefFoundError(const char* fmt, ...) __attribute__((__format__(__printf__, 1, 2))) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); @@ -743,6 +739,8 @@ class DexFileAndClassPair : ValueObject { const char* rhsDescriptor = rhs.cached_descriptor_; int cmp = strcmp(lhsDescriptor, rhsDescriptor); if (cmp != 0) { + // Note that the order must be reversed. We want to iterate over the classes in dex files. + // They are sorted lexicographically. Thus, the priority-queue must be a min-queue. return cmp > 0; } return dex_file_ < rhs.dex_file_; @@ -768,6 +766,11 @@ class DexFileAndClassPair : ValueObject { return dex_file_; } + void DeleteDexFile() { + delete dex_file_; + dex_file_ = nullptr; + } + private: static const char* GetClassDescriptor(const DexFile* dex_file, size_t index) { const DexFile::ClassDef& class_def = dex_file->GetClassDef(static_cast<uint16_t>(index)); @@ -799,13 +802,13 @@ static void AddDexFilesFromOat(const OatFile* oat_file, bool already_loaded, } } -static void AddNext(const DexFileAndClassPair& original, +static void AddNext(DexFileAndClassPair* original, std::priority_queue<DexFileAndClassPair>* heap) { - if (original.DexFileHasMoreClasses()) { - heap->push(original.GetNext()); + if (original->DexFileHasMoreClasses()) { + heap->push(original->GetNext()); } else { // Need to delete the dex file. - delete original.GetDexFile(); + original->DeleteDexFile(); } } @@ -824,19 +827,17 @@ static void FreeDexFilesInHeap(std::priority_queue<DexFileAndClassPair>* heap) { // the two elements agree on whether their dex file was from an already-loaded oat-file or the // new oat file. Any disagreement indicates a collision. bool ClassLinker::HasCollisions(const OatFile* oat_file, std::string* error_msg) { - if (!kCheckForDexCollisions) { - return false; - } - // Dex files are registered late - once a class is actually being loaded. We have to compare - // against the open oat files. + // against the open oat files. Take the dex_lock_ that protects oat_files_ accesses. ReaderMutexLock mu(Thread::Current(), dex_lock_); - std::priority_queue<DexFileAndClassPair> heap; + std::priority_queue<DexFileAndClassPair> queue; // Add dex files from already loaded oat files, but skip boot. { - // To grab the boot oat, look at the dex files in the boot classpath. + // To grab the boot oat, look at the dex files in the boot classpath. Any of those is fine, as + // they were all compiled into the same oat file. So grab the first one, which is guaranteed to + // exist if the boot class-path isn't empty. const OatFile* boot_oat = nullptr; if (!boot_class_path_.empty()) { const DexFile* boot_dex_file = boot_class_path_[0]; @@ -850,26 +851,26 @@ bool ClassLinker::HasCollisions(const OatFile* oat_file, std::string* error_msg) if (loaded_oat_file == boot_oat) { continue; } - AddDexFilesFromOat(loaded_oat_file, true, &heap); + AddDexFilesFromOat(loaded_oat_file, true, &queue); } } - if (heap.empty()) { + if (queue.empty()) { // No other oat files, return early. return false; } // Add dex files from the oat file to check. - AddDexFilesFromOat(oat_file, false, &heap); + AddDexFilesFromOat(oat_file, false, &queue); - // Now drain the heap. - while (!heap.empty()) { - DexFileAndClassPair compare_pop = heap.top(); - heap.pop(); + // Now drain the queue. + while (!queue.empty()) { + DexFileAndClassPair compare_pop = queue.top(); + queue.pop(); // Compare against the following elements. - while (!heap.empty()) { - DexFileAndClassPair top = heap.top(); + while (!queue.empty()) { + DexFileAndClassPair top = queue.top(); if (strcmp(compare_pop.GetCachedDescriptor(), top.GetCachedDescriptor()) == 0) { // Same descriptor. Check whether it's crossing old-oat-files to new-oat-files. @@ -879,18 +880,18 @@ bool ClassLinker::HasCollisions(const OatFile* oat_file, std::string* error_msg) compare_pop.GetCachedDescriptor(), compare_pop.GetDexFile()->GetLocation().c_str(), top.GetDexFile()->GetLocation().c_str()); - FreeDexFilesInHeap(&heap); + FreeDexFilesInHeap(&queue); return true; } // Pop it. - heap.pop(); - AddNext(top, &heap); + queue.pop(); + AddNext(&top, &queue); } else { // Something else. Done here. break; } } - AddNext(compare_pop, &heap); + AddNext(&compare_pop, &queue); } return false; @@ -941,11 +942,10 @@ std::vector<std::unique_ptr<const DexFile>> ClassLinker::OpenDexFilesFromOat( // Get the oat file on disk. std::unique_ptr<OatFile> oat_file = oat_file_assistant.GetBestOatFile(); if (oat_file.get() != nullptr) { - // Take the file only if it has no collisions. - if (!HasCollisions(oat_file.get(), &error_msg)) { - source_oat_file = oat_file.release(); - RegisterOatFile(source_oat_file); - } else { + // Take the file only if it has no collisions, or we must take it because of preopting. + bool accept_oat_file = !HasCollisions(oat_file.get(), &error_msg); + if (!accept_oat_file) { + // Failed the collision check. Print warning. if (Runtime::Current()->IsDexFileFallbackEnabled()) { LOG(WARNING) << "Found duplicate classes, falling back to interpreter mode for " << dex_location; @@ -954,6 +954,19 @@ std::vector<std::unique_ptr<const DexFile>> ClassLinker::OpenDexFilesFromOat( " load classes for " << dex_location; } LOG(WARNING) << error_msg; + + // However, if the app was part of /system and preopted, there is no original dex file + // available. In that case grudgingly accept the oat file. + if (!DexFile::MaybeDex(dex_location)) { + accept_oat_file = true; + LOG(WARNING) << "Dex location " << dex_location << " does not seem to include dex file. " + << "Allow oat file use. This is potentially dangerous."; + } + } + + if (accept_oat_file) { + source_oat_file = oat_file.release(); + RegisterOatFile(source_oat_file); } } } @@ -975,8 +988,7 @@ std::vector<std::unique_ptr<const DexFile>> ClassLinker::OpenDexFilesFromOat( if (Runtime::Current()->IsDexFileFallbackEnabled()) { if (!DexFile::Open(dex_location, dex_location, &error_msg, &dex_files)) { LOG(WARNING) << error_msg; - error_msgs->push_back("Failed to open dex files from " - + std::string(dex_location)); + error_msgs->push_back("Failed to open dex files from " + std::string(dex_location)); } } else { error_msgs->push_back("Fallback mode disabled, skipping dex files."); diff --git a/runtime/common_runtime_test.h b/runtime/common_runtime_test.h index 9917378..34fdd8d 100644 --- a/runtime/common_runtime_test.h +++ b/runtime/common_runtime_test.h @@ -182,7 +182,7 @@ class CheckJniAbortCatcher { } #define TEST_DISABLED_FOR_MIPS() \ - if (kRuntimeISA == kMips || kRuntimeISA == kMips64) { \ + if (kRuntimeISA == kMips) { \ printf("WARNING: TEST DISABLED FOR MIPS\n"); \ return; \ } diff --git a/runtime/dex_file.cc b/runtime/dex_file.cc index 20098e7..dfe5a04 100644 --- a/runtime/dex_file.cc +++ b/runtime/dex_file.cc @@ -153,6 +153,31 @@ bool DexFile::Open(const char* filename, const char* location, std::string* erro return false; } +static bool ContainsClassesDex(int fd, const char* filename) { + std::string error_msg; + std::unique_ptr<ZipArchive> zip_archive(ZipArchive::OpenFromFd(fd, filename, &error_msg)); + if (zip_archive.get() == nullptr) { + return false; + } + std::unique_ptr<ZipEntry> zip_entry(zip_archive->Find(DexFile::kClassesDex, &error_msg)); + return (zip_entry.get() != nullptr); +} + +bool DexFile::MaybeDex(const char* filename) { + uint32_t magic; + std::string error_msg; + ScopedFd fd(OpenAndReadMagic(filename, &magic, &error_msg)); + if (fd.get() == -1) { + return false; + } + if (IsZipMagic(magic)) { + return ContainsClassesDex(fd.release(), filename); + } else if (IsDexMagic(magic)) { + return true; + } + return false; +} + int DexFile::GetPermissions() const { if (mem_map_.get() == nullptr) { return 0; diff --git a/runtime/dex_file.h b/runtime/dex_file.h index 6b3f883..84eaa4a 100644 --- a/runtime/dex_file.h +++ b/runtime/dex_file.h @@ -388,6 +388,10 @@ class DexFile { static bool Open(const char* filename, const char* location, std::string* error_msg, std::vector<std::unique_ptr<const DexFile>>* dex_files); + // Checks whether the given file has the dex magic, or is a zip file with a classes.dex entry. + // If this function returns false, Open will not succeed. The inverse is not true, however. + static bool MaybeDex(const char* filename); + // Opens .dex file, backed by existing memory static std::unique_ptr<const DexFile> Open(const uint8_t* base, size_t size, const std::string& location, diff --git a/runtime/gc/accounting/bitmap.h b/runtime/gc/accounting/bitmap.h index b294d49..eb00472 100644 --- a/runtime/gc/accounting/bitmap.h +++ b/runtime/gc/accounting/bitmap.h @@ -121,7 +121,7 @@ class Bitmap { const size_t bitmap_size_; private: - DISALLOW_COPY_AND_ASSIGN(Bitmap); + DISALLOW_IMPLICIT_CONSTRUCTORS(Bitmap); }; // One bit per kAlignment in range (start, end] @@ -184,6 +184,8 @@ class MemoryRangeBitmap : public Bitmap { uintptr_t const cover_begin_; uintptr_t const cover_end_; + + DISALLOW_IMPLICIT_CONSTRUCTORS(MemoryRangeBitmap); }; } // namespace accounting diff --git a/runtime/gc/accounting/card_table.h b/runtime/gc/accounting/card_table.h index 75ef58a..34e6aa3 100644 --- a/runtime/gc/accounting/card_table.h +++ b/runtime/gc/accounting/card_table.h @@ -146,6 +146,8 @@ class CardTable { // Card table doesn't begin at the beginning of the mem_map_, instead it is displaced by offset // to allow the byte value of biased_begin_ to equal GC_CARD_DIRTY const size_t offset_; + + DISALLOW_IMPLICIT_CONSTRUCTORS(CardTable); }; } // namespace accounting diff --git a/runtime/gc/collector/concurrent_copying.h b/runtime/gc/collector/concurrent_copying.h index 93de035..60ea6b6 100644 --- a/runtime/gc/collector/concurrent_copying.h +++ b/runtime/gc/collector/concurrent_copying.h @@ -279,7 +279,7 @@ class ConcurrentCopying : public GarbageCollector { friend class FlipCallback; friend class ConcurrentCopyingComputeUnevacFromSpaceLiveRatioVisitor; - DISALLOW_COPY_AND_ASSIGN(ConcurrentCopying); + DISALLOW_IMPLICIT_CONSTRUCTORS(ConcurrentCopying); }; } // namespace collector diff --git a/runtime/gc/collector/garbage_collector.h b/runtime/gc/collector/garbage_collector.h index c5a8d5d..9b76d1a 100644 --- a/runtime/gc/collector/garbage_collector.h +++ b/runtime/gc/collector/garbage_collector.h @@ -190,6 +190,9 @@ class GarbageCollector : public RootVisitor { int64_t total_freed_bytes_; CumulativeLogger cumulative_timings_; mutable Mutex pause_histogram_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER; + + private: + DISALLOW_IMPLICIT_CONSTRUCTORS(GarbageCollector); }; } // namespace collector diff --git a/runtime/gc/collector/mark_compact.h b/runtime/gc/collector/mark_compact.h index 4337644..f59a2cd 100644 --- a/runtime/gc/collector/mark_compact.h +++ b/runtime/gc/collector/mark_compact.h @@ -251,7 +251,7 @@ class MarkCompact : public GarbageCollector { friend class UpdateReferenceVisitor; friend class UpdateRootVisitor; - DISALLOW_COPY_AND_ASSIGN(MarkCompact); + DISALLOW_IMPLICIT_CONSTRUCTORS(MarkCompact); }; } // namespace collector diff --git a/runtime/gc/collector/mark_sweep.cc b/runtime/gc/collector/mark_sweep.cc index f0e8d14..1068e90 100644 --- a/runtime/gc/collector/mark_sweep.cc +++ b/runtime/gc/collector/mark_sweep.cc @@ -381,9 +381,11 @@ class MarkSweepMarkObjectSlowPath { if (UNLIKELY(obj == nullptr || !IsAligned<kPageSize>(obj) || (kIsDebugBuild && large_object_space != nullptr && !large_object_space->Contains(obj)))) { - LOG(ERROR) << "Tried to mark " << obj << " not contained by any spaces"; - LOG(ERROR) << "Attempting see if it's a bad root"; + LOG(INTERNAL_FATAL) << "Tried to mark " << obj << " not contained by any spaces"; + LOG(INTERNAL_FATAL) << "Attempting see if it's a bad root"; mark_sweep_->VerifyRoots(); + PrintFileToLog("/proc/self/maps", LogSeverity::INTERNAL_FATAL); + MemMap::DumpMaps(LOG(INTERNAL_FATAL)); LOG(FATAL) << "Can't mark invalid object"; } } @@ -498,7 +500,7 @@ class VerifyRootVisitor : public SingleRootVisitor { if (heap->GetLiveBitmap()->GetContinuousSpaceBitmap(root) == nullptr) { space::LargeObjectSpace* large_object_space = heap->GetLargeObjectsSpace(); if (large_object_space != nullptr && !large_object_space->Contains(root)) { - LOG(ERROR) << "Found invalid root: " << root << " " << info; + LOG(INTERNAL_FATAL) << "Found invalid root: " << root << " " << info; } } } diff --git a/runtime/gc/collector/mark_sweep.h b/runtime/gc/collector/mark_sweep.h index fad3403..7e1af7b 100644 --- a/runtime/gc/collector/mark_sweep.h +++ b/runtime/gc/collector/mark_sweep.h @@ -336,7 +336,7 @@ class MarkSweep : public GarbageCollector { friend class VerifyRootMarkedVisitor; friend class VerifyRootVisitor; - DISALLOW_COPY_AND_ASSIGN(MarkSweep); + DISALLOW_IMPLICIT_CONSTRUCTORS(MarkSweep); }; } // namespace collector diff --git a/runtime/gc/collector/partial_mark_sweep.h b/runtime/gc/collector/partial_mark_sweep.h index ac0d068..1a211cd 100644 --- a/runtime/gc/collector/partial_mark_sweep.h +++ b/runtime/gc/collector/partial_mark_sweep.h @@ -40,7 +40,7 @@ class PartialMarkSweep : public MarkSweep { virtual void BindBitmaps() OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); private: - DISALLOW_COPY_AND_ASSIGN(PartialMarkSweep); + DISALLOW_IMPLICIT_CONSTRUCTORS(PartialMarkSweep); }; } // namespace collector diff --git a/runtime/gc/collector/semi_space.h b/runtime/gc/collector/semi_space.h index 61fbead..3c25f53 100644 --- a/runtime/gc/collector/semi_space.h +++ b/runtime/gc/collector/semi_space.h @@ -278,7 +278,7 @@ class SemiSpace : public GarbageCollector { private: friend class BitmapSetSlowPathVisitor; - DISALLOW_COPY_AND_ASSIGN(SemiSpace); + DISALLOW_IMPLICIT_CONSTRUCTORS(SemiSpace); }; } // namespace collector diff --git a/runtime/gc/collector/sticky_mark_sweep.h b/runtime/gc/collector/sticky_mark_sweep.h index 4f9dabf..b9ef137 100644 --- a/runtime/gc/collector/sticky_mark_sweep.h +++ b/runtime/gc/collector/sticky_mark_sweep.h @@ -47,7 +47,7 @@ class StickyMarkSweep FINAL : public PartialMarkSweep { EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_); private: - DISALLOW_COPY_AND_ASSIGN(StickyMarkSweep); + DISALLOW_IMPLICIT_CONSTRUCTORS(StickyMarkSweep); }; } // namespace collector diff --git a/runtime/gc/heap-inl.h b/runtime/gc/heap-inl.h index 3e56205..fbf36e8 100644 --- a/runtime/gc/heap-inl.h +++ b/runtime/gc/heap-inl.h @@ -371,11 +371,8 @@ inline mirror::Object* Heap::TryToAllocate(Thread* self, AllocatorType allocator } inline Heap::AllocationTimer::AllocationTimer(Heap* heap, mirror::Object** allocated_obj_ptr) - : heap_(heap), allocated_obj_ptr_(allocated_obj_ptr) { - if (kMeasureAllocationTime) { - allocation_start_time_ = NanoTime() / kTimeAdjust; - } -} + : heap_(heap), allocated_obj_ptr_(allocated_obj_ptr), + allocation_start_time_(kMeasureAllocationTime ? NanoTime() / kTimeAdjust : 0u) { } inline Heap::AllocationTimer::~AllocationTimer() { if (kMeasureAllocationTime) { @@ -419,7 +416,7 @@ inline bool Heap::IsOutOfMemoryOnAllocation(AllocatorType allocator_type, size_t inline void Heap::CheckConcurrentGC(Thread* self, size_t new_num_bytes_allocated, mirror::Object** obj) { if (UNLIKELY(new_num_bytes_allocated >= concurrent_start_bytes_)) { - RequestConcurrentGCAndSaveObject(self, obj); + RequestConcurrentGCAndSaveObject(self, false, obj); } } diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc index b80c4b6..cbbc76c 100644 --- a/runtime/gc/heap.cc +++ b/runtime/gc/heap.cc @@ -3325,20 +3325,24 @@ void Heap::AddFinalizerReference(Thread* self, mirror::Object** object) { *object = soa.Decode<mirror::Object*>(arg.get()); } -void Heap::RequestConcurrentGCAndSaveObject(Thread* self, mirror::Object** obj) { +void Heap::RequestConcurrentGCAndSaveObject(Thread* self, bool force_full, mirror::Object** obj) { StackHandleScope<1> hs(self); HandleWrapper<mirror::Object> wrapper(hs.NewHandleWrapper(obj)); - RequestConcurrentGC(self); + RequestConcurrentGC(self, force_full); } class Heap::ConcurrentGCTask : public HeapTask { public: - explicit ConcurrentGCTask(uint64_t target_time) : HeapTask(target_time) { } + explicit ConcurrentGCTask(uint64_t target_time, bool force_full) + : HeapTask(target_time), force_full_(force_full) { } virtual void Run(Thread* self) OVERRIDE { gc::Heap* heap = Runtime::Current()->GetHeap(); - heap->ConcurrentGC(self); + heap->ConcurrentGC(self, force_full_); heap->ClearConcurrentGCRequest(); } + + private: + const bool force_full_; // If true, force full (or partial) collection. }; static bool CanAddHeapTask(Thread* self) LOCKS_EXCLUDED(Locks::runtime_shutdown_lock_) { @@ -3351,24 +3355,30 @@ void Heap::ClearConcurrentGCRequest() { concurrent_gc_pending_.StoreRelaxed(false); } -void Heap::RequestConcurrentGC(Thread* self) { +void Heap::RequestConcurrentGC(Thread* self, bool force_full) { if (CanAddHeapTask(self) && concurrent_gc_pending_.CompareExchangeStrongSequentiallyConsistent(false, true)) { - task_processor_->AddTask(self, new ConcurrentGCTask(NanoTime())); // Start straight away. + task_processor_->AddTask(self, new ConcurrentGCTask(NanoTime(), // Start straight away. + force_full)); } } -void Heap::ConcurrentGC(Thread* self) { +void Heap::ConcurrentGC(Thread* self, bool force_full) { if (!Runtime::Current()->IsShuttingDown(self)) { // Wait for any GCs currently running to finish. if (WaitForGcToComplete(kGcCauseBackground, self) == collector::kGcTypeNone) { // If the we can't run the GC type we wanted to run, find the next appropriate one and try that // instead. E.g. can't do partial, so do full instead. - if (CollectGarbageInternal(next_gc_type_, kGcCauseBackground, false) == + collector::GcType next_gc_type = next_gc_type_; + // If forcing full and next gc type is sticky, override with a non-sticky type. + if (force_full && next_gc_type == collector::kGcTypeSticky) { + next_gc_type = HasZygoteSpace() ? collector::kGcTypePartial : collector::kGcTypeFull; + } + if (CollectGarbageInternal(next_gc_type, kGcCauseBackground, false) == collector::kGcTypeNone) { for (collector::GcType gc_type : gc_plan_) { // Attempt to run the collector, if we succeed, we are done. - if (gc_type > next_gc_type_ && + if (gc_type > next_gc_type && CollectGarbageInternal(gc_type, kGcCauseBackground, false) != collector::kGcTypeNone) { break; @@ -3553,7 +3563,7 @@ void Heap::RegisterNativeAllocation(JNIEnv* env, size_t bytes) { UpdateMaxNativeFootprint(); } else if (!IsGCRequestPending()) { if (IsGcConcurrent()) { - RequestConcurrentGC(self); + RequestConcurrentGC(self, true); // Request non-sticky type. } else { CollectGarbageInternal(gc_type, kGcCauseForNativeAlloc, false); } diff --git a/runtime/gc/heap.h b/runtime/gc/heap.h index 565687c..90249f9 100644 --- a/runtime/gc/heap.h +++ b/runtime/gc/heap.h @@ -288,7 +288,7 @@ class Heap { // Does a concurrent GC, should only be called by the GC daemon thread // through runtime. - void ConcurrentGC(Thread* self) LOCKS_EXCLUDED(Locks::runtime_shutdown_lock_); + void ConcurrentGC(Thread* self, bool force_full) LOCKS_EXCLUDED(Locks::runtime_shutdown_lock_); // Implements VMDebug.countInstancesOfClass and JDWP VM_InstanceCount. // The boolean decides whether to use IsAssignableFrom or == when comparing classes. @@ -664,7 +664,7 @@ class Heap { void RequestTrim(Thread* self) LOCKS_EXCLUDED(pending_task_lock_); // Request asynchronous GC. - void RequestConcurrentGC(Thread* self) LOCKS_EXCLUDED(pending_task_lock_); + void RequestConcurrentGC(Thread* self, bool force_full) LOCKS_EXCLUDED(pending_task_lock_); // Whether or not we may use a garbage collector, used so that we only create collectors we need. bool MayUseCollector(CollectorType type) const; @@ -786,7 +786,7 @@ class Heap { void RequestCollectorTransition(CollectorType desired_collector_type, uint64_t delta_time) LOCKS_EXCLUDED(pending_task_lock_); - void RequestConcurrentGCAndSaveObject(Thread* self, mirror::Object** obj) + void RequestConcurrentGCAndSaveObject(Thread* self, bool force_full, mirror::Object** obj) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); bool IsGCRequestPending() const; @@ -1201,41 +1201,23 @@ class Heap { friend class VerifyReferenceVisitor; friend class VerifyObjectVisitor; friend class ScopedHeapFill; - friend class ScopedHeapLock; friend class space::SpaceTest; class AllocationTimer { + public: + ALWAYS_INLINE AllocationTimer(Heap* heap, mirror::Object** allocated_obj_ptr); + ALWAYS_INLINE ~AllocationTimer(); private: - Heap* heap_; + Heap* const heap_; mirror::Object** allocated_obj_ptr_; - uint64_t allocation_start_time_; - public: - AllocationTimer(Heap* heap, mirror::Object** allocated_obj_ptr); - ~AllocationTimer(); + const uint64_t allocation_start_time_; + + DISALLOW_IMPLICIT_CONSTRUCTORS(AllocationTimer); }; DISALLOW_IMPLICIT_CONSTRUCTORS(Heap); }; -// ScopedHeapFill changes the bytes allocated counter to be equal to the growth limit. This -// causes the next allocation to perform a GC and possibly an OOM. It can be used to ensure that a -// GC happens in specific methods such as ThrowIllegalMonitorStateExceptionF in Monitor::Wait. -class ScopedHeapFill { - public: - explicit ScopedHeapFill(Heap* heap) - : heap_(heap), - delta_(heap_->GetMaxMemory() - heap_->GetBytesAllocated()) { - heap_->num_bytes_allocated_.FetchAndAddSequentiallyConsistent(delta_); - } - ~ScopedHeapFill() { - heap_->num_bytes_allocated_.FetchAndSubSequentiallyConsistent(delta_); - } - - private: - Heap* const heap_; - const int64_t delta_; -}; - } // namespace gc } // namespace art diff --git a/runtime/gc/reference_processor.h b/runtime/gc/reference_processor.h index c67fd98..a44319b 100644 --- a/runtime/gc/reference_processor.h +++ b/runtime/gc/reference_processor.h @@ -81,6 +81,9 @@ class ReferenceProcessor { IsHeapReferenceMarkedCallback* is_marked_callback_; MarkObjectCallback* mark_callback_; void* arg_; + + private: + DISALLOW_IMPLICIT_CONSTRUCTORS(ProcessReferencesArgs); }; bool SlowPathEnabled() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); // Called by ProcessReferences. @@ -105,6 +108,8 @@ class ReferenceProcessor { ReferenceQueue finalizer_reference_queue_; ReferenceQueue phantom_reference_queue_; ReferenceQueue cleared_references_; + + DISALLOW_COPY_AND_ASSIGN(ReferenceProcessor); }; } // namespace gc diff --git a/runtime/gc/reference_queue.h b/runtime/gc/reference_queue.h index f7d89d0..c45be85 100644 --- a/runtime/gc/reference_queue.h +++ b/runtime/gc/reference_queue.h @@ -106,7 +106,7 @@ class ReferenceQueue { // GC types. mirror::Reference* list_; - DISALLOW_COPY_AND_ASSIGN(ReferenceQueue); + DISALLOW_IMPLICIT_CONSTRUCTORS(ReferenceQueue); }; } // namespace gc diff --git a/runtime/gc/space/space.h b/runtime/gc/space/space.h index f2378d9..871ebac 100644 --- a/runtime/gc/space/space.h +++ b/runtime/gc/space/space.h @@ -187,7 +187,7 @@ class Space { private: friend class art::gc::Heap; - DISALLOW_COPY_AND_ASSIGN(Space); + DISALLOW_IMPLICIT_CONSTRUCTORS(Space); }; std::ostream& operator<<(std::ostream& os, const Space& space); @@ -337,7 +337,7 @@ class ContinuousSpace : public Space { uint8_t* limit_; private: - DISALLOW_COPY_AND_ASSIGN(ContinuousSpace); + DISALLOW_IMPLICIT_CONSTRUCTORS(ContinuousSpace); }; // A space where objects may be allocated higgledy-piggledy throughout virtual memory. Currently @@ -366,7 +366,7 @@ class DiscontinuousSpace : public Space { std::unique_ptr<accounting::LargeObjectBitmap> mark_bitmap_; private: - DISALLOW_COPY_AND_ASSIGN(DiscontinuousSpace); + DISALLOW_IMPLICIT_CONSTRUCTORS(DiscontinuousSpace); }; class MemMapSpace : public ContinuousSpace { @@ -400,7 +400,7 @@ class MemMapSpace : public ContinuousSpace { std::unique_ptr<MemMap> mem_map_; private: - DISALLOW_COPY_AND_ASSIGN(MemMapSpace); + DISALLOW_IMPLICIT_CONSTRUCTORS(MemMapSpace); }; // Used by the heap compaction interface to enable copying from one type of alloc space to another. @@ -453,7 +453,7 @@ class ContinuousMemMapAllocSpace : public MemMapSpace, public AllocSpace { private: friend class gc::Heap; - DISALLOW_COPY_AND_ASSIGN(ContinuousMemMapAllocSpace); + DISALLOW_IMPLICIT_CONSTRUCTORS(ContinuousMemMapAllocSpace); }; } // namespace space diff --git a/runtime/gc/task_processor.h b/runtime/gc/task_processor.h index 67e3a54..5f48619 100644 --- a/runtime/gc/task_processor.h +++ b/runtime/gc/task_processor.h @@ -46,6 +46,7 @@ class HeapTask : public SelfDeletingTask { uint64_t target_run_time_; friend class TaskProcessor; + DISALLOW_IMPLICIT_CONSTRUCTORS(HeapTask); }; // Used to process GC tasks (heap trim, heap transitions, concurrent GC). @@ -78,6 +79,8 @@ class TaskProcessor { std::unique_ptr<ConditionVariable> cond_ GUARDED_BY(lock_); std::multiset<HeapTask*, CompareByTargetRunTime> tasks_ GUARDED_BY(lock_); Thread* running_thread_ GUARDED_BY(lock_); + + DISALLOW_COPY_AND_ASSIGN(TaskProcessor); }; } // namespace gc diff --git a/runtime/indirect_reference_table.cc b/runtime/indirect_reference_table.cc index e2b9559..0ef58ea 100644 --- a/runtime/indirect_reference_table.cc +++ b/runtime/indirect_reference_table.cc @@ -175,10 +175,16 @@ bool IndirectReferenceTable::Remove(uint32_t cookie, IndirectRef iref) { DCHECK(table_ != nullptr); DCHECK_GE(segment_state_.parts.numHoles, prevState.parts.numHoles); - if (GetIndirectRefKind(iref) == kHandleScopeOrInvalid && - Thread::Current()->HandleScopeContains(reinterpret_cast<jobject>(iref))) { - LOG(WARNING) << "Attempt to remove local handle scope entry from IRT, ignoring"; - return true; + if (GetIndirectRefKind(iref) == kHandleScopeOrInvalid) { + auto* self = Thread::Current(); + if (self->HandleScopeContains(reinterpret_cast<jobject>(iref))) { + auto* env = self->GetJniEnv(); + DCHECK(env != nullptr); + if (env->check_jni) { + LOG(WARNING) << "Attempt to remove local handle scope entry from IRT, ignoring"; + } + return true; + } } const int idx = ExtractIndex(iref); if (idx < bottomIndex) { diff --git a/runtime/interpreter/interpreter.cc b/runtime/interpreter/interpreter.cc index 423b952..a37aee5 100644 --- a/runtime/interpreter/interpreter.cc +++ b/runtime/interpreter/interpreter.cc @@ -423,7 +423,7 @@ void EnterInterpreterFromDeoptimize(Thread* self, ShadowFrame* shadow_frame, JVa } ShadowFrame* old_frame = shadow_frame; shadow_frame = shadow_frame->GetLink(); - delete old_frame; + ShadowFrame::DeleteDeoptimizedFrame(old_frame); } ret_val->SetJ(value.GetJ()); } diff --git a/runtime/interpreter/interpreter_common.cc b/runtime/interpreter/interpreter_common.cc index ef3c6e2..ae67efb 100644 --- a/runtime/interpreter/interpreter_common.cc +++ b/runtime/interpreter/interpreter_common.cc @@ -501,6 +501,7 @@ bool DoCall(ArtMethod* called_method, Thread* self, ShadowFrame& shadow_frame, uint16_t num_regs; if (LIKELY(code_item != nullptr)) { num_regs = code_item->registers_size_; + DCHECK_EQ(string_init ? num_ins - 1 : num_ins, code_item->ins_size_); } else { DCHECK(called_method->IsNative() || called_method->IsProxyMethod()); num_regs = num_ins; diff --git a/runtime/jit/jit.h b/runtime/jit/jit.h index 3e80aef..f5ad8b8 100644 --- a/runtime/jit/jit.h +++ b/runtime/jit/jit.h @@ -86,6 +86,8 @@ class Jit { std::unique_ptr<jit::JitInstrumentationCache> instrumentation_cache_; std::unique_ptr<jit::JitCodeCache> code_cache_; CompilerCallbacks* compiler_callbacks_; // Owned by the jit compiler. + + DISALLOW_COPY_AND_ASSIGN(Jit); }; class JitOptions { @@ -114,8 +116,9 @@ class JitOptions { bool dump_info_on_shutdown_; JitOptions() : use_jit_(false), code_cache_capacity_(0), compile_threshold_(0), - dump_info_on_shutdown_(false) { - } + dump_info_on_shutdown_(false) { } + + DISALLOW_COPY_AND_ASSIGN(JitOptions); }; } // namespace jit diff --git a/runtime/jit/jit_code_cache.h b/runtime/jit/jit_code_cache.h index da891fe..8b76647 100644 --- a/runtime/jit/jit_code_cache.h +++ b/runtime/jit/jit_code_cache.h @@ -130,7 +130,7 @@ class JitCodeCache { // required since we have to implement ClassLinker::GetQuickOatCodeFor for walking stacks. SafeMap<mirror::ArtMethod*, const void*> method_code_map_ GUARDED_BY(lock_); - DISALLOW_COPY_AND_ASSIGN(JitCodeCache); + DISALLOW_IMPLICIT_CONSTRUCTORS(JitCodeCache); }; diff --git a/runtime/jit/jit_instrumentation.cc b/runtime/jit/jit_instrumentation.cc index 160e678..e2f9cec 100644 --- a/runtime/jit/jit_instrumentation.cc +++ b/runtime/jit/jit_instrumentation.cc @@ -47,6 +47,8 @@ class JitCompileTask : public Task { private: mirror::ArtMethod* const method_; JitInstrumentationCache* const cache_; + + DISALLOW_IMPLICIT_CONSTRUCTORS(JitCompileTask); }; JitInstrumentationCache::JitInstrumentationCache(size_t hot_method_threshold) diff --git a/runtime/jit/jit_instrumentation.h b/runtime/jit/jit_instrumentation.h index 9d5d74f..72acaef 100644 --- a/runtime/jit/jit_instrumentation.h +++ b/runtime/jit/jit_instrumentation.h @@ -58,6 +58,8 @@ class JitInstrumentationCache { std::unordered_map<jmethodID, size_t> samples_; size_t hot_method_threshold_; std::unique_ptr<ThreadPool> thread_pool_; + + DISALLOW_IMPLICIT_CONSTRUCTORS(JitInstrumentationCache); }; class JitInstrumentationListener : public instrumentation::InstrumentationListener { @@ -97,6 +99,8 @@ class JitInstrumentationListener : public instrumentation::InstrumentationListen private: JitInstrumentationCache* const instrumentation_cache_; + + DISALLOW_IMPLICIT_CONSTRUCTORS(JitInstrumentationListener); }; } // namespace jit diff --git a/runtime/jni_internal.cc b/runtime/jni_internal.cc index fc3826b..9bb08a2 100644 --- a/runtime/jni_internal.cc +++ b/runtime/jni_internal.cc @@ -2109,10 +2109,12 @@ class JNI { m = c->FindVirtualMethod(name, sig); } if (m == nullptr) { - c->DumpClass(LOG(ERROR), mirror::Class::kDumpClassFullDetail); - LOG(return_errors ? ERROR : FATAL) << "Failed to register native method " + LOG(return_errors ? ERROR : INTERNAL_FATAL) << "Failed to register native method " << PrettyDescriptor(c) << "." << name << sig << " in " << c->GetDexCache()->GetLocation()->ToModifiedUtf8(); + // Safe to pass in LOG(FATAL) since the log object aborts in destructor and only goes + // out of scope after the DumpClass is done executing. + c->DumpClass(LOG(return_errors ? ERROR : FATAL), mirror::Class::kDumpClassFullDetail); ThrowNoSuchMethodError(soa, c, name, sig, "static or non-static"); return JNI_ERR; } else if (!m->IsNative()) { diff --git a/runtime/linear_alloc.h b/runtime/linear_alloc.h index 6d8eda6..fcabcc8 100644 --- a/runtime/linear_alloc.h +++ b/runtime/linear_alloc.h @@ -42,6 +42,8 @@ class LinearAlloc { private: mutable Mutex lock_ DEFAULT_MUTEX_ACQUIRED_AFTER; ArenaAllocator allocator_ GUARDED_BY(lock_); + + DISALLOW_IMPLICIT_CONSTRUCTORS(LinearAlloc); }; } // namespace art diff --git a/runtime/native/dalvik_system_VMRuntime.cc b/runtime/native/dalvik_system_VMRuntime.cc index 53bb129..9736e15 100644 --- a/runtime/native/dalvik_system_VMRuntime.cc +++ b/runtime/native/dalvik_system_VMRuntime.cc @@ -223,7 +223,7 @@ static void VMRuntime_trimHeap(JNIEnv* env, jobject) { } static void VMRuntime_concurrentGC(JNIEnv* env, jobject) { - Runtime::Current()->GetHeap()->ConcurrentGC(ThreadForEnv(env)); + Runtime::Current()->GetHeap()->ConcurrentGC(ThreadForEnv(env), true); } static void VMRuntime_requestHeapTrim(JNIEnv* env, jobject) { @@ -231,7 +231,7 @@ static void VMRuntime_requestHeapTrim(JNIEnv* env, jobject) { } static void VMRuntime_requestConcurrentGC(JNIEnv* env, jobject) { - Runtime::Current()->GetHeap()->RequestConcurrentGC(ThreadForEnv(env)); + Runtime::Current()->GetHeap()->RequestConcurrentGC(ThreadForEnv(env), true); } static void VMRuntime_startHeapTaskProcessor(JNIEnv* env, jobject) { diff --git a/runtime/quick/inline_method_analyser.cc b/runtime/quick/inline_method_analyser.cc index 9cf4b16..1c404ff 100644 --- a/runtime/quick/inline_method_analyser.cc +++ b/runtime/quick/inline_method_analyser.cc @@ -134,7 +134,10 @@ bool InlineMethodAnalyser::AnalyseMethodCode(verifier::MethodVerifier* verifier, bool InlineMethodAnalyser::IsSyntheticAccessor(MethodReference ref) { const DexFile::MethodId& method_id = ref.dex_file->GetMethodId(ref.dex_method_index); const char* method_name = ref.dex_file->GetMethodName(method_id); - return strncmp(method_name, "access$", strlen("access$")) == 0; + // javac names synthetic accessors "access$nnn", + // jack names them "-getN", "-putN", "-wrapN". + return strncmp(method_name, "access$", strlen("access$")) == 0 || + strncmp(method_name, "-", strlen("-")) == 0; } bool InlineMethodAnalyser::AnalyseReturnMethod(const DexFile::CodeItem* code_item, diff --git a/runtime/quick_exception_handler.cc b/runtime/quick_exception_handler.cc index 2432603..a80eed6 100644 --- a/runtime/quick_exception_handler.cc +++ b/runtime/quick_exception_handler.cc @@ -202,7 +202,8 @@ class DeoptimizeStackVisitor FINAL : public StackVisitor { h_method, m->GetAccessFlags(), true, true, true, true); bool verifier_success = verifier.Verify(); CHECK(verifier_success) << PrettyMethod(h_method.Get()); - ShadowFrame* new_frame = ShadowFrame::Create(num_regs, nullptr, h_method.Get(), dex_pc); + ShadowFrame* new_frame = ShadowFrame::CreateDeoptimizedFrame( + num_regs, nullptr, h_method.Get(), dex_pc); self_->SetShadowFrameUnderConstruction(new_frame); const std::vector<int32_t> kinds(verifier.DescribeVRegs(dex_pc)); diff --git a/runtime/runtime.cc b/runtime/runtime.cc index eb60318..2633898 100644 --- a/runtime/runtime.cc +++ b/runtime/runtime.cc @@ -1566,14 +1566,15 @@ void Runtime::AbortTransactionAndThrowAbortError(Thread* self, const std::string // Throwing an exception may cause its class initialization. If we mark the transaction // aborted before that, we may warn with a false alarm. Throwing the exception before // marking the transaction aborted avoids that. - preinitialization_transaction_->ThrowAbortError(self, false); + preinitialization_transaction_->ThrowAbortError(self, &abort_message); preinitialization_transaction_->Abort(abort_message); } void Runtime::ThrowTransactionAbortError(Thread* self) { DCHECK(IsAotCompiler()); DCHECK(IsActiveTransaction()); - preinitialization_transaction_->ThrowAbortError(self, true); + // Passing nullptr means we rethrow an exception with the earlier transaction abort message. + preinitialization_transaction_->ThrowAbortError(self, nullptr); } void Runtime::RecordWriteFieldBoolean(mirror::Object* obj, MemberOffset field_offset, diff --git a/runtime/scoped_thread_state_change.h b/runtime/scoped_thread_state_change.h index b93fcb4..99750a1 100644 --- a/runtime/scoped_thread_state_change.h +++ b/runtime/scoped_thread_state_change.h @@ -133,11 +133,7 @@ class ScopedObjectAccessAlreadyRunnable { T AddLocalReference(mirror::Object* obj) const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { Locks::mutator_lock_->AssertSharedHeld(Self()); DCHECK(IsRunnable()); // Don't work with raw objects in non-runnable states. - if (obj == nullptr) { - return nullptr; - } - DCHECK_NE((reinterpret_cast<uintptr_t>(obj) & 0xffff0000), 0xebad0000); - return Env()->AddLocalReference<T>(obj); + return obj == nullptr ? nullptr : Env()->AddLocalReference<T>(obj); } template<typename T> diff --git a/runtime/stack.h b/runtime/stack.h index e2af5ee..3f1bff8 100644 --- a/runtime/stack.h +++ b/runtime/stack.h @@ -74,12 +74,18 @@ class ShadowFrame { } // Create ShadowFrame in heap for deoptimization. - static ShadowFrame* Create(uint32_t num_vregs, ShadowFrame* link, - mirror::ArtMethod* method, uint32_t dex_pc) { + static ShadowFrame* CreateDeoptimizedFrame(uint32_t num_vregs, ShadowFrame* link, + mirror::ArtMethod* method, uint32_t dex_pc) { uint8_t* memory = new uint8_t[ComputeSize(num_vregs)]; return Create(num_vregs, link, method, dex_pc, memory); } + // Delete a ShadowFrame allocated on the heap for deoptimization. + static void DeleteDeoptimizedFrame(ShadowFrame* sf) { + uint8_t* memory = reinterpret_cast<uint8_t*>(sf); + delete[] memory; + } + // Create ShadowFrame for interpreter using provided memory. static ShadowFrame* Create(uint32_t num_vregs, ShadowFrame* link, mirror::ArtMethod* method, uint32_t dex_pc, void* memory) { diff --git a/runtime/trace.h b/runtime/trace.h index 06824b8..df6d5e7 100644 --- a/runtime/trace.h +++ b/runtime/trace.h @@ -189,7 +189,7 @@ class Trace FINAL : public instrumentation::InstrumentationListener { std::unique_ptr<File> trace_file_; // Buffer to store trace data. - std::unique_ptr<uint8_t> buf_; + std::unique_ptr<uint8_t[]> buf_; // Flags enabling extra tracing of things such as alloc counts. const int flags_; diff --git a/runtime/transaction.cc b/runtime/transaction.cc index cc0f15f..ab821d7 100644 --- a/runtime/transaction.cc +++ b/runtime/transaction.cc @@ -70,13 +70,21 @@ void Transaction::Abort(const std::string& abort_message) { } } -void Transaction::ThrowAbortError(Thread* self, bool rethrow) { +void Transaction::ThrowAbortError(Thread* self, const std::string* abort_message) { + const bool rethrow = (abort_message == nullptr); if (kIsDebugBuild && rethrow) { CHECK(IsAborted()) << "Rethrow " << Transaction::kAbortExceptionDescriptor << " while transaction is not aborted"; } - std::string abort_msg(GetAbortMessage()); - self->ThrowNewWrappedException(Transaction::kAbortExceptionSignature, abort_msg.c_str()); + if (rethrow) { + // Rethrow an exception with the earlier abort message stored in the transaction. + self->ThrowNewWrappedException(Transaction::kAbortExceptionSignature, + GetAbortMessage().c_str()); + } else { + // Throw an exception with the given abort message. + self->ThrowNewWrappedException(Transaction::kAbortExceptionSignature, + abort_message->c_str()); + } } bool Transaction::IsAborted() { diff --git a/runtime/transaction.h b/runtime/transaction.h index 4d85662..030478c 100644 --- a/runtime/transaction.h +++ b/runtime/transaction.h @@ -48,7 +48,7 @@ class Transaction FINAL { void Abort(const std::string& abort_message) LOCKS_EXCLUDED(log_lock_) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); - void ThrowAbortError(Thread* self, bool rethrow) + void ThrowAbortError(Thread* self, const std::string* abort_message) LOCKS_EXCLUDED(log_lock_) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); bool IsAborted() LOCKS_EXCLUDED(log_lock_); diff --git a/runtime/utils.cc b/runtime/utils.cc index ec7131d..650214f 100644 --- a/runtime/utils.cc +++ b/runtime/utils.cc @@ -262,8 +262,8 @@ uint64_t ThreadCpuNanoTime() { void NanoSleep(uint64_t ns) { timespec tm; - tm.tv_sec = 0; - tm.tv_nsec = ns; + tm.tv_sec = ns / MsToNs(1000); + tm.tv_nsec = ns - static_cast<uint64_t>(tm.tv_sec) * MsToNs(1000); nanosleep(&tm, nullptr); } @@ -1298,7 +1298,7 @@ void DumpNativeStack(std::ostream& os, pid_t tid, const char* prefix, if (!BacktraceMap::IsValid(it->map)) { os << StringPrintf("%08" PRIxPTR " ???", it->pc); } else { - os << StringPrintf("%08" PRIxPTR " ", it->pc - it->map.start); + os << StringPrintf("%08" PRIxPTR " ", BacktraceMap::GetRelativePc(it->map, it->pc)); os << it->map.name; os << " ("; if (!it->func_name.empty()) { diff --git a/runtime/utils_test.cc b/runtime/utils_test.cc index 259fe33..195de0c 100644 --- a/runtime/utils_test.cc +++ b/runtime/utils_test.cc @@ -515,4 +515,10 @@ TEST_F(UtilsTest, IsAbsoluteUint) { EXPECT_FALSE(IsAbsoluteUint<32>(UINT_MAX_plus1)); } +TEST_F(UtilsTest, TestSleep) { + auto start = NanoTime(); + NanoSleep(MsToNs(1500)); + EXPECT_GT(NanoTime() - start, MsToNs(1000)); +} + } // namespace art diff --git a/runtime/verifier/register_line.cc b/runtime/verifier/register_line.cc index 8445751..2838681 100644 --- a/runtime/verifier/register_line.cc +++ b/runtime/verifier/register_line.cc @@ -137,7 +137,7 @@ void RegisterLine::MarkRefsAsInitialized(MethodVerifier* verifier, const RegType if (GetRegisterType(verifier, i).Equals(uninit_type)) { line_[i] = init_type.GetId(); changed++; - if (i != this_reg && is_string) { + if (is_string && i != this_reg) { auto it = verifier->GetStringInitPcRegMap().find(dex_pc); if (it != verifier->GetStringInitPcRegMap().end()) { it->second.insert(i); diff --git a/test/138-duplicate-classes-check/build b/test/138-duplicate-classes-check/build deleted file mode 100755 index 7ddc81d..0000000 --- a/test/138-duplicate-classes-check/build +++ /dev/null @@ -1,31 +0,0 @@ -#!/bin/bash -# -# Copyright (C) 2015 The Android Open Source Project -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# Stop if something fails. -set -e - -mkdir classes -${JAVAC} -d classes `find src -name '*.java'` - -mkdir classes-ex -${JAVAC} -d classes-ex `find src-ex -name '*.java'` - -if [ ${NEED_DEX} = "true" ]; then - ${DX} -JXmx256m --debug --dex --dump-to=classes.lst --output=classes.dex --dump-width=1000 classes - zip $TEST_NAME.jar classes.dex - ${DX} -JXmx256m --debug --dex --dump-to=classes-ex.lst --output=classes.dex --dump-width=1000 classes-ex - zip ${TEST_NAME}-ex.jar classes.dex -fi diff --git a/test/480-checker-dead-blocks/src/Main.java b/test/480-checker-dead-blocks/src/Main.java index 560ce95..83dbb26 100644 --- a/test/480-checker-dead-blocks/src/Main.java +++ b/test/480-checker-dead-blocks/src/Main.java @@ -128,7 +128,7 @@ public class Main { // CHECK-DAG: [[Arg:i\d+]] ParameterValue // CHECK-DAG: Return [ [[Arg]] ] - // CHECK-START: int Main.testRemoveLoop(int) dead_code_elimination_final (after) + // CHECK-START: int Main.testDeadLoop(int) dead_code_elimination_final (after) // CHECK-NOT: If // CHECK-NOT: Add @@ -139,9 +139,56 @@ public class Main { return x; } + // CHECK-START: int Main.testUpdateLoopInformation(int) dead_code_elimination_final (before) + // CHECK-DAG: If + // CHECK-DAG: If + // CHECK-DAG: Add + + // CHECK-START: int Main.testUpdateLoopInformation(int) dead_code_elimination_final (after) + // CHECK-DAG: [[Arg:i\d+]] ParameterValue + // CHECK-DAG: Return [ [[Arg]] ] + + // CHECK-START: int Main.testUpdateLoopInformation(int) dead_code_elimination_final (after) + // CHECK-NOT: If + // CHECK-NOT: Add + + public static int testUpdateLoopInformation(int x) { + // Use of Or in the condition generates a dead loop where not all of its + // blocks are removed. This forces DCE to update their loop information. + while (inlineFalse() || !inlineTrue()) { + x++; + } + return x; + } + + // CHECK-START: int Main.testRemoveSuspendCheck(int, int) dead_code_elimination_final (before) + // CHECK: SuspendCheck + // CHECK: SuspendCheck + // CHECK: SuspendCheck + // CHECK-NOT: SuspendCheck + + // CHECK-START: int Main.testRemoveSuspendCheck(int, int) dead_code_elimination_final (after) + // CHECK: SuspendCheck + // CHECK: SuspendCheck + // CHECK-NOT: SuspendCheck + + public static int testRemoveSuspendCheck(int x, int y) { + // Inner loop will leave behind the header with its SuspendCheck. DCE must + // remove it, otherwise the outer loop would end up with two. + while (y > 0) { + while (inlineFalse() || !inlineTrue()) { + x++; + } + y--; + } + return x; + } + public static void main(String[] args) { assertIntEquals(7, testTrueBranch(4, 3)); assertIntEquals(1, testFalseBranch(4, 3)); assertIntEquals(42, testRemoveLoop(42)); + assertIntEquals(23, testUpdateLoopInformation(23)); + assertIntEquals(12, testRemoveSuspendCheck(12, 5)); } } diff --git a/test/482-checker-loop-back-edge-use/expected.txt b/test/482-checker-loop-back-edge-use/expected.txt new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/test/482-checker-loop-back-edge-use/expected.txt diff --git a/test/482-checker-loop-back-edge-use/info.txt b/test/482-checker-loop-back-edge-use/info.txt new file mode 100644 index 0000000..f7fdeff --- /dev/null +++ b/test/482-checker-loop-back-edge-use/info.txt @@ -0,0 +1,2 @@ +Tests the register allocator's optimization of adding synthesized uses +at back edges. diff --git a/test/482-checker-loop-back-edge-use/src/Main.java b/test/482-checker-loop-back-edge-use/src/Main.java new file mode 100644 index 0000000..74184e8 --- /dev/null +++ b/test/482-checker-loop-back-edge-use/src/Main.java @@ -0,0 +1,131 @@ +/* +* Copyright (C) 2015 The Android Open Source Project +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ + + +public class Main { + + // CHECK-START: void Main.loop1(boolean) liveness (after) + // CHECK: ParameterValue (liveness: 2 ranges: { [2, 22) }, uses: { 17 22 } + // CHECK: Goto (liveness: 20) + public static void loop1(boolean incoming) { + while (incoming) {} + } + + // CHECK-START: void Main.loop2(boolean) liveness (after) + // CHECK: ParameterValue (liveness: 2 ranges: { [2, 42) }, uses: { 33 38 42 } + // CHECK: Goto (liveness: 36) + // CHECK: Goto (liveness: 40) + public static void loop2(boolean incoming) { + while (true) { + System.out.println("foo"); + while (incoming) {} + } + } + + // CHECK-START: void Main.loop3(boolean) liveness (after) + // CHECK: ParameterValue (liveness: 2 ranges: { [2, 60) }, uses: { 56 60 } + // CHECK: Goto (liveness: 58) + + // CHECK-START: void Main.loop3(boolean) liveness (after) + // CHECK-NOT: Goto (liveness: 54) + public static void loop3(boolean incoming) { + // 'incoming' only needs a use at the outer loop's back edge. + while (System.currentTimeMillis() != 42) { + while (Runtime.getRuntime() != null) {} + System.out.println(incoming); + } + } + + // CHECK-START: void Main.loop4(boolean) liveness (after) + // CHECK: ParameterValue (liveness: 2 ranges: { [2, 22) }, uses: { 22 } + + // CHECK-START: void Main.loop4(boolean) liveness (after) + // CHECK-NOT: Goto (liveness: 20) + public static void loop4(boolean incoming) { + // 'incoming' has no loop use, so should not have back edge uses. + System.out.println(incoming); + while (System.currentTimeMillis() != 42) { + while (Runtime.getRuntime() != null) {} + } + } + + // CHECK-START: void Main.loop5(boolean) liveness (after) + // CHECK: ParameterValue (liveness: 2 ranges: { [2, 50) }, uses: { 33 42 46 50 } + // CHECK: Goto (liveness: 44) + // CHECK: Goto (liveness: 48) + public static void loop5(boolean incoming) { + // 'incoming' must have a use at both back edges. + while (Runtime.getRuntime() != null) { + while (incoming) { + System.out.println(incoming); + } + } + } + + // CHECK-START: void Main.loop6(boolean) liveness (after) + // CHECK ParameterValue (liveness: 2 ranges: { [2, 46) }, uses: { 24 46 } + // CHECK: Goto (liveness: 44) + + // CHECK-START: void Main.loop6(boolean) liveness (after) + // CHECK-NOT: Goto (liveness: 22) + public static void loop6(boolean incoming) { + // 'incoming' must have a use only at the first loop's back edge. + while (true) { + System.out.println(incoming); + while (Runtime.getRuntime() != null) {} + } + } + + // CHECK-START: void Main.loop7(boolean) liveness (after) + // CHECK: ParameterValue (liveness: 2 ranges: { [2, 50) }, uses: { 32 41 46 50 } + // CHECK: Goto (liveness: 44) + // CHECK: Goto (liveness: 48) + public static void loop7(boolean incoming) { + // 'incoming' must have a use at both back edges. + while (Runtime.getRuntime() != null) { + System.out.println(incoming); + while (incoming) {} + } + } + + // CHECK-START: void Main.loop8() liveness (after) + // CHECK: StaticFieldGet (liveness: 12 ranges: { [12, 44) }, uses: { 35 40 44 } + // CHECK: Goto (liveness: 38) + // CHECK: Goto (liveness: 42) + public static void loop8() { + // 'incoming' must have a use at both back edges. + boolean incoming = field; + while (Runtime.getRuntime() != null) { + while (incoming) {} + } + } + + // CHECK-START: void Main.loop9() liveness (after) + // CHECK: StaticFieldGet (liveness: 22 ranges: { [22, 36) }, uses: { 31 36 } + // CHECK: Goto (liveness: 38) + public static void loop9() { + while (Runtime.getRuntime() != null) { + // 'incoming' must only have a use in the inner loop. + boolean incoming = field; + while (incoming) {} + } + } + + public static void main(String[] args) { + } + + static boolean field; +} diff --git a/test/483-dce-block/expected.txt b/test/483-dce-block/expected.txt new file mode 100644 index 0000000..ef48625 --- /dev/null +++ b/test/483-dce-block/expected.txt @@ -0,0 +1 @@ +class Main diff --git a/test/483-dce-block/info.txt b/test/483-dce-block/info.txt new file mode 100644 index 0000000..3db88ab --- /dev/null +++ b/test/483-dce-block/info.txt @@ -0,0 +1,2 @@ +Regression test for optimizing that used to crash +compiling the `foo` method. diff --git a/test/483-dce-block/src/Main.java b/test/483-dce-block/src/Main.java new file mode 100644 index 0000000..2f66a74 --- /dev/null +++ b/test/483-dce-block/src/Main.java @@ -0,0 +1,58 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +public class Main { + public static void foo(Object o, int a) { + Object result = null; + if (o instanceof Main) { + // The compiler optimizes the type of `o` by introducing + // a `HBoundType` in this block. + while (a != 3) { + if (a == 2) { + a++; + result = o; + continue; + } else if (willInline()) { + // This block will be detected as dead after inlining. + result = new Object(); + continue; + } + result = new Object(); + } + // The compiler produces a phi at the back edge for `result`. + // Before dead block elimination, the phi has three inputs: + // result = (new Object(), new Object(), HBoundType) + // + // After dead block elimination, the phi has now two inputs: + // result = (new Object(), HBoundType) + // + // Our internal data structure for linking users and inputs expect + // the input index stored in that data structure to be the index + // in the inputs array. So the index before dead block elimination + // of the `HBoundType` would be 2. Dead block elimination must update + // that index to be 1. + } + System.out.println(result.getClass()); + } + + public static boolean willInline() { + return false; + } + + public static void main(String[] args) { + foo(new Main(), 2); + } +} diff --git a/test/484-checker-register-hints/expected.txt b/test/484-checker-register-hints/expected.txt new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/test/484-checker-register-hints/expected.txt diff --git a/test/484-checker-register-hints/info.txt b/test/484-checker-register-hints/info.txt new file mode 100644 index 0000000..8923680 --- /dev/null +++ b/test/484-checker-register-hints/info.txt @@ -0,0 +1,4 @@ +Checks that the register allocator does not punish other +blocks because one block forced spilling. The block that +forces the spilling should restore the registers at the merge +point. diff --git a/test/484-checker-register-hints/src/Main.java b/test/484-checker-register-hints/src/Main.java new file mode 100644 index 0000000..33952d9 --- /dev/null +++ b/test/484-checker-register-hints/src/Main.java @@ -0,0 +1,138 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +public class Main { + + // CHECK-START: void Main.test1(boolean, int, int, int, int, int) register (after) + // CHECK: name "B0" + // CHECK-NOT: ParallelMove + // CHECK: name "B1" + // CHECK-NOT: end_block + // CHECK: If + // CHECK-NOT: ParallelMove + // CHECK: name "B3" + // CHECK-NOT: end_block + // CHECK: ArraySet + // We could check here that there is a parallel move, but it's only valid + // for some architectures (for example x86), as other architectures may + // not do move at all. + // CHECK: end_block + // CHECK-NOT: ParallelMove + + public static void test1(boolean z, int a, int b, int c, int d, int m) { + int e = live1; + int f = live2; + int g = live3; + if (z) { + } else { + // Create enough live instructions to force spilling on x86. + int h = live4; + int i = live5; + array[2] = e + i + h; + array[3] = f + i + h; + array[4] = g + i + h; + array[0] = h; + array[1] = i + h; + + } + live1 = e + f + g; + } + + // CHECK-START: void Main.test2(boolean, int, int, int, int, int) register (after) + // CHECK: name "B0" + // CHECK-NOT: ParallelMove + // CHECK: name "B1" + // CHECK-NOT: end_block + // CHECK: If + // CHECK-NOT: ParallelMove + // CHECK: name "B3" + // CHECK-NOT: end_block + // CHECK: ArraySet + // We could check here that there is a parallel move, but it's only valid + // for some architectures (for example x86), as other architectures may + // not do move at all. + // CHECK: end_block + // CHECK-NOT: ParallelMove + + public static void test2(boolean z, int a, int b, int c, int d, int m) { + int e = live1; + int f = live2; + int g = live3; + if (z) { + if (y) { + int h = live4; + int i = live5; + array[2] = e + i + h; + array[3] = f + i + h; + array[4] = g + i + h; + array[0] = h; + array[1] = i + h; + } + } + live1 = e + f + g; + } + + // CHECK-START: void Main.test3(boolean, int, int, int, int, int) register (after) + // CHECK: name "B0" + // CHECK-NOT: ParallelMove + // CHECK: name "B1" + // CHECK-NOT: end_block + // CHECK: If + // CHECK-NOT: ParallelMove + // CHECK: name "B6" + // CHECK-NOT: end_block + // CHECK: ArraySet + // We could check here that there is a parallel move, but it's only valid + // for some architectures (for example x86), as other architectures may + // not do move at all. + // CHECK: end_block + // CHECK-NOT: ParallelMove + + public static void test3(boolean z, int a, int b, int c, int d, int m) { + // Same version as test2, but with branches reversed, to ensure + // whatever linear order is computed, we will get the same results. + int e = live1; + int f = live2; + int g = live3; + if (z) { + live1 = e; + } else { + if (y) { + live1 = e; + } else { + int h = live4; + int i = live5; + array[2] = e + i + h; + array[3] = f + i + h; + array[4] = g + i + h; + array[0] = h; + array[1] = i + h; + } + } + live1 = e + f + g; + } + + public static void main(String[] args) { + } + + static boolean y; + static int live1; + static int live2; + static int live3; + static int live4; + static int live5; + static int[] array; +} diff --git a/test/Android.run-test.mk b/test/Android.run-test.mk index c7e6877..515b8af 100644 --- a/test/Android.run-test.mk +++ b/test/Android.run-test.mk @@ -428,6 +428,17 @@ endif TEST_ART_BROKEN_OPTIMIZING_DEBUGGABLE_RUN_TESTS := +# Tests that should fail in the read barrier configuration. +TEST_ART_BROKEN_READ_BARRIER_RUN_TESTS := \ + 098-ddmc # b/20720510 + +ifeq ($(ART_USE_READ_BARRIER),true) + ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \ + $(COMPILER_TYPES),$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \ + $(IMAGE_TYPES),$(PICTEST_TYPES),$(DEBUGGABLE_TYPES),$(TEST_ART_BROKEN_READ_BARRIER_RUN_TESTS),$(ALL_ADDRESS_SIZES)) +endif + +TEST_ART_BROKEN_READ_BARRIER_RUN_TESTS := # Clear variables ahead of appending to them when defining tests. $(foreach target, $(TARGET_TYPES), $(eval ART_RUN_TEST_$(call name-to-var,$(target))_RULES :=)) @@ -95,6 +95,7 @@ ANDROID_DATA=$ANDROID_DATA \ PATH=$ANDROID_ROOT/bin:$PATH \ $invoke_with $ANDROID_ROOT/bin/$DALVIKVM $lib \ -XXlib:$LIBART \ + -Xnorelocate \ -Ximage:$ANDROID_ROOT/framework/core.art \ -Xcompiler-option --include-debug-symbols \ "$@" diff --git a/tools/run-jdwp-tests.sh b/tools/run-jdwp-tests.sh index a007fa2..1dd443b 100755 --- a/tools/run-jdwp-tests.sh +++ b/tools/run-jdwp-tests.sh @@ -19,11 +19,6 @@ if [ ! -d libcore ]; then exit 1 fi -if [[ $ANDROID_SERIAL == HT4CTJT03670 ]] || [[ $ANDROID_SERIAL == HT49CJT00070 ]]; then - echo "Not running on buildbot because of failures on volantis. Investigating." - exit 0 -fi - # Jar containing all the tests. test_jar=out/host/linux-x86/framework/apache-harmony-jdwp-tests-hostdex.jar junit_jar=out/host/linux-x86/framework/junit.jar @@ -79,7 +74,7 @@ vogar $vm_command \ $args \ $device_dir \ $image_compiler_option \ - --timeout 600 \ + --timeout 800 \ --vm-arg -Djpda.settings.verbose=true \ --vm-arg -Djpda.settings.syncPort=34016 \ --vm-arg -Djpda.settings.transportAddress=127.0.0.1:55107 \ |