/* * Copyright (C) 2008 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include "zip_archive.h" #include #include #include #include #include #include #include "base/stringprintf.h" #include "base/unix_file/fd_file.h" #include "UniquePtr.h" namespace art { static const size_t kBufSize = 32 * KB; // Get 2 little-endian bytes. static uint32_t Le16ToHost(const byte* src) { return ((src[0] << 0) | (src[1] << 8)); } // Get 4 little-endian bytes. static uint32_t Le32ToHost(const byte* src) { return ((src[0] << 0) | (src[1] << 8) | (src[2] << 16) | (src[3] << 24)); } uint16_t ZipEntry::GetCompressionMethod() { return Le16ToHost(ptr_ + ZipArchive::kCDEMethod); } uint32_t ZipEntry::GetCompressedLength() { return Le32ToHost(ptr_ + ZipArchive::kCDECompLen); } uint32_t ZipEntry::GetUncompressedLength() { return Le32ToHost(ptr_ + ZipArchive::kCDEUncompLen); } uint32_t ZipEntry::GetCrc32() { return Le32ToHost(ptr_ + ZipArchive::kCDECRC); } off64_t ZipEntry::GetDataOffset() { // All we have is the offset to the Local File Header, which is // variable size, so we have to read the contents of the struct to // figure out where the actual data starts. // We also need to make sure that the lengths are not so large that // somebody trying to map the compressed or uncompressed data runs // off the end of the mapped region. off64_t dir_offset = zip_archive_->dir_offset_; int64_t lfh_offset = Le32ToHost(ptr_ + ZipArchive::kCDELocalOffset); if (lfh_offset + ZipArchive::kLFHLen >= dir_offset) { LOG(WARNING) << "Zip: bad LFH offset in zip"; return -1; } if (lseek64(zip_archive_->fd_, lfh_offset, SEEK_SET) != lfh_offset) { PLOG(WARNING) << "Zip: failed seeking to LFH at offset " << lfh_offset; return -1; } uint8_t lfh_buf[ZipArchive::kLFHLen]; ssize_t actual = TEMP_FAILURE_RETRY(read(zip_archive_->fd_, lfh_buf, sizeof(lfh_buf))); if (actual != sizeof(lfh_buf)) { LOG(WARNING) << "Zip: failed reading LFH from offset " << lfh_offset; return -1; } if (Le32ToHost(lfh_buf) != ZipArchive::kLFHSignature) { LOG(WARNING) << "Zip: didn't find signature at start of LFH, offset " << lfh_offset; return -1; } uint32_t gpbf = Le16ToHost(lfh_buf + ZipArchive::kLFHGPBFlags); if ((gpbf & ZipArchive::kGPFUnsupportedMask) != 0) { LOG(WARNING) << "Invalid General Purpose Bit Flag: " << gpbf; return -1; } off64_t data_offset = (lfh_offset + ZipArchive::kLFHLen + Le16ToHost(lfh_buf + ZipArchive::kLFHNameLen) + Le16ToHost(lfh_buf + ZipArchive::kLFHExtraLen)); if (data_offset >= dir_offset) { LOG(WARNING) << "Zip: bad data offset " << data_offset << " in zip"; return -1; } // check lengths if (static_cast(data_offset + GetCompressedLength()) > dir_offset) { LOG(WARNING) << "Zip: bad compressed length in zip " << "(" << data_offset << " + " << GetCompressedLength() << " > " << dir_offset << ")"; return -1; } if (GetCompressionMethod() == kCompressStored && static_cast(data_offset + GetUncompressedLength()) > dir_offset) { LOG(WARNING) << "Zip: bad uncompressed length in zip " << "(" << data_offset << " + " << GetUncompressedLength() << " > " << dir_offset << ")"; return -1; } return data_offset; } static bool CopyFdToMemory(uint8_t* begin, size_t size, int in, size_t count) { uint8_t* dst = begin; std::vector buf(kBufSize); while (count != 0) { size_t bytes_to_read = (count > kBufSize) ? kBufSize : count; ssize_t actual = TEMP_FAILURE_RETRY(read(in, &buf[0], bytes_to_read)); if (actual != static_cast(bytes_to_read)) { PLOG(WARNING) << "Zip: short read"; return false; } memcpy(dst, &buf[0], bytes_to_read); dst += bytes_to_read; count -= bytes_to_read; } DCHECK_EQ(dst, begin + size); return true; } class ZStream { public: ZStream(byte* write_buf, size_t write_buf_size) { // Initialize the zlib stream struct. memset(&zstream_, 0, sizeof(zstream_)); zstream_.zalloc = Z_NULL; zstream_.zfree = Z_NULL; zstream_.opaque = Z_NULL; zstream_.next_in = NULL; zstream_.avail_in = 0; zstream_.next_out = reinterpret_cast(write_buf); zstream_.avail_out = write_buf_size; zstream_.data_type = Z_UNKNOWN; } z_stream& Get() { return zstream_; } ~ZStream() { inflateEnd(&zstream_); } private: z_stream zstream_; }; static bool InflateToMemory(uint8_t* begin, size_t size, int in, size_t uncompressed_length, size_t compressed_length) { uint8_t* dst = begin; UniquePtr read_buf(new uint8_t[kBufSize]); UniquePtr write_buf(new uint8_t[kBufSize]); if (read_buf.get() == NULL || write_buf.get() == NULL) { LOG(WARNING) << "Zip: failed to allocate buffer to inflate"; return false; } UniquePtr zstream(new ZStream(write_buf.get(), kBufSize)); // Use the undocumented "negative window bits" feature to tell zlib // that there's no zlib header waiting for it. int zerr = inflateInit2(&zstream->Get(), -MAX_WBITS); if (zerr != Z_OK) { if (zerr == Z_VERSION_ERROR) { LOG(ERROR) << "Installed zlib is not compatible with linked version (" << ZLIB_VERSION << ")"; } else { LOG(WARNING) << "Call to inflateInit2 failed (zerr=" << zerr << ")"; } return false; } size_t remaining = compressed_length; do { // read as much as we can if (zstream->Get().avail_in == 0) { size_t bytes_to_read = (remaining > kBufSize) ? kBufSize : remaining; ssize_t actual = TEMP_FAILURE_RETRY(read(in, read_buf.get(), bytes_to_read)); if (actual != static_cast(bytes_to_read)) { LOG(WARNING) << "Zip: inflate read failed (" << actual << " vs " << bytes_to_read << ")"; return false; } remaining -= bytes_to_read; zstream->Get().next_in = read_buf.get(); zstream->Get().avail_in = bytes_to_read; } // uncompress the data zerr = inflate(&zstream->Get(), Z_NO_FLUSH); if (zerr != Z_OK && zerr != Z_STREAM_END) { LOG(WARNING) << "Zip: inflate zerr=" << zerr << " (next_in=" << zstream->Get().next_in << " avail_in=" << zstream->Get().avail_in << " next_out=" << zstream->Get().next_out << " avail_out=" << zstream->Get().avail_out << ")"; return false; } // write when we're full or when we're done if (zstream->Get().avail_out == 0 || (zerr == Z_STREAM_END && zstream->Get().avail_out != kBufSize)) { size_t bytes_to_write = zstream->Get().next_out - write_buf.get(); memcpy(dst, write_buf.get(), bytes_to_write); dst += bytes_to_write; zstream->Get().next_out = write_buf.get(); zstream->Get().avail_out = kBufSize; } } while (zerr == Z_OK); DCHECK_EQ(zerr, Z_STREAM_END); // other errors should've been caught // paranoia if (zstream->Get().total_out != uncompressed_length) { LOG(WARNING) << "Zip: size mismatch on inflated file (" << zstream->Get().total_out << " vs " << uncompressed_length << ")"; return false; } DCHECK_EQ(dst, begin + size); return true; } bool ZipEntry::ExtractToFile(File& file, std::string* error_msg) { uint32_t length = GetUncompressedLength(); int result = TEMP_FAILURE_RETRY(ftruncate(file.Fd(), length)); if (result == -1) { *error_msg = StringPrintf("Zip: failed to ftruncate '%s' to length %ud", file.GetPath().c_str(), length); return false; } UniquePtr map(MemMap::MapFile(length, PROT_READ | PROT_WRITE, MAP_SHARED, file.Fd(), 0, file.GetPath().c_str(), error_msg)); if (map.get() == NULL) { *error_msg = StringPrintf("Zip: failed to mmap space for '%s': %s", file.GetPath().c_str(), error_msg->c_str()); return false; } return ExtractToMemory(map->Begin(), map->Size(), error_msg); } bool ZipEntry::ExtractToMemory(uint8_t* begin, size_t size, std::string* error_msg) { // If size is zero, data offset will be meaningless, so bail out early. if (size == 0) { return true; } off64_t data_offset = GetDataOffset(); if (data_offset == -1) { *error_msg = StringPrintf("Zip: data_offset=%lld", data_offset); return false; } if (lseek64(zip_archive_->fd_, data_offset, SEEK_SET) != data_offset) { *error_msg = StringPrintf("Zip: lseek to data at %lld failed", data_offset); return false; } // TODO: this doesn't verify the data's CRC, but probably should (especially // for uncompressed data). switch (GetCompressionMethod()) { case kCompressStored: return CopyFdToMemory(begin, size, zip_archive_->fd_, GetUncompressedLength()); case kCompressDeflated: return InflateToMemory(begin, size, zip_archive_->fd_, GetUncompressedLength(), GetCompressedLength()); default: *error_msg = StringPrintf("Zip: unknown compression method 0x%x", GetCompressionMethod()); return false; } } MemMap* ZipEntry::ExtractToMemMap(const char* entry_filename, std::string* error_msg) { std::string name(entry_filename); name += " extracted in memory from "; name += entry_filename; UniquePtr map(MemMap::MapAnonymous(name.c_str(), NULL, GetUncompressedLength(), PROT_READ | PROT_WRITE, error_msg)); if (map.get() == nullptr) { DCHECK(!error_msg->empty()); return NULL; } bool success = ExtractToMemory(map->Begin(), map->Size(), error_msg); if (!success) { LOG(ERROR) << "Zip: Failed to extract '" << entry_filename << "' to memory"; return NULL; } return map.release(); } static void SetCloseOnExec(int fd) { // This dance is more portable than Linux's O_CLOEXEC open(2) flag. int flags = fcntl(fd, F_GETFD); if (flags == -1) { PLOG(WARNING) << "fcntl(" << fd << ", F_GETFD) failed"; return; } int rc = fcntl(fd, F_SETFD, flags | FD_CLOEXEC); if (rc == -1) { PLOG(WARNING) << "fcntl(" << fd << ", F_SETFD, " << flags << ") failed"; return; } } ZipArchive* ZipArchive::Open(const char* filename, std::string* error_msg) { DCHECK(filename != nullptr); int fd = open(filename, O_RDONLY, 0); if (fd == -1) { *error_msg = StringPrintf("Zip: unable to open '%s': %s", filename, strerror(errno)); return NULL; } return OpenFromFd(fd, filename, error_msg); } ZipArchive* ZipArchive::OpenFromFd(int fd, const char* filename, std::string* error_msg) { SetCloseOnExec(fd); UniquePtr zip_archive(new ZipArchive(fd, filename)); CHECK(zip_archive.get() != nullptr); if (!zip_archive->MapCentralDirectory(error_msg)) { zip_archive->Close(); return NULL; } if (!zip_archive->Parse(error_msg)) { zip_archive->Close(); return NULL; } return zip_archive.release(); } ZipEntry* ZipArchive::Find(const char* name) const { DCHECK(name != NULL); DirEntries::const_iterator it = dir_entries_.find(name); if (it == dir_entries_.end()) { return NULL; } return new ZipEntry(this, (*it).second); } void ZipArchive::Close() { if (fd_ != -1) { close(fd_); } fd_ = -1; num_entries_ = 0; dir_offset_ = 0; } std::string ZipArchive::ErrorStringPrintf(const char* fmt, ...) { va_list ap; va_start(ap, fmt); std::string result(StringPrintf("Zip '%s' : ", filename_.c_str())); StringAppendV(&result, fmt, ap); va_end(ap); return result; } // Find the zip Central Directory and memory-map it. // // On success, returns true after populating fields from the EOCD area: // num_entries_ // dir_offset_ // dir_map_ bool ZipArchive::MapCentralDirectory(std::string* error_msg) { /* * Get and test file length. */ off64_t file_length = lseek64(fd_, 0, SEEK_END); if (file_length < kEOCDLen) { *error_msg = ErrorStringPrintf("length %lld is too small to be zip", file_length); return false; } size_t read_amount = kMaxEOCDSearch; if (file_length < off64_t(read_amount)) { read_amount = file_length; } UniquePtr scan_buf(new uint8_t[read_amount]); CHECK(scan_buf.get() != nullptr); /* * Make sure this is a Zip archive. */ if (lseek64(fd_, 0, SEEK_SET) != 0) { *error_msg = ErrorStringPrintf("seek to start failed: %s", strerror(errno)); return false; } ssize_t actual = TEMP_FAILURE_RETRY(read(fd_, scan_buf.get(), sizeof(int32_t))); if (actual != static_cast(sizeof(int32_t))) { *error_msg = ErrorStringPrintf("couldn\'t read first signature from zip archive: %s", strerror(errno)); return false; } unsigned int header = Le32ToHost(scan_buf.get()); if (header != kLFHSignature) { *error_msg = ErrorStringPrintf("not a zip archive (found 0x%x)", header); return false; } // Perform the traditional EOCD snipe hunt. // // We're searching for the End of Central Directory magic number, // which appears at the start of the EOCD block. It's followed by // 18 bytes of EOCD stuff and up to 64KB of archive comment. We // need to read the last part of the file into a buffer, dig through // it to find the magic number, parse some values out, and use those // to determine the extent of the CD. // // We start by pulling in the last part of the file. off64_t search_start = file_length - read_amount; if (lseek64(fd_, search_start, SEEK_SET) != search_start) { *error_msg = ErrorStringPrintf("seek %lld failed: %s", search_start, strerror(errno)); return false; } actual = TEMP_FAILURE_RETRY(read(fd_, scan_buf.get(), read_amount)); if (actual != static_cast(read_amount)) { *error_msg = ErrorStringPrintf("read %lld, expected %zd. %s", search_start, read_amount, strerror(errno)); return false; } // Scan backward for the EOCD magic. In an archive without a trailing // comment, we'll find it on the first try. (We may want to consider // doing an initial minimal read; if we don't find it, retry with a // second read as above.) int i; for (i = read_amount - kEOCDLen; i >= 0; i--) { if (scan_buf.get()[i] == 0x50 && Le32ToHost(&(scan_buf.get())[i]) == kEOCDSignature) { break; } } if (i < 0) { *error_msg = ErrorStringPrintf("EOCD not found, not a zip file"); return false; } off64_t eocd_offset = search_start + i; const byte* eocd_ptr = scan_buf.get() + i; CHECK(eocd_offset < file_length); // Grab the CD offset and size, and the number of entries in the // archive. Verify that they look reasonable. uint16_t disk_number = Le16ToHost(eocd_ptr + kEOCDDiskNumber); uint16_t disk_with_central_dir = Le16ToHost(eocd_ptr + kEOCDDiskNumberForCD); uint16_t num_entries = Le16ToHost(eocd_ptr + kEOCDNumEntries); uint16_t total_num_entries = Le16ToHost(eocd_ptr + kEOCDTotalNumEntries); uint32_t dir_size = Le32ToHost(eocd_ptr + kEOCDSize); uint32_t dir_offset = Le32ToHost(eocd_ptr + kEOCDFileOffset); uint16_t comment_size = Le16ToHost(eocd_ptr + kEOCDCommentSize); if ((uint64_t) dir_offset + (uint64_t) dir_size > (uint64_t) eocd_offset) { *error_msg = ErrorStringPrintf("bad offsets (dir=%ud, size=%ud, eocd=%lld)", dir_offset, dir_size, eocd_offset); return false; } if (num_entries == 0) { *error_msg = ErrorStringPrintf("empty archive?"); return false; } else if (num_entries != total_num_entries || disk_number != 0 || disk_with_central_dir != 0) { *error_msg = ErrorStringPrintf("spanned archives not supported"); return false; } // Check to see if comment is a sane size if ((comment_size > (file_length - kEOCDLen)) || (eocd_offset > (file_length - kEOCDLen) - comment_size)) { *error_msg = ErrorStringPrintf("comment size runs off end of file"); return false; } // It all looks good. Create a mapping for the CD. dir_map_.reset(MemMap::MapFile(dir_size, PROT_READ, MAP_SHARED, fd_, dir_offset, filename_.c_str(), error_msg)); if (dir_map_.get() == NULL) { return false; } num_entries_ = num_entries; dir_offset_ = dir_offset; return true; } bool ZipArchive::Parse(std::string* error_msg) { const byte* cd_ptr = dir_map_->Begin(); size_t cd_length = dir_map_->Size(); // Walk through the central directory, adding entries to the hash // table and verifying values. const byte* ptr = cd_ptr; for (int i = 0; i < num_entries_; i++) { if (Le32ToHost(ptr) != kCDESignature) { *error_msg = ErrorStringPrintf("missed a central dir sig (at %d)", i); return false; } if (ptr + kCDELen > cd_ptr + cd_length) { *error_msg = ErrorStringPrintf("ran off the end (at %d)", i); return false; } int64_t local_hdr_offset = Le32ToHost(ptr + kCDELocalOffset); if (local_hdr_offset >= dir_offset_) { *error_msg = ErrorStringPrintf("bad LFH offset %lld at entry %d", local_hdr_offset, i); return false; } uint16_t gpbf = Le16ToHost(ptr + kCDEGPBFlags); if ((gpbf & kGPFUnsupportedMask) != 0) { *error_msg = ErrorStringPrintf("invalid general purpose bit flag %x", gpbf); return false; } uint16_t name_len = Le16ToHost(ptr + kCDENameLen); uint16_t extra_len = Le16ToHost(ptr + kCDEExtraLen); uint16_t comment_len = Le16ToHost(ptr + kCDECommentLen); // add the CDE filename to the hash table const char* name = reinterpret_cast(ptr + kCDELen); // Check name for NULL characters if (memchr(name, 0, name_len) != NULL) { *error_msg = ErrorStringPrintf("filename contains NUL byte"); return false; } dir_entries_.Put(StringPiece(name, name_len), ptr); ptr += kCDELen + name_len + extra_len + comment_len; if (ptr > cd_ptr + cd_length) { *error_msg = ErrorStringPrintf("bad CD advance (%p vs %p) at entry %d", ptr, cd_ptr + cd_length, i); return false; } } return true; } } // namespace art