diff options
author | The Android Open Source Project <initial-contribution@android.com> | 2008-10-21 07:00:00 -0700 |
---|---|---|
committer | The Android Open Source Project <initial-contribution@android.com> | 2008-10-21 07:00:00 -0700 |
commit | 7c1b96a165f970a09ed239bb4fb3f1b0d8f2a407 (patch) | |
tree | df5a6539447324de36e95b057d6b9f0361b7a250 /libs/utils/ZipFileRO.cpp | |
download | frameworks_native-7c1b96a165f970a09ed239bb4fb3f1b0d8f2a407.zip frameworks_native-7c1b96a165f970a09ed239bb4fb3f1b0d8f2a407.tar.gz frameworks_native-7c1b96a165f970a09ed239bb4fb3f1b0d8f2a407.tar.bz2 |
Initial Contribution
Diffstat (limited to 'libs/utils/ZipFileRO.cpp')
-rw-r--r-- | libs/utils/ZipFileRO.cpp | 724 |
1 files changed, 724 insertions, 0 deletions
diff --git a/libs/utils/ZipFileRO.cpp b/libs/utils/ZipFileRO.cpp new file mode 100644 index 0000000..ae8c719 --- /dev/null +++ b/libs/utils/ZipFileRO.cpp @@ -0,0 +1,724 @@ +/* + * Copyright (C) 2007 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// +// Read-only access to Zip archives, with minimal heap allocation. +// +#define LOG_TAG "zipro" +//#define LOG_NDEBUG 0 +#include "utils/ZipFileRO.h" +#include "utils/Log.h" +#include "utils/misc.h" + +#include <zlib.h> + +#include <string.h> +#include <fcntl.h> +#include <errno.h> +#include <assert.h> + +using namespace android; + +/* + * Zip file constants. + */ +#define kEOCDSignature 0x06054b50 +#define kEOCDLen 22 +#define kEOCDNumEntries 8 // offset to #of entries in file +#define kEOCDFileOffset 16 // offset to central directory + +#define kMaxCommentLen 65535 // longest possible in ushort +#define kMaxEOCDSearch (kMaxCommentLen + kEOCDLen) + +#define kLFHSignature 0x04034b50 +#define kLFHLen 30 // excluding variable-len fields +#define kLFHNameLen 26 // offset to filename length +#define kLFHExtraLen 28 // offset to extra length + +#define kCDESignature 0x02014b50 +#define kCDELen 46 // excluding variable-len fields +#define kCDEMethod 10 // offset to compression method +#define kCDEModWhen 12 // offset to modification timestamp +#define kCDECRC 16 // offset to entry CRC +#define kCDECompLen 20 // offset to compressed length +#define kCDEUncompLen 24 // offset to uncompressed length +#define kCDENameLen 28 // offset to filename length +#define kCDEExtraLen 30 // offset to extra length +#define kCDECommentLen 32 // offset to comment length +#define kCDELocalOffset 42 // offset to local hdr + +/* + * The values we return for ZipEntryRO use 0 as an invalid value, so we + * want to adjust the hash table index by a fixed amount. Using a large + * value helps insure that people don't mix & match arguments, e.g. to + * findEntryByIndex(). + */ +#define kZipEntryAdj 10000 + +/* + * Convert a ZipEntryRO to a hash table index, verifying that it's in a + * valid range. + */ +int ZipFileRO::entryToIndex(const ZipEntryRO entry) const +{ + long ent = ((long) entry) - kZipEntryAdj; + if (ent < 0 || ent >= mHashTableSize || mHashTable[ent].name == NULL) { + LOGW("Invalid ZipEntryRO %p (%ld)\n", entry, ent); + return -1; + } + return ent; +} + + +/* + * Open the specified file read-only. We memory-map the entire thing and + * close the file before returning. + */ +status_t ZipFileRO::open(const char* zipFileName) +{ + int fd = -1; + off_t length; + + assert(mFileMap == NULL); + + /* + * Open and map the specified file. + */ + fd = ::open(zipFileName, O_RDONLY); + if (fd < 0) { + LOGW("Unable to open zip '%s': %s\n", zipFileName, strerror(errno)); + return NAME_NOT_FOUND; + } + + length = lseek(fd, 0, SEEK_END); + if (length < 0) { + close(fd); + return UNKNOWN_ERROR; + } + + mFileMap = new FileMap(); + if (mFileMap == NULL) { + close(fd); + return NO_MEMORY; + } + if (!mFileMap->create(zipFileName, fd, 0, length, true)) { + LOGW("Unable to map '%s': %s\n", zipFileName, strerror(errno)); + close(fd); + return UNKNOWN_ERROR; + } + + mFd = fd; + + /* + * Got it mapped, verify it and create data structures for fast access. + */ + if (!parseZipArchive()) { + mFileMap->release(); + mFileMap = NULL; + return UNKNOWN_ERROR; + } + + return OK; +} + +/* + * Parse the Zip archive, verifying its contents and initializing internal + * data structures. + */ +bool ZipFileRO::parseZipArchive(void) +{ +#define CHECK_OFFSET(_off) { \ + if ((unsigned int) (_off) >= maxOffset) { \ + LOGE("ERROR: bad offset %u (max %d): %s\n", \ + (unsigned int) (_off), maxOffset, #_off); \ + goto bail; \ + } \ + } + const unsigned char* basePtr = (const unsigned char*)mFileMap->getDataPtr(); + const unsigned char* ptr; + size_t length = mFileMap->getDataLength(); + bool result = false; + unsigned int i, numEntries, cdOffset; + unsigned int val; + + /* + * The first 4 bytes of the file will either be the local header + * signature for the first file (kLFHSignature) or, if the archive doesn't + * have any files in it, the end-of-central-directory signature + * (kEOCDSignature). + */ + val = get4LE(basePtr); + if (val == kEOCDSignature) { + LOGI("Found Zip archive, but it looks empty\n"); + goto bail; + } else if (val != kLFHSignature) { + LOGV("Not a Zip archive (found 0x%08x)\n", val); + goto bail; + } + + /* + * Find the EOCD. We'll find it immediately unless they have a file + * comment. + */ + ptr = basePtr + length - kEOCDLen; + + while (ptr >= basePtr) { + if (*ptr == (kEOCDSignature & 0xff) && get4LE(ptr) == kEOCDSignature) + break; + ptr--; + } + if (ptr < basePtr) { + LOGI("Could not find end-of-central-directory in Zip\n"); + goto bail; + } + + /* + * There are two interesting items in the EOCD block: the number of + * entries in the file, and the file offset of the start of the + * central directory. + * + * (There's actually a count of the #of entries in this file, and for + * all files which comprise a spanned archive, but for our purposes + * we're only interested in the current file. Besides, we expect the + * two to be equivalent for our stuff.) + */ + numEntries = get2LE(ptr + kEOCDNumEntries); + cdOffset = get4LE(ptr + kEOCDFileOffset); + + /* valid offsets are [0,EOCD] */ + unsigned int maxOffset; + maxOffset = (ptr - basePtr) +1; + + LOGV("+++ numEntries=%d cdOffset=%d\n", numEntries, cdOffset); + if (numEntries == 0 || cdOffset >= length) { + LOGW("Invalid entries=%d offset=%d (len=%zd)\n", + numEntries, cdOffset, length); + goto bail; + } + + /* + * Create hash table. We have a minimum 75% load factor, possibly as + * low as 50% after we round off to a power of 2. + */ + mNumEntries = numEntries; + mHashTableSize = roundUpPower2(1 + ((numEntries * 4) / 3)); + mHashTable = (HashEntry*) calloc(1, sizeof(HashEntry) * mHashTableSize); + + /* + * Walk through the central directory, adding entries to the hash + * table. + */ + ptr = basePtr + cdOffset; + for (i = 0; i < numEntries; i++) { + unsigned int fileNameLen, extraLen, commentLen, localHdrOffset; + const unsigned char* localHdr; + unsigned int hash; + + if (get4LE(ptr) != kCDESignature) { + LOGW("Missed a central dir sig (at %d)\n", i); + goto bail; + } + if (ptr + kCDELen > basePtr + length) { + LOGW("Ran off the end (at %d)\n", i); + goto bail; + } + + localHdrOffset = get4LE(ptr + kCDELocalOffset); + CHECK_OFFSET(localHdrOffset); + fileNameLen = get2LE(ptr + kCDENameLen); + extraLen = get2LE(ptr + kCDEExtraLen); + commentLen = get2LE(ptr + kCDECommentLen); + + //LOGV("+++ %d: localHdr=%d fnl=%d el=%d cl=%d\n", + // i, localHdrOffset, fileNameLen, extraLen, commentLen); + //LOGV(" '%.*s'\n", fileNameLen, ptr + kCDELen); + + /* add the CDE filename to the hash table */ + hash = computeHash((const char*)ptr + kCDELen, fileNameLen); + addToHash((const char*)ptr + kCDELen, fileNameLen, hash); + + localHdr = basePtr + localHdrOffset; + if (get4LE(localHdr) != kLFHSignature) { + LOGW("Bad offset to local header: %d (at %d)\n", + localHdrOffset, i); + goto bail; + } + + ptr += kCDELen + fileNameLen + extraLen + commentLen; + CHECK_OFFSET(ptr - basePtr); + } + + result = true; + +bail: + return result; +#undef CHECK_OFFSET +} + + +/* + * Simple string hash function for non-null-terminated strings. + */ +/*static*/ unsigned int ZipFileRO::computeHash(const char* str, int len) +{ + unsigned int hash = 0; + + while (len--) + hash = hash * 31 + *str++; + + return hash; +} + +/* + * Add a new entry to the hash table. + */ +void ZipFileRO::addToHash(const char* str, int strLen, unsigned int hash) +{ + int ent = hash & (mHashTableSize-1); + + /* + * We over-allocate the table, so we're guaranteed to find an empty slot. + */ + while (mHashTable[ent].name != NULL) + ent = (ent + 1) & (mHashTableSize-1); + + mHashTable[ent].name = str; + mHashTable[ent].nameLen = strLen; +} + +/* + * Find a matching entry. + * + * Returns 0 if not found. + */ +ZipEntryRO ZipFileRO::findEntryByName(const char* fileName) const +{ + int nameLen = strlen(fileName); + unsigned int hash = computeHash(fileName, nameLen); + int ent = hash & (mHashTableSize-1); + + while (mHashTable[ent].name != NULL) { + if (mHashTable[ent].nameLen == nameLen && + memcmp(mHashTable[ent].name, fileName, nameLen) == 0) + { + /* match */ + return (ZipEntryRO) (ent + kZipEntryAdj); + } + + ent = (ent + 1) & (mHashTableSize-1); + } + + return NULL; +} + +/* + * Find the Nth entry. + * + * This currently involves walking through the sparse hash table, counting + * non-empty entries. If we need to speed this up we can either allocate + * a parallel lookup table or (perhaps better) provide an iterator interface. + */ +ZipEntryRO ZipFileRO::findEntryByIndex(int idx) const +{ + if (idx < 0 || idx >= mNumEntries) { + LOGW("Invalid index %d\n", idx); + return NULL; + } + + for (int ent = 0; ent < mHashTableSize; ent++) { + if (mHashTable[ent].name != NULL) { + if (idx-- == 0) + return (ZipEntryRO) (ent + kZipEntryAdj); + } + } + + return NULL; +} + +/* + * Get the useful fields from the zip entry. + * + * Returns "false" if the offsets to the fields or the contents of the fields + * appear to be bogus. + */ +bool ZipFileRO::getEntryInfo(ZipEntryRO entry, int* pMethod, long* pUncompLen, + long* pCompLen, off_t* pOffset, long* pModWhen, long* pCrc32) const +{ + int ent = entryToIndex(entry); + if (ent < 0) + return false; + + /* + * Recover the start of the central directory entry from the filename + * pointer. + */ + const unsigned char* basePtr = (const unsigned char*)mFileMap->getDataPtr(); + const unsigned char* ptr = (const unsigned char*) mHashTable[ent].name; + size_t zipLength = mFileMap->getDataLength(); + + ptr -= kCDELen; + + int method = get2LE(ptr + kCDEMethod); + if (pMethod != NULL) + *pMethod = method; + + if (pModWhen != NULL) + *pModWhen = get4LE(ptr + kCDEModWhen); + if (pCrc32 != NULL) + *pCrc32 = get4LE(ptr + kCDECRC); + + /* + * We need to make sure that the lengths are not so large that somebody + * trying to map the compressed or uncompressed data runs off the end + * of the mapped region. + */ + unsigned long localHdrOffset = get4LE(ptr + kCDELocalOffset); + if (localHdrOffset + kLFHLen >= zipLength) { + LOGE("ERROR: bad local hdr offset in zip\n"); + return false; + } + const unsigned char* localHdr = basePtr + localHdrOffset; + off_t dataOffset = localHdrOffset + kLFHLen + + get2LE(localHdr + kLFHNameLen) + get2LE(localHdr + kLFHExtraLen); + if ((unsigned long) dataOffset >= zipLength) { + LOGE("ERROR: bad data offset in zip\n"); + return false; + } + + if (pCompLen != NULL) { + *pCompLen = get4LE(ptr + kCDECompLen); + if (*pCompLen < 0 || (size_t)(dataOffset + *pCompLen) >= zipLength) { + LOGE("ERROR: bad compressed length in zip\n"); + return false; + } + } + if (pUncompLen != NULL) { + *pUncompLen = get4LE(ptr + kCDEUncompLen); + if (*pUncompLen < 0) { + LOGE("ERROR: negative uncompressed length in zip\n"); + return false; + } + if (method == kCompressStored && + (size_t)(dataOffset + *pUncompLen) >= zipLength) + { + LOGE("ERROR: bad uncompressed length in zip\n"); + return false; + } + } + + if (pOffset != NULL) { + *pOffset = dataOffset; + } + return true; +} + +/* + * Copy the entry's filename to the buffer. + */ +int ZipFileRO::getEntryFileName(ZipEntryRO entry, char* buffer, int bufLen) + const +{ + int ent = entryToIndex(entry); + if (ent < 0) + return -1; + + int nameLen = mHashTable[ent].nameLen; + if (bufLen < nameLen+1) + return nameLen+1; + + memcpy(buffer, mHashTable[ent].name, nameLen); + buffer[nameLen] = '\0'; + return 0; +} + +/* + * Create a new FileMap object that spans the data in "entry". + */ +FileMap* ZipFileRO::createEntryFileMap(ZipEntryRO entry) const +{ + /* + * TODO: the efficient way to do this is to modify FileMap to allow + * sub-regions of a file to be mapped. A reference-counting scheme + * can manage the base memory mapping. For now, we just create a brand + * new mapping off of the Zip archive file descriptor. + */ + + FileMap* newMap; + long compLen; + off_t offset; + + if (!getEntryInfo(entry, NULL, NULL, &compLen, &offset, NULL, NULL)) + return NULL; + + newMap = new FileMap(); + if (!newMap->create(mFileMap->getFileName(), mFd, offset, compLen, true)) { + newMap->release(); + return NULL; + } + + return newMap; +} + +/* + * Uncompress an entry, in its entirety, into the provided output buffer. + * + * This doesn't verify the data's CRC, which might be useful for + * uncompressed data. The caller should be able to manage it. + */ +bool ZipFileRO::uncompressEntry(ZipEntryRO entry, void* buffer) const +{ + const int kSequentialMin = 32768; + bool result = false; + int ent = entryToIndex(entry); + if (ent < 0) + return -1; + + const unsigned char* basePtr = (const unsigned char*)mFileMap->getDataPtr(); + int method; + long uncompLen, compLen; + off_t offset; + + getEntryInfo(entry, &method, &uncompLen, &compLen, &offset, NULL, NULL); + + /* + * Experiment with madvise hint. When we want to uncompress a file, + * we pull some stuff out of the central dir entry and then hit a + * bunch of compressed or uncompressed data sequentially. The CDE + * visit will cause a limited amount of read-ahead because it's at + * the end of the file. We could end up doing lots of extra disk + * access if the file we're prying open is small. Bottom line is we + * probably don't want to turn MADV_SEQUENTIAL on and leave it on. + * + * So, if the compressed size of the file is above a certain minimum + * size, temporarily boost the read-ahead in the hope that the extra + * pair of system calls are negated by a reduction in page faults. + */ + if (compLen > kSequentialMin) + mFileMap->advise(FileMap::SEQUENTIAL); + + if (method == kCompressStored) { + memcpy(buffer, basePtr + offset, uncompLen); + } else { + if (!inflateBuffer(buffer, basePtr + offset, uncompLen, compLen)) + goto bail; + } + + if (compLen > kSequentialMin) + mFileMap->advise(FileMap::NORMAL); + + result = true; + +bail: + return result; +} + +/* + * Uncompress an entry, in its entirety, to an open file descriptor. + * + * This doesn't verify the data's CRC, but probably should. + */ +bool ZipFileRO::uncompressEntry(ZipEntryRO entry, int fd) const +{ + bool result = false; + int ent = entryToIndex(entry); + if (ent < 0) + return -1; + + const unsigned char* basePtr = (const unsigned char*)mFileMap->getDataPtr(); + int method; + long uncompLen, compLen; + off_t offset; + + getEntryInfo(entry, &method, &uncompLen, &compLen, &offset, NULL, NULL); + + if (method == kCompressStored) { + ssize_t actual; + + actual = write(fd, basePtr + offset, uncompLen); + if (actual < 0) { + LOGE("Write failed: %s\n", strerror(errno)); + goto bail; + } else if (actual != uncompLen) { + LOGE("Partial write during uncompress (%d of %ld)\n", + (int)actual, uncompLen); + goto bail; + } else { + LOGI("+++ successful write\n"); + } + } else { + if (!inflateBuffer(fd, basePtr+offset, uncompLen, compLen)) + goto bail; + } + + result = true; + +bail: + return result; +} + +/* + * Uncompress "deflate" data from one buffer to another. + */ +/*static*/ bool ZipFileRO::inflateBuffer(void* outBuf, const void* inBuf, + long uncompLen, long compLen) +{ + bool result = false; + z_stream zstream; + int zerr; + + /* + * Initialize the zlib stream struct. + */ + memset(&zstream, 0, sizeof(zstream)); + zstream.zalloc = Z_NULL; + zstream.zfree = Z_NULL; + zstream.opaque = Z_NULL; + zstream.next_in = (Bytef*)inBuf; + zstream.avail_in = compLen; + zstream.next_out = (Bytef*) outBuf; + zstream.avail_out = uncompLen; + zstream.data_type = Z_UNKNOWN; + + /* + * Use the undocumented "negative window bits" feature to tell zlib + * that there's no zlib header waiting for it. + */ + zerr = inflateInit2(&zstream, -MAX_WBITS); + if (zerr != Z_OK) { + if (zerr == Z_VERSION_ERROR) { + LOGE("Installed zlib is not compatible with linked version (%s)\n", + ZLIB_VERSION); + } else { + LOGE("Call to inflateInit2 failed (zerr=%d)\n", zerr); + } + goto bail; + } + + /* + * Expand data. + */ + zerr = inflate(&zstream, Z_FINISH); + if (zerr != Z_STREAM_END) { + LOGW("Zip inflate failed, zerr=%d (nIn=%p aIn=%u nOut=%p aOut=%u)\n", + zerr, zstream.next_in, zstream.avail_in, + zstream.next_out, zstream.avail_out); + goto z_bail; + } + + /* paranoia */ + if ((long) zstream.total_out != uncompLen) { + LOGW("Size mismatch on inflated file (%ld vs %ld)\n", + zstream.total_out, uncompLen); + goto z_bail; + } + + result = true; + +z_bail: + inflateEnd(&zstream); /* free up any allocated structures */ + +bail: + return result; +} + +/* + * Uncompress "deflate" data from one buffer to an open file descriptor. + */ +/*static*/ bool ZipFileRO::inflateBuffer(int fd, const void* inBuf, + long uncompLen, long compLen) +{ + bool result = false; + const int kWriteBufSize = 32768; + unsigned char writeBuf[kWriteBufSize]; + z_stream zstream; + int zerr; + + /* + * Initialize the zlib stream struct. + */ + memset(&zstream, 0, sizeof(zstream)); + zstream.zalloc = Z_NULL; + zstream.zfree = Z_NULL; + zstream.opaque = Z_NULL; + zstream.next_in = (Bytef*)inBuf; + zstream.avail_in = compLen; + zstream.next_out = (Bytef*) writeBuf; + zstream.avail_out = sizeof(writeBuf); + zstream.data_type = Z_UNKNOWN; + + /* + * Use the undocumented "negative window bits" feature to tell zlib + * that there's no zlib header waiting for it. + */ + zerr = inflateInit2(&zstream, -MAX_WBITS); + if (zerr != Z_OK) { + if (zerr == Z_VERSION_ERROR) { + LOGE("Installed zlib is not compatible with linked version (%s)\n", + ZLIB_VERSION); + } else { + LOGE("Call to inflateInit2 failed (zerr=%d)\n", zerr); + } + goto bail; + } + + /* + * Loop while we have more to do. + */ + do { + /* + * Expand data. + */ + zerr = inflate(&zstream, Z_NO_FLUSH); + if (zerr != Z_OK && zerr != Z_STREAM_END) { + LOGW("zlib inflate: zerr=%d (nIn=%p aIn=%u nOut=%p aOut=%u)\n", + zerr, zstream.next_in, zstream.avail_in, + zstream.next_out, zstream.avail_out); + goto z_bail; + } + + /* write when we're full or when we're done */ + if (zstream.avail_out == 0 || + (zerr == Z_STREAM_END && zstream.avail_out != sizeof(writeBuf))) + { + long writeSize = zstream.next_out - writeBuf; + int cc = write(fd, writeBuf, writeSize); + if (cc != (int) writeSize) { + LOGW("write failed in inflate (%d vs %ld)\n", cc, writeSize); + goto z_bail; + } + + zstream.next_out = writeBuf; + zstream.avail_out = sizeof(writeBuf); + } + } while (zerr == Z_OK); + + assert(zerr == Z_STREAM_END); /* other errors should've been caught */ + + /* paranoia */ + if ((long) zstream.total_out != uncompLen) { + LOGW("Size mismatch on inflated file (%ld vs %ld)\n", + zstream.total_out, uncompLen); + goto z_bail; + } + + result = true; + +z_bail: + inflateEnd(&zstream); /* free up any allocated structures */ + +bail: + return result; +} |