From b99096ba5f1c5cd8dc680561a7e1a3fc899cd1f9 Mon Sep 17 00:00:00 2001 From: rsesek Date: Wed, 19 Aug 2015 12:07:42 -0700 Subject: Add HFSIterator to iteratively access files and data on HFS+ and HFSX volumes. BUG=496898 Review URL: https://codereview.chromium.org/1274353002 Cr-Commit-Position: refs/heads/master@{#344293} --- chrome/chrome_tests_unit.gypi | 20 + chrome/chrome_utility.gypi | 3 + .../data/safe_browsing/dmg/generate_test_data.sh | 33 ++ chrome/test/data/safe_browsing/dmg/make_hfs.sh | 79 +++ chrome/unit_tests.isolate | 1 + .../utility/safe_browsing/mac/convert_big_endian.h | 39 ++ chrome/utility/safe_browsing/mac/dmg_test_utils.cc | 16 + chrome/utility/safe_browsing/mac/dmg_test_utils.h | 5 + chrome/utility/safe_browsing/mac/hfs.cc | 648 +++++++++++++++++++++ chrome/utility/safe_browsing/mac/hfs.h | 96 +++ chrome/utility/safe_browsing/mac/hfs_unittest.cc | 228 ++++++++ 11 files changed, 1168 insertions(+) create mode 100755 chrome/test/data/safe_browsing/dmg/generate_test_data.sh create mode 100755 chrome/test/data/safe_browsing/dmg/make_hfs.sh create mode 100644 chrome/utility/safe_browsing/mac/convert_big_endian.h create mode 100644 chrome/utility/safe_browsing/mac/hfs.cc create mode 100644 chrome/utility/safe_browsing/mac/hfs.h create mode 100644 chrome/utility/safe_browsing/mac/hfs_unittest.cc diff --git a/chrome/chrome_tests_unit.gypi b/chrome/chrome_tests_unit.gypi index c8339f5..86837d1 100644 --- a/chrome/chrome_tests_unit.gypi +++ b/chrome/chrome_tests_unit.gypi @@ -1029,6 +1029,7 @@ 'renderer/safe_browsing/phishing_url_feature_extractor_unittest.cc', 'renderer/safe_browsing/scorer_unittest.cc', 'utility/safe_browsing/mac/dmg_test_utils.cc', + 'utility/safe_browsing/mac/hfs_unittest.cc', 'utility/safe_browsing/mac/read_stream_unittest.cc', ], # These are the enable_autofill_dialog = 1 sources. Some autofill tests @@ -2719,6 +2720,25 @@ 'browser/plugins/plugin_installer_unittest.cc', ], }], + ['safe_browsing==1 and OS=="mac"', { + 'actions': [ + { + 'action_name': 'Generate safe_browsing DMG test data', + 'variables': { + 'generate_test_data': 'test/data/safe_browsing/dmg/generate_test_data.sh', + }, + 'inputs': [ + '<(generate_test_data)', + 'test/data/safe_browsing/dmg/make_hfs.sh', + ], + 'outputs': [ + '<(PRODUCT_DIR)/test_data/chrome/safe_browsing_dmg/hfs_plus.img', + '<(PRODUCT_DIR)/test_data/chrome/safe_browsing_dmg/hfsx_case_sensitive.img', + ], + 'action': [ '<(generate_test_data)', '<(PRODUCT_DIR)/test_data/chrome/safe_browsing_dmg' ], + }, + ], + }], # Adding more conditions? Don't forget to update the GN build. ], }, diff --git a/chrome/chrome_utility.gypi b/chrome/chrome_utility.gypi index 92627bf..f2bb71e 100644 --- a/chrome/chrome_utility.gypi +++ b/chrome/chrome_utility.gypi @@ -68,6 +68,9 @@ 'utility/profile_import_handler.h', ], 'chrome_utility_safe_browsing_sources': [ + 'utility/safe_browsing/mac/convert_big_endian.h', + 'utility/safe_browsing/mac/hfs.cc', + 'utility/safe_browsing/mac/hfs.h', 'utility/safe_browsing/mac/read_stream.cc', 'utility/safe_browsing/mac/read_stream.h', ], diff --git a/chrome/test/data/safe_browsing/dmg/generate_test_data.sh b/chrome/test/data/safe_browsing/dmg/generate_test_data.sh new file mode 100755 index 0000000..f538c62 --- /dev/null +++ b/chrome/test/data/safe_browsing/dmg/generate_test_data.sh @@ -0,0 +1,33 @@ +#!/bin/sh + +# Copyright 2015 The Chromium Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +THIS_DIR=$(dirname "$0") + +OUT_DIR="$1" + +if [[ ! "$1" ]]; then + echo "Usage: $(basename "$0") [output_dir]" + exit 1 +fi + +if [[ -e "$1" && ! -d "$1" ]]; then + echo "Output directory \`$1' exists but is not a directory." + exit 1 +fi +if [[ ! -d "$1" ]]; then + mkdir -p "$1" +fi + +generate_test_data() { + # HFS Raw Images ############################################################# + + MAKE_HFS="${THIS_DIR}/make_hfs.sh" + "${MAKE_HFS}" HFS+ 1024 "${OUT_DIR}/hfs_plus.img" + "${MAKE_HFS}" hfsx $((8 * 1024)) "${OUT_DIR}/hfsx_case_sensitive.img" +} + +# Silence any log output. +generate_test_data &> /dev/null diff --git a/chrome/test/data/safe_browsing/dmg/make_hfs.sh b/chrome/test/data/safe_browsing/dmg/make_hfs.sh new file mode 100755 index 0000000..df08184 --- /dev/null +++ b/chrome/test/data/safe_browsing/dmg/make_hfs.sh @@ -0,0 +1,79 @@ +#!/bin/sh + +# Copyright 2015 The Chromium Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +# This script is used to generate an HFS file system with several types of +# files of different sizes. + +set -e + +FILESYSTEM_TYPE="$1" +RAMDISK_SIZE="$2" +OUT_FILE="$3" + +VOLUME_NAME="SafeBrowsingDMG" +UNICODE_FILENAME="Tĕsẗ 🐐 " + +if [[ ! $FILESYSTEM_TYPE ]]; then + echo "Need to specify a filesystem type. See \`diskutil listfilesystems'." + exit 1 +fi + +if [[ ! $RAMDISK_SIZE ]]; then + echo "Need to specify a volume size in bytes." + exit 1 +fi + +if [[ ! $OUT_FILE ]]; then + echo "Need to specify a destination filename." + exit 1 +fi + +RAMDISK_VOLUME=$(hdiutil attach -nomount ram://$RAMDISK_SIZE) +diskutil erasevolume "${FILESYSTEM_TYPE}" "${VOLUME_NAME}" ${RAMDISK_VOLUME} + +pushd "/Volumes/${VOLUME_NAME}" + +touch .metadata_never_index + +mkdir -p first/second/third/fourth/fifth + +pushd first +pushd second +pushd third +pushd fourth +pushd fifth + +dd if=/dev/random of=random bs=1 count=768 + +popd # fourth + +touch "Hello World" +touch "hEllo wOrld" # No-op on case-insensitive filesystem. + +popd # third + +ln -s fourth/fifth/random symlink-random + +popd # second + +echo "Poop" > "${UNICODE_FILENAME}" +ditto --hfsCompression "${UNICODE_FILENAME}" goat-output.txt + +popd # first + +ln "second/${UNICODE_FILENAME}" unicode_name + +popd # volume root + +echo "This is a test HFS+ filesystem generated by" \ + "chrome/test/data/safe_browsing/dmg/make_hfs.sh." > README.txt + +popd # Original PWD + +# Unmount the volume, copy the raw device to a file, and then destroy it. +diskutil unmount ${RAMDISK_VOLUME} +dd if=${RAMDISK_VOLUME} of="${OUT_FILE}" +diskutil eject ${RAMDISK_VOLUME} diff --git a/chrome/unit_tests.isolate b/chrome/unit_tests.isolate index 24ffcc3..ad2aa01 100644 --- a/chrome/unit_tests.isolate +++ b/chrome/unit_tests.isolate @@ -103,6 +103,7 @@ '<(PRODUCT_DIR)/<(mac_product_name) Framework.framework/', '<(PRODUCT_DIR)/exif.so', '<(PRODUCT_DIR)/osmesa.so', + '<(PRODUCT_DIR)/test_data/chrome/safe_browsing_dmg/', ], }, }], diff --git a/chrome/utility/safe_browsing/mac/convert_big_endian.h b/chrome/utility/safe_browsing/mac/convert_big_endian.h new file mode 100644 index 0000000..00a648e --- /dev/null +++ b/chrome/utility/safe_browsing/mac/convert_big_endian.h @@ -0,0 +1,39 @@ +// Copyright 2015 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef CHROME_UTILITY_SAFE_BROWSING_MAC_CONVERT_BIG_ENDIAN_H_ +#define CHROME_UTILITY_SAFE_BROWSING_MAC_CONVERT_BIG_ENDIAN_H_ + +#include +#include + +// This file contains byte swapping routines for use in safe_browsing::dmg. The +// pattern is to use type-based overloading of the form ConvertBigEndian(T*) to +// swap all structures from big-endian to host-endian. This file provides the +// implementations for scalars, which are inlined since the OSSwap functions +// themselves are macros that call compiler intrinsics. + +namespace safe_browsing { +namespace dmg { + +inline void ConvertBigEndian(uint16_t* x) { + *x = OSSwapBigToHostInt16(*x); +} + +inline void ConvertBigEndian(int16_t* x) { + *x = OSSwapBigToHostInt16(*x); +} + +inline void ConvertBigEndian(uint32_t* x) { + *x = OSSwapBigToHostInt32(*x); +} + +inline void ConvertBigEndian(uint64_t* x) { + *x = OSSwapBigToHostInt64(*x); +} + +} // namespace dmg +} // namespace safe_browsing + +#endif // CHROME_UTILITY_SAFE_BROWSING_MAC_CONVERT_BIG_ENDIAN_H_ diff --git a/chrome/utility/safe_browsing/mac/dmg_test_utils.cc b/chrome/utility/safe_browsing/mac/dmg_test_utils.cc index cb6cfcf..cf6c5a2 100644 --- a/chrome/utility/safe_browsing/mac/dmg_test_utils.cc +++ b/chrome/utility/safe_browsing/mac/dmg_test_utils.cc @@ -4,12 +4,28 @@ #include "chrome/utility/safe_browsing/mac/dmg_test_utils.h" +#include "base/files/file_path.h" #include "base/logging.h" +#include "base/path_service.h" +#include "chrome/common/chrome_paths.h" +#include "testing/gtest/include/gtest/gtest.h" namespace safe_browsing { namespace dmg { namespace test { +void GetTestFile(const char* file_name, base::File* file) { + base::FilePath test_data; + ASSERT_TRUE(PathService::Get(chrome::DIR_GEN_TEST_DATA, &test_data)); + + base::FilePath path = test_data.AppendASCII("chrome") + .AppendASCII("safe_browsing_dmg") + .AppendASCII(file_name); + + *file = base::File(path, base::File::FLAG_OPEN | base::File::FLAG_READ); + ASSERT_TRUE(file->IsValid()); +} + bool ReadEntireStream(ReadStream* stream, std::vector* data) { DCHECK(data->empty()); uint8_t buffer[1024]; diff --git a/chrome/utility/safe_browsing/mac/dmg_test_utils.h b/chrome/utility/safe_browsing/mac/dmg_test_utils.h index d3834a7..87d34de 100644 --- a/chrome/utility/safe_browsing/mac/dmg_test_utils.h +++ b/chrome/utility/safe_browsing/mac/dmg_test_utils.h @@ -7,12 +7,17 @@ #include +#include "base/files/file.h" #include "chrome/utility/safe_browsing/mac/read_stream.h" namespace safe_browsing { namespace dmg { namespace test { +// Opens a generated test data file. Uses gtest assertions to verify success, +// so this should be called with ASSERT_NO_FATAL_FAILURE(). +void GetTestFile(const char* file_name, base::File* file); + // Reads the given |stream| until end-of-stream is reached, storying the read // bytes into |data|. Returns true on success and false on error. bool ReadEntireStream(ReadStream* stream, std::vector* data); diff --git a/chrome/utility/safe_browsing/mac/hfs.cc b/chrome/utility/safe_browsing/mac/hfs.cc new file mode 100644 index 0000000..da9f5d7 --- /dev/null +++ b/chrome/utility/safe_browsing/mac/hfs.cc @@ -0,0 +1,648 @@ +// Copyright 2015 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "chrome/utility/safe_browsing/mac/hfs.h" + +#include +#include + +#include +#include +#include + +#include "base/logging.h" +#include "base/numerics/safe_math.h" +#include "base/strings/utf_string_conversions.h" +#include "chrome/utility/safe_browsing/mac/convert_big_endian.h" +#include "chrome/utility/safe_browsing/mac/read_stream.h" + +namespace safe_browsing { +namespace dmg { + +// UTF-16 character for file path seprator. +static const uint16_t kFilePathSeparator = '/'; + +static void ConvertBigEndian(HFSPlusForkData* fork) { + ConvertBigEndian(&fork->logicalSize); + ConvertBigEndian(&fork->clumpSize); + ConvertBigEndian(&fork->totalBlocks); + for (size_t i = 0; i < arraysize(fork->extents); ++i) { + ConvertBigEndian(&fork->extents[i].startBlock); + ConvertBigEndian(&fork->extents[i].blockCount); + } +} + +static void ConvertBigEndian(HFSPlusVolumeHeader* header) { + ConvertBigEndian(&header->signature); + ConvertBigEndian(&header->version); + ConvertBigEndian(&header->attributes); + ConvertBigEndian(&header->lastMountedVersion); + ConvertBigEndian(&header->journalInfoBlock); + ConvertBigEndian(&header->createDate); + ConvertBigEndian(&header->modifyDate); + ConvertBigEndian(&header->backupDate); + ConvertBigEndian(&header->checkedDate); + ConvertBigEndian(&header->fileCount); + ConvertBigEndian(&header->folderCount); + ConvertBigEndian(&header->blockSize); + ConvertBigEndian(&header->totalBlocks); + ConvertBigEndian(&header->freeBlocks); + ConvertBigEndian(&header->nextAllocation); + ConvertBigEndian(&header->rsrcClumpSize); + ConvertBigEndian(&header->dataClumpSize); + ConvertBigEndian(&header->nextCatalogID); + ConvertBigEndian(&header->writeCount); + ConvertBigEndian(&header->encodingsBitmap); + ConvertBigEndian(&header->allocationFile); + ConvertBigEndian(&header->extentsFile); + ConvertBigEndian(&header->catalogFile); + ConvertBigEndian(&header->attributesFile); + ConvertBigEndian(&header->startupFile); +} + +static void ConvertBigEndian(BTHeaderRec* header) { + ConvertBigEndian(&header->treeDepth); + ConvertBigEndian(&header->rootNode); + ConvertBigEndian(&header->leafRecords); + ConvertBigEndian(&header->firstLeafNode); + ConvertBigEndian(&header->lastLeafNode); + ConvertBigEndian(&header->nodeSize); + ConvertBigEndian(&header->maxKeyLength); + ConvertBigEndian(&header->totalNodes); + ConvertBigEndian(&header->freeNodes); + ConvertBigEndian(&header->reserved1); + ConvertBigEndian(&header->clumpSize); + ConvertBigEndian(&header->attributes); +} + +static void ConvertBigEndian(BTNodeDescriptor* node) { + ConvertBigEndian(&node->fLink); + ConvertBigEndian(&node->bLink); + ConvertBigEndian(&node->numRecords); +} + +static void ConvertBigEndian(HFSPlusCatalogFolder* folder) { + ConvertBigEndian(&folder->recordType); + ConvertBigEndian(&folder->flags); + ConvertBigEndian(&folder->valence); + ConvertBigEndian(&folder->folderID); + ConvertBigEndian(&folder->createDate); + ConvertBigEndian(&folder->contentModDate); + ConvertBigEndian(&folder->attributeModDate); + ConvertBigEndian(&folder->accessDate); + ConvertBigEndian(&folder->backupDate); + ConvertBigEndian(&folder->bsdInfo.ownerID); + ConvertBigEndian(&folder->bsdInfo.groupID); + ConvertBigEndian(&folder->bsdInfo.fileMode); + ConvertBigEndian(&folder->textEncoding); + ConvertBigEndian(&folder->folderCount); +} + +static void ConvertBigEndian(HFSPlusCatalogFile* file) { + ConvertBigEndian(&file->recordType); + ConvertBigEndian(&file->flags); + ConvertBigEndian(&file->reserved1); + ConvertBigEndian(&file->fileID); + ConvertBigEndian(&file->createDate); + ConvertBigEndian(&file->contentModDate); + ConvertBigEndian(&file->attributeModDate); + ConvertBigEndian(&file->accessDate); + ConvertBigEndian(&file->backupDate); + ConvertBigEndian(&file->bsdInfo.ownerID); + ConvertBigEndian(&file->bsdInfo.groupID); + ConvertBigEndian(&file->bsdInfo.fileMode); + ConvertBigEndian(&file->userInfo.fdType); + ConvertBigEndian(&file->userInfo.fdCreator); + ConvertBigEndian(&file->userInfo.fdFlags); + ConvertBigEndian(&file->textEncoding); + ConvertBigEndian(&file->reserved2); + ConvertBigEndian(&file->dataFork); + ConvertBigEndian(&file->resourceFork); +} + +// A ReadStream implementation for an HFS+ fork. This only consults the eight +// fork extents. This does not consult the extent overflow file. +class HFSForkReadStream : public ReadStream { + public: + HFSForkReadStream(HFSIterator* hfs, const HFSPlusForkData& fork); + ~HFSForkReadStream() override; + + bool Read(uint8_t* buffer, size_t buffer_size, size_t* bytes_read) override; + // Seek only supports SEEK_SET. + off_t Seek(off_t offset, int whence) override; + + private: + HFSIterator* const hfs_; // The HFS+ iterator. + const HFSPlusForkData fork_; // The fork to be read. + uint8_t current_extent_; // The current extent index in the fork. + bool read_current_extent_; // Whether the current_extent_ has been read. + std::vector current_extent_data_; // Data for |current_extent_|. + size_t fork_logical_offset_; // The logical offset into the fork. + + DISALLOW_COPY_AND_ASSIGN(HFSForkReadStream); +}; + +// HFSBTreeIterator iterates over the HFS+ catalog file. +class HFSBTreeIterator { + public: + struct Entry { + uint16_t record_type; // Catalog folder item type. + base::string16 path; // Full path to the item. + bool unexported; // Whether this is HFS+ private data. + union { + HFSPlusCatalogFile* file; + HFSPlusCatalogFolder* folder; + }; + }; + + HFSBTreeIterator(); + ~HFSBTreeIterator(); + + bool Init(ReadStream* stream); + + bool HasNext(); + bool Next(); + + const Entry* current_record() const { return ¤t_record_; } + + private: + // Seeks |stream_| to the catalog node ID. + bool SeekToNode(uint32_t node_id); + + // If required, reads the current leaf into |leaf_data_| and updates the + // buffer offsets. + bool ReadCurrentLeaf(); + + // Returns a pointer to data at |current_leaf_offset_| in |leaf_data_|. This + // then advances the offset by the size of the object being returned. + template T* GetLeafData(); + + // Checks if the HFS+ catalog key is a Mac OS X reserved key that should not + // have it or its contents iterated over. + bool IsKeyUnexported(const base::string16& path); + + ReadStream* stream_; // The stream backing the catalog file. + BTHeaderRec header_; // The header B-tree node. + + // Maps CNIDs to their full path. This is used to construct full paths for + // items that descend from the folders in this map. + std::map folder_cnid_map_; + + // CNIDs of the non-exported folders reserved by OS X. If an item has this + // CNID as a parent, it should be skipped. + std::set unexported_parents_; + + // The total number of leaf records read from all the leaf nodes. + uint32_t leaf_records_read_; + + // The number of records read from the current leaf node. + uint32_t current_leaf_records_read_; + uint32_t current_leaf_number_; // The node ID of the leaf being read. + // Whether the |current_leaf_number_|'s data has been read into the + // |leaf_data_| buffer. + bool read_current_leaf_; + // The node data for |current_leaf_number_| copied from |stream_|. + std::vector leaf_data_; + size_t current_leaf_offset_; // The offset in |leaf_data_|. + + // Pointer to |leaf_data_| as a BTNodeDescriptor. + const BTNodeDescriptor* current_leaf_; + Entry current_record_; // The record read at |current_leaf_offset_|. + + // Constant, string16 versions of the __APPLE_API_PRIVATE values. + const base::string16 kHFSMetadataFolder = + base::UTF8ToUTF16(base::StringPiece("\x0\x0\x0\x0HFS+ Private Data", 21)); + const base::string16 kHFSDirMetadataFolder = + base::UTF8ToUTF16(".HFS+ Private Directory Data\xd"); + + DISALLOW_COPY_AND_ASSIGN(HFSBTreeIterator); +}; + +HFSIterator::HFSIterator(ReadStream* stream) + : stream_(stream), + volume_header_() { +} + +HFSIterator::~HFSIterator() {} + +bool HFSIterator::Open() { + if (stream_->Seek(1024, SEEK_SET) != 1024) + return false; + + if (!stream_->ReadType(&volume_header_)) { + DLOG(ERROR) << "Failed to read volume header"; + return false; + } + ConvertBigEndian(&volume_header_); + + if (volume_header_.signature != kHFSPlusSigWord && + volume_header_.signature != kHFSXSigWord) { + DLOG(ERROR) << "Unrecognized volume header signature " + << volume_header_.signature; + return false; + } + + if (!ReadCatalogFile()) + return false; + + return true; +} + +bool HFSIterator::Next() { + if (!catalog_->HasNext()) + return false; + + // The iterator should only stop on file and folders, skipping over "thread + // records". In addition, unexported private files and directories should be + // skipped as well. + bool keep_going = false; + do { + keep_going = catalog_->Next(); + if (keep_going) { + if (!catalog_->current_record()->unexported && + (catalog_->current_record()->record_type == kHFSPlusFolderRecord || + catalog_->current_record()->record_type == kHFSPlusFileRecord)) { + return true; + } + keep_going = catalog_->HasNext(); + } + } while (keep_going); + + return keep_going; +} + +bool HFSIterator::IsDirectory() { + return catalog_->current_record()->record_type == kHFSPlusFolderRecord; +} + +bool HFSIterator::IsSymbolicLink() { + if (IsDirectory()) + return S_ISLNK(catalog_->current_record()->folder->bsdInfo.fileMode); + else + return S_ISLNK(catalog_->current_record()->file->bsdInfo.fileMode); +} + +bool HFSIterator::IsHardLink() { + if (IsDirectory()) + return false; + const HFSPlusCatalogFile* file = catalog_->current_record()->file; + return file->userInfo.fdType == kHardLinkFileType && + file->userInfo.fdCreator == kHFSPlusCreator; +} + +bool HFSIterator::IsDecmpfsCompressed() { + if (IsDirectory()) + return false; + const HFSPlusCatalogFile* file = catalog_->current_record()->file; + return file->bsdInfo.ownerFlags & UF_COMPRESSED; +} + +base::string16 HFSIterator::GetPath() { + return catalog_->current_record()->path; +} + +scoped_ptr HFSIterator::GetReadStream() { + if (IsDirectory() || IsHardLink()) + return nullptr; + + DCHECK_EQ(kHFSPlusFileRecord, catalog_->current_record()->record_type); + return make_scoped_ptr( + new HFSForkReadStream(this, catalog_->current_record()->file->dataFork)); +} + +bool HFSIterator::SeekToBlock(uint64_t block) { + uint64_t offset = block * volume_header_.blockSize; + off_t rv = stream_->Seek(offset, SEEK_SET); + return rv >= 0 && static_cast(rv) == offset; +} + +bool HFSIterator::ReadCatalogFile() { + catalog_file_.reset(new HFSForkReadStream(this, volume_header_.catalogFile)); + catalog_.reset(new HFSBTreeIterator()); + return catalog_->Init(catalog_file_.get()); +} + +HFSForkReadStream::HFSForkReadStream(HFSIterator* hfs, + const HFSPlusForkData& fork) + : hfs_(hfs), + fork_(fork), + current_extent_(0), + read_current_extent_(false), + current_extent_data_(), + fork_logical_offset_(0) { +} + +HFSForkReadStream::~HFSForkReadStream() {} + +bool HFSForkReadStream::Read(uint8_t* buffer, + size_t buffer_size, + size_t* bytes_read) { + size_t buffer_space_remaining = buffer_size; + *bytes_read = 0; + + if (fork_logical_offset_ == fork_.logicalSize) + return true; + + for (; current_extent_ < arraysize(fork_.extents); ++current_extent_) { + // If the buffer is out of space, do not attempt any reads. Check this + // here, so that current_extent_ is advanced by the loop if the last + // extent was fully read. + if (buffer_space_remaining == 0) + break; + + const HFSPlusExtentDescriptor* extent = &fork_.extents[current_extent_]; + + // A zero-length extent means end-of-fork. + if (extent->startBlock == 0 && extent->blockCount == 0) + break; + + auto extent_size = + base::CheckedNumeric(extent->blockCount) * hfs_->block_size(); + if (!extent_size.IsValid()) { + DLOG(ERROR) << "Extent blockCount overflows"; + return false; + } + + // Read the entire extent now, to avoid excessive seeking and re-reading. + if (!read_current_extent_) { + hfs_->SeekToBlock(extent->startBlock); + current_extent_data_.resize(extent_size.ValueOrDie()); + if (!hfs_->stream()->ReadExact(¤t_extent_data_[0], + extent_size.ValueOrDie())) { + DLOG(ERROR) << "Failed to read extent " << current_extent_; + return false; + } + + read_current_extent_ = true; + } + + size_t extent_offset = fork_logical_offset_ % extent_size.ValueOrDie(); + size_t bytes_to_copy = + std::min(std::min(static_cast(fork_.logicalSize) - + fork_logical_offset_, + extent_size.ValueOrDie() - extent_offset), + buffer_space_remaining); + + memcpy(&buffer[buffer_size - buffer_space_remaining], + ¤t_extent_data_[extent_offset], + bytes_to_copy); + + buffer_space_remaining -= bytes_to_copy; + *bytes_read += bytes_to_copy; + fork_logical_offset_ += bytes_to_copy; + + // If the fork's data have been read, then end the loop. + if (fork_logical_offset_ == fork_.logicalSize) + return true; + + // If this extent still has data to be copied out, then the read was + // partial and the buffer is full. Do not advance to the next extent. + if (extent_offset < current_extent_data_.size()) + break; + + // Advance to the next extent, so reset the state. + read_current_extent_ = false; + } + + return true; +} + +off_t HFSForkReadStream::Seek(off_t offset, int whence) { + DCHECK_EQ(SEEK_SET, whence); + DCHECK_GE(offset, 0); + DCHECK_LT(static_cast(offset), fork_.logicalSize); + size_t target_block = offset / hfs_->block_size(); + size_t block_count = 0; + for (size_t i = 0; i < arraysize(fork_.extents); ++i) { + const HFSPlusExtentDescriptor* extent = &fork_.extents[i]; + + // An empty extent indicates end-of-fork. + if (extent->startBlock == 0 && extent->blockCount == 0) + break; + + base::CheckedNumeric new_block_count(block_count); + new_block_count += extent->blockCount; + if (!new_block_count.IsValid()) { + DLOG(ERROR) << "Seek offset block count overflows"; + return false; + } + + if (target_block < new_block_count.ValueOrDie()) { + if (current_extent_ != i) { + read_current_extent_ = false; + current_extent_ = i; + } + auto iterator_block_offset = + base::CheckedNumeric(block_count) * hfs_->block_size(); + if (!iterator_block_offset.IsValid()) { + DLOG(ERROR) << "Seek block offset overflows"; + return false; + } + fork_logical_offset_ = offset; + return offset; + } + + block_count = new_block_count.ValueOrDie(); + } + return -1; +} + +HFSBTreeIterator::HFSBTreeIterator() + : stream_(), + header_(), + leaf_records_read_(0), + current_leaf_records_read_(0), + current_leaf_number_(0), + read_current_leaf_(false), + leaf_data_(), + current_leaf_offset_(0), + current_leaf_() { +} + +HFSBTreeIterator::~HFSBTreeIterator() {} + +bool HFSBTreeIterator::Init(ReadStream* stream) { + DCHECK(!stream_); + stream_ = stream; + + if (stream_->Seek(0, SEEK_SET) != 0) { + DLOG(ERROR) << "Failed to seek to header node"; + return false; + } + + BTNodeDescriptor node; + if (!stream_->ReadType(&node)) { + DLOG(ERROR) << "Failed to read BTNodeDescriptor"; + return false; + } + ConvertBigEndian(&node); + + if (node.kind != kBTHeaderNode) { + DLOG(ERROR) << "Initial node is not a header node"; + return false; + } + + if (!stream_->ReadType(&header_)) { + DLOG(ERROR) << "Failed to read BTHeaderRec"; + return false; + } + ConvertBigEndian(&header_); + + current_leaf_number_ = header_.firstLeafNode; + leaf_data_.resize(header_.nodeSize); + + return true; +} + +bool HFSBTreeIterator::HasNext() { + return leaf_records_read_ < header_.leafRecords; +} + +bool HFSBTreeIterator::Next() { + if (!ReadCurrentLeaf()) + return false; + + GetLeafData(); // keyLength + auto parent_id = OSSwapBigToHostInt32(*GetLeafData()); + auto key_string_length = OSSwapBigToHostInt16(*GetLeafData()); + auto key_string = + reinterpret_cast(&leaf_data_[current_leaf_offset_]); + for (uint16_t i = 0; + i < key_string_length; + ++i, current_leaf_offset_ += sizeof(uint16_t)) { + key_string[i] = OSSwapBigToHostInt16(key_string[i]); + } + base::string16 key(key_string, key_string_length); + + // Read the record type and then rewind as the field is part of the catalog + // structure that is read next. + current_record_.record_type = OSSwapBigToHostInt16(*GetLeafData()); + current_record_.unexported = false; + current_leaf_offset_ -= sizeof(int16_t); + switch (current_record_.record_type) { + case kHFSPlusFolderRecord: { + auto folder = GetLeafData(); + ConvertBigEndian(folder); + ++leaf_records_read_; + ++current_leaf_records_read_; + + // If this key is unexported, or the parent folder is, then mark the + // record as such. + if (IsKeyUnexported(key) || + unexported_parents_.find(parent_id) != unexported_parents_.end()) { + unexported_parents_.insert(folder->folderID); + current_record_.unexported = true; + } + + // Update the CNID map to construct the path tree. + if (parent_id != 0) { + auto parent_name = folder_cnid_map_.find(parent_id); + if (parent_name != folder_cnid_map_.end()) + key = parent_name->second + kFilePathSeparator + key; + } + folder_cnid_map_[folder->folderID] = key; + + current_record_.path = key; + current_record_.folder = folder; + break; + } + case kHFSPlusFileRecord: { + auto file = GetLeafData(); + ConvertBigEndian(file); + ++leaf_records_read_; + ++current_leaf_records_read_; + + base::string16 path = + folder_cnid_map_[parent_id] + kFilePathSeparator + key; + current_record_.path = path; + current_record_.file = file; + current_record_.unexported = + unexported_parents_.find(parent_id) != unexported_parents_.end(); + break; + } + case kHFSPlusFolderThreadRecord: + case kHFSPlusFileThreadRecord: { + // Thread records are used to quickly locate a file or folder just by + // CNID. As these are not necessary for the iterator, skip past the data. + GetLeafData(); // recordType + GetLeafData(); // reserved + GetLeafData(); // parentID + auto string_length = OSSwapBigToHostInt16(*GetLeafData()); + for (uint16_t i = 0; i < string_length; ++i) + GetLeafData(); + ++leaf_records_read_; + ++current_leaf_records_read_; + break; + } + default: + DLOG(ERROR) << "Unknown record type " << current_record_.record_type; + return false; + } + + // If all the records from this leaf have been read, follow the forward link + // to the next B-Tree leaf node. + if (current_leaf_records_read_ >= current_leaf_->numRecords) { + current_leaf_number_ = current_leaf_->fLink; + read_current_leaf_ = false; + } + + return true; +} + +bool HFSBTreeIterator::SeekToNode(uint32_t node_id) { + if (node_id >= header_.totalNodes) + return false; + size_t offset = node_id * header_.nodeSize; + if (stream_->Seek(offset, SEEK_SET) != -1) { + current_leaf_number_ = node_id; + return true; + } + return false; +} + +bool HFSBTreeIterator::ReadCurrentLeaf() { + if (read_current_leaf_) + return true; + + if (!SeekToNode(current_leaf_number_)) { + DLOG(ERROR) << "Failed to seek to node " << current_leaf_number_; + return false; + } + + if (!stream_->ReadExact(&leaf_data_[0], header_.nodeSize)) { + DLOG(ERROR) << "Failed to read node " << current_leaf_number_; + return false; + } + + auto leaf = reinterpret_cast(&leaf_data_[0]); + ConvertBigEndian(leaf); + if (leaf->kind != kBTLeafNode) { + DLOG(ERROR) << "Node " << current_leaf_number_ << " is not a leaf"; + return false; + } + current_leaf_ = leaf; + current_leaf_offset_ = sizeof(BTNodeDescriptor); + current_leaf_records_read_ = 0; + read_current_leaf_ = true; + return true; +} + +template +T* HFSBTreeIterator::GetLeafData() { + base::CheckedNumeric size = sizeof(T); + auto new_offset = size + current_leaf_offset_; + if (!new_offset.IsValid() || new_offset.ValueOrDie() >= leaf_data_.size()) + return nullptr; + T* object = reinterpret_cast(&leaf_data_[current_leaf_offset_]); + current_leaf_offset_ = new_offset.ValueOrDie(); + return object; +} + +bool HFSBTreeIterator::IsKeyUnexported(const base::string16& key) { + return key == kHFSDirMetadataFolder || + key == kHFSMetadataFolder; +} + +} // namespace dmg +} // namespace safe_browsing diff --git a/chrome/utility/safe_browsing/mac/hfs.h b/chrome/utility/safe_browsing/mac/hfs.h new file mode 100644 index 0000000..8a4a6d7 --- /dev/null +++ b/chrome/utility/safe_browsing/mac/hfs.h @@ -0,0 +1,96 @@ +// Copyright 2015 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef CHROME_UTILITY_SAFE_BROWSING_MAC_HFS_H_ +#define CHROME_UTILITY_SAFE_BROWSING_MAC_HFS_H_ + +#include +#include + +#include "base/macros.h" +#include "base/memory/scoped_ptr.h" +#include "base/strings/string16.h" + +namespace safe_browsing { +namespace dmg { + +class ReadStream; +class HFSBTreeIterator; +class HFSForkReadStream; + +// HFSIterator is a read-only iterator over an HFS+ file system. It provides +// access to the data fork of all files on the system, as well as the path. This +// implementation has several deliberate limitations: +// - Only HFS+ and HFSX are supported. +// - The journal file is ignored. As this is intended to be used for HFS+ in +// a DMG, replaying the journal should not typically be required. +// - The extents overflow file is not consulted. In a DMG, the file system +// should not be fragmented, and so consulting this should not typically be +// required. +// - No access is provided to resource forks. +// - Getting the ReadStream for hard linked files is not supported. +// - Files in hard linked directories are ignored. +// - No content will be returned for files that are decmpfs compressed. +// For information on the HFS format, see +// . +class HFSIterator { + public: + // Constructs an iterator from a stream. + explicit HFSIterator(ReadStream* stream); + ~HFSIterator(); + + // Opens the filesystem and initializes the iterator. The iterator is + // initialized to an invalid item before the first entry. Use Next() to + // advance the iterator. This method must be called before any other + // method. If this returns false, it is not legal to call any other methods. + bool Open(); + + // Advances the iterator to the next item. If this returns false, then it + // is not legal to call any other methods. + bool Next(); + + // Returns true if the current iterator item is a directory and false if it + // is a file. + bool IsDirectory(); + + // Returns true if the current iterator item is a symbolic link. + bool IsSymbolicLink(); + + // Returns true if the current iterator item is a hard link. + bool IsHardLink(); + + // Returns true if the current iterator item is decmpfs-compressed. + bool IsDecmpfsCompressed(); + + // Returns the full filesystem path of the current iterator item. + base::string16 GetPath(); + + // Returns a stream for the data fork of the current iterator item. This may + // only be called if IsDirectory() and IsHardLink() returns false. + scoped_ptr GetReadStream(); + + private: + friend class HFSForkReadStream; + + // Moves the |stream_| position to a specific HFS+ |block|. + bool SeekToBlock(uint64_t block); + + // Reads the catalog file to initialize the iterator. + bool ReadCatalogFile(); + + uint32_t block_size() const { return volume_header_.blockSize; } + ReadStream* stream() const { return stream_; } + + ReadStream* const stream_; // The stream backing the filesystem. + HFSPlusVolumeHeader volume_header_; + scoped_ptr catalog_file_; // Data of the catalog file. + scoped_ptr catalog_; // Iterator over the catalog file. + + DISALLOW_COPY_AND_ASSIGN(HFSIterator); +}; + +} // namespace dmg +} // namespace safe_browsing + +#endif // CHROME_UTILITY_SAFE_BROWSING_MAC_HFS_H_ diff --git a/chrome/utility/safe_browsing/mac/hfs_unittest.cc b/chrome/utility/safe_browsing/mac/hfs_unittest.cc new file mode 100644 index 0000000..4ef9316 --- /dev/null +++ b/chrome/utility/safe_browsing/mac/hfs_unittest.cc @@ -0,0 +1,228 @@ +// Copyright 2015 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "chrome/utility/safe_browsing/mac/hfs.h" + +#include "base/files/file.h" +#include "base/logging.h" +#include "base/strings/string_piece.h" +#include "base/strings/string_util.h" +#include "base/strings/utf_string_conversions.h" +#include "chrome/utility/safe_browsing/mac/dmg_test_utils.h" +#include "chrome/utility/safe_browsing/mac/read_stream.h" +#include "testing/gtest/include/gtest/gtest.h" + +namespace safe_browsing { +namespace dmg { +namespace { + +class HFSIteratorTest : public testing::Test { + public: + void GetTargetFiles(bool case_sensitive, + std::set* files, + std::set* dirs) { + const char* kBaseFiles[] = { + "first/second/third/fourth/fifth/random", + "first/second/third/fourth/Hello World", + "first/second/third/symlink-random", + "first/second/goat-output.txt", + "first/unicode_name", + "README.txt", + ".metadata_never_index", + }; + + const char* kBaseDirs[] = { + "first/second/third/fourth/fifth", + "first/second/third/fourth", + "first/second/third", + "first/second", + "first", + ".Trashes", + }; + + const base::string16 dmg_name = base::ASCIIToUTF16("SafeBrowsingDMG/"); + + for (size_t i = 0; i < arraysize(kBaseFiles); ++i) + files->insert(dmg_name + base::ASCIIToUTF16(kBaseFiles[i])); + + files->insert(dmg_name + base::ASCIIToUTF16("first/second/") + + base::UTF8ToUTF16("Te\xCC\x86st\xCC\x88 \xF0\x9F\x90\x90 ")); + + dirs->insert(dmg_name.substr(0, dmg_name.size() - 1)); + for (size_t i = 0; i < arraysize(kBaseDirs); ++i) + dirs->insert(dmg_name + base::ASCIIToUTF16(kBaseDirs[i])); + + if (case_sensitive) { + files->insert(base::ASCIIToUTF16( + "SafeBrowsingDMG/first/second/third/fourth/hEllo wOrld")); + } + } + + void TestTargetFiles(safe_browsing::dmg::HFSIterator* hfs_reader, + bool case_sensitive) { + std::set files, dirs; + GetTargetFiles(case_sensitive, &files, &dirs); + + ASSERT_TRUE(hfs_reader->Open()); + while (hfs_reader->Next()) { + base::string16 path = hfs_reader->GetPath(); + // Skip over .fseventsd files. + if (path.find(base::ASCIIToUTF16("SafeBrowsingDMG/.fseventsd")) != + base::string16::npos) { + continue; + } + if (hfs_reader->IsDirectory()) + EXPECT_TRUE(dirs.erase(path)) << path; + else + EXPECT_TRUE(files.erase(path)) << path; + } + + EXPECT_EQ(0u, files.size()); + for (const auto& file : files) { + ADD_FAILURE() << "Unexpected missing file " << file; + } + } +}; + +TEST_F(HFSIteratorTest, HFSPlus) { + base::File file; + ASSERT_NO_FATAL_FAILURE(test::GetTestFile("hfs_plus.img", &file)); + + FileReadStream stream(file.GetPlatformFile()); + HFSIterator hfs_reader(&stream); + TestTargetFiles(&hfs_reader, false); +} + +TEST_F(HFSIteratorTest, HFSXCaseSensitive) { + base::File file; + ASSERT_NO_FATAL_FAILURE(test::GetTestFile("hfsx_case_sensitive.img", &file)); + + FileReadStream stream(file.GetPlatformFile()); + HFSIterator hfs_reader(&stream); + TestTargetFiles(&hfs_reader, true); +} + +class HFSFileReadTest : public testing::TestWithParam { + protected: + void SetUp() override { + ASSERT_NO_FATAL_FAILURE(test::GetTestFile(GetParam(), &hfs_file_)); + + hfs_stream_.reset(new FileReadStream(hfs_file_.GetPlatformFile())); + hfs_reader_.reset(new HFSIterator(hfs_stream_.get())); + ASSERT_TRUE(hfs_reader_->Open()); + } + + bool GoToFile(const char* name) { + while (hfs_reader_->Next()) { + if (EndsWith(hfs_reader_->GetPath(), base::ASCIIToUTF16(name), + base::CompareCase::SENSITIVE)) { + return true; + } + } + return false; + } + + HFSIterator* hfs_reader() { return hfs_reader_.get(); } + + private: + base::File hfs_file_; + scoped_ptr hfs_stream_; + scoped_ptr hfs_reader_; +}; + +TEST_P(HFSFileReadTest, ReadReadme) { + ASSERT_TRUE(GoToFile("README.txt")); + + scoped_ptr stream = hfs_reader()->GetReadStream(); + ASSERT_TRUE(stream.get()); + + EXPECT_FALSE(hfs_reader()->IsSymbolicLink()); + EXPECT_FALSE(hfs_reader()->IsHardLink()); + EXPECT_FALSE(hfs_reader()->IsDecmpfsCompressed()); + + std::vector buffer(4, 0); + + // Read the first four bytes. + EXPECT_TRUE(stream->ReadExact(&buffer[0], buffer.size())); + const uint8_t expected[] = { 'T', 'h', 'i', 's' }; + EXPECT_EQ(0, memcmp(expected, &buffer[0], sizeof(expected))); + buffer.clear(); + + // Rewind back to the start. + EXPECT_EQ(0, stream->Seek(0, SEEK_SET)); + + // Read the entire file now. + EXPECT_TRUE(test::ReadEntireStream(stream.get(), &buffer)); + EXPECT_EQ("This is a test HFS+ filesystem generated by " + "chrome/test/data/safe_browsing/dmg/make_hfs.sh.\n", + base::StringPiece(reinterpret_cast(&buffer[0]), + buffer.size())); + EXPECT_EQ(92u, buffer.size()); +} + +TEST_P(HFSFileReadTest, ReadRandom) { + ASSERT_TRUE(GoToFile("fifth/random")); + + scoped_ptr stream = hfs_reader()->GetReadStream(); + ASSERT_TRUE(stream.get()); + + EXPECT_FALSE(hfs_reader()->IsSymbolicLink()); + EXPECT_FALSE(hfs_reader()->IsHardLink()); + EXPECT_FALSE(hfs_reader()->IsDecmpfsCompressed()); + + std::vector buffer; + EXPECT_TRUE(test::ReadEntireStream(stream.get(), &buffer)); + EXPECT_EQ(768u, buffer.size()); +} + +TEST_P(HFSFileReadTest, Symlink) { + ASSERT_TRUE(GoToFile("symlink-random")); + + scoped_ptr stream = hfs_reader()->GetReadStream(); + ASSERT_TRUE(stream.get()); + + EXPECT_TRUE(hfs_reader()->IsSymbolicLink()); + EXPECT_FALSE(hfs_reader()->IsHardLink()); + EXPECT_FALSE(hfs_reader()->IsDecmpfsCompressed()); + + std::vector buffer; + EXPECT_TRUE(test::ReadEntireStream(stream.get(), &buffer)); + + EXPECT_EQ("fourth/fifth/random", + base::StringPiece(reinterpret_cast(&buffer[0]), + buffer.size())); +} + +TEST_P(HFSFileReadTest, HardLink) { + ASSERT_TRUE(GoToFile("unicode_name")); + + EXPECT_FALSE(hfs_reader()->IsSymbolicLink()); + EXPECT_TRUE(hfs_reader()->IsHardLink()); + EXPECT_FALSE(hfs_reader()->IsDecmpfsCompressed()); +} + +TEST_P(HFSFileReadTest, DecmpfsFile) { + ASSERT_TRUE(GoToFile("first/second/goat-output.txt")); + + scoped_ptr stream = hfs_reader()->GetReadStream(); + ASSERT_TRUE(stream.get()); + + EXPECT_FALSE(hfs_reader()->IsSymbolicLink()); + EXPECT_FALSE(hfs_reader()->IsHardLink()); + EXPECT_TRUE(hfs_reader()->IsDecmpfsCompressed()); + + std::vector buffer; + EXPECT_TRUE(test::ReadEntireStream(stream.get(), &buffer)); + EXPECT_EQ(0u, buffer.size()); +} + +INSTANTIATE_TEST_CASE_P(HFSIteratorTest, + HFSFileReadTest, + testing::Values( + "hfs_plus.img", + "hfsx_case_sensitive.img")); + +} // namespace +} // namespace dmg +} // namespace safe_browsing -- cgit v1.1