// Copyright (c) 2012 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. #include "content/browser/storage_partition_impl_map.h" #include "base/bind.h" #include "base/callback.h" #include "base/files/file_enumerator.h" #include "base/files/file_path.h" #include "base/files/file_util.h" #include "base/stl_util.h" #include "base/strings/string_number_conversions.h" #include "base/strings/string_util.h" #include "base/strings/stringprintf.h" #include "base/threading/sequenced_worker_pool.h" #include "content/browser/appcache/chrome_appcache_service.h" #include "content/browser/fileapi/browser_file_system_helper.h" #include "content/browser/fileapi/chrome_blob_storage_context.h" #include "content/browser/loader/resource_request_info_impl.h" #include "content/browser/resource_context_impl.h" #include "content/browser/service_worker/service_worker_request_handler.h" #include "content/browser/storage_partition_impl.h" #include "content/browser/streams/stream.h" #include "content/browser/streams/stream_context.h" #include "content/browser/streams/stream_registry.h" #include "content/browser/streams/stream_url_request_job.h" #include "content/browser/webui/url_data_manager_backend.h" #include "content/public/browser/browser_context.h" #include "content/public/browser/browser_thread.h" #include "content/public/browser/content_browser_client.h" #include "content/public/browser/storage_partition.h" #include "content/public/common/content_constants.h" #include "content/public/common/url_constants.h" #include "crypto/sha2.h" #include "net/url_request/url_request_context.h" #include "net/url_request/url_request_context_getter.h" #include "storage/browser/blob/blob_storage_context.h" #include "storage/browser/blob/blob_url_request_job_factory.h" #include "storage/browser/fileapi/file_system_url_request_job_factory.h" #include "storage/common/blob/blob_data.h" using storage::FileSystemContext; using storage::BlobStorageContext; namespace content { namespace { // A derivative that knows about Streams too. class BlobProtocolHandler : public net::URLRequestJobFactory::ProtocolHandler { public: BlobProtocolHandler(ChromeBlobStorageContext* blob_storage_context, StreamContext* stream_context, storage::FileSystemContext* file_system_context) : blob_storage_context_(blob_storage_context), stream_context_(stream_context), file_system_context_(file_system_context) {} virtual ~BlobProtocolHandler() { } virtual net::URLRequestJob* MaybeCreateJob( net::URLRequest* request, net::NetworkDelegate* network_delegate) const OVERRIDE { scoped_refptr stream = stream_context_->registry()->GetStream(request->url()); if (stream.get()) return new StreamURLRequestJob(request, network_delegate, stream); if (!blob_protocol_handler_) { // Construction is deferred because 'this' is constructed on // the main thread but we want blob_protocol_handler_ constructed // on the IO thread. blob_protocol_handler_.reset(new storage::BlobProtocolHandler( blob_storage_context_->context(), file_system_context_.get(), BrowserThread::GetMessageLoopProxyForThread(BrowserThread::FILE) .get())); } return blob_protocol_handler_->MaybeCreateJob(request, network_delegate); } private: const scoped_refptr blob_storage_context_; const scoped_refptr stream_context_; const scoped_refptr file_system_context_; mutable scoped_ptr blob_protocol_handler_; DISALLOW_COPY_AND_ASSIGN(BlobProtocolHandler); }; // These constants are used to create the directory structure under the profile // where renderers with a non-default storage partition keep their persistent // state. This will contain a set of directories that partially mirror the // directory structure of BrowserContext::GetPath(). // // The kStoragePartitionDirname contains an extensions directory which is // further partitioned by extension id, followed by another level of directories // for the "default" extension storage partition and one directory for each // persistent partition used by a webview tag. Example: // // Storage/ext/ABCDEF/def // Storage/ext/ABCDEF/hash(partition name) // // The code in GetStoragePartitionPath() constructs these path names. // // TODO(nasko): Move extension related path code out of content. const base::FilePath::CharType kStoragePartitionDirname[] = FILE_PATH_LITERAL("Storage"); const base::FilePath::CharType kExtensionsDirname[] = FILE_PATH_LITERAL("ext"); const base::FilePath::CharType kDefaultPartitionDirname[] = FILE_PATH_LITERAL("def"); const base::FilePath::CharType kTrashDirname[] = FILE_PATH_LITERAL("trash"); // Because partition names are user specified, they can be arbitrarily long // which makes them unsuitable for paths names. We use a truncation of a // SHA256 hash to perform a deterministic shortening of the string. The // kPartitionNameHashBytes constant controls the length of the truncation. // We use 6 bytes, which gives us 99.999% reliability against collisions over // 1 million partition domains. // // Analysis: // We assume that all partition names within one partition domain are // controlled by the the same entity. Thus there is no chance for adverserial // attack and all we care about is accidental collision. To get 5 9s over // 1 million domains, we need the probability of a collision in any one domain // to be // // p < nroot(1000000, .99999) ~= 10^-11 // // We use the following birthday attack approximation to calculate the max // number of unique names for this probability: // // n(p,H) = sqrt(2*H * ln(1/(1-p))) // // For a 6-byte hash, H = 2^(6*8). n(10^-11, H) ~= 75 // // An average partition domain is likely to have less than 10 unique // partition names which is far lower than 75. // // Note, that for 4 9s of reliability, the limit is 237 partition names per // partition domain. const int kPartitionNameHashBytes = 6; // Needed for selecting all files in ObliterateOneDirectory() below. #if defined(OS_POSIX) const int kAllFileTypes = base::FileEnumerator::FILES | base::FileEnumerator::DIRECTORIES | base::FileEnumerator::SHOW_SYM_LINKS; #else const int kAllFileTypes = base::FileEnumerator::FILES | base::FileEnumerator::DIRECTORIES; #endif base::FilePath GetStoragePartitionDomainPath( const std::string& partition_domain) { CHECK(base::IsStringUTF8(partition_domain)); return base::FilePath(kStoragePartitionDirname).Append(kExtensionsDirname) .Append(base::FilePath::FromUTF8Unsafe(partition_domain)); } // Helper function for doing a depth-first deletion of the data on disk. // Examines paths directly in |current_dir| (no recursion) and tries to // delete from disk anything that is in, or isn't a parent of something in // |paths_to_keep|. Paths that need further expansion are added to // |paths_to_consider|. void ObliterateOneDirectory(const base::FilePath& current_dir, const std::vector& paths_to_keep, std::vector* paths_to_consider) { CHECK(current_dir.IsAbsolute()); base::FileEnumerator enumerator(current_dir, false, kAllFileTypes); for (base::FilePath to_delete = enumerator.Next(); !to_delete.empty(); to_delete = enumerator.Next()) { // Enum tracking which of the 3 possible actions to take for |to_delete|. enum { kSkip, kEnqueue, kDelete } action = kDelete; for (std::vector::const_iterator to_keep = paths_to_keep.begin(); to_keep != paths_to_keep.end(); ++to_keep) { if (to_delete == *to_keep) { action = kSkip; break; } else if (to_delete.IsParent(*to_keep)) { // |to_delete| contains a path to keep. Add to stack for further // processing. action = kEnqueue; break; } } switch (action) { case kDelete: base::DeleteFile(to_delete, true); break; case kEnqueue: paths_to_consider->push_back(to_delete); break; case kSkip: break; } } } // Synchronously attempts to delete |unnormalized_root|, preserving only // entries in |paths_to_keep|. If there are no entries in |paths_to_keep| on // disk, then it completely removes |unnormalized_root|. All paths must be // absolute paths. void BlockingObliteratePath( const base::FilePath& unnormalized_browser_context_root, const base::FilePath& unnormalized_root, const std::vector& paths_to_keep, const scoped_refptr& closure_runner, const base::Closure& on_gc_required) { // Early exit required because MakeAbsoluteFilePath() will fail on POSIX // if |unnormalized_root| does not exist. This is safe because there is // nothing to do in this situation anwyays. if (!base::PathExists(unnormalized_root)) { return; } // Never try to obliterate things outside of the browser context root or the // browser context root itself. Die hard. base::FilePath root = base::MakeAbsoluteFilePath(unnormalized_root); base::FilePath browser_context_root = base::MakeAbsoluteFilePath(unnormalized_browser_context_root); CHECK(!root.empty()); CHECK(!browser_context_root.empty()); CHECK(browser_context_root.IsParent(root) && browser_context_root != root); // Reduce |paths_to_keep| set to those under the root and actually on disk. std::vector valid_paths_to_keep; for (std::vector::const_iterator it = paths_to_keep.begin(); it != paths_to_keep.end(); ++it) { if (root.IsParent(*it) && base::PathExists(*it)) valid_paths_to_keep.push_back(*it); } // If none of the |paths_to_keep| are valid anymore then we just whack the // root and be done with it. Otherwise, signal garbage collection and do // a best-effort delete of the on-disk structures. if (valid_paths_to_keep.empty()) { base::DeleteFile(root, true); return; } closure_runner->PostTask(FROM_HERE, on_gc_required); // Otherwise, start at the root and delete everything that is not in // |valid_paths_to_keep|. std::vector paths_to_consider; paths_to_consider.push_back(root); while(!paths_to_consider.empty()) { base::FilePath path = paths_to_consider.back(); paths_to_consider.pop_back(); ObliterateOneDirectory(path, valid_paths_to_keep, &paths_to_consider); } } // Ensures each path in |active_paths| is a direct child of storage_root. void NormalizeActivePaths(const base::FilePath& storage_root, base::hash_set* active_paths) { base::hash_set normalized_active_paths; for (base::hash_set::iterator iter = active_paths->begin(); iter != active_paths->end(); ++iter) { base::FilePath relative_path; if (!storage_root.AppendRelativePath(*iter, &relative_path)) continue; std::vector components; relative_path.GetComponents(&components); DCHECK(!relative_path.empty()); normalized_active_paths.insert(storage_root.Append(components.front())); } active_paths->swap(normalized_active_paths); } // Deletes all entries inside the |storage_root| that are not in the // |active_paths|. Deletion is done in 2 steps: // // (1) Moving all garbage collected paths into a trash directory. // (2) Asynchronously deleting the trash directory. // // The deletion is asynchronous because after (1) completes, calling code can // safely continue to use the paths that had just been garbage collected // without fear of race conditions. // // This code also ignores failed moves rather than attempting a smarter retry. // Moves shouldn't fail here unless there is some out-of-band error (eg., // FS corruption). Retry logic is dangerous in the general case because // there is not necessarily a guaranteed case where the logic may succeed. // // This function is still named BlockingGarbageCollect() because it does // execute a few filesystem operations synchronously. void BlockingGarbageCollect( const base::FilePath& storage_root, const scoped_refptr& file_access_runner, scoped_ptr > active_paths) { CHECK(storage_root.IsAbsolute()); NormalizeActivePaths(storage_root, active_paths.get()); base::FileEnumerator enumerator(storage_root, false, kAllFileTypes); base::FilePath trash_directory; if (!base::CreateTemporaryDirInDir(storage_root, kTrashDirname, &trash_directory)) { // Unable to continue without creating the trash directory so give up. return; } for (base::FilePath path = enumerator.Next(); !path.empty(); path = enumerator.Next()) { if (active_paths->find(path) == active_paths->end() && path != trash_directory) { // Since |trash_directory| is unique for each run of this function there // can be no colllisions on the move. base::Move(path, trash_directory.Append(path.BaseName())); } } file_access_runner->PostTask( FROM_HERE, base::Bind(base::IgnoreResult(&base::DeleteFile), trash_directory, true)); } } // namespace // static base::FilePath StoragePartitionImplMap::GetStoragePartitionPath( const std::string& partition_domain, const std::string& partition_name) { if (partition_domain.empty()) return base::FilePath(); base::FilePath path = GetStoragePartitionDomainPath(partition_domain); // TODO(ajwong): Mangle in-memory into this somehow, either by putting // it into the partition_name, or by manually adding another path component // here. Otherwise, it's possible to have an in-memory StoragePartition and // a persistent one that return the same FilePath for GetPath(). if (!partition_name.empty()) { // For analysis of why we can ignore collisions, see the comment above // kPartitionNameHashBytes. char buffer[kPartitionNameHashBytes]; crypto::SHA256HashString(partition_name, &buffer[0], sizeof(buffer)); return path.AppendASCII(base::HexEncode(buffer, sizeof(buffer))); } return path.Append(kDefaultPartitionDirname); } StoragePartitionImplMap::StoragePartitionImplMap( BrowserContext* browser_context) : browser_context_(browser_context), resource_context_initialized_(false) { // Doing here instead of initializer list cause it's just too ugly to read. base::SequencedWorkerPool* blocking_pool = BrowserThread::GetBlockingPool(); file_access_runner_ = blocking_pool->GetSequencedTaskRunner(blocking_pool->GetSequenceToken()); } StoragePartitionImplMap::~StoragePartitionImplMap() { STLDeleteContainerPairSecondPointers(partitions_.begin(), partitions_.end()); } StoragePartitionImpl* StoragePartitionImplMap::Get( const std::string& partition_domain, const std::string& partition_name, bool in_memory) { // Find the previously created partition if it's available. StoragePartitionConfig partition_config( partition_domain, partition_name, in_memory); PartitionMap::const_iterator it = partitions_.find(partition_config); if (it != partitions_.end()) return it->second; base::FilePath partition_path = browser_context_->GetPath().Append( GetStoragePartitionPath(partition_domain, partition_name)); StoragePartitionImpl* partition = StoragePartitionImpl::Create(browser_context_, in_memory, partition_path); partitions_[partition_config] = partition; ChromeBlobStorageContext* blob_storage_context = ChromeBlobStorageContext::GetFor(browser_context_); StreamContext* stream_context = StreamContext::GetFor(browser_context_); ProtocolHandlerMap protocol_handlers; protocol_handlers[url::kBlobScheme] = linked_ptr( new BlobProtocolHandler(blob_storage_context, stream_context, partition->GetFileSystemContext())); protocol_handlers[url::kFileSystemScheme] = linked_ptr( CreateFileSystemProtocolHandler(partition_domain, partition->GetFileSystemContext())); protocol_handlers[kChromeUIScheme] = linked_ptr( URLDataManagerBackend::CreateProtocolHandler( browser_context_->GetResourceContext(), browser_context_->IsOffTheRecord(), partition->GetAppCacheService(), blob_storage_context)); std::vector additional_webui_schemes; GetContentClient()->browser()->GetAdditionalWebUISchemes( &additional_webui_schemes); for (std::vector::const_iterator it = additional_webui_schemes.begin(); it != additional_webui_schemes.end(); ++it) { protocol_handlers[*it] = linked_ptr( URLDataManagerBackend::CreateProtocolHandler( browser_context_->GetResourceContext(), browser_context_->IsOffTheRecord(), partition->GetAppCacheService(), blob_storage_context)); } protocol_handlers[kChromeDevToolsScheme] = linked_ptr( CreateDevToolsProtocolHandler(browser_context_->GetResourceContext(), browser_context_->IsOffTheRecord())); URLRequestInterceptorScopedVector request_interceptors; request_interceptors.push_back( ServiceWorkerRequestHandler::CreateInterceptor().release()); // These calls must happen after StoragePartitionImpl::Create(). if (partition_domain.empty()) { partition->SetURLRequestContext( GetContentClient()->browser()->CreateRequestContext( browser_context_, &protocol_handlers, request_interceptors.Pass())); } else { partition->SetURLRequestContext( GetContentClient()->browser()->CreateRequestContextForStoragePartition( browser_context_, partition->GetPath(), in_memory, &protocol_handlers, request_interceptors.Pass())); } partition->SetMediaURLRequestContext( partition_domain.empty() ? browser_context_->GetMediaRequestContext() : browser_context_->GetMediaRequestContextForStoragePartition( partition->GetPath(), in_memory)); PostCreateInitialization(partition, in_memory); return partition; } void StoragePartitionImplMap::AsyncObliterate( const GURL& site, const base::Closure& on_gc_required) { // This method should avoid creating any StoragePartition (which would // create more open file handles) so that it can delete as much of the // data off disk as possible. std::string partition_domain; std::string partition_name; bool in_memory = false; GetContentClient()->browser()->GetStoragePartitionConfigForSite( browser_context_, site, false, &partition_domain, &partition_name, &in_memory); // Find the active partitions for the domain. Because these partitions are // active, it is not possible to just delete the directories that contain // the backing data structures without causing the browser to crash. Instead, // of deleteing the directory, we tell each storage context later to // remove any data they have saved. This will leave the directory structure // intact but it will only contain empty databases. std::vector active_partitions; std::vector paths_to_keep; for (PartitionMap::const_iterator it = partitions_.begin(); it != partitions_.end(); ++it) { const StoragePartitionConfig& config = it->first; if (config.partition_domain == partition_domain) { it->second->ClearData( // All except shader cache. ~StoragePartition::REMOVE_DATA_MASK_SHADER_CACHE, StoragePartition::QUOTA_MANAGED_STORAGE_MASK_ALL, GURL(), StoragePartition::OriginMatcherFunction(), base::Time(), base::Time::Max(), base::Bind(&base::DoNothing)); if (!config.in_memory) { paths_to_keep.push_back(it->second->GetPath()); } } } // Start a best-effort delete of the on-disk storage excluding paths that are // known to still be in use. This is to delete any previously created // StoragePartition state that just happens to not have been used during this // run of the browser. base::FilePath domain_root = browser_context_->GetPath().Append( GetStoragePartitionDomainPath(partition_domain)); BrowserThread::PostBlockingPoolTask( FROM_HERE, base::Bind(&BlockingObliteratePath, browser_context_->GetPath(), domain_root, paths_to_keep, base::MessageLoopProxy::current(), on_gc_required)); } void StoragePartitionImplMap::GarbageCollect( scoped_ptr > active_paths, const base::Closure& done) { // Include all paths for current StoragePartitions in the active_paths since // they cannot be deleted safely. for (PartitionMap::const_iterator it = partitions_.begin(); it != partitions_.end(); ++it) { const StoragePartitionConfig& config = it->first; if (!config.in_memory) active_paths->insert(it->second->GetPath()); } // Find the directory holding the StoragePartitions and delete everything in // there that isn't considered active. base::FilePath storage_root = browser_context_->GetPath().Append( GetStoragePartitionDomainPath(std::string())); file_access_runner_->PostTaskAndReply( FROM_HERE, base::Bind(&BlockingGarbageCollect, storage_root, file_access_runner_, base::Passed(&active_paths)), done); } void StoragePartitionImplMap::ForEach( const BrowserContext::StoragePartitionCallback& callback) { for (PartitionMap::const_iterator it = partitions_.begin(); it != partitions_.end(); ++it) { callback.Run(it->second); } } void StoragePartitionImplMap::PostCreateInitialization( StoragePartitionImpl* partition, bool in_memory) { // TODO(ajwong): ResourceContexts no longer have any storage related state. // We should move this into a place where it is called once per // BrowserContext creation rather than piggybacking off the default context // creation. // Note: moving this into Get() before partitions_[] is set causes reentrency. if (!resource_context_initialized_) { resource_context_initialized_ = true; InitializeResourceContext(browser_context_); } // Check first to avoid memory leak in unittests. if (BrowserThread::IsMessageLoopValid(BrowserThread::IO)) { BrowserThread::PostTask( BrowserThread::IO, FROM_HERE, base::Bind(&ChromeAppCacheService::InitializeOnIOThread, partition->GetAppCacheService(), in_memory ? base::FilePath() : partition->GetPath().Append(kAppCacheDirname), browser_context_->GetResourceContext(), make_scoped_refptr(partition->GetURLRequestContext()), make_scoped_refptr( browser_context_->GetSpecialStoragePolicy()))); BrowserThread::PostTask( BrowserThread::IO, FROM_HERE, base::Bind(&ServiceWorkerContextWrapper::SetBlobParametersForCache, partition->GetServiceWorkerContext(), make_scoped_refptr(partition->GetURLRequestContext()), make_scoped_refptr( ChromeBlobStorageContext::GetFor(browser_context_)))); // We do not call InitializeURLRequestContext() for media contexts because, // other than the HTTP cache, the media contexts share the same backing // objects as their associated "normal" request context. Thus, the previous // call serves to initialize the media request context for this storage // partition as well. } } } // namespace content