diff options
author | rvargas@google.com <rvargas@google.com@0039d316-1c4b-4281-b951-d872f2087c98> | 2013-06-12 19:51:17 +0000 |
---|---|---|
committer | rvargas@google.com <rvargas@google.com@0039d316-1c4b-4281-b951-d872f2087c98> | 2013-06-12 19:51:17 +0000 |
commit | 8d9718392f63dbd2e464988aa61ac15aaf1d55ad (patch) | |
tree | d0c82a8ca04021b59ed1c21925f711dd256c874a /net | |
parent | 0787e1075eba1c3a8ddbf603298f3f773e07a4f3 (diff) | |
download | chromium_src-8d9718392f63dbd2e464988aa61ac15aaf1d55ad.zip chromium_src-8d9718392f63dbd2e464988aa61ac15aaf1d55ad.tar.gz chromium_src-8d9718392f63dbd2e464988aa61ac15aaf1d55ad.tar.bz2 |
Disk cache: Main definition of the file format version 3.
BUG=241277
TEST=none
R=gavinp@chromium.org
Review URL: https://codereview.chromium.org/16583006
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@205902 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'net')
-rw-r--r-- | net/disk_cache/v3/disk_format_v3.cc | 15 | ||||
-rw-r--r-- | net/disk_cache/v3/disk_format_v3.h | 219 | ||||
-rw-r--r-- | net/net.gyp | 1 |
3 files changed, 139 insertions, 96 deletions
diff --git a/net/disk_cache/v3/disk_format_v3.cc b/net/disk_cache/v3/disk_format_v3.cc deleted file mode 100644 index 5b08954..0000000 --- a/net/disk_cache/v3/disk_format_v3.cc +++ /dev/null @@ -1,15 +0,0 @@ -// Copyright (c) 2010 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#include "net/disk_cache/disk_format.h" - -namespace disk_cache { - -IndexHeader::IndexHeader() { - memset(this, 0, sizeof(*this)); - magic = kIndexMagic; - version = kCurrentVersion; -} - -} // namespace disk_cache diff --git a/net/disk_cache/v3/disk_format_v3.h b/net/disk_cache/v3/disk_format_v3.h index 688ff58..5616377 100644 --- a/net/disk_cache/v3/disk_format_v3.h +++ b/net/disk_cache/v3/disk_format_v3.h @@ -3,97 +3,148 @@ // found in the LICENSE file. // The cache is stored on disk as a collection of block-files, plus an index -// file plus a collection of external files. +// plus a collection of external files. // -// Any data blob bigger than kMaxBlockSize (net/addr.h) will be stored on a -// separate file named f_xxx where x is a hexadecimal number. Shorter data will -// be stored as a series of blocks on a block-file. In any case, CacheAddr +// Any data blob bigger than kMaxBlockSize (disk_cache/addr.h) will be stored in +// a separate file named f_xxx where x is a hexadecimal number. Shorter data +// will be stored as a series of blocks on a block-file. In any case, CacheAddr // represents the address of the data inside the cache. // -// The index file is just a simple hash table that maps a particular entry to -// a CacheAddr value. Linking for a given hash bucket is handled internally -// by the cache entry. +// The index is actually a collection of four files that store a hash table with +// allocation bitmaps and backup data. Hash collisions are handled directly by +// the table, which from some point of view behaves like a 4-way associative +// cache with overflow buckets (so not really open addressing). +// +// Basically the hash table is a collection of buckets. The first part of the +// table has a fixed number of buckets and it is directly addressed by the hash, +// while the second part of the table (stored on a second file) has a variable +// number of buckets. Each bucket stores up to four cells (each cell represents +// a possibl entry). The index bitmap tracks the state of individual cells. // // The last element of the cache is the block-file. A block file is a file -// designed to store blocks of data of a given size. It is able to store data -// that spans from one to four consecutive "blocks", and it grows as needed to -// store up to approximately 65000 blocks. It has a fixed size header used for -// book keeping such as tracking free of blocks on the file. For example, a -// block-file for 1KB blocks will grow from 8KB when totally empty to about 64MB -// when completely full. At that point, data blocks of 1KB will be stored on a -// second block file that will store the next set of 65000 blocks. The first -// file contains the number of the second file, and the second file contains the -// number of a third file, created when the second file reaches its limit. It is -// important to remember that no matter how long the chain of files is, any -// given block can be located directly by its address, which contains the file -// number and starting block inside the file. +// designed to store blocks of data of a given size. For more details see +// disk_cache/disk_format_base.h // -// A new cache is initialized with four block files (named data_0 through -// data_3), each one dedicated to store blocks of a given size. The number at -// the end of the file name is the block file number (in decimal). +// A new cache is initialized with a set of block files (named data_0 through +// data_6), each one dedicated to store blocks of a given size or function. The +// number at the end of the file name is the block file number (in decimal). // -// There are two "special" types of blocks: an entry and a rankings node. An -// entry keeps track of all the information related to the same cache entry, -// such as the key, hash value, data pointers etc. A rankings node keeps track -// of the information that is updated frequently for a given entry, such as its -// location on the LRU lists, last access time etc. +// There are three "special" types of blocks: normal entries, evicted entries +// and control data for external files. // // The files that store internal information for the cache (blocks and index) -// are at least partially memory mapped. They have a location that is signaled -// every time the internal structures are modified, so it is possible to detect -// (most of the time) when the process dies in the middle of an update. -// -// In order to prevent dirty data to be used as valid (after a crash), every -// cache entry has a dirty identifier. Each running instance of the cache keeps -// a separate identifier (maintained on the "this_id" header field) that is used -// to mark every entry that is created or modified. When the entry is closed, -// and all the data can be trusted, the dirty flag is cleared from the entry. -// When the cache encounters an entry whose identifier is different than the one -// being currently used, it means that the entry was not properly closed on a -// previous run, so it is discarded. - -#ifndef NET_DISK_CACHE_DISK_FORMAT_H_ -#define NET_DISK_CACHE_DISK_FORMAT_H_ +// are memory mapped. They have a location that is signaled every time the +// internal structures are modified, so it is possible to detect (most of the +// time) when the process dies in the middle of an update. There are dedicated +// backup files for cache bitmaps, used to detect entries out of date. + +#ifndef NET_DISK_CACHE_V3_DISK_FORMAT_V3_H_ +#define NET_DISK_CACHE_V3_DISK_FORMAT_V3_H_ #include "base/basictypes.h" -#include "net/base/net_export.h" +#include "net/disk_cache/disk_format_base.h" namespace disk_cache { -const uint32 kCurrentVersion = 0x20000; // Version 2.0. +const int kBaseTableLen = 0x10000; +const uint32 kIndexMagicV3 = 0xC103CAC3; +const uint32 kVersion3 = 0x30000; // Version 3.0. -// Header for the master index file. -struct NET_EXPORT_PRIVATE IndexHeader { - IndexHeader(); +// Flags for a given cache. +enum CacheFlags { + CACHE_EVICTION_2 = 1, // Keep multiple lists for eviction. + CACHE_EVICTED = 1 << 1 // Already evicted at least one entry. +}; +// Header for the master index file. +struct IndexHeaderV3 { uint32 magic; uint32 version; int32 num_entries; // Number of entries currently stored. int32 num_bytes; // Total size of the stored data. int32 last_file; // Last external file created. - int32 this_id; // Id for all entries being changed (dirty flag). + int32 reserved1; CacheAddr stats; // Storage for usage data. - int32 table_len; // Actual size of the table (0 == kIndexTablesize). + int32 table_len; // Actual size of the table. int32 crash; // Signals a previous crash. int32 experiment; // Id of an ongoing test. + int32 max_bytes; // Total maximum size of the stored data. + uint32 flags; + int32 used_cells; + int32 max_bucket; uint64 create_time; // Creation time for this set of files. - int32 pad[52]; - LruData lru; // Eviction control data. + uint64 base_time; // Current base for timestamps. + uint64 old_time; // Previous time used for timestamps. + int32 max_block_file; + int32 num_no_use_entries; + int32 num_low_use_entries; + int32 num_high_use_entries; + int32 reserved; + int32 num_evicted_entries; + int32 pad[6]; }; -// The structure of the whole index file. -struct Index { - IndexHeader header; - CacheAddr table[kIndexTablesize]; // Default size. Actual size controlled - // by header.table_len. +const int kBaseBitmapBytes = 3968; +// The IndexBitmap is directly saved to a file named index. The file grows in +// page increments (4096 bytes), but all bits don't have to be in use at any +// given time. The required file size can be computed from header.table_len. +struct IndexBitmap { + IndexHeaderV3 header; + uint32 bitmap[kBaseBitmapBytes / 4]; // First page of the bitmap. }; +COMPILE_ASSERT(sizeof(IndexBitmap) == 4096, bad_IndexHeader); // Possible states for a given entry. enum EntryState { - ENTRY_NORMAL = 0, - ENTRY_EVICTED, // The entry was recently evicted from the cache. - ENTRY_DOOMED // The entry was doomed. + ENTRY_FREE = 0, // Available slot. + ENTRY_NEW, // The entry is being created. + ENTRY_OPEN, // The entry is being accessed. + ENTRY_MODIFIED, // The entry is being modified. + ENTRY_DELETED, // The entry is being deleted. + ENTRY_FIXING, // Inconsistent state. The entry is being verified. + ENTRY_USED // The slot is in use (entry is present). +}; +COMPILE_ASSERT(ENTRY_USED <= 7, state_uses_3_bits); + +enum EntryGroup { + ENTRY_NO_USE = 0, // The entry has not been reused. + ENTRY_LOW_USE, // The entry has low reuse. + ENTRY_HIGH_USE, // The entry has high reuse. + ENTRY_RESERVED, // Reserved for future use. + ENTRY_EVICTED // The entry was deleted. }; +COMPILE_ASSERT(ENTRY_USED <= 7, group_uses_3_bits); + +#pragma pack(push, 1) +struct IndexCell { + void Clear() { memset(this, 0, sizeof(*this)); } + + uint64 address : 22; + uint64 hash : 18; + uint64 timestamp : 20; + uint64 reuse : 4; + uint8 state : 3; + uint8 group : 3; + uint8 sum : 2; +}; +COMPILE_ASSERT(sizeof(IndexCell) == 9, bad_IndexCell); + +struct IndexBucket { + IndexCell cells[4]; + int32 next; + uint32 hash : 24; // The last byte is only defined for buckets of + uint32 reserved : 8; // the extra table. +}; +COMPILE_ASSERT(sizeof(IndexBucket) == 44, bad_IndexBucket); +const int kBytesPerCell = 44 / 4; + +// The main cache index. Backed by a file named index_tb1. +// The extra table (index_tb2) has a similar format, but different size. +struct Index { + // Default size. Actual size controlled by header.table_len. + IndexBucket table[kBaseTableLen / 4]; +}; +#pragma pack(pop) // Flags that can be applied to an entry. enum EntryFlags { @@ -101,33 +152,39 @@ enum EntryFlags { CHILD_ENTRY = 1 << 1 // Child entry that stores sparse data. }; -// Main structure for an entry on the backing storage. If the key is longer than -// what can be stored on this structure, it will be extended on consecutive -// blocks (adding 256 bytes each time), up to 4 blocks (1024 - 32 - 1 chars). -// After that point, the whole key will be stored as a data block or external -// file. -struct EntryStore { - uint32 hash; // Full hash of the key. - CacheAddr next; // Next entry with the same hash or bucket. - CacheAddr rankings_node; // Rankings node for this entry. - int32 reuse_count; // How often is this entry used. - int32 refetch_count; // How often is this fetched from the net. - int32 state; // Current state. - uint64 creation_time; +struct EntryRecord { + uint32 hash; + uint32 pad1; + uint8 reuse_count; + uint8 refetch_count; + int8 state; // Current EntryState. + uint8 flags; // Any combination of EntryFlags. int32 key_len; - CacheAddr long_key; // Optional address of a long key. int32 data_size[4]; // We can store up to 4 data streams for each CacheAddr data_addr[4]; // entry. - uint32 flags; // Any combination of EntryFlags. - int32 pad[4]; - uint32 self_hash; // The hash of EntryStore up to this point. - char key[256 - 24 * 4]; // null terminated + uint32 data_hash[4]; + uint64 creation_time; + uint64 last_modified_time; + uint64 last_access_time; + int32 pad[3]; + uint32 self_hash; }; +COMPILE_ASSERT(sizeof(EntryRecord) == 104, bad_EntryRecord); -COMPILE_ASSERT(sizeof(EntryStore) == 256, bad_EntyStore); -const int kMaxInternalKeyLength = 4 * sizeof(EntryStore) - - offsetof(EntryStore, key) - 1; +struct ShortEntryRecord { + uint32 hash; + uint32 pad1; + uint8 reuse_count; + uint8 refetch_count; + int8 state; // Current EntryState. + uint8 flags; + int32 key_len; + uint64 last_access_time; + uint32 long_hash[5]; + uint32 self_hash; +}; +COMPILE_ASSERT(sizeof(ShortEntryRecord) == 48, bad_ShortEntryRecord); } // namespace disk_cache -#endif // NET_DISK_CACHE_DISK_FORMAT_H_ +#endif // NET_DISK_CACHE_V3_DISK_FORMAT_V3_H_ diff --git a/net/net.gyp b/net/net.gyp index 096dab0..e8f36ce 100644 --- a/net/net.gyp +++ b/net/net.gyp @@ -409,6 +409,7 @@ 'disk_cache/flash/segment.h', 'disk_cache/flash/storage.cc', 'disk_cache/flash/storage.h', + 'disk_cache/v3/disk_format_v3.h', 'dns/address_sorter.h', 'dns/address_sorter_posix.cc', 'dns/address_sorter_posix.h', |