diff options
author | sra@chromium.org <sra@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2011-02-23 21:18:48 +0000 |
---|---|---|
committer | sra@chromium.org <sra@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2011-02-23 21:18:48 +0000 |
commit | 6db3cd4ebccf65c9bbe1bb6d6239ec103caaf518 (patch) | |
tree | c2ab9d415c80ae62ff06fdbb8be3472271daacb0 | |
parent | ce910b9d066453ea491473c4ef9e73b163a58d6d (diff) | |
download | chromium_src-6db3cd4ebccf65c9bbe1bb6d6239ec103caaf518.zip chromium_src-6db3cd4ebccf65c9bbe1bb6d6239ec103caaf518.tar.gz chromium_src-6db3cd4ebccf65c9bbe1bb6d6239ec103caaf518.tar.bz2 |
Improved memory usage while applying patch.
Reduced total size of allocations from 520MB to 318MB.
The general technique is to allocate the correct size rather than grow into
the correct size and overshoot.
1. Find file sizes and allocate buffers of that size for the input files.
2. Pre-allocate a buffer for the collected inputs for the final diff.
3. Calculate the size for (2) during compression and include it in the patch
header.
The courgette.exe command line tool now calls the same ApplyEnsemblePatch
entry point that is called by the installer. This ensures measurements of
courgette.exe are a better reflection of the installer.
BUG=72459
Review URL: http://codereview.chromium.org/6546008
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@75787 0039d316-1c4b-4281-b951-d872f2087c98
-rw-r--r-- | courgette/courgette_tool.cc | 52 | ||||
-rw-r--r-- | courgette/ensemble.h | 3 | ||||
-rw-r--r-- | courgette/ensemble_apply.cc | 21 | ||||
-rw-r--r-- | courgette/ensemble_create.cc | 8 | ||||
-rw-r--r-- | courgette/streams.cc | 16 | ||||
-rw-r--r-- | courgette/streams.h | 12 |
6 files changed, 96 insertions, 16 deletions
diff --git a/courgette/courgette_tool.cc b/courgette/courgette_tool.cc index 7176442..523a438 100644 --- a/courgette/courgette_tool.cc +++ b/courgette/courgette_tool.cc @@ -52,7 +52,11 @@ std::string ReadOrFail(const std::wstring& file_name, const char* kind) { #else FilePath file_path(WideToASCII(file_name)); #endif + int64 file_size = 0; + if (!file_util::GetFileSize(file_path, &file_size)) + Problem("Can't read %s file.", kind); std::string buffer; + buffer.reserve(static_cast<size_t>(file_size)); if (!file_util::ReadFileToString(file_path, &buffer)) Problem("Can't read %s file.", kind); return buffer; @@ -285,20 +289,48 @@ void GenerateEnsemblePatch(const std::wstring& old_file, void ApplyEnsemblePatch(const std::wstring& old_file, const std::wstring& patch_file, const std::wstring& new_file) { - std::string old_buffer = ReadOrFail(old_file, "'old' input"); - std::string patch_buffer = ReadOrFail(patch_file, "'patch' input"); + // We do things a little differently here in order to call the same Courgette + // entry point as the installer. That entry point point takes file names and + // returns an status code but does not output any diagnostics. +#if defined(OS_WIN) + FilePath old_path(old_file); + FilePath patch_path(patch_file); + FilePath new_path(new_file); +#else + FilePath old_path(WideToASCII(old_file)); + FilePath patch_path(WideToASCII(patch_file)); + FilePath new_path(WideToASCII(new_file)); +#endif - courgette::SourceStream old_stream; - courgette::SourceStream patch_stream; - old_stream.Init(old_buffer); - patch_stream.Init(patch_buffer); - courgette::SinkStream new_stream; courgette::Status status = - courgette::ApplyEnsemblePatch(&old_stream, &patch_stream, &new_stream); + courgette::ApplyEnsemblePatch(old_path.value().c_str(), + patch_path.value().c_str(), + new_path.value().c_str()); + + if (status == courgette::C_OK) + return; + + // Diagnose the error. + if (status == courgette::C_BAD_ENSEMBLE_MAGIC) + Problem("Not a courgette patch"); + if (status == courgette::C_BAD_ENSEMBLE_VERSION) + Problem("Wrong version patch"); + if (status == courgette::C_BAD_ENSEMBLE_HEADER) + Problem("Corrupt patch"); + // If we failed due to a missing input file, this will + // print the message. + std::string old_buffer = ReadOrFail(old_file, "'old' input"); + old_buffer.clear(); + std::string patch_buffer = ReadOrFail(patch_file, "'patch' input"); + patch_buffer.clear(); - if (status != courgette::C_OK) Problem("-apply failed."); + // Non-input related errors: + if (status == courgette::C_WRITE_OPEN_ERROR) + Problem("Can't open output"); + if (status == courgette::C_WRITE_ERROR) + Problem("Can't write output"); - WriteSinkToFile(&new_stream, new_file); + Problem("-apply failed."); } void GenerateBSDiffPatch(const std::wstring& old_file, diff --git a/courgette/ensemble.h b/courgette/ensemble.h index 6a58cb0..f907f9d 100644 --- a/courgette/ensemble.h +++ b/courgette/ensemble.h @@ -108,6 +108,7 @@ struct CourgettePatchFile { // version // source-checksum // target-checksum + // final-patch-input-size (an allocation hint) // multiple-streams: // stream 0: // number-of-transformed-elements (N) - varint32 @@ -136,7 +137,7 @@ struct CourgettePatchFile { static const uint32 kMagic = 'C' | ('o' << 8) | ('u' << 16); - static const uint32 kVersion = 20090320; + static const uint32 kVersion = 20110216; // Transformation method IDs. enum TransformationMethodId { diff --git a/courgette/ensemble_apply.cc b/courgette/ensemble_apply.cc index d1310b4..0785598 100644 --- a/courgette/ensemble_apply.cc +++ b/courgette/ensemble_apply.cc @@ -8,6 +8,7 @@ #include "base/basictypes.h" #include "base/file_util.h" +#include "base/logging.h" #include "courgette/crc.h" #include "courgette/image_info.h" @@ -63,6 +64,7 @@ class EnsemblePatchApplication { uint32 source_checksum_; uint32 target_checksum_; + uint32 final_patch_input_size_prediction_; std::vector<TransformationPatcher*> patchers_; @@ -73,7 +75,8 @@ class EnsemblePatchApplication { }; EnsemblePatchApplication::EnsemblePatchApplication() - : source_checksum_(0), target_checksum_(0) { + : source_checksum_(0), target_checksum_(0), + final_patch_input_size_prediction_(0) { } EnsemblePatchApplication::~EnsemblePatchApplication() { @@ -103,6 +106,9 @@ Status EnsemblePatchApplication::ReadHeader(SourceStream* header_stream) { if (!header_stream->ReadVarint32(&target_checksum_)) return C_BAD_ENSEMBLE_HEADER; + if (!header_stream->ReadVarint32(&final_patch_input_size_prediction_)) + return C_BAD_ENSEMBLE_HEADER; + return C_OK; } @@ -214,6 +220,8 @@ Status EnsemblePatchApplication::TransformDown( SinkStream* basic_elements) { // Construct blob of original input followed by reformed elements. + basic_elements->Reserve(final_patch_input_size_prediction_); + // The original input: basic_elements->Write(base_region_.start(), base_region_.length()); @@ -231,6 +239,9 @@ Status EnsemblePatchApplication::TransformDown( if (!transformed_elements->Empty()) return C_STREAM_NOT_CONSUMED; + // We have totally consumed transformed_elements, so can free the + // storage to which it referred. + corrected_elements_storage_.Retire(); return C_OK; } @@ -374,13 +385,21 @@ Status ApplyEnsemblePatch(const FilePath::CharType* old_file_name, return status; // Header smells good so read the whole patch file for real. + int64 patch_file_size = 0; + if (!file_util::GetFileSize(patch_file_path, &patch_file_size)) + return C_READ_ERROR; std::string patch_file_buffer; + patch_file_buffer.reserve(static_cast<size_t>(patch_file_size)); if (!file_util::ReadFileToString(patch_file_path, &patch_file_buffer)) return C_READ_ERROR; // Read the old_file. FilePath old_file_path(old_file_name); + int64 old_file_size = 0; + if (!file_util::GetFileSize(old_file_path, &old_file_size)) + return C_READ_ERROR; std::string old_file_buffer; + old_file_buffer.reserve(static_cast<size_t>(old_file_size)); if (!file_util::ReadFileToString(old_file_path, &old_file_buffer)) return C_READ_ERROR; diff --git a/courgette/ensemble_create.cc b/courgette/ensemble_create.cc index 98b7ba0..b70621a 100644 --- a/courgette/ensemble_create.cc +++ b/courgette/ensemble_create.cc @@ -351,6 +351,9 @@ Status GenerateEnsemblePatch(SourceStream* base, if (delta2_status != C_OK) return delta2_status; + // Last use, free storage. + linearized_predicted_transformed_elements.Retire(); + // // Generate sub-patch for whole enchilada. // @@ -381,8 +384,12 @@ Status GenerateEnsemblePatch(SourceStream* base, if (!corrected_transformed_elements_source_set.Empty()) return C_STREAM_NOT_CONSUMED; + // No more references to this stream's buffer. + linearized_corrected_transformed_elements.Retire(); + FreeGenerators(&generators); + size_t final_patch_input_size = predicted_ensemble.Length(); SourceStream predicted_ensemble_source; predicted_ensemble_source.Init(predicted_ensemble); Status delta3_status = GenerateSimpleDelta(&predicted_ensemble_source, @@ -401,6 +408,7 @@ Status GenerateEnsemblePatch(SourceStream* base, CalculateCrc(old_region.start(), old_region.length())); final_patch->WriteVarint32( CalculateCrc(new_region.start(), new_region.length())); + final_patch->WriteSizeVarint32(final_patch_input_size); if (!patch_streams.CopyTo(final_patch)) return C_STREAM_ERROR; diff --git a/courgette/streams.cc b/courgette/streams.cc index 26c7f2e..32dbf6b 100644 --- a/courgette/streams.cc +++ b/courgette/streams.cc @@ -211,8 +211,12 @@ void SinkStream::WriteSizeVarint32(size_t value) { void SinkStream::Append(SinkStream* other) { Write(other->buffer_.c_str(), other->buffer_.size()); - other->buffer_.clear(); - other->buffer_.reserve(0); // Non-binding request to reduce storage. + other->Retire(); +} + +void SinkStream::Retire() { + buffer_.clear(); + buffer_.reserve(0); // Non-binding request to reduce storage. } //////////////////////////////////////////////////////////////////////////////// @@ -335,6 +339,14 @@ void SinkStreamSet::CopyHeaderTo(SinkStream* header) { bool SinkStreamSet::CopyTo(SinkStream *combined_stream) { SinkStream header; CopyHeaderTo(&header); + + // Reserve the correct amount of storage. + size_t length = header.Length(); + for (size_t i = 0; i < count_; ++i) { + length += stream(i)->Length(); + } + combined_stream->Reserve(length); + combined_stream->Append(&header); for (size_t i = 0; i < count_; ++i) { combined_stream->Append(stream(i)); diff --git a/courgette/streams.h b/courgette/streams.h index 144dca7..3dbc2b7 100644 --- a/courgette/streams.h +++ b/courgette/streams.h @@ -147,6 +147,9 @@ class SinkStream { // Hints that the stream will grow by an additional |length| bytes. void Reserve(size_t length) { buffer_.reserve(length + buffer_.length()); } + // Finished with this stream and any storage it has. + void Retire(); + private: std::string buffer_; // Use a string to manage the stream's memory. @@ -187,6 +190,11 @@ class SourceStreamSet { DISALLOW_COPY_AND_ASSIGN(SourceStreamSet); }; +// A SinkStreamSet is a set of SinkStreams. Data is collected by writing to the +// component streams. When data collection is complete, it is destructively +// transferred, either by flattening into one stream (CopyTo), or transfering +// data pairwise into another SinkStreamSet by calling that SinkStreamSet's +// WriteSet method. class SinkStreamSet { public: SinkStreamSet(); @@ -199,8 +207,8 @@ class SinkStreamSet { // Returns a pointer to a substream. SinkStream* stream(size_t id) { return id < count_ ? &streams_[id] : NULL; } - // CopyTo serializes the streams in the SinkStreamSet into a single target - // stream or file. The serialized format may be re-read by initializing a + // CopyTo serializes the streams in this SinkStreamSet into a single target + // stream. The serialized format may be re-read by initializing a // SourceStreamSet with a buffer containing the data. bool CopyTo(SinkStream* combined_stream); |