diff options
author | sra@chromium.org <sra@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2009-05-18 23:47:01 +0000 |
---|---|---|
committer | sra@chromium.org <sra@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2009-05-18 23:47:01 +0000 |
commit | d576712f077da660d19a323c9c1efaad6caa26d0 (patch) | |
tree | dfa055757b91fd6c1dc1a81361afbbc92adb38aa | |
parent | d9b46d768e64c24beb0497902e47b2a9d4a54af7 (diff) | |
download | chromium_src-d576712f077da660d19a323c9c1efaad6caa26d0.zip chromium_src-d576712f077da660d19a323c9c1efaad6caa26d0.tar.gz chromium_src-d576712f077da660d19a323c9c1efaad6caa26d0.tar.bz2 |
Improvements to Courgette's version of bsdiff
* Store 'diff' bytes by run-length encoding zeros. This reduces the memory
needed to store the zeros by ~30MB for chrome.7z.
* Store the control tuple elements in separate streams. The 'extra_bytes'
counts are often zero so this brings all the zeros together.
The uncompressed patch file is much smaller due to the run-length encoded
zeros. It is slightly smaller (3-8%) after compression with lzma.
Review URL: http://codereview.chromium.org/115435
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@16343 0039d316-1c4b-4281-b951-d872f2087c98
-rw-r--r-- | courgette/courgette.h | 2 | ||||
-rw-r--r-- | courgette/third_party/bsdiff.h | 3 | ||||
-rw-r--r-- | courgette/third_party/bsdiff_apply.cc | 70 | ||||
-rw-r--r-- | courgette/third_party/bsdiff_create.cc | 55 |
4 files changed, 64 insertions, 66 deletions
diff --git a/courgette/courgette.h b/courgette/courgette.h index 0ddded5..7375c10 100644 --- a/courgette/courgette.h +++ b/courgette/courgette.h @@ -26,7 +26,7 @@ enum Status { C_BAD_ENSEMBLE_MAGIC = 5, // Ensemble patch has bad magic. C_BAD_ENSEMBLE_VERSION = 6, // Ensemble patch has wrong version. C_BAD_ENSEMBLE_HEADER = 7, // Ensemble patch has corrupt header. - C_BAD_ENSEMBLE_CRC = 8, // Ensemble patch has corrupt header. + C_BAD_ENSEMBLE_CRC = 8, // Ensemble patch has corrupt data. C_BAD_TRANSFORM = 12, // Transform mis-specified. C_BAD_BASE = 13, // Base for transform malformed. diff --git a/courgette/third_party/bsdiff.h b/courgette/third_party/bsdiff.h index bf7fdec..5e5683a 100644 --- a/courgette/third_party/bsdiff.h +++ b/courgette/third_party/bsdiff.h @@ -74,9 +74,6 @@ typedef struct MBSPatchHeader_ { uint32 slen; // Length of the file to be patched. uint32 scrc32; // CRC32 of the file to be patched. uint32 dlen; // Length of the result file. - uint32 cblen; // Length of the control block in bytes. - uint32 difflen; // Length of the diff block in bytes. - uint32 extralen; // Length of the extra block in bytes. } MBSPatchHeader; // This is the value for the tag field. Must match length exactly, not counting diff --git a/courgette/third_party/bsdiff_apply.cc b/courgette/third_party/bsdiff_apply.cc index 4b6a011..cc9bb50 100644 --- a/courgette/third_party/bsdiff_apply.cc +++ b/courgette/third_party/bsdiff_apply.cc @@ -44,9 +44,6 @@ BSDiffStatus MBS_ReadHeader(SourceStream* stream, MBSPatchHeader* header) { if (!stream->ReadVarint32(&header->slen)) return READ_ERROR; if (!stream->ReadVarint32(&header->scrc32)) return READ_ERROR; if (!stream->ReadVarint32(&header->dlen)) return READ_ERROR; - if (!stream->ReadVarint32(&header->cblen)) return READ_ERROR; - if (!stream->ReadVarint32(&header->difflen)) return READ_ERROR; - if (!stream->ReadVarint32(&header->extralen)) return READ_ERROR; // The string will have a NUL terminator that we don't use, hence '-1'. COMPILE_ASSERT(sizeof(MBS_PATCH_HEADER_TAG) - 1 == sizeof(header->tag), @@ -54,12 +51,6 @@ BSDiffStatus MBS_ReadHeader(SourceStream* stream, MBSPatchHeader* header) { if (memcmp(header->tag, MBS_PATCH_HEADER_TAG, 8) != 0) return UNEXPECTED_ERROR; - size_t bytes_remaining = stream->Remaining(); - if (header->cblen + - header->difflen + - header->extralen != bytes_remaining) - return UNEXPECTED_ERROR; - return OK; } @@ -69,35 +60,37 @@ BSDiffStatus MBS_ApplyPatch(const MBSPatchHeader *header, SinkStream* new_stream) { const uint8* old_end = old_start + old_size; - SourceStream control_stream; - - const uint8* control_start = patch_stream->Buffer(); - if (!patch_stream->ReadSubstream(header->cblen, &control_stream)) - return READ_ERROR; - if (!patch_stream->Skip(header->difflen + header->extralen)) - return READ_ERROR; - if (!patch_stream->Empty()) + SourceStreamSet patch_streams; + if (!patch_streams.Init(patch_stream)) return READ_ERROR; - const uint8* diff_start = control_start + header->cblen; - const uint8* diff_end = diff_start + header->difflen; - const uint8* extra_start = diff_end; - const uint8* extra_end = extra_start + header->extralen; + SourceStream* control_stream_copy_counts = patch_streams.stream(0); + SourceStream* control_stream_extra_counts = patch_streams.stream(1); + SourceStream* control_stream_seeks = patch_streams.stream(2); + SourceStream* diff_skips = patch_streams.stream(3); + SourceStream* diff_bytes = patch_streams.stream(4); + SourceStream* extra_bytes = patch_streams.stream(5); - const uint8* old_position = old_start; - const uint8* diff_position = diff_start; + const uint8* extra_start = extra_bytes->Buffer(); + const uint8* extra_end = extra_start + extra_bytes->Remaining(); const uint8* extra_position = extra_start; + const uint8* old_position = old_start; + new_stream->Reserve(header->dlen); - while (!control_stream.Empty()) { + uint32 pending_diff_zeros = 0; + if (!diff_skips->ReadVarint32(&pending_diff_zeros)) + return UNEXPECTED_ERROR; + + while (!control_stream_copy_counts->Empty()) { uint32 copy_count, extra_count; int32 seek_adjustment; - if (!control_stream.ReadVarint32(©_count)) + if (!control_stream_copy_counts->ReadVarint32(©_count)) return UNEXPECTED_ERROR; - if (!control_stream.ReadVarint32(&extra_count)) + if (!control_stream_extra_counts->ReadVarint32(&extra_count)) return UNEXPECTED_ERROR; - if (!control_stream.ReadVarint32Signed(&seek_adjustment)) + if (!control_stream_seeks->ReadVarint32Signed(&seek_adjustment)) return UNEXPECTED_ERROR; #ifdef DEBUG_bsmedberg @@ -108,16 +101,22 @@ BSDiffStatus MBS_ApplyPatch(const MBSPatchHeader *header, // block. if (copy_count > static_cast<size_t>(old_end - old_position)) return UNEXPECTED_ERROR; - if (copy_count > static_cast<size_t>(diff_end - diff_position)) - return UNEXPECTED_ERROR; // Add together bytes from the 'old' file and the 'diff' stream. for (size_t i = 0; i < copy_count; ++i) { - uint8 byte = old_position[i] + diff_position[i]; + uint8 diff_byte = 0; + if (pending_diff_zeros) { + --pending_diff_zeros; + } else { + if (!diff_skips->ReadVarint32(&pending_diff_zeros)) + return UNEXPECTED_ERROR; + if (!diff_bytes->Read(&diff_byte, 1)) + return UNEXPECTED_ERROR; + } + uint8 byte = old_position[i] + diff_byte; new_stream->Write(&byte, 1); } old_position += copy_count; - diff_position += copy_count; // Copy bytes from the extra block. if (extra_count > static_cast<size_t>(extra_end - extra_position)) @@ -134,9 +133,12 @@ BSDiffStatus MBS_ApplyPatch(const MBSPatchHeader *header, old_position += seek_adjustment; } - if (diff_position != diff_end) - return UNEXPECTED_ERROR; - if (extra_position != extra_end) + if (!control_stream_copy_counts->Empty() || + !control_stream_extra_counts->Empty() || + !control_stream_seeks->Empty() || + !diff_skips->Empty() || + !diff_bytes->Empty() || + !extra_bytes->Empty()) return UNEXPECTED_ERROR; return OK; diff --git a/courgette/third_party/bsdiff_create.cc b/courgette/third_party/bsdiff_create.cc index d59dc21..0212093 100644 --- a/courgette/third_party/bsdiff_create.cc +++ b/courgette/third_party/bsdiff_create.cc @@ -42,7 +42,7 @@ namespace courgette { // // The code appears to be a rewritten version of the suffix array algorithm // presented in "Faster Suffix Sorting" by N. Jesper Larsson and Kunihiko -// Sadakane, special-cased for bytes. +// Sadakane, special cased for bytes. static void split(int *I,int *V,int start,int len,int h) @@ -191,9 +191,6 @@ static void WriteHeader(SinkStream* stream, MBSPatchHeader* header) { stream->WriteVarint32(header->slen); stream->WriteVarint32(header->scrc32); stream->WriteVarint32(header->dlen); - stream->WriteVarint32(header->cblen); - stream->WriteVarint32(header->difflen); - stream->WriteVarint32(header->extralen); } BSDiffStatus CreateBinaryPatch(SourceStream* old_stream, @@ -204,9 +201,19 @@ BSDiffStatus CreateBinaryPatch(SourceStream* old_stream, LOG(INFO) << "Start bsdiff"; size_t initial_patch_stream_length = patch_stream->Length(); + SinkStreamSet patch_streams; + SinkStream* control_stream_copy_counts = patch_streams.stream(0); + SinkStream* control_stream_extra_counts = patch_streams.stream(1); + SinkStream* control_stream_seeks = patch_streams.stream(2); + SinkStream* diff_skips = patch_streams.stream(3); + SinkStream* diff_bytes = patch_streams.stream(4); + SinkStream* extra_bytes = patch_streams.stream(5); + const uint8* old = old_stream->Buffer(); const int oldsize = old_stream->Remaining(); + uint32 pending_diff_zeros = 0; + scoped_array<int> I(new(std::nothrow) int[oldsize + 1]); scoped_array<int> V(new(std::nothrow) int[oldsize + 1]); if (I == NULL || V == NULL) @@ -221,24 +228,12 @@ BSDiffStatus CreateBinaryPatch(SourceStream* old_stream, const uint8* newbuf = new_stream->Buffer(); const int newsize = new_stream->Remaining(); - // Allocate newsize+1 bytes instead of newsize bytes to ensure that we never - // try to malloc(0) and get a NULL pointer. - - scoped_array<uint8> diff_bytes_array(new(std::nothrow) uint8[newsize + 1]); - scoped_array<uint8> extra_bytes_array(new(std::nothrow) uint8[newsize + 1]); - if (diff_bytes_array == NULL || extra_bytes_array == NULL) - return MEM_ERROR; - - uint8* diff_bytes = diff_bytes_array.get(); - uint8* extra_bytes = extra_bytes_array.get(); int control_length = 0; int diff_bytes_length = 0; int diff_bytes_nonzero = 0; int extra_bytes_length = 0; int eblen = 0; - SinkStream control_stream; - // The patch format is a sequence of triples <copy,extra,seek> where 'copy' is // the number of bytes to copy from the old file (possibly with mistakes), // 'extra' is the number of bytes to copy from a stream of fresh bytes, and @@ -364,13 +359,18 @@ BSDiffStatus CreateBinaryPatch(SourceStream* old_stream, for (int i = 0; i < lenf; i++) { uint8 diff_byte = newbuf[lastscan + i] - old[lastpos + i]; - diff_bytes[diff_bytes_length + i] = diff_byte; - if (diff_byte) + if (diff_byte) { ++diff_bytes_nonzero; + diff_skips->WriteVarint32(pending_diff_zeros); + pending_diff_zeros = 0; + diff_bytes->Write(&diff_byte, 1); + } else { + ++pending_diff_zeros; + } } int gap = (scan - lenb) - (lastscan + lenf); for (int i = 0; i < gap; i++) - extra_bytes[extra_bytes_length + i] = newbuf[lastscan + lenf + i]; + extra_bytes->Write(&newbuf[lastscan + lenf + i], 1); diff_bytes_length += lenf; extra_bytes_length += gap; @@ -379,9 +379,9 @@ BSDiffStatus CreateBinaryPatch(SourceStream* old_stream, uint32 extra_count = gap; int32 seek_adjustment = ((pos - lenb) - (lastpos + lenf)); - control_stream.WriteVarint32(copy_count); - control_stream.WriteVarint32(extra_count); - control_stream.WriteVarint32Signed(seek_adjustment); + control_stream_copy_counts->WriteVarint32(copy_count); + control_stream_extra_counts->WriteVarint32(extra_count); + control_stream_seeks->WriteVarint32Signed(seek_adjustment); ++control_length; #ifdef DEBUG_bsmedberg LOG(INFO) << StringPrintf( @@ -395,6 +395,8 @@ BSDiffStatus CreateBinaryPatch(SourceStream* old_stream, } } + diff_skips->WriteVarint32(pending_diff_zeros); + I.reset(); MBSPatchHeader header; @@ -405,19 +407,16 @@ BSDiffStatus CreateBinaryPatch(SourceStream* old_stream, header.slen = oldsize; header.scrc32 = CalculateCrc(old, oldsize); header.dlen = newsize; - header.cblen = control_stream.Length(); - header.difflen = diff_bytes_length; - header.extralen = extra_bytes_length; WriteHeader(patch_stream, &header); - patch_stream->Append(&control_stream); - patch_stream->Write(diff_bytes, diff_bytes_length); - patch_stream->Write(extra_bytes, extra_bytes_length); + size_t diff_skips_length = diff_skips->Length(); + patch_streams.CopyTo(patch_stream); LOG(INFO) << "Control tuples: " << control_length << " copy bytes: " << diff_bytes_length << " mistakes: " << diff_bytes_nonzero + << " (skips: " << diff_skips_length << ")" << " extra bytes: " << extra_bytes_length; LOG(INFO) << "Uncompressed bsdiff patch size " |