summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorsra@chromium.org <sra@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2009-05-18 23:47:01 +0000
committersra@chromium.org <sra@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2009-05-18 23:47:01 +0000
commitd576712f077da660d19a323c9c1efaad6caa26d0 (patch)
treedfa055757b91fd6c1dc1a81361afbbc92adb38aa
parentd9b46d768e64c24beb0497902e47b2a9d4a54af7 (diff)
downloadchromium_src-d576712f077da660d19a323c9c1efaad6caa26d0.zip
chromium_src-d576712f077da660d19a323c9c1efaad6caa26d0.tar.gz
chromium_src-d576712f077da660d19a323c9c1efaad6caa26d0.tar.bz2
Improvements to Courgette's version of bsdiff
* Store 'diff' bytes by run-length encoding zeros. This reduces the memory needed to store the zeros by ~30MB for chrome.7z. * Store the control tuple elements in separate streams. The 'extra_bytes' counts are often zero so this brings all the zeros together. The uncompressed patch file is much smaller due to the run-length encoded zeros. It is slightly smaller (3-8%) after compression with lzma. Review URL: http://codereview.chromium.org/115435 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@16343 0039d316-1c4b-4281-b951-d872f2087c98
-rw-r--r--courgette/courgette.h2
-rw-r--r--courgette/third_party/bsdiff.h3
-rw-r--r--courgette/third_party/bsdiff_apply.cc70
-rw-r--r--courgette/third_party/bsdiff_create.cc55
4 files changed, 64 insertions, 66 deletions
diff --git a/courgette/courgette.h b/courgette/courgette.h
index 0ddded5..7375c10 100644
--- a/courgette/courgette.h
+++ b/courgette/courgette.h
@@ -26,7 +26,7 @@ enum Status {
C_BAD_ENSEMBLE_MAGIC = 5, // Ensemble patch has bad magic.
C_BAD_ENSEMBLE_VERSION = 6, // Ensemble patch has wrong version.
C_BAD_ENSEMBLE_HEADER = 7, // Ensemble patch has corrupt header.
- C_BAD_ENSEMBLE_CRC = 8, // Ensemble patch has corrupt header.
+ C_BAD_ENSEMBLE_CRC = 8, // Ensemble patch has corrupt data.
C_BAD_TRANSFORM = 12, // Transform mis-specified.
C_BAD_BASE = 13, // Base for transform malformed.
diff --git a/courgette/third_party/bsdiff.h b/courgette/third_party/bsdiff.h
index bf7fdec..5e5683a 100644
--- a/courgette/third_party/bsdiff.h
+++ b/courgette/third_party/bsdiff.h
@@ -74,9 +74,6 @@ typedef struct MBSPatchHeader_ {
uint32 slen; // Length of the file to be patched.
uint32 scrc32; // CRC32 of the file to be patched.
uint32 dlen; // Length of the result file.
- uint32 cblen; // Length of the control block in bytes.
- uint32 difflen; // Length of the diff block in bytes.
- uint32 extralen; // Length of the extra block in bytes.
} MBSPatchHeader;
// This is the value for the tag field. Must match length exactly, not counting
diff --git a/courgette/third_party/bsdiff_apply.cc b/courgette/third_party/bsdiff_apply.cc
index 4b6a011..cc9bb50 100644
--- a/courgette/third_party/bsdiff_apply.cc
+++ b/courgette/third_party/bsdiff_apply.cc
@@ -44,9 +44,6 @@ BSDiffStatus MBS_ReadHeader(SourceStream* stream, MBSPatchHeader* header) {
if (!stream->ReadVarint32(&header->slen)) return READ_ERROR;
if (!stream->ReadVarint32(&header->scrc32)) return READ_ERROR;
if (!stream->ReadVarint32(&header->dlen)) return READ_ERROR;
- if (!stream->ReadVarint32(&header->cblen)) return READ_ERROR;
- if (!stream->ReadVarint32(&header->difflen)) return READ_ERROR;
- if (!stream->ReadVarint32(&header->extralen)) return READ_ERROR;
// The string will have a NUL terminator that we don't use, hence '-1'.
COMPILE_ASSERT(sizeof(MBS_PATCH_HEADER_TAG) - 1 == sizeof(header->tag),
@@ -54,12 +51,6 @@ BSDiffStatus MBS_ReadHeader(SourceStream* stream, MBSPatchHeader* header) {
if (memcmp(header->tag, MBS_PATCH_HEADER_TAG, 8) != 0)
return UNEXPECTED_ERROR;
- size_t bytes_remaining = stream->Remaining();
- if (header->cblen +
- header->difflen +
- header->extralen != bytes_remaining)
- return UNEXPECTED_ERROR;
-
return OK;
}
@@ -69,35 +60,37 @@ BSDiffStatus MBS_ApplyPatch(const MBSPatchHeader *header,
SinkStream* new_stream) {
const uint8* old_end = old_start + old_size;
- SourceStream control_stream;
-
- const uint8* control_start = patch_stream->Buffer();
- if (!patch_stream->ReadSubstream(header->cblen, &control_stream))
- return READ_ERROR;
- if (!patch_stream->Skip(header->difflen + header->extralen))
- return READ_ERROR;
- if (!patch_stream->Empty())
+ SourceStreamSet patch_streams;
+ if (!patch_streams.Init(patch_stream))
return READ_ERROR;
- const uint8* diff_start = control_start + header->cblen;
- const uint8* diff_end = diff_start + header->difflen;
- const uint8* extra_start = diff_end;
- const uint8* extra_end = extra_start + header->extralen;
+ SourceStream* control_stream_copy_counts = patch_streams.stream(0);
+ SourceStream* control_stream_extra_counts = patch_streams.stream(1);
+ SourceStream* control_stream_seeks = patch_streams.stream(2);
+ SourceStream* diff_skips = patch_streams.stream(3);
+ SourceStream* diff_bytes = patch_streams.stream(4);
+ SourceStream* extra_bytes = patch_streams.stream(5);
- const uint8* old_position = old_start;
- const uint8* diff_position = diff_start;
+ const uint8* extra_start = extra_bytes->Buffer();
+ const uint8* extra_end = extra_start + extra_bytes->Remaining();
const uint8* extra_position = extra_start;
+ const uint8* old_position = old_start;
+
new_stream->Reserve(header->dlen);
- while (!control_stream.Empty()) {
+ uint32 pending_diff_zeros = 0;
+ if (!diff_skips->ReadVarint32(&pending_diff_zeros))
+ return UNEXPECTED_ERROR;
+
+ while (!control_stream_copy_counts->Empty()) {
uint32 copy_count, extra_count;
int32 seek_adjustment;
- if (!control_stream.ReadVarint32(&copy_count))
+ if (!control_stream_copy_counts->ReadVarint32(&copy_count))
return UNEXPECTED_ERROR;
- if (!control_stream.ReadVarint32(&extra_count))
+ if (!control_stream_extra_counts->ReadVarint32(&extra_count))
return UNEXPECTED_ERROR;
- if (!control_stream.ReadVarint32Signed(&seek_adjustment))
+ if (!control_stream_seeks->ReadVarint32Signed(&seek_adjustment))
return UNEXPECTED_ERROR;
#ifdef DEBUG_bsmedberg
@@ -108,16 +101,22 @@ BSDiffStatus MBS_ApplyPatch(const MBSPatchHeader *header,
// block.
if (copy_count > static_cast<size_t>(old_end - old_position))
return UNEXPECTED_ERROR;
- if (copy_count > static_cast<size_t>(diff_end - diff_position))
- return UNEXPECTED_ERROR;
// Add together bytes from the 'old' file and the 'diff' stream.
for (size_t i = 0; i < copy_count; ++i) {
- uint8 byte = old_position[i] + diff_position[i];
+ uint8 diff_byte = 0;
+ if (pending_diff_zeros) {
+ --pending_diff_zeros;
+ } else {
+ if (!diff_skips->ReadVarint32(&pending_diff_zeros))
+ return UNEXPECTED_ERROR;
+ if (!diff_bytes->Read(&diff_byte, 1))
+ return UNEXPECTED_ERROR;
+ }
+ uint8 byte = old_position[i] + diff_byte;
new_stream->Write(&byte, 1);
}
old_position += copy_count;
- diff_position += copy_count;
// Copy bytes from the extra block.
if (extra_count > static_cast<size_t>(extra_end - extra_position))
@@ -134,9 +133,12 @@ BSDiffStatus MBS_ApplyPatch(const MBSPatchHeader *header,
old_position += seek_adjustment;
}
- if (diff_position != diff_end)
- return UNEXPECTED_ERROR;
- if (extra_position != extra_end)
+ if (!control_stream_copy_counts->Empty() ||
+ !control_stream_extra_counts->Empty() ||
+ !control_stream_seeks->Empty() ||
+ !diff_skips->Empty() ||
+ !diff_bytes->Empty() ||
+ !extra_bytes->Empty())
return UNEXPECTED_ERROR;
return OK;
diff --git a/courgette/third_party/bsdiff_create.cc b/courgette/third_party/bsdiff_create.cc
index d59dc21..0212093 100644
--- a/courgette/third_party/bsdiff_create.cc
+++ b/courgette/third_party/bsdiff_create.cc
@@ -42,7 +42,7 @@ namespace courgette {
//
// The code appears to be a rewritten version of the suffix array algorithm
// presented in "Faster Suffix Sorting" by N. Jesper Larsson and Kunihiko
-// Sadakane, special-cased for bytes.
+// Sadakane, special cased for bytes.
static void
split(int *I,int *V,int start,int len,int h)
@@ -191,9 +191,6 @@ static void WriteHeader(SinkStream* stream, MBSPatchHeader* header) {
stream->WriteVarint32(header->slen);
stream->WriteVarint32(header->scrc32);
stream->WriteVarint32(header->dlen);
- stream->WriteVarint32(header->cblen);
- stream->WriteVarint32(header->difflen);
- stream->WriteVarint32(header->extralen);
}
BSDiffStatus CreateBinaryPatch(SourceStream* old_stream,
@@ -204,9 +201,19 @@ BSDiffStatus CreateBinaryPatch(SourceStream* old_stream,
LOG(INFO) << "Start bsdiff";
size_t initial_patch_stream_length = patch_stream->Length();
+ SinkStreamSet patch_streams;
+ SinkStream* control_stream_copy_counts = patch_streams.stream(0);
+ SinkStream* control_stream_extra_counts = patch_streams.stream(1);
+ SinkStream* control_stream_seeks = patch_streams.stream(2);
+ SinkStream* diff_skips = patch_streams.stream(3);
+ SinkStream* diff_bytes = patch_streams.stream(4);
+ SinkStream* extra_bytes = patch_streams.stream(5);
+
const uint8* old = old_stream->Buffer();
const int oldsize = old_stream->Remaining();
+ uint32 pending_diff_zeros = 0;
+
scoped_array<int> I(new(std::nothrow) int[oldsize + 1]);
scoped_array<int> V(new(std::nothrow) int[oldsize + 1]);
if (I == NULL || V == NULL)
@@ -221,24 +228,12 @@ BSDiffStatus CreateBinaryPatch(SourceStream* old_stream,
const uint8* newbuf = new_stream->Buffer();
const int newsize = new_stream->Remaining();
- // Allocate newsize+1 bytes instead of newsize bytes to ensure that we never
- // try to malloc(0) and get a NULL pointer.
-
- scoped_array<uint8> diff_bytes_array(new(std::nothrow) uint8[newsize + 1]);
- scoped_array<uint8> extra_bytes_array(new(std::nothrow) uint8[newsize + 1]);
- if (diff_bytes_array == NULL || extra_bytes_array == NULL)
- return MEM_ERROR;
-
- uint8* diff_bytes = diff_bytes_array.get();
- uint8* extra_bytes = extra_bytes_array.get();
int control_length = 0;
int diff_bytes_length = 0;
int diff_bytes_nonzero = 0;
int extra_bytes_length = 0;
int eblen = 0;
- SinkStream control_stream;
-
// The patch format is a sequence of triples <copy,extra,seek> where 'copy' is
// the number of bytes to copy from the old file (possibly with mistakes),
// 'extra' is the number of bytes to copy from a stream of fresh bytes, and
@@ -364,13 +359,18 @@ BSDiffStatus CreateBinaryPatch(SourceStream* old_stream,
for (int i = 0; i < lenf; i++) {
uint8 diff_byte = newbuf[lastscan + i] - old[lastpos + i];
- diff_bytes[diff_bytes_length + i] = diff_byte;
- if (diff_byte)
+ if (diff_byte) {
++diff_bytes_nonzero;
+ diff_skips->WriteVarint32(pending_diff_zeros);
+ pending_diff_zeros = 0;
+ diff_bytes->Write(&diff_byte, 1);
+ } else {
+ ++pending_diff_zeros;
+ }
}
int gap = (scan - lenb) - (lastscan + lenf);
for (int i = 0; i < gap; i++)
- extra_bytes[extra_bytes_length + i] = newbuf[lastscan + lenf + i];
+ extra_bytes->Write(&newbuf[lastscan + lenf + i], 1);
diff_bytes_length += lenf;
extra_bytes_length += gap;
@@ -379,9 +379,9 @@ BSDiffStatus CreateBinaryPatch(SourceStream* old_stream,
uint32 extra_count = gap;
int32 seek_adjustment = ((pos - lenb) - (lastpos + lenf));
- control_stream.WriteVarint32(copy_count);
- control_stream.WriteVarint32(extra_count);
- control_stream.WriteVarint32Signed(seek_adjustment);
+ control_stream_copy_counts->WriteVarint32(copy_count);
+ control_stream_extra_counts->WriteVarint32(extra_count);
+ control_stream_seeks->WriteVarint32Signed(seek_adjustment);
++control_length;
#ifdef DEBUG_bsmedberg
LOG(INFO) << StringPrintf(
@@ -395,6 +395,8 @@ BSDiffStatus CreateBinaryPatch(SourceStream* old_stream,
}
}
+ diff_skips->WriteVarint32(pending_diff_zeros);
+
I.reset();
MBSPatchHeader header;
@@ -405,19 +407,16 @@ BSDiffStatus CreateBinaryPatch(SourceStream* old_stream,
header.slen = oldsize;
header.scrc32 = CalculateCrc(old, oldsize);
header.dlen = newsize;
- header.cblen = control_stream.Length();
- header.difflen = diff_bytes_length;
- header.extralen = extra_bytes_length;
WriteHeader(patch_stream, &header);
- patch_stream->Append(&control_stream);
- patch_stream->Write(diff_bytes, diff_bytes_length);
- patch_stream->Write(extra_bytes, extra_bytes_length);
+ size_t diff_skips_length = diff_skips->Length();
+ patch_streams.CopyTo(patch_stream);
LOG(INFO) << "Control tuples: " << control_length
<< " copy bytes: " << diff_bytes_length
<< " mistakes: " << diff_bytes_nonzero
+ << " (skips: " << diff_skips_length << ")"
<< " extra bytes: " << extra_bytes_length;
LOG(INFO) << "Uncompressed bsdiff patch size "