From c1de821fbf6dbefdc8eaf8db9e47e1b9ad20d0a9 Mon Sep 17 00:00:00 2001 From: huangs Date: Tue, 11 Aug 2015 13:58:52 -0700 Subject: [Courgette] Extract qsufsort module and add tests. This is to prepare for a follow-up CL to optimize qsufsort(), which reduces run time by ~40%, and "courgette -gen" run time by ~10%. Review URL: https://codereview.chromium.org/1272453003 Cr-Commit-Position: refs/heads/master@{#342883} --- courgette/third_party/bsdiff_create.cc | 165 ++------------------------------- 1 file changed, 7 insertions(+), 158 deletions(-) (limited to 'courgette/third_party/bsdiff_create.cc') diff --git a/courgette/third_party/bsdiff_create.cc b/courgette/third_party/bsdiff_create.cc index b43b53a..d6ea695 100644 --- a/courgette/third_party/bsdiff_create.cc +++ b/courgette/third_party/bsdiff_create.cc @@ -20,6 +20,8 @@ 2010-05-26 - Use a paged array for V and I. The address space may be too fragmented for these big arrays to be contiguous. --Stephen Adams + 2015-08-03 - Extract qsufsort portion to a separate file. + --Samuel Huang */ #include "courgette/third_party/bsdiff.h" @@ -35,162 +37,10 @@ #include "courgette/crc.h" #include "courgette/streams.h" #include "courgette/third_party/paged_array.h" +#include "courgette/third_party/qsufsort.h" namespace courgette { -// ------------------------------------------------------------------------ -// -// The following code is taken verbatim from 'bsdiff.c'. Please keep all the -// code formatting and variable names. The changes from the original are (1) -// replacing tabs with spaces, (2) indentation, (3) using 'const', and (4) -// changing the V and I parameters from int* to PagedArray&. -// -// The code appears to be a rewritten version of the suffix array algorithm -// presented in "Faster Suffix Sorting" by N. Jesper Larsson and Kunihiko -// Sadakane, special cased for bytes. - -static void -split(PagedArray& I,PagedArray& V,int start,int len,int h) -{ - int i,j,k,x,tmp,jj,kk; - - if(len<16) { - for(k=start;kstart) split(I,V,start,jj-start,h); - - for(i=0;ikk) split(I,V,kk,start+len-kk,h); -} - -static void -qsufsort(PagedArray& I, PagedArray& V,const unsigned char *old,int oldsize) -{ - int buckets[256]; - int i,h,len; - - for(i=0;i<256;i++) buckets[i]=0; - for(i=0;i0;i--) buckets[i]=buckets[i-1]; - buckets[0]=0; - - for(i=0;i& I,const unsigned char *old,int oldsize, - const unsigned char *newbuf,int newsize,int st,int en,int *pos) -{ - int x,y; - - if(en-st<2) { - x=matchlen(old+I[st],oldsize-I[st],newbuf,newsize); - y=matchlen(old+I[en],oldsize-I[en],newbuf,newsize); - - if(x>y) { - *pos=I[st]; - return x; - } else { - *pos=I[en]; - return y; - } - } - - x=st+(en-st)/2; - if(memcmp(old+I[x],newbuf,std::min(oldsize-I[x],newsize))<0) { - return search(I,old,oldsize,newbuf,newsize,x,en,pos); - } else { - return search(I,old,oldsize,newbuf,newsize,st,x,pos); - } -} - -// End of 'verbatim' code. -// ------------------------------------------------------------------------ - static CheckBool WriteHeader(SinkStream* stream, MBSPatchHeader* header) { bool ok = stream->Write(header->tag, sizeof(header->tag)); ok &= stream->WriteVarint32(header->slen); @@ -236,7 +86,7 @@ BSDiffStatus CreateBinaryPatch(SourceStream* old_stream, } base::Time q_start_time = base::Time::Now(); - qsufsort(I, V, old, oldsize); + qsuf::qsufsort&>(I, V, old, oldsize); VLOG(1) << " done qsufsort " << (base::Time::Now() - q_start_time).InSecondsF(); V.clear(); @@ -300,9 +150,8 @@ BSDiffStatus CreateBinaryPatch(SourceStream* old_stream, scan += match_length; for (int scsc = scan; scan < newsize; ++scan) { - match_length = search(I, old, oldsize, - newbuf + scan, newsize - scan, - 0, oldsize, &pos); + match_length = qsuf::search&>( + I, old, oldsize, newbuf + scan, newsize - scan, 0, oldsize, &pos); for ( ; scsc < scan + match_length ; scsc++) if ((scsc + lastoffset < oldsize) && @@ -451,4 +300,4 @@ BSDiffStatus CreateBinaryPatch(SourceStream* old_stream, return OK; } -} // namespace +} // namespace courgette -- cgit v1.1