diff options
Diffstat (limited to 'courgette/third_party/bsdiff_create.cc')
-rw-r--r-- | courgette/third_party/bsdiff_create.cc | 165 |
1 files changed, 7 insertions, 158 deletions
diff --git a/courgette/third_party/bsdiff_create.cc b/courgette/third_party/bsdiff_create.cc index b43b53a..d6ea695 100644 --- a/courgette/third_party/bsdiff_create.cc +++ b/courgette/third_party/bsdiff_create.cc @@ -20,6 +20,8 @@ 2010-05-26 - Use a paged array for V and I. The address space may be too fragmented for these big arrays to be contiguous. --Stephen Adams <sra@chromium.org> + 2015-08-03 - Extract qsufsort portion to a separate file. + --Samuel Huang <huangs@chromium.org> */ #include "courgette/third_party/bsdiff.h" @@ -35,162 +37,10 @@ #include "courgette/crc.h" #include "courgette/streams.h" #include "courgette/third_party/paged_array.h" +#include "courgette/third_party/qsufsort.h" namespace courgette { -// ------------------------------------------------------------------------ -// -// The following code is taken verbatim from 'bsdiff.c'. Please keep all the -// code formatting and variable names. The changes from the original are (1) -// replacing tabs with spaces, (2) indentation, (3) using 'const', and (4) -// changing the V and I parameters from int* to PagedArray<int>&. -// -// The code appears to be a rewritten version of the suffix array algorithm -// presented in "Faster Suffix Sorting" by N. Jesper Larsson and Kunihiko -// Sadakane, special cased for bytes. - -static void -split(PagedArray<int>& I,PagedArray<int>& V,int start,int len,int h) -{ - int i,j,k,x,tmp,jj,kk; - - if(len<16) { - for(k=start;k<start+len;k+=j) { - j=1;x=V[I[k]+h]; - for(i=1;k+i<start+len;i++) { - if(V[I[k+i]+h]<x) { - x=V[I[k+i]+h]; - j=0; - }; - if(V[I[k+i]+h]==x) { - tmp=I[k+j];I[k+j]=I[k+i];I[k+i]=tmp; - j++; - }; - }; - for(i=0;i<j;i++) V[I[k+i]]=k+j-1; - if(j==1) I[k]=-1; - }; - return; - }; - - x=V[I[start+len/2]+h]; - jj=0;kk=0; - for(i=start;i<start+len;i++) { - if(V[I[i]+h]<x) jj++; - if(V[I[i]+h]==x) kk++; - }; - jj+=start;kk+=jj; - - i=start;j=0;k=0; - while(i<jj) { - if(V[I[i]+h]<x) { - i++; - } else if(V[I[i]+h]==x) { - tmp=I[i];I[i]=I[jj+j];I[jj+j]=tmp; - j++; - } else { - tmp=I[i];I[i]=I[kk+k];I[kk+k]=tmp; - k++; - }; - }; - - while(jj+j<kk) { - if(V[I[jj+j]+h]==x) { - j++; - } else { - tmp=I[jj+j];I[jj+j]=I[kk+k];I[kk+k]=tmp; - k++; - }; - }; - - if(jj>start) split(I,V,start,jj-start,h); - - for(i=0;i<kk-jj;i++) V[I[jj+i]]=kk-1; - if(jj==kk-1) I[jj]=-1; - - if(start+len>kk) split(I,V,kk,start+len-kk,h); -} - -static void -qsufsort(PagedArray<int>& I, PagedArray<int>& V,const unsigned char *old,int oldsize) -{ - int buckets[256]; - int i,h,len; - - for(i=0;i<256;i++) buckets[i]=0; - for(i=0;i<oldsize;i++) buckets[old[i]]++; - for(i=1;i<256;i++) buckets[i]+=buckets[i-1]; - for(i=255;i>0;i--) buckets[i]=buckets[i-1]; - buckets[0]=0; - - for(i=0;i<oldsize;i++) I[++buckets[old[i]]]=i; - I[0]=oldsize; - for(i=0;i<oldsize;i++) V[i]=buckets[old[i]]; - V[oldsize]=0; - for(i=1;i<256;i++) if(buckets[i]==buckets[i-1]+1) I[buckets[i]]=-1; - I[0]=-1; - - for(h=1;I[0]!=-(oldsize+1);h+=h) { - len=0; - for(i=0;i<oldsize+1;) { - if(I[i]<0) { - len-=I[i]; - i-=I[i]; - } else { - if(len) I[i-len]=-len; - len=V[I[i]]+1-i; - split(I,V,i,len,h); - i+=len; - len=0; - }; - }; - if(len) I[i-len]=-len; - }; - - for(i=0;i<oldsize+1;i++) I[V[i]]=i; -} - -static int -matchlen(const unsigned char *old,int oldsize,const unsigned char *newbuf,int newsize) -{ - int i; - - for(i=0;(i<oldsize)&&(i<newsize);i++) - if(old[i]!=newbuf[i]) break; - - return i; -} - -static int -search(PagedArray<int>& I,const unsigned char *old,int oldsize, - const unsigned char *newbuf,int newsize,int st,int en,int *pos) -{ - int x,y; - - if(en-st<2) { - x=matchlen(old+I[st],oldsize-I[st],newbuf,newsize); - y=matchlen(old+I[en],oldsize-I[en],newbuf,newsize); - - if(x>y) { - *pos=I[st]; - return x; - } else { - *pos=I[en]; - return y; - } - } - - x=st+(en-st)/2; - if(memcmp(old+I[x],newbuf,std::min(oldsize-I[x],newsize))<0) { - return search(I,old,oldsize,newbuf,newsize,x,en,pos); - } else { - return search(I,old,oldsize,newbuf,newsize,st,x,pos); - } -} - -// End of 'verbatim' code. -// ------------------------------------------------------------------------ - static CheckBool WriteHeader(SinkStream* stream, MBSPatchHeader* header) { bool ok = stream->Write(header->tag, sizeof(header->tag)); ok &= stream->WriteVarint32(header->slen); @@ -236,7 +86,7 @@ BSDiffStatus CreateBinaryPatch(SourceStream* old_stream, } base::Time q_start_time = base::Time::Now(); - qsufsort(I, V, old, oldsize); + qsuf::qsufsort<PagedArray<int>&>(I, V, old, oldsize); VLOG(1) << " done qsufsort " << (base::Time::Now() - q_start_time).InSecondsF(); V.clear(); @@ -300,9 +150,8 @@ BSDiffStatus CreateBinaryPatch(SourceStream* old_stream, scan += match_length; for (int scsc = scan; scan < newsize; ++scan) { - match_length = search(I, old, oldsize, - newbuf + scan, newsize - scan, - 0, oldsize, &pos); + match_length = qsuf::search<PagedArray<int>&>( + I, old, oldsize, newbuf + scan, newsize - scan, 0, oldsize, &pos); for ( ; scsc < scan + match_length ; scsc++) if ((scsc + lastoffset < oldsize) && @@ -451,4 +300,4 @@ BSDiffStatus CreateBinaryPatch(SourceStream* old_stream, return OK; } -} // namespace +} // namespace courgette |