summaryrefslogtreecommitdiffstats
path: root/courgette/third_party/bsdiff_create.cc
diff options
context:
space:
mode:
Diffstat (limited to 'courgette/third_party/bsdiff_create.cc')
-rw-r--r--courgette/third_party/bsdiff_create.cc165
1 files changed, 7 insertions, 158 deletions
diff --git a/courgette/third_party/bsdiff_create.cc b/courgette/third_party/bsdiff_create.cc
index b43b53a..d6ea695 100644
--- a/courgette/third_party/bsdiff_create.cc
+++ b/courgette/third_party/bsdiff_create.cc
@@ -20,6 +20,8 @@
2010-05-26 - Use a paged array for V and I. The address space may be too
fragmented for these big arrays to be contiguous.
--Stephen Adams <sra@chromium.org>
+ 2015-08-03 - Extract qsufsort portion to a separate file.
+ --Samuel Huang <huangs@chromium.org>
*/
#include "courgette/third_party/bsdiff.h"
@@ -35,162 +37,10 @@
#include "courgette/crc.h"
#include "courgette/streams.h"
#include "courgette/third_party/paged_array.h"
+#include "courgette/third_party/qsufsort.h"
namespace courgette {
-// ------------------------------------------------------------------------
-//
-// The following code is taken verbatim from 'bsdiff.c'. Please keep all the
-// code formatting and variable names. The changes from the original are (1)
-// replacing tabs with spaces, (2) indentation, (3) using 'const', and (4)
-// changing the V and I parameters from int* to PagedArray<int>&.
-//
-// The code appears to be a rewritten version of the suffix array algorithm
-// presented in "Faster Suffix Sorting" by N. Jesper Larsson and Kunihiko
-// Sadakane, special cased for bytes.
-
-static void
-split(PagedArray<int>& I,PagedArray<int>& V,int start,int len,int h)
-{
- int i,j,k,x,tmp,jj,kk;
-
- if(len<16) {
- for(k=start;k<start+len;k+=j) {
- j=1;x=V[I[k]+h];
- for(i=1;k+i<start+len;i++) {
- if(V[I[k+i]+h]<x) {
- x=V[I[k+i]+h];
- j=0;
- };
- if(V[I[k+i]+h]==x) {
- tmp=I[k+j];I[k+j]=I[k+i];I[k+i]=tmp;
- j++;
- };
- };
- for(i=0;i<j;i++) V[I[k+i]]=k+j-1;
- if(j==1) I[k]=-1;
- };
- return;
- };
-
- x=V[I[start+len/2]+h];
- jj=0;kk=0;
- for(i=start;i<start+len;i++) {
- if(V[I[i]+h]<x) jj++;
- if(V[I[i]+h]==x) kk++;
- };
- jj+=start;kk+=jj;
-
- i=start;j=0;k=0;
- while(i<jj) {
- if(V[I[i]+h]<x) {
- i++;
- } else if(V[I[i]+h]==x) {
- tmp=I[i];I[i]=I[jj+j];I[jj+j]=tmp;
- j++;
- } else {
- tmp=I[i];I[i]=I[kk+k];I[kk+k]=tmp;
- k++;
- };
- };
-
- while(jj+j<kk) {
- if(V[I[jj+j]+h]==x) {
- j++;
- } else {
- tmp=I[jj+j];I[jj+j]=I[kk+k];I[kk+k]=tmp;
- k++;
- };
- };
-
- if(jj>start) split(I,V,start,jj-start,h);
-
- for(i=0;i<kk-jj;i++) V[I[jj+i]]=kk-1;
- if(jj==kk-1) I[jj]=-1;
-
- if(start+len>kk) split(I,V,kk,start+len-kk,h);
-}
-
-static void
-qsufsort(PagedArray<int>& I, PagedArray<int>& V,const unsigned char *old,int oldsize)
-{
- int buckets[256];
- int i,h,len;
-
- for(i=0;i<256;i++) buckets[i]=0;
- for(i=0;i<oldsize;i++) buckets[old[i]]++;
- for(i=1;i<256;i++) buckets[i]+=buckets[i-1];
- for(i=255;i>0;i--) buckets[i]=buckets[i-1];
- buckets[0]=0;
-
- for(i=0;i<oldsize;i++) I[++buckets[old[i]]]=i;
- I[0]=oldsize;
- for(i=0;i<oldsize;i++) V[i]=buckets[old[i]];
- V[oldsize]=0;
- for(i=1;i<256;i++) if(buckets[i]==buckets[i-1]+1) I[buckets[i]]=-1;
- I[0]=-1;
-
- for(h=1;I[0]!=-(oldsize+1);h+=h) {
- len=0;
- for(i=0;i<oldsize+1;) {
- if(I[i]<0) {
- len-=I[i];
- i-=I[i];
- } else {
- if(len) I[i-len]=-len;
- len=V[I[i]]+1-i;
- split(I,V,i,len,h);
- i+=len;
- len=0;
- };
- };
- if(len) I[i-len]=-len;
- };
-
- for(i=0;i<oldsize+1;i++) I[V[i]]=i;
-}
-
-static int
-matchlen(const unsigned char *old,int oldsize,const unsigned char *newbuf,int newsize)
-{
- int i;
-
- for(i=0;(i<oldsize)&&(i<newsize);i++)
- if(old[i]!=newbuf[i]) break;
-
- return i;
-}
-
-static int
-search(PagedArray<int>& I,const unsigned char *old,int oldsize,
- const unsigned char *newbuf,int newsize,int st,int en,int *pos)
-{
- int x,y;
-
- if(en-st<2) {
- x=matchlen(old+I[st],oldsize-I[st],newbuf,newsize);
- y=matchlen(old+I[en],oldsize-I[en],newbuf,newsize);
-
- if(x>y) {
- *pos=I[st];
- return x;
- } else {
- *pos=I[en];
- return y;
- }
- }
-
- x=st+(en-st)/2;
- if(memcmp(old+I[x],newbuf,std::min(oldsize-I[x],newsize))<0) {
- return search(I,old,oldsize,newbuf,newsize,x,en,pos);
- } else {
- return search(I,old,oldsize,newbuf,newsize,st,x,pos);
- }
-}
-
-// End of 'verbatim' code.
-// ------------------------------------------------------------------------
-
static CheckBool WriteHeader(SinkStream* stream, MBSPatchHeader* header) {
bool ok = stream->Write(header->tag, sizeof(header->tag));
ok &= stream->WriteVarint32(header->slen);
@@ -236,7 +86,7 @@ BSDiffStatus CreateBinaryPatch(SourceStream* old_stream,
}
base::Time q_start_time = base::Time::Now();
- qsufsort(I, V, old, oldsize);
+ qsuf::qsufsort<PagedArray<int>&>(I, V, old, oldsize);
VLOG(1) << " done qsufsort "
<< (base::Time::Now() - q_start_time).InSecondsF();
V.clear();
@@ -300,9 +150,8 @@ BSDiffStatus CreateBinaryPatch(SourceStream* old_stream,
scan += match_length;
for (int scsc = scan; scan < newsize; ++scan) {
- match_length = search(I, old, oldsize,
- newbuf + scan, newsize - scan,
- 0, oldsize, &pos);
+ match_length = qsuf::search<PagedArray<int>&>(
+ I, old, oldsize, newbuf + scan, newsize - scan, 0, oldsize, &pos);
for ( ; scsc < scan + match_length ; scsc++)
if ((scsc + lastoffset < oldsize) &&
@@ -451,4 +300,4 @@ BSDiffStatus CreateBinaryPatch(SourceStream* old_stream,
return OK;
}
-} // namespace
+} // namespace courgette