/* qsufsort.h -- Suffix array generation. Copyright 2003 Colin Percival For the terms under which this work may be distributed, please see the adjoining file "LICENSE". ChangeLog: 2005-05-05 - Use the modified header struct from bspatch.h; use 32-bit values throughout. --Benjamin Smedberg 2010-05-26 - Use a paged array for V and I. The address space may be too fragmented for these big arrays to be contiguous. --Stephen Adams 2015-08-03 - Extrat qsufsort to a separate file as template. --Samuel Huang 2015-08-19 - Optimize split() and search(), add comments. --Samuel Huang */ #include #include namespace courgette { namespace qsuf { // ------------------------------------------------------------------------ // // The following code is taken verbatim from 'bsdiff.c'. Please keep all the // code formatting and variable names. The changes from the original are: // (1) replacing tabs with spaces, // (2) indentation, // (3) using 'const', // (4) changing the V and I parameters from int* to template . // (5) optimizing split() and search(); fix styles. // // The code appears to be a rewritten version of the suffix array algorithm // presented in "Faster Suffix Sorting" by N. Jesper Larsson and Kunihiko // Sadakane, special cased for bytes. template static void split(T I, T V, int start, int end, int h) { // For small interval, apply selection sort. if (end - start < 6) { for (int i = start; i < end; ) { int skip = 1; int best = V[I[i] + h]; for (int j = i + 1; j < end; j++) { int cur = V[I[j] + h]; if (best > cur) { best = cur; int tmp = I[i]; I[i] = I[j]; I[j] = tmp; skip = 1; } else if (best == cur) { int tmp = I[i + skip]; I[i + skip] = I[j]; I[j] = tmp; ++skip; } } if (skip == 1) { V[I[i]] = i; I[i] = -1; } else { for (int j = i, jend = i + skip; j < jend; j++) V[I[j]] = jend - 1; } i += skip; } return; } // Split [start, end) into 3 intervals: // [start, j) with secondary keys < pivot, // [j, k) with secondary keys == pivot, // [k, end) with secondary keys > pivot. int pivot = V[I[(start + end) / 2] + h]; int j = start; int k = end; for (int i = start; i < k; ) { int cur = V[I[i] + h]; if (cur < pivot) { if (i != j) { int tmp = I[i]; I[i] = I[j]; I[j] = tmp; } ++i; ++j; } else if (cur > pivot) { --k; int tmp = I[i]; I[i] = I[k]; I[k] = tmp; } else { ++i; } } // Recurse on the "< pivot" piece. if (start < j) split(I, V, start, j, h); // Update the "== pivot" piece. if (j == k - 1) { V[I[j]] = j; I[j] = -1; } else { for (int i = j; i < k; ++i) V[I[i]] = k - 1; } // Recurse on the "> pivot" piece. if (k < end) split(I, V, k, end, h); } template static void qsufsort(T I, T V,const unsigned char *old,int oldsize) { int buckets[256]; int i,h,len; for(i=0;i<256;i++) buckets[i]=0; for(i=0;i0;i--) buckets[i]=buckets[i-1]; buckets[0]=0; for(i=0;i(I,V,i,i+len,h); i+=len; len=0; }; }; if(len) I[i-len]=-len; }; for(i=0;i static int search(T I, const unsigned char *old, int oldsize, const unsigned char *newbuf, int newsize, int *pos) { int lo = 0; int hi = oldsize; while (hi - lo >= 2) { int mid = (lo + hi) / 2; if(memcmp(old+I[mid],newbuf,std::min(oldsize-I[mid],newsize))<0) { lo = mid; } else { hi = mid; } } int x = matchlen(old + I[lo], oldsize - I[lo], newbuf, newsize); int y = matchlen(old + I[hi], oldsize - I[hi], newbuf, newsize); if(x > y) { *pos = I[lo]; return x; } *pos = I[hi]; return y; } // End of 'verbatim' code. // ------------------------------------------------------------------------ } // namespace qsuf } // namespace courgette