summaryrefslogtreecommitdiffstats
path: root/courgette
diff options
context:
space:
mode:
authorsra@chromium.org <sra@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2010-05-29 00:47:48 +0000
committersra@chromium.org <sra@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2010-05-29 00:47:48 +0000
commitb6732ba797c8b6a8b2a4748fe61d86b6578e9468 (patch)
treea61713e539847443e779814b4d3eeaac74ace06e /courgette
parentfd75696fceeaca48f47ad77ff40010b220f31c3c (diff)
downloadchromium_src-b6732ba797c8b6a8b2a4748fe61d86b6578e9468.zip
chromium_src-b6732ba797c8b6a8b2a4748fe61d86b6578e9468.tar.gz
chromium_src-b6732ba797c8b6a8b2a4748fe61d86b6578e9468.tar.bz2
Use an array of pages for the large arrays.
The large arrays of ints used by the suffix array code sometimes can't be allocated due to fragmented address space. Using an array of 'pages' lets the allocation be satisfied by many smaller allocations. BUG=none TEST=none Review URL: http://codereview.chromium.org/2228003 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@48547 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'courgette')
-rw-r--r--courgette/bsdiff_memory_unittest.cc36
-rw-r--r--courgette/courgette.gyp2
-rw-r--r--courgette/third_party/bsdiff_create.cc39
-rw-r--r--courgette/third_party/paged_array.h81
-rw-r--r--courgette/third_party/paged_array_unittest.cc42
5 files changed, 188 insertions, 12 deletions
diff --git a/courgette/bsdiff_memory_unittest.cc b/courgette/bsdiff_memory_unittest.cc
index a384dee..75de082 100644
--- a/courgette/bsdiff_memory_unittest.cc
+++ b/courgette/bsdiff_memory_unittest.cc
@@ -20,6 +20,8 @@ class BSDiffMemoryTest : public testing::Test {
std::string FileContents(const char* file_name) const;
void GenerateAndTestPatch(const std::string& a, const std::string& b) const;
+ std::string GenerateSyntheticInput(size_t length, int seed) const;
+
private:
void SetUp() {
PathService::Get(base::DIR_SOURCE_ROOT, &test_dir_);
@@ -64,6 +66,18 @@ void BSDiffMemoryTest::GenerateAndTestPatch(const std::string& old_text,
EXPECT_EQ(0, memcmp(new_text.c_str(), new2.Buffer(), new_text.length()));
}
+std::string BSDiffMemoryTest::GenerateSyntheticInput(size_t length, int seed)
+ const {
+ static const char* a[8] = {"O", "A", "x", "-", "y", ".", "|", ":"};
+ std::string result;
+ while (result.length() < length) {
+ seed = (seed + 17) * 1049 + (seed >> 27);
+ result.append(a[seed & 7]);
+ }
+ result.resize(length);
+ return result;
+}
+
TEST_F(BSDiffMemoryTest, TestEmpty) {
GenerateAndTestPatch("", "");
}
@@ -99,6 +113,28 @@ TEST_F(BSDiffMemoryTest, TestSmallInputsWithSmallChanges) {
GenerateAndTestPatch(file1, file2);
}
+TEST_F(BSDiffMemoryTest, TestNearPageArrayPageSize) {
+ // This magic number is the size of one block of the PageArray in
+ // third_party/bsdiff_create.cc.
+ size_t critical_size = 1 << 18;
+
+ // Test first-inputs with sizes that straddle the magic size to test this
+ // PageArray's internal boundary condition.
+
+ std::string file1 = GenerateSyntheticInput(critical_size, 0);
+ std::string file2 = GenerateSyntheticInput(critical_size, 1);
+ GenerateAndTestPatch(file1, file2);
+
+ std::string file1a = file1.substr(0, critical_size - 1);
+ GenerateAndTestPatch(file1a, file2);
+
+ std::string file1b = file1.substr(0, critical_size - 2);
+ GenerateAndTestPatch(file1b, file2);
+
+ std::string file1c = file1 + file1.substr(0, 1);
+ GenerateAndTestPatch(file1c, file2);
+}
+
TEST_F(BSDiffMemoryTest, TestIndenticalDlls) {
std::string file1 = FileContents("en-US.dll");
GenerateAndTestPatch(file1, file1);
diff --git a/courgette/courgette.gyp b/courgette/courgette.gyp
index 21eedfa..7ca3dbe6 100644
--- a/courgette/courgette.gyp
+++ b/courgette/courgette.gyp
@@ -24,6 +24,7 @@
'third_party/bsdiff.h',
'third_party/bsdiff_apply.cc',
'third_party/bsdiff_create.cc',
+ 'third_party/paged_array.h',
'courgette.h',
'crc.cc',
'crc.h',
@@ -85,6 +86,7 @@
'image_info_unittest.cc',
'run_all_unittests.cc',
'streams_unittest.cc',
+ 'third_party/paged_array_unittest.cc'
],
'dependencies': [
'courgette_lib',
diff --git a/courgette/third_party/bsdiff_create.cc b/courgette/third_party/bsdiff_create.cc
index fc0ce25..6f93720 100644
--- a/courgette/third_party/bsdiff_create.cc
+++ b/courgette/third_party/bsdiff_create.cc
@@ -17,6 +17,9 @@
--Rahul Kuchhal
2009-03-31 - Change to use Streams. Added lots of comments.
--Stephen Adams <sra@chromium.org>
+ 2010-05-26 - Use a paged array for V and I. The address space may be too
+ fragmented for these big arrays to be contiguous.
+ --Stephen Adams <sra@chromium.org>
*/
#include "courgette/third_party/bsdiff.h"
@@ -31,21 +34,23 @@
#include "courgette/crc.h"
#include "courgette/streams.h"
+#include "courgette/third_party/paged_array.h"
namespace courgette {
// ------------------------------------------------------------------------
//
// The following code is taken verbatim from 'bsdiff.c'. Please keep all the
-// code formatting and variable names. The only changes from the original are
-// replacing tabs with spaces, indentation, and using 'const'.
+// code formatting and variable names. The changes from the original are (1)
+// replacing tabs with spaces, (2) indentation, (3) using 'const', and (4)
+// changing the V and I parameters from int* to PagedArray<int>&.
//
// The code appears to be a rewritten version of the suffix array algorithm
// presented in "Faster Suffix Sorting" by N. Jesper Larsson and Kunihiko
// Sadakane, special cased for bytes.
static void
-split(int *I,int *V,int start,int len,int h)
+split(PagedArray<int>& I,PagedArray<int>& V,int start,int len,int h)
{
int i,j,k,x,tmp,jj,kk;
@@ -107,7 +112,7 @@ split(int *I,int *V,int start,int len,int h)
}
static void
-qsufsort(int *I,int *V,const unsigned char *old,int oldsize)
+qsufsort(PagedArray<int>& I, PagedArray<int>& V,const unsigned char *old,int oldsize)
{
int buckets[256];
int i,h,len;
@@ -157,7 +162,7 @@ matchlen(const unsigned char *old,int oldsize,const unsigned char *newbuf,int ne
}
static int
-search(int *I,const unsigned char *old,int oldsize,
+search(PagedArray<int>& I,const unsigned char *old,int oldsize,
const unsigned char *newbuf,int newsize,int st,int en,int *pos)
{
int x,y;
@@ -214,16 +219,26 @@ BSDiffStatus CreateBinaryPatch(SourceStream* old_stream,
uint32 pending_diff_zeros = 0;
- scoped_array<int> I(new(std::nothrow) int[oldsize + 1]);
- scoped_array<int> V(new(std::nothrow) int[oldsize + 1]);
- if (I == NULL || V == NULL)
+ PagedArray<int> I;
+ PagedArray<int> V;
+
+ if (!I.Allocate(oldsize + 1)) {
+ LOG(ERROR) << "Could not allocate I[], " << ((oldsize + 1) * sizeof(int))
+ << " bytes";
+ return MEM_ERROR;
+ }
+
+ if (!V.Allocate(oldsize + 1)) {
+ LOG(ERROR) << "Could not allocate V[], " << ((oldsize + 1) * sizeof(int))
+ << " bytes";
return MEM_ERROR;
+ }
base::Time q_start_time = base::Time::Now();
- qsufsort(I.get(), V.get(), old, oldsize);
+ qsufsort(I, V, old, oldsize);
LOG(INFO) << " done qsufsort "
<< (base::Time::Now() - q_start_time).InSecondsF();
- V.reset();
+ V.clear();
const uint8* newbuf = new_stream->Buffer();
const int newsize = new_stream->Remaining();
@@ -284,7 +299,7 @@ BSDiffStatus CreateBinaryPatch(SourceStream* old_stream,
scan += match_length;
for (int scsc = scan; scan < newsize; ++scan) {
- match_length = search(I.get(), old, oldsize,
+ match_length = search(I, old, oldsize,
newbuf + scan, newsize - scan,
0, oldsize, &pos);
@@ -396,7 +411,7 @@ BSDiffStatus CreateBinaryPatch(SourceStream* old_stream,
diff_skips->WriteVarint32(pending_diff_zeros);
- I.reset();
+ I.clear();
MBSPatchHeader header;
// The string will have a null terminator that we don't use, hence '-1'.
diff --git a/courgette/third_party/paged_array.h b/courgette/third_party/paged_array.h
new file mode 100644
index 0000000..e9d0299
--- /dev/null
+++ b/courgette/third_party/paged_array.h
@@ -0,0 +1,81 @@
+// Copyright (c) 2010 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// PagedArray implements an array stored using many fixed-size pages.
+//
+// PagedArray is a work-around to allow large arrays to be allocated when there
+// is too much address space fragmentation for allocating the large arrays as
+// contigous arrays.
+#ifndef COURGETTE_BSDIFF_PAGED_ARRAY_H_
+#define COURGETTE_BSDIFF_PAGED_ARRAY_H_
+
+// For std::nothrow:
+#include <new>
+
+#include "base/basictypes.h"
+
+namespace courgette {
+
+// PagedArray implements an array stored using many fixed-size pages.
+template<typename T>
+class PagedArray {
+ enum {
+ // Page size in elements. Page size of 2^18 * sizeof(T) is 1MB for T = int.
+ kLogPageSize = 18,
+ kPageSize = 1 << kLogPageSize
+ };
+
+ public:
+ PagedArray() : pages_(NULL), page_count_(0) {}
+
+ ~PagedArray() { clear(); }
+
+ T& operator[](size_t i) {
+ size_t page = i >> kLogPageSize;
+ size_t offset = i & (kPageSize - 1);
+ // It is tempting to add a DCHECK(page < page_count_), but that makes
+ // bsdiff_create run 2x slower (even when compiled optimized.)
+ return pages_[page][offset];
+ }
+
+ // Allocates storage for |size| elements. Returns true on success and false if
+ // allocation fails.
+ bool Allocate(size_t size) {
+ clear();
+ size_t pages_needed = (size + kPageSize - 1) >> kLogPageSize;
+ pages_ = new(std::nothrow) T*[pages_needed];
+ if (pages_ == NULL)
+ return false;
+
+ for (page_count_ = 0; page_count_ < pages_needed; ++page_count_) {
+ T* block = new(std::nothrow) T[kPageSize];
+ if (block == NULL) {
+ clear();
+ return false;
+ }
+ pages_[page_count_] = block;
+ }
+ return true;
+ }
+
+ // Releases all storage. May be called more than once.
+ void clear() {
+ if (pages_ != NULL) {
+ while (page_count_ != 0) {
+ --page_count_;
+ delete[] pages_[page_count_];
+ }
+ delete[] pages_;
+ pages_ = NULL;
+ }
+ }
+
+ private:
+ T** pages_;
+ size_t page_count_;
+
+ DISALLOW_COPY_AND_ASSIGN(PagedArray);
+};
+} // namespace
+#endif // COURGETTE_BSDIFF_PAGED_ARRAY_H_
diff --git a/courgette/third_party/paged_array_unittest.cc b/courgette/third_party/paged_array_unittest.cc
new file mode 100644
index 0000000..9736ca0
--- /dev/null
+++ b/courgette/third_party/paged_array_unittest.cc
@@ -0,0 +1,42 @@
+// Copyright (c) 2010 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "courgette/third_party/paged_array.h"
+
+#include "testing/gtest/include/gtest/gtest.h"
+
+class PagedArrayTest : public testing::Test {
+ public:
+ // Total allocation of 4GB will fail in 32 bit programs if allocations are
+ // leaked.
+ static const int kIterations = 20;
+ static const int kSize = 200 * 1024 * 1024 / sizeof(int); // 200MB
+};
+
+TEST_F(PagedArrayTest, TestManyAllocationsDestructorFree) {
+ for (int i = 0; i < kIterations; ++i) {
+ courgette::PagedArray<int> a;
+ EXPECT_EQ(true, a.Allocate(kSize));
+ }
+}
+
+TEST_F(PagedArrayTest, TestManyAllocationsManualFree) {
+ courgette::PagedArray<int> a;
+ for (int i = 0; i < kIterations; ++i) {
+ EXPECT_EQ(true, a.Allocate(kSize));
+ a.clear();
+ }
+}
+
+TEST_F(PagedArrayTest, TestAccess) {
+ const int kAccessSize = 3 * 1024 * 1024;
+ courgette::PagedArray<int> a;
+ a.Allocate(kAccessSize);
+ for (int i = 0; i < kAccessSize; ++i) {
+ a[i] = i;
+ }
+ for (int i = 0; i < kAccessSize; ++i) {
+ EXPECT_EQ(i, a[i]);
+ }
+}