diff options
author | sra@chromium.org <sra@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2009-05-08 23:00:29 +0000 |
---|---|---|
committer | sra@chromium.org <sra@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2009-05-08 23:00:29 +0000 |
commit | 04ca1bc65afb76ea30698c25f24599d20119e3d2 (patch) | |
tree | 2bb65e974d478f4d607b83f6a84a56c01f501ac0 /courgette/difference_estimator.cc | |
parent | 9adf1dcf3281408560267787eddcc767566b425f (diff) | |
download | chromium_src-04ca1bc65afb76ea30698c25f24599d20119e3d2.zip chromium_src-04ca1bc65afb76ea30698c25f24599d20119e3d2.tar.gz chromium_src-04ca1bc65afb76ea30698c25f24599d20119e3d2.tar.bz2 |
Move Courgette
from src\third_party\courgette
to src\courgette and src\courgette\third_party
Fixed #includes
Added properties to ignore generated files:
C:\c5\src>svn pg svn:ignore courgette
courgette.xcodeproj
courgette.sln
courgette_fuzz.vcproj
courgette_lib.vcproj
courgette_minimal_tool.vcproj
courgette_tool.vcproj
courgette.vcproj
courgette_unittests.vcproj
SConstruct
courgette_fuzz.scons
courgette_lib.scons
courgette_main.scons
courgette_minimal_tool.scons
courgette.scons
courgette_tool.scons
courgette_unittests.scons
Review URL: http://codereview.chromium.org/115062
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@15692 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'courgette/difference_estimator.cc')
-rw-r--r-- | courgette/difference_estimator.cc | 118 |
1 files changed, 118 insertions, 0 deletions
diff --git a/courgette/difference_estimator.cc b/courgette/difference_estimator.cc new file mode 100644 index 0000000..e591f9b --- /dev/null +++ b/courgette/difference_estimator.cc @@ -0,0 +1,118 @@ +// Copyright (c) 2009 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// We want to measure the similarity of two sequences of bytes as a surrogate +// for measuring how well a second sequence will compress differentially to the +// first sequence. + +#include "courgette/difference_estimator.h" + +#include "base/hash_tables.h" + +namespace courgette { + +// Our difference measure is the number of k-tuples that occur in Subject that +// don't occur in Base. +const int kTupleSize = 4; + +namespace { + +COMPILE_ASSERT(kTupleSize >= 4 && kTupleSize <= 8, kTupleSize_between_4_and_8); + +size_t HashTuple(const uint8* source) { + size_t hash1 = *reinterpret_cast<const uint32*>(source); + size_t hash2 = *reinterpret_cast<const uint32*>(source + kTupleSize - 4); + size_t hash = ((hash1 * 17 + hash2 * 37) + (hash1 >> 17)) ^ (hash2 >> 23); + return hash; +} + +bool RegionsEqual(const Region& a, const Region& b) { + if (a.length() != b.length()) + return false; + return memcmp(a.start(), b.start(), a.length()) == 0; +} + +} // anonymous namepace + +class DifferenceEstimator::Base { + public: + explicit Base(const Region& region) : region_(region) { } + + void Init() { + const uint8* start = region_.start(); + const uint8* end = region_.end() - (kTupleSize - 1); + for (const uint8* p = start; p < end; ++p) { + size_t hash = HashTuple(p); + hashes_.insert(hash); + } + } + + const Region& region() const { return region_; } + + private: + Region region_; + base::hash_set<size_t> hashes_; + + friend class DifferenceEstimator; + DISALLOW_COPY_AND_ASSIGN(Base); +}; + +class DifferenceEstimator::Subject { + public: + explicit Subject(const Region& region) : region_(region) {} + + const Region& region() const { return region_; } + + private: + Region region_; + + DISALLOW_COPY_AND_ASSIGN(Subject); +}; + +DifferenceEstimator::DifferenceEstimator() {} + +DifferenceEstimator::~DifferenceEstimator() { + for (size_t i = 0; i < owned_bases_.size(); ++i) + delete owned_bases_[i]; + for (size_t i = 0; i < owned_subjects_.size(); ++i) + delete owned_subjects_[i]; +} + +DifferenceEstimator::Base* DifferenceEstimator::MakeBase(const Region& region) { + Base* base = new Base(region); + base->Init(); + owned_bases_.push_back(base); + return base; +} + +DifferenceEstimator::Subject* DifferenceEstimator::MakeSubject( + const Region& region) { + Subject* subject = new Subject(region); + owned_subjects_.push_back(subject); + return subject; +} + +size_t DifferenceEstimator::Measure(Base* base, Subject* subject) { + size_t mismatches = 0; + const uint8* start = subject->region().start(); + const uint8* end = subject->region().end() - (kTupleSize - 1); + + const uint8* p = start; + while (p < end) { + size_t hash = HashTuple(p); + if (base->hashes_.find(hash) == base->hashes_.end()) { + ++mismatches; + } + p += 1; + } + + if (mismatches == 0) { + if (RegionsEqual(base->region(), subject->region())) + return 0; + } + ++mismatches; // Guarantee not zero. + return mismatches; +} + +} // namespace |