From 9c47919b367121d098dcbf70cae5687d884d05dd Mon Sep 17 00:00:00 2001 From: "dgrogan@chromium.org" Date: Tue, 21 May 2013 08:46:56 +0000 Subject: When Retrier succeeds, record errors it encountered. Right now we just know that retrying a file operation eventually succeeds and the approximate number of times it was retried. With this patch we'll know which kinds of filesystem errors can be overcome by retrying. It will give us a better idea of what's causing the errors in the first place. BUG=225051 Review URL: https://chromiumcodereview.appspot.com/15304008 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@201267 0039d316-1c4b-4281-b951-d872f2087c98 --- third_party/leveldatabase/env_chromium.cc | 94 +++++++++++++++++++++---------- tools/metrics/histograms/histograms.xml | 33 ++++++++--- 2 files changed, 90 insertions(+), 37 deletions(-) diff --git a/third_party/leveldatabase/env_chromium.cc b/third_party/leveldatabase/env_chromium.cc index 55e7cb6..1052933 100644 --- a/third_party/leveldatabase/env_chromium.cc +++ b/third_party/leveldatabase/env_chromium.cc @@ -206,6 +206,14 @@ class UMALogger { const = 0; }; +class RetrierProvider { + public: + virtual int MaxRetryTimeMillis() const = 0; + virtual base::HistogramBase* GetRetryTimeHistogram(MethodID method) const = 0; + virtual base::HistogramBase* GetRecoveredFromErrorHistogram( + MethodID method) const = 0; +}; + static const base::FilePath::CharType kLevelDBTestDirectoryPrefix[] = FILE_PATH_LITERAL("leveldb-test-"); @@ -394,7 +402,7 @@ class ChromiumFileLock : public FileLock { ::base::PlatformFile file_; }; -class ChromiumEnv : public Env, public UMALogger { +class ChromiumEnv : public Env, public UMALogger, public RetrierProvider { public: ChromiumEnv(); virtual ~ChromiumEnv() { @@ -531,19 +539,28 @@ class ChromiumEnv : public Env, public UMALogger { class Retrier { public: - Retrier(base::HistogramBase* histogram, const int kMaxRetryMillis) + Retrier(MethodID method, RetrierProvider* provider) : start_(base::TimeTicks::Now()), - limit_(start_ + base::TimeDelta::FromMilliseconds(kMaxRetryMillis)), + limit_(start_ + base::TimeDelta::FromMilliseconds( + provider->MaxRetryTimeMillis())), last_(start_), time_to_sleep_(base::TimeDelta::FromMilliseconds(10)), success_(true), - histogram_(histogram) {} - + method_(method), + last_error_(base::PLATFORM_FILE_OK), + provider_(provider) {} ~Retrier() { - if (success_) - histogram_->AddTime(last_ - start_); + if (success_) { + provider_->GetRetryTimeHistogram(method_)->AddTime(last_ - start_); + if (last_error_ != base::PLATFORM_FILE_OK) { + DCHECK(last_error_ < 0); + provider_->GetRecoveredFromErrorHistogram(method_)->Add(-last_error_); + } + } } - bool ShouldKeepTrying() { + bool ShouldKeepTrying(base::PlatformFileError last_error) { + DCHECK_NE(last_error, base::PLATFORM_FILE_OK); + last_error_ = last_error; if (last_ < limit_) { base::PlatformThread::Sleep(time_to_sleep_); last_ = base::TimeTicks::Now(); @@ -558,7 +575,9 @@ class ChromiumEnv : public Env, public UMALogger { base::TimeTicks last_; base::TimeDelta time_to_sleep_; bool success_; - base::HistogramBase* histogram_; + MethodID method_; + base::PlatformFileError last_error_; + RetrierProvider* provider_; }; virtual Status RenameFile(const std::string& src, const std::string& dst) { @@ -568,7 +587,7 @@ class ChromiumEnv : public Env, public UMALogger { return result; base::FilePath destination = CreateFilePath(dst); - Retrier retrier(GetRetryTimeHistogram(kRenameFile), kMaxRetryTimeMillis); + Retrier retrier(kRenameFile, this); base::PlatformFileError error = base::PLATFORM_FILE_OK; do { if (::file_util::ReplaceFileAndGetError( @@ -578,7 +597,7 @@ class ChromiumEnv : public Env, public UMALogger { sync_parent(src); return result; } - } while (retrier.ShouldKeepTrying()); + } while (retrier.ShouldKeepTrying(error)); DCHECK(error != base::PLATFORM_FILE_OK); RecordOSError(kRenameFile, error); @@ -599,12 +618,12 @@ class ChromiumEnv : public Env, public UMALogger { bool created; ::base::PlatformFileError error_code; ::base::PlatformFile file; - Retrier retrier(GetRetryTimeHistogram(kLockFile), kMaxRetryTimeMillis); + Retrier retrier(kLockFile, this); do { file = ::base::CreatePlatformFile( CreateFilePath(fname), flags, &created, &error_code); } while (error_code != ::base::PLATFORM_FILE_OK && - retrier.ShouldKeepTrying()); + retrier.ShouldKeepTrying(error_code)); if (error_code == ::base::PLATFORM_FILE_ERROR_NOT_FOUND) { ::base::FilePath parent = CreateFilePath(fname).DirName(); @@ -723,10 +742,18 @@ class ChromiumEnv : public Env, public UMALogger { } base::HistogramBase* GetOSErrorHistogram(MethodID method, int limit) const; - base::HistogramBase* GetRetryTimeHistogram(MethodID method) const; base::HistogramBase* GetMethodIOErrorHistogram() const; base::HistogramBase* GetMaxFDHistogram(const std::string& type) const; base::HistogramBase* GetLockFileAncestorHistogram() const; + + // RetrierProvider implementation. + virtual int MaxRetryTimeMillis() const { + return kMaxRetryTimeMillis; + } + virtual base::HistogramBase* GetRetryTimeHistogram(MethodID method) const; + virtual base::HistogramBase* GetRecoveredFromErrorHistogram( + MethodID method) const; + base::FilePath test_directory_; ::base::Lock mu_; @@ -755,21 +782,6 @@ base::HistogramBase* ChromiumEnv::GetOSErrorHistogram(MethodID method, base::Histogram::kUmaTargetedHistogramFlag); } -base::HistogramBase* ChromiumEnv::GetRetryTimeHistogram(MethodID method) const { - std::string uma_name(name_); - // TODO(dgrogan): This is probably not the best way to concatenate strings. - uma_name.append(".TimeUntilSuccessFor").append(MethodIDToString(method)); - - const int kBucketSizeMillis = 25; - // Add 2, 1 for each of the buckets <1 and >max. - const int kNumBuckets = kMaxRetryTimeMillis / kBucketSizeMillis + 2; - return base::Histogram::FactoryTimeGet( - uma_name, base::TimeDelta::FromMilliseconds(1), - base::TimeDelta::FromMilliseconds(kMaxRetryTimeMillis + 1), - kNumBuckets, - base::Histogram::kUmaTargetedHistogramFlag); -} - base::HistogramBase* ChromiumEnv::GetMethodIOErrorHistogram() const { std::string uma_name(name_); uma_name.append(".IOError"); @@ -801,6 +813,30 @@ base::HistogramBase* ChromiumEnv::GetLockFileAncestorHistogram() const { base::Histogram::kUmaTargetedHistogramFlag); } +base::HistogramBase* ChromiumEnv::GetRetryTimeHistogram(MethodID method) const { + std::string uma_name(name_); + // TODO(dgrogan): This is probably not the best way to concatenate strings. + uma_name.append(".TimeUntilSuccessFor").append(MethodIDToString(method)); + + const int kBucketSizeMillis = 25; + // Add 2, 1 for each of the buckets <1 and >max. + const int kNumBuckets = kMaxRetryTimeMillis / kBucketSizeMillis + 2; + return base::Histogram::FactoryTimeGet( + uma_name, base::TimeDelta::FromMilliseconds(1), + base::TimeDelta::FromMilliseconds(kMaxRetryTimeMillis + 1), + kNumBuckets, + base::Histogram::kUmaTargetedHistogramFlag); +} + +base::HistogramBase* ChromiumEnv::GetRecoveredFromErrorHistogram( + MethodID method) const { + std::string uma_name(name_); + uma_name.append(".RetryRecoveredFromErrorIn") + .append(MethodIDToString(method)); + return base::LinearHistogram::FactoryGet(uma_name, 1, kNumEntries, + kNumEntries + 1, base::Histogram::kUmaTargetedHistogramFlag); +} + class Thread : public ::base::PlatformThread::Delegate { public: Thread(void (*function)(void* arg), void* arg) diff --git a/tools/metrics/histograms/histograms.xml b/tools/metrics/histograms/histograms.xml index f1b655b..7961a31 100644 --- a/tools/metrics/histograms/histograms.xml +++ b/tools/metrics/histograms/histograms.xml @@ -2060,6 +2060,14 @@ other types of suffix sets. + + + When IDB LevelDBEnv successfully retries an operation that had failed, + record the error from the most recent failed attempt. + + + Deprecated 2013-04. As of m28 use LevelDBEnv.IDB.TimeUntilSuccessFor. @@ -2124,6 +2132,13 @@ other types of suffix sets. + + + When Non-IDB LevelDBEnv successfully retries an operation that had failed, + record the error from the most recent failed attempt. + + + Deprecated 2013-04. As of m28 use LevelDBEnv.TimeUntilSuccessFor. @@ -14132,13 +14147,6 @@ other types of suffix sets. - - - - - - - @@ -14160,9 +14168,18 @@ other types of suffix sets. + + + + + + + + + - Deprecated 2013-04 in favor of LevelDBEnvExponentialRetryTimes. + Deprecated 2013-04 in favor of LevelDBEnvRetry. -- cgit v1.1