summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorerg@chromium.org <erg@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2010-05-12 16:49:35 +0000
committererg@chromium.org <erg@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2010-05-12 16:49:35 +0000
commit3b6711ea37b8f4b1b4d5c0ceca20f8bc90228d92 (patch)
tree2e5d58d6483eff4a87cd2aedd5528a120ef0e553
parentf691f0e726229cf1150d2f04310d0d41be36f98c (diff)
downloadchromium_src-3b6711ea37b8f4b1b4d5c0ceca20f8bc90228d92.zip
chromium_src-3b6711ea37b8f4b1b4d5c0ceca20f8bc90228d92.tar.gz
chromium_src-3b6711ea37b8f4b1b4d5c0ceca20f8bc90228d92.tar.bz2
40% speed up in parsing the /proc/<pid>/smaps file by changing StringTokenizer. What used to take 10ms to parse now takes 6ms.
- Profiling showed that doing the additional work to work with quotes added a bit of runtime, but most users don't use the optional quotes functionality. This speed parsing up by 20% by switching to a fast-path implementation, reverting to the slower path when necessary. - Eliminate temporary copies of tokens. This speeds up GetWorkingSetKBytes by another 20%. BUG=40033 TEST=Existing StringTokenizerTests. Review URL: http://codereview.chromium.org/1997017 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@47038 0039d316-1c4b-4281-b951-d872f2087c98
-rw-r--r--base/process_util_linux.cc10
-rw-r--r--base/string_tokenizer.h71
2 files changed, 59 insertions, 22 deletions
diff --git a/base/process_util_linux.cc b/base/process_util_linux.cc
index ee44638..d3d029e 100644
--- a/base/process_util_linux.cc
+++ b/base/process_util_linux.cc
@@ -271,13 +271,15 @@ bool ProcessMetrics::GetWorkingSetKBytes(WorkingSetKBytes* ws_usage) const {
int pss_kb = 0;
bool have_pss = false;
if (file_util::ReadFileToString(stat_file, &smaps) && smaps.length() > 0) {
+ const std::string private_prefix = "Private_";
+ const std::string pss_prefix = "Pss";
StringTokenizer tokenizer(smaps, ":\n");
+ StringPiece last_key_name;
ParsingState state = KEY_NAME;
- std::string last_key_name;
while (tokenizer.GetNext()) {
switch (state) {
case KEY_NAME:
- last_key_name = tokenizer.token();
+ last_key_name = tokenizer.token_piece();
state = KEY_VALUE;
break;
case KEY_VALUE:
@@ -285,9 +287,9 @@ bool ProcessMetrics::GetWorkingSetKBytes(WorkingSetKBytes* ws_usage) const {
NOTREACHED();
return false;
}
- if (StartsWithASCII(last_key_name, "Private_", 1)) {
+ if (last_key_name.starts_with(private_prefix)) {
private_kb += StringToInt(tokenizer.token());
- } else if (StartsWithASCII(last_key_name, "Pss", 1)) {
+ } else if (last_key_name.starts_with(pss_prefix)) {
have_pss = true;
pss_kb += StringToInt(tokenizer.token());
}
diff --git a/base/string_tokenizer.h b/base/string_tokenizer.h
index a6b5870..3b8f8c3 100644
--- a/base/string_tokenizer.h
+++ b/base/string_tokenizer.h
@@ -5,8 +5,11 @@
#ifndef BASE_STRING_TOKENIZER_H_
#define BASE_STRING_TOKENIZER_H_
+#include <algorithm>
#include <string>
+#include "base/string_piece.h"
+
// StringTokenizerT is a simple string tokenizer class. It works like an
// iterator that with each step (see the Advance method) updates members that
// refer to the next token in the input string. The user may optionally
@@ -121,24 +124,10 @@ class StringTokenizerT {
// returns false if the tokenizer is complete. This method must be called
// before calling any of the token* methods.
bool GetNext() {
- AdvanceState state;
- token_is_delim_ = false;
- for (;;) {
- token_begin_ = token_end_;
- if (token_end_ == end_)
- return false;
- ++token_end_;
- if (AdvanceOne(&state, *token_begin_))
- break;
- if (options_ & RETURN_DELIMS) {
- token_is_delim_ = true;
- return true;
- }
- // else skip over delim
- }
- while (token_end_ != end_ && AdvanceOne(&state, *token_end_))
- ++token_end_;
- return true;
+ if (quotes_.empty() && options_ == 0)
+ return QuickGetNext();
+ else
+ return FullGetNext();
}
// Start iterating through tokens from the beginning of the string.
@@ -156,18 +145,64 @@ class StringTokenizerT {
const_iterator token_begin() const { return token_begin_; }
const_iterator token_end() const { return token_end_; }
str token() const { return str(token_begin_, token_end_); }
+ base::StringPiece token_piece() const {
+ return base::StringPiece(&*token_begin_,
+ std::distance(token_begin_, token_end_));
+ }
private:
void Init(const_iterator string_begin,
const_iterator string_end,
const str& delims) {
start_pos_ = string_begin;
+ token_begin_ = string_begin;
token_end_ = string_begin;
end_ = string_end;
delims_ = delims;
options_ = 0;
}
+ // Implementation of GetNext() for when we have no quote characters. We have
+ // two separate implementations because AdvanceOne() is a hot spot in large
+ // text files with large tokens.
+ bool QuickGetNext() {
+ token_is_delim_ = false;
+ for (;;) {
+ token_begin_ = token_end_;
+ if (token_end_ == end_)
+ return false;
+ ++token_end_;
+ if (delims_.find(*token_begin_) == str::npos)
+ break;
+ // else skip over delim
+ }
+ while (token_end_ != end_ && delims_.find(*token_end_) == str::npos)
+ ++token_end_;
+ return true;
+ }
+
+ // Implementation of GetNext() for when we have to take quotes into account.
+ bool FullGetNext() {
+ AdvanceState state;
+ token_is_delim_ = false;
+ for (;;) {
+ token_begin_ = token_end_;
+ if (token_end_ == end_)
+ return false;
+ ++token_end_;
+ if (AdvanceOne(&state, *token_begin_))
+ break;
+ if (options_ & RETURN_DELIMS) {
+ token_is_delim_ = true;
+ return true;
+ }
+ // else skip over delim
+ }
+ while (token_end_ != end_ && AdvanceOne(&state, *token_end_))
+ ++token_end_;
+ return true;
+ }
+
bool IsDelim(char_type c) const {
return delims_.find(c) != str::npos;
}