40% speed up in parsing the /proc/<pid>/smaps file by changing StringTokenizer. What used to take 10ms to parse now takes 6ms.

- Profiling showed that doing the additional work to work with quotes added a bit of runtime, but most users don't use the optional quotes functionality. This speed parsing up by 20% by switching to a fast-path implementation, reverting to the slower path when necessary. - Eliminate temporary copies of tokens. This speeds up GetWorkingSetKBytes by another 20%. BUG=40033 TEST=Existing StringTokenizerTests. Review URL: http://codereview.chromium.org/1997017 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@47038 0039d316-1c4b-4281-b951-d872f2087c98
author: erg@chromium.org <erg@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> 2010-05-12 16:49:35 +0000
committer: erg@chromium.org <erg@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> 2010-05-12 16:49:35 +0000
commit: 3b6711ea37b8f4b1b4d5c0ceca20f8bc90228d92 (patch)
tree: 2e5d58d6483eff4a87cd2aedd5528a120ef0e553
parent: f691f0e726229cf1150d2f04310d0d41be36f98c (diff)
download: chromium_src-3b6711ea37b8f4b1b4d5c0ceca20f8bc90228d92.zip
chromium_src-3b6711ea37b8f4b1b4d5c0ceca20f8bc90228d92.tar.gz
chromium_src-3b6711ea37b8f4b1b4d5c0ceca20f8bc90228d92.tar.bz2
2 files changed, 59 insertions, 22 deletions
diff --git a/base/process_util_linux.cc b/base/process_util_linux.cc
index ee44638..d3d029e 100644
--- a/base/process_util_linux.cc
+++ b/base/process_util_linux.cc
@@ -271,13 +271,15 @@ bool ProcessMetrics::GetWorkingSetKBytes(WorkingSetKBytes* ws_usage) const {
   int pss_kb = 0;
   bool have_pss = false;
   if (file_util::ReadFileToString(stat_file, &smaps) && smaps.length() > 0) {
+    const std::string private_prefix = "Private_";
+    const std::string pss_prefix = "Pss";
     StringTokenizer tokenizer(smaps, ":\n");
+    StringPiece last_key_name;
     ParsingState state = KEY_NAME;
-    std::string last_key_name;
     while (tokenizer.GetNext()) {
       switch (state) {
         case KEY_NAME:
-          last_key_name = tokenizer.token();
+          last_key_name = tokenizer.token_piece();
           state = KEY_VALUE;
           break;
         case KEY_VALUE:
@@ -285,9 +287,9 @@ bool ProcessMetrics::GetWorkingSetKBytes(WorkingSetKBytes* ws_usage) const {
             NOTREACHED();
             return false;
           }
-          if (StartsWithASCII(last_key_name, "Private_", 1)) {
+          if (last_key_name.starts_with(private_prefix)) {
             private_kb += StringToInt(tokenizer.token());
-          } else if (StartsWithASCII(last_key_name, "Pss", 1)) {
+          } else if (last_key_name.starts_with(pss_prefix)) {
             have_pss = true;
             pss_kb += StringToInt(tokenizer.token());
           }
diff --git a/base/string_tokenizer.h b/base/string_tokenizer.h
index a6b5870..3b8f8c3 100644
--- a/base/string_tokenizer.h
+++ b/base/string_tokenizer.h
@@ -5,8 +5,11 @@
 #ifndef BASE_STRING_TOKENIZER_H_
 #define BASE_STRING_TOKENIZER_H_
 
+#include <algorithm>
 #include <string>
 
+#include "base/string_piece.h"
+
 // StringTokenizerT is a simple string tokenizer class.  It works like an
 // iterator that with each step (see the Advance method) updates members that
 // refer to the next token in the input string.  The user may optionally
@@ -121,24 +124,10 @@ class StringTokenizerT {
   // returns false if the tokenizer is complete.  This method must be called
   // before calling any of the token* methods.
   bool GetNext() {
-    AdvanceState state;
-    token_is_delim_ = false;
-    for (;;) {
-      token_begin_ = token_end_;
-      if (token_end_ == end_)
-        return false;
-      ++token_end_;
-      if (AdvanceOne(&state, *token_begin_))
-        break;
-      if (options_ & RETURN_DELIMS) {
-        token_is_delim_ = true;
-        return true;
-      }
-      // else skip over delim
-    }
-    while (token_end_ != end_ && AdvanceOne(&state, *token_end_))
-      ++token_end_;
-    return true;
+    if (quotes_.empty() && options_ == 0)
+      return QuickGetNext();
+    else
+      return FullGetNext();
   }
 
   // Start iterating through tokens from the beginning of the string.
@@ -156,18 +145,64 @@ class StringTokenizerT {
   const_iterator token_begin() const { return token_begin_; }
   const_iterator token_end() const { return token_end_; }
   str token() const { return str(token_begin_, token_end_); }
+  base::StringPiece token_piece() const {
+    return base::StringPiece(&*token_begin_,
+                             std::distance(token_begin_, token_end_));
+  }
 
  private:
   void Init(const_iterator string_begin,
             const_iterator string_end,
             const str& delims) {
     start_pos_ = string_begin;
+    token_begin_ = string_begin;
     token_end_ = string_begin;
     end_ = string_end;
     delims_ = delims;
     options_ = 0;
   }
 
+  // Implementation of GetNext() for when we have no quote characters. We have
+  // two separate implementations because AdvanceOne() is a hot spot in large
+  // text files with large tokens.
+  bool QuickGetNext() {
+    token_is_delim_ = false;
+    for (;;) {
+      token_begin_ = token_end_;
+      if (token_end_ == end_)
+        return false;
+      ++token_end_;
+      if (delims_.find(*token_begin_) == str::npos)
+        break;
+      // else skip over delim
+    }
+    while (token_end_ != end_ && delims_.find(*token_end_) == str::npos)
+      ++token_end_;
+    return true;
+  }
+
+  // Implementation of GetNext() for when we have to take quotes into account.
+  bool FullGetNext() {
+    AdvanceState state;
+    token_is_delim_ = false;
+    for (;;) {
+      token_begin_ = token_end_;
+      if (token_end_ == end_)
+        return false;
+      ++token_end_;
+      if (AdvanceOne(&state, *token_begin_))
+        break;
+      if (options_ & RETURN_DELIMS) {
+        token_is_delim_ = true;
+        return true;
+      }
+      // else skip over delim
+    }
+    while (token_end_ != end_ && AdvanceOne(&state, *token_end_))
+      ++token_end_;
+    return true;
+  }
+
   bool IsDelim(char_type c) const {
     return delims_.find(c) != str::npos;
   }
author	erg@chromium.org <erg@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>	2010-05-12 16:49:35 +0000
committer	erg@chromium.org <erg@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>	2010-05-12 16:49:35 +0000
commit	3b6711ea37b8f4b1b4d5c0ceca20f8bc90228d92 (patch)
tree	2e5d58d6483eff4a87cd2aedd5528a120ef0e553
parent	f691f0e726229cf1150d2f04310d0d41be36f98c (diff)
download	chromium_src-3b6711ea37b8f4b1b4d5c0ceca20f8bc90228d92.zip chromium_src-3b6711ea37b8f4b1b4d5c0ceca20f8bc90228d92.tar.gz chromium_src-3b6711ea37b8f4b1b4d5c0ceca20f8bc90228d92.tar.bz2