summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authormrossetti@chromium.org <mrossetti@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2011-03-15 22:08:18 +0000
committermrossetti@chromium.org <mrossetti@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2011-03-15 22:08:18 +0000
commita8f102a29e65551638d4513a306e384d6ce829b6 (patch)
treeadccc9ae938ca76c947ef4aaa704cf018632d799
parenta7d7b63fcffdc10198b4134b6272f589dfacae17 (diff)
downloadchromium_src-a8f102a29e65551638d4513a306e384d6ce829b6.zip
chromium_src-a8f102a29e65551638d4513a306e384d6ce829b6.tar.gz
chromium_src-a8f102a29e65551638d4513a306e384d6ce829b6.tar.bz2
Simplified the InMemoryURLIndex::WordVectorFromString16 static function. Updated header comment with example. Changed EXPECT_EQs in unit test to ASSERT_EQs for safety.
BUG=None TEST=Ran InMemoryURLIndexTest.StaticFunctions unit test. Review URL: http://codereview.chromium.org/6670017 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@78299 0039d316-1c4b-4281-b951-d872f2087c98
-rw-r--r--chrome/browser/history/in_memory_url_index.cc29
-rw-r--r--chrome/browser/history/in_memory_url_index.h13
-rw-r--r--chrome/browser/history/in_memory_url_index_unittest.cc4
3 files changed, 21 insertions, 25 deletions
diff --git a/chrome/browser/history/in_memory_url_index.cc b/chrome/browser/history/in_memory_url_index.cc
index 54de552..b093188 100644
--- a/chrome/browser/history/in_memory_url_index.cc
+++ b/chrome/browser/history/in_memory_url_index.cc
@@ -436,31 +436,18 @@ InMemoryURLIndex::String16Set InMemoryURLIndex::WordSetFromString16(
InMemoryURLIndex::String16Vector InMemoryURLIndex::WordVectorFromString16(
const string16& uni_string,
bool break_on_space) {
- // TODO(mrossetti): Come back and update the following code if the
- // BreakIterator is changed. Here are some comments:
- // The iterator behaves differently depending on the breaking strategy. Its
- // unit tests do not properly test this case as its basic word and space tests
- // always use a test string starting with a space.
base::BreakIterator iter(&uni_string, break_on_space ?
base::BreakIterator::BREAK_SPACE : base::BreakIterator::BREAK_WORD);
String16Vector words;
- if (break_on_space) {
- if (iter.Init()) {
- while (iter.Advance()) {
- string16 word = iter.GetString();
+ if (!iter.Init())
+ return words;
+ while (iter.Advance()) {
+ if (break_on_space || iter.IsWord()) {
+ string16 word = iter.GetString();
+ if (break_on_space)
TrimWhitespace(word, TRIM_ALL, &word);
- if (!word.empty())
- words.push_back(word);
- }
- }
- } else {
- if (iter.Init()) {
- if (iter.IsWord())
- words.push_back(iter.GetString());
- while (iter.Advance()) {
- if (iter.IsWord())
- words.push_back(iter.GetString());
- }
+ if (!word.empty())
+ words.push_back(word);
}
}
return words;
diff --git a/chrome/browser/history/in_memory_url_index.h b/chrome/browser/history/in_memory_url_index.h
index b4bd266..b7f832f 100644
--- a/chrome/browser/history/in_memory_url_index.h
+++ b/chrome/browser/history/in_memory_url_index.h
@@ -142,8 +142,17 @@ class InMemoryURLIndex {
void DeleteURL(URLID row_id);
// Breaks the |uni_string| string down into individual words and return
- // a vector with the individual words in their original order. Break on
- // whitespace if |break_on_space| also on special characters.
+ // a vector with the individual words in their original order. If
+ // |break_on_space| is false then the resulting list will contain only words
+ // containing alpha-numeric characters. If |break_on_space| is true then the
+ // resulting list will contain strings broken at whitespace.
+ //
+ // Example:
+ // Given: |uni_string|: "http://www.google.com/ harry the rabbit."
+ // With |break_on_space| false the returned list will contain:
+ // "http", "www", "google", "com", "harry", "the", "rabbit"
+ // With |break_on_space| true the returned list will contain:
+ // "http://", "www.google.com/", "harry", "the", "rabbit."
static String16Vector WordVectorFromString16(const string16& uni_string,
bool break_on_space);
diff --git a/chrome/browser/history/in_memory_url_index_unittest.cc b/chrome/browser/history/in_memory_url_index_unittest.cc
index c0422fe..c29abf7 100644
--- a/chrome/browser/history/in_memory_url_index_unittest.cc
+++ b/chrome/browser/history/in_memory_url_index_unittest.cc
@@ -260,7 +260,7 @@ TEST_F(InMemoryURLIndexTest, StaticFunctions) {
string16 string_a(ASCIIToUTF16("http://www.google.com/ frammy the brammy"));
InMemoryURLIndex::String16Vector string_vec =
InMemoryURLIndex::WordVectorFromString16(string_a, false);
- EXPECT_EQ(7U, string_vec.size());
+ ASSERT_EQ(7U, string_vec.size());
// See if we got the words we expected.
EXPECT_EQ(UTF8ToUTF16("http"), string_vec[0]);
EXPECT_EQ(UTF8ToUTF16("www"), string_vec[1]);
@@ -271,7 +271,7 @@ TEST_F(InMemoryURLIndexTest, StaticFunctions) {
EXPECT_EQ(UTF8ToUTF16("brammy"), string_vec[6]);
string_vec = InMemoryURLIndex::WordVectorFromString16(string_a, true);
- EXPECT_EQ(5U, string_vec.size());
+ ASSERT_EQ(5U, string_vec.size());
EXPECT_EQ(UTF8ToUTF16("http://"), string_vec[0]);
EXPECT_EQ(UTF8ToUTF16("www.google.com/"), string_vec[1]);
EXPECT_EQ(UTF8ToUTF16("frammy"), string_vec[2]);