diff options
author | mrossetti@chromium.org <mrossetti@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2011-03-15 22:08:18 +0000 |
---|---|---|
committer | mrossetti@chromium.org <mrossetti@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2011-03-15 22:08:18 +0000 |
commit | a8f102a29e65551638d4513a306e384d6ce829b6 (patch) | |
tree | adccc9ae938ca76c947ef4aaa704cf018632d799 | |
parent | a7d7b63fcffdc10198b4134b6272f589dfacae17 (diff) | |
download | chromium_src-a8f102a29e65551638d4513a306e384d6ce829b6.zip chromium_src-a8f102a29e65551638d4513a306e384d6ce829b6.tar.gz chromium_src-a8f102a29e65551638d4513a306e384d6ce829b6.tar.bz2 |
Simplified the InMemoryURLIndex::WordVectorFromString16 static function. Updated header comment with example. Changed EXPECT_EQs in unit test to ASSERT_EQs for safety.
BUG=None
TEST=Ran InMemoryURLIndexTest.StaticFunctions unit test.
Review URL: http://codereview.chromium.org/6670017
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@78299 0039d316-1c4b-4281-b951-d872f2087c98
-rw-r--r-- | chrome/browser/history/in_memory_url_index.cc | 29 | ||||
-rw-r--r-- | chrome/browser/history/in_memory_url_index.h | 13 | ||||
-rw-r--r-- | chrome/browser/history/in_memory_url_index_unittest.cc | 4 |
3 files changed, 21 insertions, 25 deletions
diff --git a/chrome/browser/history/in_memory_url_index.cc b/chrome/browser/history/in_memory_url_index.cc index 54de552..b093188 100644 --- a/chrome/browser/history/in_memory_url_index.cc +++ b/chrome/browser/history/in_memory_url_index.cc @@ -436,31 +436,18 @@ InMemoryURLIndex::String16Set InMemoryURLIndex::WordSetFromString16( InMemoryURLIndex::String16Vector InMemoryURLIndex::WordVectorFromString16( const string16& uni_string, bool break_on_space) { - // TODO(mrossetti): Come back and update the following code if the - // BreakIterator is changed. Here are some comments: - // The iterator behaves differently depending on the breaking strategy. Its - // unit tests do not properly test this case as its basic word and space tests - // always use a test string starting with a space. base::BreakIterator iter(&uni_string, break_on_space ? base::BreakIterator::BREAK_SPACE : base::BreakIterator::BREAK_WORD); String16Vector words; - if (break_on_space) { - if (iter.Init()) { - while (iter.Advance()) { - string16 word = iter.GetString(); + if (!iter.Init()) + return words; + while (iter.Advance()) { + if (break_on_space || iter.IsWord()) { + string16 word = iter.GetString(); + if (break_on_space) TrimWhitespace(word, TRIM_ALL, &word); - if (!word.empty()) - words.push_back(word); - } - } - } else { - if (iter.Init()) { - if (iter.IsWord()) - words.push_back(iter.GetString()); - while (iter.Advance()) { - if (iter.IsWord()) - words.push_back(iter.GetString()); - } + if (!word.empty()) + words.push_back(word); } } return words; diff --git a/chrome/browser/history/in_memory_url_index.h b/chrome/browser/history/in_memory_url_index.h index b4bd266..b7f832f 100644 --- a/chrome/browser/history/in_memory_url_index.h +++ b/chrome/browser/history/in_memory_url_index.h @@ -142,8 +142,17 @@ class InMemoryURLIndex { void DeleteURL(URLID row_id); // Breaks the |uni_string| string down into individual words and return - // a vector with the individual words in their original order. Break on - // whitespace if |break_on_space| also on special characters. + // a vector with the individual words in their original order. If + // |break_on_space| is false then the resulting list will contain only words + // containing alpha-numeric characters. If |break_on_space| is true then the + // resulting list will contain strings broken at whitespace. + // + // Example: + // Given: |uni_string|: "http://www.google.com/ harry the rabbit." + // With |break_on_space| false the returned list will contain: + // "http", "www", "google", "com", "harry", "the", "rabbit" + // With |break_on_space| true the returned list will contain: + // "http://", "www.google.com/", "harry", "the", "rabbit." static String16Vector WordVectorFromString16(const string16& uni_string, bool break_on_space); diff --git a/chrome/browser/history/in_memory_url_index_unittest.cc b/chrome/browser/history/in_memory_url_index_unittest.cc index c0422fe..c29abf7 100644 --- a/chrome/browser/history/in_memory_url_index_unittest.cc +++ b/chrome/browser/history/in_memory_url_index_unittest.cc @@ -260,7 +260,7 @@ TEST_F(InMemoryURLIndexTest, StaticFunctions) { string16 string_a(ASCIIToUTF16("http://www.google.com/ frammy the brammy")); InMemoryURLIndex::String16Vector string_vec = InMemoryURLIndex::WordVectorFromString16(string_a, false); - EXPECT_EQ(7U, string_vec.size()); + ASSERT_EQ(7U, string_vec.size()); // See if we got the words we expected. EXPECT_EQ(UTF8ToUTF16("http"), string_vec[0]); EXPECT_EQ(UTF8ToUTF16("www"), string_vec[1]); @@ -271,7 +271,7 @@ TEST_F(InMemoryURLIndexTest, StaticFunctions) { EXPECT_EQ(UTF8ToUTF16("brammy"), string_vec[6]); string_vec = InMemoryURLIndex::WordVectorFromString16(string_a, true); - EXPECT_EQ(5U, string_vec.size()); + ASSERT_EQ(5U, string_vec.size()); EXPECT_EQ(UTF8ToUTF16("http://"), string_vec[0]); EXPECT_EQ(UTF8ToUTF16("www.google.com/"), string_vec[1]); EXPECT_EQ(UTF8ToUTF16("frammy"), string_vec[2]); |