diff options
Diffstat (limited to 'chrome/browser/history/query_parser_unittest.cc')
-rw-r--r-- | chrome/browser/history/query_parser_unittest.cc | 163 |
1 files changed, 163 insertions, 0 deletions
diff --git a/chrome/browser/history/query_parser_unittest.cc b/chrome/browser/history/query_parser_unittest.cc new file mode 100644 index 0000000..f8b41d9 --- /dev/null +++ b/chrome/browser/history/query_parser_unittest.cc @@ -0,0 +1,163 @@ +// Copyright (c) 2010 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "base/basictypes.h" +#include "base/scoped_vector.h" +#include "base/utf_string_conversions.h" +#include "chrome/browser/history/query_parser.h" +#include "testing/gtest/include/gtest/gtest.h" + +class QueryParserTest : public testing::Test { + public: + struct TestData { + const char* input; + const int expected_word_count; + }; + + std::string QueryToString(const std::string& query); + + protected: + QueryParser query_parser_; +}; + +// Test helper: Convert a user query string in 8-bit (for hardcoding +// convenience) to a SQLite query string. +std::string QueryParserTest::QueryToString(const std::string& query) { + string16 sqlite_query; + query_parser_.ParseQuery(UTF8ToUTF16(query), &sqlite_query); + return UTF16ToUTF8(sqlite_query); +} + +// Basic multi-word queries, including prefix matching. +TEST_F(QueryParserTest, SimpleQueries) { + EXPECT_EQ("", QueryToString(" ")); + EXPECT_EQ("singleword*", QueryToString("singleword")); + EXPECT_EQ("spacedout*", QueryToString(" spacedout ")); + EXPECT_EQ("foo* bar*", QueryToString("foo bar")); + // Short words aren't prefix matches. For Korean Hangul + // the minimum is 2 while for other scripts, it's 3. + EXPECT_EQ("f b", QueryToString(" f b")); + // KA JANG + EXPECT_EQ(WideToUTF8(L"\xAC00 \xC7A5"), + QueryToString(WideToUTF8(L" \xAC00 \xC7A5"))); + EXPECT_EQ("foo* bar*", QueryToString(" foo bar ")); + // KA-JANG BICH-GO + EXPECT_EQ(WideToUTF8(L"\xAC00\xC7A5* \xBE5B\xACE0*"), + QueryToString(WideToUTF8(L"\xAC00\xC7A5 \xBE5B\xACE0"))); +} + +// Quoted substring parsing. +TEST_F(QueryParserTest, Quoted) { + // ASCII quotes + EXPECT_EQ("\"Quoted\"", QueryToString("\"Quoted\"")); + // Missing end quotes + EXPECT_EQ("\"miss end\"", QueryToString("\"miss end")); + // Missing begin quotes + EXPECT_EQ("miss* beg*", QueryToString("miss beg\"")); + // Weird formatting + EXPECT_EQ("\"Many\" \"quotes\"", QueryToString("\"Many \"\"quotes")); +} + +// Apostrophes within words should be preserved, but otherwise stripped. +TEST_F(QueryParserTest, Apostrophes) { + EXPECT_EQ("foo* bar's*", QueryToString("foo bar's")); + EXPECT_EQ("l'foo*", QueryToString("l'foo")); + EXPECT_EQ("foo*", QueryToString("'foo")); +} + +// Special characters. +TEST_F(QueryParserTest, SpecialChars) { + EXPECT_EQ("foo* the* bar*", QueryToString("!#:/*foo#$*;'* the!#:/*bar")); +} + +TEST_F(QueryParserTest, NumWords) { + TestData data[] = { + { "blah", 1 }, + { "foo \"bar baz\"", 3 }, + { "foo \"baz\"", 2 }, + { "foo \"bar baz\" blah", 4 }, + }; + + for (size_t i = 0; i < arraysize(data); ++i) { + string16 query_string; + EXPECT_EQ(data[i].expected_word_count, + query_parser_.ParseQuery(UTF8ToUTF16(data[i].input), + &query_string)); + } +} + +TEST_F(QueryParserTest, ParseQueryNodesAndMatch) { + struct TestData2 { + const std::string query; + const std::string text; + const bool matches; + const size_t m1_start; + const size_t m1_end; + const size_t m2_start; + const size_t m2_end; + } data[] = { + { "foo foo", "foo", true, 0, 3, 0, 0 }, + { "foo fooey", "fooey", true, 0, 5, 0, 0 }, + { "foo fooey bar", "bar fooey", true, 0, 3, 4, 9 }, + { "blah", "blah", true, 0, 4, 0, 0 }, + { "blah", "foo", false, 0, 0, 0, 0 }, + { "blah", "blahblah", true, 0, 4, 0, 0 }, + { "blah", "foo blah", true, 4, 8, 0, 0 }, + { "foo blah", "blah", false, 0, 0, 0, 0 }, + { "foo blah", "blahx foobar", true, 0, 4, 6, 9 }, + { "\"foo blah\"", "foo blah", true, 0, 8, 0, 0 }, + { "\"foo blah\"", "foox blahx", false, 0, 0, 0, 0 }, + { "\"foo blah\"", "foo blah", true, 0, 8, 0, 0 }, + { "\"foo blah\"", "\"foo blah\"", true, 1, 9, 0, 0 }, + { "foo blah", "\"foo bar blah\"", true, 1, 4, 9, 13 }, + }; + for (size_t i = 0; i < ARRAYSIZE_UNSAFE(data); ++i) { + QueryParser parser; + ScopedVector<QueryNode> query_nodes; + parser.ParseQuery(UTF8ToUTF16(data[i].query), &query_nodes.get()); + Snippet::MatchPositions match_positions; + ASSERT_EQ(data[i].matches, + parser.DoesQueryMatch(UTF8ToUTF16(data[i].text), + query_nodes.get(), + &match_positions)); + size_t offset = 0; + if (data[i].m1_start != 0 || data[i].m1_end != 0) { + ASSERT_TRUE(match_positions.size() >= 1); + EXPECT_EQ(data[i].m1_start, match_positions[0].first); + EXPECT_EQ(data[i].m1_end, match_positions[0].second); + offset++; + } + if (data[i].m2_start != 0 || data[i].m2_end != 0) { + ASSERT_TRUE(match_positions.size() == 1 + offset); + EXPECT_EQ(data[i].m2_start, match_positions[offset].first); + EXPECT_EQ(data[i].m2_end, match_positions[offset].second); + } + } +} + +TEST_F(QueryParserTest, ExtractQueryWords) { + struct TestData2 { + const std::string text; + const std::string w1; + const std::string w2; + const std::string w3; + const size_t word_count; + } data[] = { + { "foo", "foo", "", "", 1 }, + { "foo bar", "foo", "bar", "", 2 }, + { "\"foo bar\"", "foo", "bar", "", 2 }, + { "\"foo bar\" a", "foo", "bar", "a", 3 }, + }; + for (size_t i = 0; i < ARRAYSIZE_UNSAFE(data); ++i) { + std::vector<string16> results; + QueryParser parser; + parser.ExtractQueryWords(UTF8ToUTF16(data[i].text), &results); + ASSERT_EQ(data[i].word_count, results.size()); + EXPECT_EQ(data[i].w1, UTF16ToUTF8(results[0])); + if (results.size() == 2) + EXPECT_EQ(data[i].w2, UTF16ToUTF8(results[1])); + if (results.size() == 3) + EXPECT_EQ(data[i].w3, UTF16ToUTF8(results[2])); + } +} |