summaryrefslogtreecommitdiffstats
path: root/chrome/browser/history/query_parser_unittest.cc
diff options
context:
space:
mode:
Diffstat (limited to 'chrome/browser/history/query_parser_unittest.cc')
-rw-r--r--chrome/browser/history/query_parser_unittest.cc163
1 files changed, 163 insertions, 0 deletions
diff --git a/chrome/browser/history/query_parser_unittest.cc b/chrome/browser/history/query_parser_unittest.cc
new file mode 100644
index 0000000..f8b41d9
--- /dev/null
+++ b/chrome/browser/history/query_parser_unittest.cc
@@ -0,0 +1,163 @@
+// Copyright (c) 2010 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "base/basictypes.h"
+#include "base/scoped_vector.h"
+#include "base/utf_string_conversions.h"
+#include "chrome/browser/history/query_parser.h"
+#include "testing/gtest/include/gtest/gtest.h"
+
+class QueryParserTest : public testing::Test {
+ public:
+ struct TestData {
+ const char* input;
+ const int expected_word_count;
+ };
+
+ std::string QueryToString(const std::string& query);
+
+ protected:
+ QueryParser query_parser_;
+};
+
+// Test helper: Convert a user query string in 8-bit (for hardcoding
+// convenience) to a SQLite query string.
+std::string QueryParserTest::QueryToString(const std::string& query) {
+ string16 sqlite_query;
+ query_parser_.ParseQuery(UTF8ToUTF16(query), &sqlite_query);
+ return UTF16ToUTF8(sqlite_query);
+}
+
+// Basic multi-word queries, including prefix matching.
+TEST_F(QueryParserTest, SimpleQueries) {
+ EXPECT_EQ("", QueryToString(" "));
+ EXPECT_EQ("singleword*", QueryToString("singleword"));
+ EXPECT_EQ("spacedout*", QueryToString(" spacedout "));
+ EXPECT_EQ("foo* bar*", QueryToString("foo bar"));
+ // Short words aren't prefix matches. For Korean Hangul
+ // the minimum is 2 while for other scripts, it's 3.
+ EXPECT_EQ("f b", QueryToString(" f b"));
+ // KA JANG
+ EXPECT_EQ(WideToUTF8(L"\xAC00 \xC7A5"),
+ QueryToString(WideToUTF8(L" \xAC00 \xC7A5")));
+ EXPECT_EQ("foo* bar*", QueryToString(" foo bar "));
+ // KA-JANG BICH-GO
+ EXPECT_EQ(WideToUTF8(L"\xAC00\xC7A5* \xBE5B\xACE0*"),
+ QueryToString(WideToUTF8(L"\xAC00\xC7A5 \xBE5B\xACE0")));
+}
+
+// Quoted substring parsing.
+TEST_F(QueryParserTest, Quoted) {
+ // ASCII quotes
+ EXPECT_EQ("\"Quoted\"", QueryToString("\"Quoted\""));
+ // Missing end quotes
+ EXPECT_EQ("\"miss end\"", QueryToString("\"miss end"));
+ // Missing begin quotes
+ EXPECT_EQ("miss* beg*", QueryToString("miss beg\""));
+ // Weird formatting
+ EXPECT_EQ("\"Many\" \"quotes\"", QueryToString("\"Many \"\"quotes"));
+}
+
+// Apostrophes within words should be preserved, but otherwise stripped.
+TEST_F(QueryParserTest, Apostrophes) {
+ EXPECT_EQ("foo* bar's*", QueryToString("foo bar's"));
+ EXPECT_EQ("l'foo*", QueryToString("l'foo"));
+ EXPECT_EQ("foo*", QueryToString("'foo"));
+}
+
+// Special characters.
+TEST_F(QueryParserTest, SpecialChars) {
+ EXPECT_EQ("foo* the* bar*", QueryToString("!#:/*foo#$*;'* the!#:/*bar"));
+}
+
+TEST_F(QueryParserTest, NumWords) {
+ TestData data[] = {
+ { "blah", 1 },
+ { "foo \"bar baz\"", 3 },
+ { "foo \"baz\"", 2 },
+ { "foo \"bar baz\" blah", 4 },
+ };
+
+ for (size_t i = 0; i < arraysize(data); ++i) {
+ string16 query_string;
+ EXPECT_EQ(data[i].expected_word_count,
+ query_parser_.ParseQuery(UTF8ToUTF16(data[i].input),
+ &query_string));
+ }
+}
+
+TEST_F(QueryParserTest, ParseQueryNodesAndMatch) {
+ struct TestData2 {
+ const std::string query;
+ const std::string text;
+ const bool matches;
+ const size_t m1_start;
+ const size_t m1_end;
+ const size_t m2_start;
+ const size_t m2_end;
+ } data[] = {
+ { "foo foo", "foo", true, 0, 3, 0, 0 },
+ { "foo fooey", "fooey", true, 0, 5, 0, 0 },
+ { "foo fooey bar", "bar fooey", true, 0, 3, 4, 9 },
+ { "blah", "blah", true, 0, 4, 0, 0 },
+ { "blah", "foo", false, 0, 0, 0, 0 },
+ { "blah", "blahblah", true, 0, 4, 0, 0 },
+ { "blah", "foo blah", true, 4, 8, 0, 0 },
+ { "foo blah", "blah", false, 0, 0, 0, 0 },
+ { "foo blah", "blahx foobar", true, 0, 4, 6, 9 },
+ { "\"foo blah\"", "foo blah", true, 0, 8, 0, 0 },
+ { "\"foo blah\"", "foox blahx", false, 0, 0, 0, 0 },
+ { "\"foo blah\"", "foo blah", true, 0, 8, 0, 0 },
+ { "\"foo blah\"", "\"foo blah\"", true, 1, 9, 0, 0 },
+ { "foo blah", "\"foo bar blah\"", true, 1, 4, 9, 13 },
+ };
+ for (size_t i = 0; i < ARRAYSIZE_UNSAFE(data); ++i) {
+ QueryParser parser;
+ ScopedVector<QueryNode> query_nodes;
+ parser.ParseQuery(UTF8ToUTF16(data[i].query), &query_nodes.get());
+ Snippet::MatchPositions match_positions;
+ ASSERT_EQ(data[i].matches,
+ parser.DoesQueryMatch(UTF8ToUTF16(data[i].text),
+ query_nodes.get(),
+ &match_positions));
+ size_t offset = 0;
+ if (data[i].m1_start != 0 || data[i].m1_end != 0) {
+ ASSERT_TRUE(match_positions.size() >= 1);
+ EXPECT_EQ(data[i].m1_start, match_positions[0].first);
+ EXPECT_EQ(data[i].m1_end, match_positions[0].second);
+ offset++;
+ }
+ if (data[i].m2_start != 0 || data[i].m2_end != 0) {
+ ASSERT_TRUE(match_positions.size() == 1 + offset);
+ EXPECT_EQ(data[i].m2_start, match_positions[offset].first);
+ EXPECT_EQ(data[i].m2_end, match_positions[offset].second);
+ }
+ }
+}
+
+TEST_F(QueryParserTest, ExtractQueryWords) {
+ struct TestData2 {
+ const std::string text;
+ const std::string w1;
+ const std::string w2;
+ const std::string w3;
+ const size_t word_count;
+ } data[] = {
+ { "foo", "foo", "", "", 1 },
+ { "foo bar", "foo", "bar", "", 2 },
+ { "\"foo bar\"", "foo", "bar", "", 2 },
+ { "\"foo bar\" a", "foo", "bar", "a", 3 },
+ };
+ for (size_t i = 0; i < ARRAYSIZE_UNSAFE(data); ++i) {
+ std::vector<string16> results;
+ QueryParser parser;
+ parser.ExtractQueryWords(UTF8ToUTF16(data[i].text), &results);
+ ASSERT_EQ(data[i].word_count, results.size());
+ EXPECT_EQ(data[i].w1, UTF16ToUTF8(results[0]));
+ if (results.size() == 2)
+ EXPECT_EQ(data[i].w2, UTF16ToUTF8(results[1]));
+ if (results.size() == 3)
+ EXPECT_EQ(data[i].w3, UTF16ToUTF8(results[2]));
+ }
+}