summaryrefslogtreecommitdiffstats
path: root/components/query_parser/query_parser_unittest.cc
blob: c99c639b96447493d73b915cd04240f6836f2af0 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
// Copyright 2014 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include <stddef.h>

#include "base/macros.h"
#include "base/memory/scoped_vector.h"
#include "base/strings/utf_string_conversions.h"
#include "components/query_parser/query_parser.h"
#include "testing/gtest/include/gtest/gtest.h"

namespace query_parser {

class QueryParserTest : public testing::Test {
 public:
  struct TestData {
    const char* input;
    const int expected_word_count;
  };

  std::string QueryToString(const std::string& query);

 protected:
  QueryParser query_parser_;
};

// Test helper: Convert a user query string in 8-bit (for hardcoding
// convenience) to a SQLite query string.
std::string QueryParserTest::QueryToString(const std::string& query) {
  base::string16 sqlite_query;
  query_parser_.ParseQuery(base::UTF8ToUTF16(query),
                           MatchingAlgorithm::DEFAULT,
                           &sqlite_query);
  return base::UTF16ToUTF8(sqlite_query);
}

// Basic multi-word queries, including prefix matching.
TEST_F(QueryParserTest, SimpleQueries) {
  EXPECT_EQ("", QueryToString(" "));
  EXPECT_EQ("singleword*", QueryToString("singleword"));
  EXPECT_EQ("spacedout*", QueryToString("  spacedout "));
  EXPECT_EQ("foo* bar*", QueryToString("foo bar"));
  // Short words aren't prefix matches. For Korean Hangul
  // the minimum is 2 while for other scripts, it's 3.
  EXPECT_EQ("f b", QueryToString(" f b"));
  // KA JANG
  EXPECT_EQ(base::WideToUTF8(L"\xAC00 \xC7A5"),
            QueryToString(base::WideToUTF8(L" \xAC00 \xC7A5")));
  EXPECT_EQ("foo* bar*", QueryToString(" foo   bar "));
  // KA-JANG BICH-GO
  EXPECT_EQ(base::WideToUTF8(L"\xAC00\xC7A5* \xBE5B\xACE0*"),
            QueryToString(base::WideToUTF8(L"\xAC00\xC7A5 \xBE5B\xACE0")));
}

// Quoted substring parsing.
TEST_F(QueryParserTest, Quoted) {
  // ASCII quotes
  EXPECT_EQ("\"Quoted\"", QueryToString("\"Quoted\""));
  // Missing end quotes
  EXPECT_EQ("\"miss end\"", QueryToString("\"miss end"));
  // Missing begin quotes
  EXPECT_EQ("miss* beg*", QueryToString("miss beg\""));
  // Weird formatting
  EXPECT_EQ("\"Many\" \"quotes\"", QueryToString("\"Many   \"\"quotes"));
}

// Apostrophes within words should be preserved, but otherwise stripped.
TEST_F(QueryParserTest, Apostrophes) {
  EXPECT_EQ("foo* bar's*", QueryToString("foo bar's"));
  EXPECT_EQ("l'foo*", QueryToString("l'foo"));
  EXPECT_EQ("foo*", QueryToString("'foo"));
}

// Special characters.
TEST_F(QueryParserTest, SpecialChars) {
  EXPECT_EQ("foo* the* bar*", QueryToString("!#:/*foo#$*;'* the!#:/*bar"));
}

TEST_F(QueryParserTest, NumWords) {
  TestData data[] = {
    { "blah",                  1 },
    { "foo \"bar baz\"",       3 },
    { "foo \"baz\"",           2 },
    { "foo \"bar baz\"  blah", 4 },
  };

  for (size_t i = 0; i < arraysize(data); ++i) {
    base::string16 query_string;
    EXPECT_EQ(data[i].expected_word_count,
              query_parser_.ParseQuery(base::UTF8ToUTF16(data[i].input),
                                       MatchingAlgorithm::DEFAULT,
                                       &query_string));
  }
}

TEST_F(QueryParserTest, ParseQueryNodesAndMatch) {
  struct TestData2 {
    const std::string query;
    const std::string text;
    const bool matches;
    const size_t m1_start;
    const size_t m1_end;
    const size_t m2_start;
    const size_t m2_end;
  } data[] = {
    { "foo",           "fooey foo",        true,  0, 3, 6, 9 },
    { "foo foo",       "foo",              true,  0, 3, 0, 0 },
    { "foo fooey",     "fooey",            true,  0, 5, 0, 0 },
    { "fooey foo",     "fooey",            true,  0, 5, 0, 0 },
    { "foo fooey bar", "bar fooey",        true,  0, 3, 4, 9 },
    { "blah",          "blah",             true,  0, 4, 0, 0 },
    { "blah",          "foo",              false, 0, 0, 0, 0 },
    { "blah",          "blahblah",         true,  0, 4, 0, 0 },
    { "blah",          "foo blah",         true,  4, 8, 0, 0 },
    { "foo blah",      "blah",             false, 0, 0, 0, 0 },
    { "foo blah",      "blahx foobar",     true,  0, 4, 6, 9 },
    { "\"foo blah\"",  "foo blah",         true,  0, 8, 0, 0 },
    { "\"foo blah\"",  "foox blahx",       false, 0, 0, 0, 0 },
    { "\"foo blah\"",  "foo blah",         true,  0, 8, 0, 0 },
    { "\"foo blah\"",  "\"foo blah\"",     true,  1, 9, 0, 0 },
    { "foo blah",      "\"foo bar blah\"", true,  1, 4, 9, 13 },
  };
  for (size_t i = 0; i < arraysize(data); ++i) {
    QueryParser parser;
    ScopedVector<QueryNode> query_nodes;
    parser.ParseQueryNodes(base::UTF8ToUTF16(data[i].query),
                           MatchingAlgorithm::DEFAULT,
                           &query_nodes.get());
    Snippet::MatchPositions match_positions;
    ASSERT_EQ(data[i].matches,
              parser.DoesQueryMatch(base::UTF8ToUTF16(data[i].text),
                                    query_nodes.get(),
                                    &match_positions));
    size_t offset = 0;
    if (data[i].m1_start != 0 || data[i].m1_end != 0) {
      ASSERT_TRUE(match_positions.size() >= 1);
      EXPECT_EQ(data[i].m1_start, match_positions[0].first);
      EXPECT_EQ(data[i].m1_end, match_positions[0].second);
      offset++;
    }
    if (data[i].m2_start != 0 || data[i].m2_end != 0) {
      ASSERT_TRUE(match_positions.size() == 1 + offset);
      EXPECT_EQ(data[i].m2_start, match_positions[offset].first);
      EXPECT_EQ(data[i].m2_end, match_positions[offset].second);
    }
  }
}

TEST_F(QueryParserTest, ParseQueryWords) {
  struct TestData2 {
    const std::string text;
    const std::string w1;
    const std::string w2;
    const std::string w3;
    const size_t word_count;
  } data[] = {
    { "foo",           "foo", "",    "",  1 },
    { "foo bar",       "foo", "bar", "",  2 },
    { "\"foo bar\"",   "foo", "bar", "",  2 },
    { "\"foo bar\" a", "foo", "bar", "a", 3 },
  };
  for (size_t i = 0; i < arraysize(data); ++i) {
    std::vector<base::string16> results;
    QueryParser parser;
    parser.ParseQueryWords(base::UTF8ToUTF16(data[i].text),
                           MatchingAlgorithm::DEFAULT,
                           &results);
    ASSERT_EQ(data[i].word_count, results.size());
    EXPECT_EQ(data[i].w1, base::UTF16ToUTF8(results[0]));
    if (results.size() == 2)
      EXPECT_EQ(data[i].w2, base::UTF16ToUTF8(results[1]));
    if (results.size() == 3)
      EXPECT_EQ(data[i].w3, base::UTF16ToUTF8(results[2]));
  }
}

}  // namespace query_parser