summaryrefslogtreecommitdiffstats
path: root/components/drive/search_metadata.cc
blob: fcc35313fa2781e211cf67bdebc125e0ff3ecf8d (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
// Copyright (c) 2013 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "components/drive/search_metadata.h"

#include <algorithm>
#include <queue>

#include "base/bind.h"
#include "base/i18n/string_search.h"
#include "base/metrics/histogram.h"
#include "base/strings/string_piece.h"
#include "base/strings/string_split.h"
#include "base/strings/string_util.h"
#include "base/strings/utf_string_conversions.h"
#include "base/time/time.h"
#include "components/drive/drive_api_util.h"
#include "components/drive/file_system_core_util.h"
#include "net/base/escape.h"

namespace drive {
namespace internal {

namespace {

struct ResultCandidate {
  ResultCandidate(const std::string& local_id,
                  const ResourceEntry& entry,
                  const std::string& highlighted_base_name)
      : local_id(local_id),
        entry(entry),
        highlighted_base_name(highlighted_base_name) {
  }

  std::string local_id;
  ResourceEntry entry;
  std::string highlighted_base_name;
};

// Used to sort the result candidates per the last accessed/modified time. The
// recently accessed/modified files come first.
bool CompareByTimestamp(const ResourceEntry& a, const ResourceEntry& b) {
  const PlatformFileInfoProto& a_file_info = a.file_info();
  const PlatformFileInfoProto& b_file_info = b.file_info();

  if (a_file_info.last_accessed() != b_file_info.last_accessed())
    return a_file_info.last_accessed() > b_file_info.last_accessed();

  // When the entries have the same last access time (which happens quite often
  // because Drive server doesn't set the field until an entry is viewed via
  // drive.google.com), we use last modified time as the tie breaker.
  return a_file_info.last_modified() > b_file_info.last_modified();
}

struct ResultCandidateComparator {
  bool operator()(const ResultCandidate* a, const ResultCandidate* b) const {
    return CompareByTimestamp(a->entry, b->entry);
  }
};

// A wrapper of std::priority_queue which deals with pointers of values.
template<typename T, typename Compare>
class ScopedPriorityQueue {
 public:
  ScopedPriorityQueue() {}

  ~ScopedPriorityQueue() {
    while (!empty())
      pop();
  }

  bool empty() const { return queue_.empty(); }

  size_t size() const { return queue_.size(); }

  const T* top() const { return queue_.top(); }

  void push(T* x) { queue_.push(x); }

  void pop() {
    // Keep top alive for the pop() call so that debug checks can access
    // underlying data (e.g. validating heap property of the priority queue
    // will call the comparator).
    T* saved_top = queue_.top();
    queue_.pop();
    delete saved_top;
  }

 private:
  std::priority_queue<T*, std::vector<T*>, Compare> queue_;

  DISALLOW_COPY_AND_ASSIGN(ScopedPriorityQueue);
};

// Classifies the given entry as hidden if it's not under specific directories.
class HiddenEntryClassifier {
 public:
  HiddenEntryClassifier(ResourceMetadata* metadata,
                        const std::string& mydrive_local_id)
      : metadata_(metadata) {
    // Only things under My Drive and drive/other are not hidden.
    is_hiding_child_[mydrive_local_id] = false;
    is_hiding_child_[util::kDriveOtherDirLocalId] = false;

    // Everything else is hidden, including the directories mentioned above
    // themselves.
    is_hiding_child_[""] = true;
  }

  // |result| is set to true if |entry| is hidden.
  FileError IsHidden(const ResourceEntry& entry, bool* result) {
    // Look up for parents recursively.
    std::vector<std::string> undetermined_ids;
    undetermined_ids.push_back(entry.parent_local_id());

    std::map<std::string, bool>::iterator it =
        is_hiding_child_.find(undetermined_ids.back());
    for (; it == is_hiding_child_.end();
         it = is_hiding_child_.find(undetermined_ids.back())) {
      ResourceEntry parent;
      FileError error =
          metadata_->GetResourceEntryById(undetermined_ids.back(), &parent);
      if (error != FILE_ERROR_OK)
        return error;
      undetermined_ids.push_back(parent.parent_local_id());
    }

    // Cache the result.
    undetermined_ids.pop_back();  // The last one is already in the map.
    for (size_t i = 0; i < undetermined_ids.size(); ++i)
      is_hiding_child_[undetermined_ids[i]] = it->second;

    *result = it->second;
    return FILE_ERROR_OK;
  }

 private:
  ResourceMetadata* metadata_;

  // local ID to is_hidden map.
  std::map<std::string, bool> is_hiding_child_;
};

// Used to implement SearchMetadata.
// Adds entry to the result when appropriate.
// In particular, if size of |queries| is larger than 0, only adds files with
// the name matching the query.
FileError MaybeAddEntryToResult(
    ResourceMetadata* resource_metadata,
    ResourceMetadata::Iterator* it,
    const ScopedVector<
        base::i18n::FixedPatternStringSearchIgnoringCaseAndAccents>& queries,
    const SearchMetadataPredicate& predicate,
    size_t at_most_num_matches,
    HiddenEntryClassifier* hidden_entry_classifier,
    ScopedPriorityQueue<ResultCandidate, ResultCandidateComparator>*
        result_candidates) {
  DCHECK_GE(at_most_num_matches, result_candidates->size());

  const ResourceEntry& entry = it->GetValue();

  // If the candidate set is already full, and this |entry| is old, do nothing.
  // We perform this check first in order to avoid the costly find-and-highlight
  // or FilePath lookup as much as possible.
  if (result_candidates->size() == at_most_num_matches &&
      !CompareByTimestamp(entry, result_candidates->top()->entry))
    return FILE_ERROR_OK;

  // Add |entry| to the result if the entry is eligible for the given
  // |options| and matches the query. The base name of the entry must
  // contain |query| to match the query.
  std::string highlighted;
  if (!predicate.Run(entry) ||
      !FindAndHighlight(entry.base_name(), queries, &highlighted))
    return FILE_ERROR_OK;

  // Hidden entry should not be returned.
  bool hidden = false;
  FileError error = hidden_entry_classifier->IsHidden(entry, &hidden);
  if (error != FILE_ERROR_OK || hidden)
    return error;

  // Make space for |entry| when appropriate.
  if (result_candidates->size() == at_most_num_matches)
    result_candidates->pop();
  result_candidates->push(new ResultCandidate(it->GetID(), entry, highlighted));
  return FILE_ERROR_OK;
}

// Implements SearchMetadata().
FileError SearchMetadataOnBlockingPool(ResourceMetadata* resource_metadata,
                                       const std::string& query_text,
                                       const SearchMetadataPredicate& predicate,
                                       int at_most_num_matches,
                                       MetadataSearchResultVector* results) {
  ScopedPriorityQueue<ResultCandidate,
                      ResultCandidateComparator> result_candidates;

  // Prepare data structure for searching.
  std::vector<base::string16> keywords =
      base::SplitString(base::UTF8ToUTF16(query_text),
                        base::StringPiece16(base::kWhitespaceUTF16),
                        base::TRIM_WHITESPACE, base::SPLIT_WANT_NONEMPTY);

  ScopedVector<base::i18n::FixedPatternStringSearchIgnoringCaseAndAccents>
      queries;
  for (const auto& keyword : keywords) {
    queries.push_back(
        new base::i18n::FixedPatternStringSearchIgnoringCaseAndAccents(
            keyword));
  }

  // Prepare an object to filter out hidden entries.
  ResourceEntry mydrive;
  FileError error = resource_metadata->GetResourceEntryByPath(
      util::GetDriveMyDriveRootPath(), &mydrive);
  if (error != FILE_ERROR_OK)
    return error;
  HiddenEntryClassifier hidden_entry_classifier(resource_metadata,
                                                mydrive.local_id());

  // Iterate over entries.
  scoped_ptr<ResourceMetadata::Iterator> it = resource_metadata->GetIterator();
  for (; !it->IsAtEnd(); it->Advance()) {
    FileError error = MaybeAddEntryToResult(
        resource_metadata, it.get(), queries, predicate, at_most_num_matches,
        &hidden_entry_classifier, &result_candidates);
    if (error != FILE_ERROR_OK)
      return error;
  }

  // Prepare the result.
  for (; !result_candidates.empty(); result_candidates.pop()) {
    const ResultCandidate& candidate = *result_candidates.top();
    // The path field of entries in result_candidates are empty at this point,
    // because we don't want to run the expensive metadata DB look up except for
    // the final results. Hence, here we fill the part.
    base::FilePath path;
    error = resource_metadata->GetFilePath(candidate.local_id, &path);
    if (error != FILE_ERROR_OK)
      return error;
    bool is_directory = candidate.entry.file_info().is_directory();
    results->push_back(MetadataSearchResult(
        path, is_directory, candidate.highlighted_base_name,
        candidate.entry.file_specific_info().md5()));
  }

  // Reverse the order here because |result_candidates| puts the most
  // uninteresting candidate at the top.
  std::reverse(results->begin(), results->end());

  return FILE_ERROR_OK;
}

// Runs the SearchMetadataCallback and updates the histogram.
void RunSearchMetadataCallback(const SearchMetadataCallback& callback,
                               const base::TimeTicks& start_time,
                               scoped_ptr<MetadataSearchResultVector> results,
                               FileError error) {
  if (error != FILE_ERROR_OK)
    results.reset();
  callback.Run(error, results.Pass());

  UMA_HISTOGRAM_TIMES("Drive.SearchMetadataTime",
                      base::TimeTicks::Now() - start_time);
}

// Appends substring of |original_text| to |highlighted_text| with highlight.
void AppendStringWithHighlight(const base::string16& original_text,
                               size_t start,
                               size_t length,
                               bool highlight,
                               std::string* highlighted_text) {
  if (highlight)
    highlighted_text->append("<b>");

  highlighted_text->append(net::EscapeForHTML(
      base::UTF16ToUTF8(original_text.substr(start, length))));

  if (highlight)
    highlighted_text->append("</b>");
}

}  // namespace

void SearchMetadata(
    scoped_refptr<base::SequencedTaskRunner> blocking_task_runner,
    ResourceMetadata* resource_metadata,
    const std::string& query,
    const SearchMetadataPredicate& predicate,
    size_t at_most_num_matches,
    const SearchMetadataCallback& callback) {
  DCHECK(!callback.is_null());

  const base::TimeTicks start_time = base::TimeTicks::Now();

  scoped_ptr<MetadataSearchResultVector> results(
      new MetadataSearchResultVector);
  MetadataSearchResultVector* results_ptr = results.get();
  base::PostTaskAndReplyWithResult(
      blocking_task_runner.get(), FROM_HERE,
      base::Bind(&SearchMetadataOnBlockingPool, resource_metadata, query,
                 predicate, at_most_num_matches, results_ptr),
      base::Bind(&RunSearchMetadataCallback, callback, start_time,
                 base::Passed(&results)));
}

bool MatchesType(int options, const ResourceEntry& entry) {
  if ((options & SEARCH_METADATA_EXCLUDE_HOSTED_DOCUMENTS) &&
      entry.file_specific_info().is_hosted_document())
    return false;

  if ((options & SEARCH_METADATA_EXCLUDE_DIRECTORIES) &&
      entry.file_info().is_directory())
    return false;

  if (options & SEARCH_METADATA_SHARED_WITH_ME)
    return entry.shared_with_me();

  if (options & SEARCH_METADATA_OFFLINE) {
    if (entry.file_specific_info().is_hosted_document()) {
      // Not all hosted documents are cached by Drive offline app.
      // https://support.google.com/drive/answer/2375012
      std::string mime_type = entry.file_specific_info().content_mime_type();
      return mime_type == drive::util::kGoogleDocumentMimeType ||
             mime_type == drive::util::kGoogleSpreadsheetMimeType ||
             mime_type == drive::util::kGooglePresentationMimeType ||
             mime_type == drive::util::kGoogleDrawingMimeType;
    } else {
      return entry.file_specific_info().cache_state().is_present();
    }
  }

  return true;
}

bool FindAndHighlight(
    const std::string& text,
    const ScopedVector<
        base::i18n::FixedPatternStringSearchIgnoringCaseAndAccents>& queries,
    std::string* highlighted_text) {
  DCHECK(highlighted_text);
  highlighted_text->clear();

  // Check text matches with all queries.
  size_t match_start = 0;
  size_t match_length = 0;

  base::string16 text16 = base::UTF8ToUTF16(text);
  std::vector<bool> highlights(text16.size(), false);
  for (auto* query : queries) {
    if (!query->Search(text16, &match_start, &match_length))
      return false;

    std::fill(highlights.begin() + match_start,
              highlights.begin() + match_start + match_length, true);
  }

  // Generate highlighted text.
  size_t start_current_segment = 0;

  for (size_t i = 0; i < text16.size(); ++i) {
    if (highlights[start_current_segment] == highlights[i])
      continue;

    AppendStringWithHighlight(
        text16, start_current_segment, i - start_current_segment,
        highlights[start_current_segment], highlighted_text);

    start_current_segment = i;
  }

  DCHECK_GE(text16.size(), start_current_segment);
  AppendStringWithHighlight(
      text16, start_current_segment, text16.size() - start_current_segment,
      highlights[start_current_segment], highlighted_text);

  return true;
}

}  // namespace internal
}  // namespace drive