summaryrefslogtreecommitdiffstats
path: root/content/renderer/android/email_detector.cc
blob: 28b8af4d90103d59575f68e0b41a5be7a003980d (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
// Copyright (c) 2012 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "content/renderer/android/email_detector.h"

#include "base/logging.h"
#include "base/memory/scoped_ptr.h"
#include "base/strings/utf_string_conversions.h"
#include "content/public/renderer/android_content_detection_prefixes.h"
#include "net/base/escape.h"
#include "third_party/icu/source/i18n/unicode/regex.h"

namespace {

// Maximum length of an email address.
const size_t kMaximumEmailLength = 254;

// Regex to match email addresses.
// This is more specific than RFC 2822 (uncommon special characters are
// disallowed) in order to avoid false positives.
// Delimiters are word boundaries to allow punctuation, quote marks etc. around
// the address.
const char kEmailRegex[] = "\\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\\.[A-Z]{2,6}\\b";

}  // anonymous namespace

namespace content {

EmailDetector::EmailDetector() {
}

size_t EmailDetector::GetMaximumContentLength() {
  return kMaximumEmailLength;
}

GURL EmailDetector::GetIntentURL(const std::string& content_text) {
  if (content_text.empty())
    return GURL();

  return GURL(kEmailPrefix +
      net::EscapeQueryParamValue(content_text, true));
}

bool EmailDetector::FindContent(const string16::const_iterator& begin,
                                const string16::const_iterator& end,
                                size_t* start_pos,
                                size_t* end_pos,
                                std::string* content_text) {
  string16 utf16_input = string16(begin, end);
  icu::UnicodeString pattern(kEmailRegex);
  icu::UnicodeString input(utf16_input.data(), utf16_input.length());
  UErrorCode status = U_ZERO_ERROR;
  scoped_ptr<icu::RegexMatcher> matcher(
      new icu::RegexMatcher(pattern,
                            input,
                            UREGEX_CASE_INSENSITIVE,
                            status));
  if (matcher->find()) {
    *start_pos = matcher->start(status);
    DCHECK(U_SUCCESS(status));
    *end_pos = matcher->end(status);
    DCHECK(U_SUCCESS(status));
    icu::UnicodeString content_ustr(matcher->group(status));
    DCHECK(U_SUCCESS(status));
    UTF16ToUTF8(content_ustr.getBuffer(), content_ustr.length(), content_text);
    return true;
  }

  return false;
}

}  // namespace content