diff options
author | leandrogracia@chromium.org <leandrogracia@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2012-05-24 21:12:11 +0000 |
---|---|---|
committer | leandrogracia@chromium.org <leandrogracia@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2012-05-24 21:12:11 +0000 |
commit | 19da2adb70d4f7a869d66ba33fb944b6ebb851c0 (patch) | |
tree | 4efe2eb0804160f1de9df233252d61490700d23f /content | |
parent | 933fb2edc177d6f5736f66f96e3c5e2d5607e570 (diff) | |
download | chromium_src-19da2adb70d4f7a869d66ba33fb944b6ebb851c0.zip chromium_src-19da2adb70d4f7a869d66ba33fb944b6ebb851c0.tar.gz chromium_src-19da2adb70d4f7a869d66ba33fb944b6ebb851c0.tar.bz2 |
[Android] Introduce email address detection.
For more context see: https://chromiumcodereview.appspot.com/10187020/
BUG=125390
TEST=email_detector_unittest.cc
Review URL: https://chromiumcodereview.appspot.com/10443015
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@138887 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'content')
-rw-r--r-- | content/content_renderer.gypi | 2 | ||||
-rw-r--r-- | content/content_tests.gypi | 1 | ||||
-rw-r--r-- | content/renderer/android/email_detector.cc | 74 | ||||
-rw-r--r-- | content/renderer/android/email_detector.h | 38 | ||||
-rw-r--r-- | content/renderer/android/email_detector_unittest.cc | 51 |
5 files changed, 166 insertions, 0 deletions
diff --git a/content/content_renderer.gypi b/content/content_renderer.gypi index 0d67650..fd8eca1 100644 --- a/content/content_renderer.gypi +++ b/content/content_renderer.gypi @@ -44,6 +44,8 @@ 'renderer/android/address_detector.h', 'renderer/android/content_detector.cc', 'renderer/android/content_detector.h', + 'renderer/android/email_detector.cc', + 'renderer/android/email_detector.h', 'renderer/active_notification_tracker.cc', 'renderer/active_notification_tracker.h', 'renderer/device_orientation_dispatcher.cc', diff --git a/content/content_tests.gypi b/content/content_tests.gypi index 4758bc8..e449b26 100644 --- a/content/content_tests.gypi +++ b/content/content_tests.gypi @@ -285,6 +285,7 @@ 'gpu/gpu_info_collector_unittest_win.cc', 'renderer/active_notification_tracker_unittest.cc', 'renderer/android/address_detector_unittest.cc', + 'renderer/android/email_detector_unittest.cc', 'renderer/gpu/input_event_filter_unittest.cc', 'renderer/media/audio_message_filter_unittest.cc', 'renderer/media/capture_video_decoder_unittest.cc', diff --git a/content/renderer/android/email_detector.cc b/content/renderer/android/email_detector.cc new file mode 100644 index 0000000..1178492 --- /dev/null +++ b/content/renderer/android/email_detector.cc @@ -0,0 +1,74 @@ +// Copyright (c) 2012 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "content/renderer/android/email_detector.h" + +#include "base/memory/scoped_ptr.h" +#include "base/utf_string_conversions.h" +#include "net/base/escape.h" +#include "unicode/regex.h" + +namespace { + +// Maximum length of an email address. +const size_t kMaximumEmailLength = 254; + +// Prefix used for email intent URIs. +const char kEmailSchemaPrefix[] = "mailto:"; + +// Regex to match email addresses. +// This is more specific than RFC 2822 (uncommon special characters are +// disallowed) in order to avoid false positives. +// Delimiters are word boundaries to allow punctuation, quote marks etc. around +// the address. +const char kEmailRegex[] = "\\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\\.[A-Z]{2,6}\\b"; + +} // anonymous namespace + +namespace content { + +EmailDetector::EmailDetector() { +} + +size_t EmailDetector::GetMaximumContentLength() { + return kMaximumEmailLength; +} + +GURL EmailDetector::GetIntentURL(const std::string& content_text) { + if (content_text.empty()) + return GURL(); + + return GURL(kEmailSchemaPrefix + + net::EscapeQueryParamValue(content_text, true)); +} + +bool EmailDetector::FindContent(const string16::const_iterator& begin, + const string16::const_iterator& end, + size_t* start_pos, + size_t* end_pos, + std::string* content_text) { + string16 utf16_input = string16(begin, end); + icu::UnicodeString pattern(kEmailRegex); + icu::UnicodeString input(utf16_input.data(), utf16_input.length()); + UErrorCode status = U_ZERO_ERROR; + scoped_ptr<icu::RegexMatcher> matcher( + new icu::RegexMatcher(pattern, + input, + UREGEX_CASE_INSENSITIVE, + status)); + if (matcher->find()) { + *start_pos = matcher->start(status); + DCHECK(U_SUCCESS(status)); + *end_pos = matcher->end(status); + DCHECK(U_SUCCESS(status)); + icu::UnicodeString content_ustr(matcher->group(status)); + DCHECK(U_SUCCESS(status)); + UTF16ToUTF8(content_ustr.getBuffer(), content_ustr.length(), content_text); + return true; + } + + return false; +} + +} // namespace content diff --git a/content/renderer/android/email_detector.h b/content/renderer/android/email_detector.h new file mode 100644 index 0000000..cda43c6 --- /dev/null +++ b/content/renderer/android/email_detector.h @@ -0,0 +1,38 @@ +// Copyright (c) 2012 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef CONTENT_RENDERER_ANDROID_EMAIL_DETECTOR_H_ +#define CONTENT_RENDERER_ANDROID_EMAIL_DETECTOR_H_ +#pragma once + +#include "content/renderer/android/content_detector.h" + +class EmailDetectorTest; + +namespace content { + +// Finds email addresses (in most common formats, but not including special +// characters) in the given text string. +class EmailDetector : public ContentDetector { + public: + EmailDetector(); + + private: + friend class ::EmailDetectorTest; + + // Implementation of ContentDetector. + virtual bool FindContent(const string16::const_iterator& begin, + const string16::const_iterator& end, + size_t* start_pos, + size_t* end_pos, + std::string* content_text) OVERRIDE; + virtual GURL GetIntentURL(const std::string& content_text) OVERRIDE; + virtual size_t GetMaximumContentLength() OVERRIDE; + + DISALLOW_COPY_AND_ASSIGN(EmailDetector); +}; + +} // namespace content + +#endif // CONTENT_RENDERER_ANDROID_EMAIL_DETECTOR_H_ diff --git a/content/renderer/android/email_detector_unittest.cc b/content/renderer/android/email_detector_unittest.cc new file mode 100644 index 0000000..a68184e --- /dev/null +++ b/content/renderer/android/email_detector_unittest.cc @@ -0,0 +1,51 @@ +// Copyright (c) 2012 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "content/renderer/android/email_detector.h" + +#include "base/utf_string_conversions.h" +#include "testing/gtest/include/gtest/gtest.h" + +using content::EmailDetector; + +class EmailDetectorTest : public testing::Test { + public: + static void FindAndCheckEmail(const std::string& content, + const std::string& expected) { + string16 content_16 = UTF8ToUTF16(content); + string16 result_16; + size_t start, end; + EmailDetector detector; + std::string content_text; + if (detector.FindContent(content_16.begin(), content_16.end(), + &start, &end, &content_text)) { + result_16 = content_16.substr(start, end - start); + } + EXPECT_EQ(expected, UTF16ToUTF8(result_16)); + EXPECT_EQ(expected, content_text); + } +}; + +TEST_F(EmailDetectorTest, FindEmail) { + FindAndCheckEmail("please email test@testing.com", "test@testing.com"); + FindAndCheckEmail("please email test@123.456.co.uk.", "test@123.456.co.uk"); + FindAndCheckEmail("My email is 'a@b.org'.", "a@b.org"); + FindAndCheckEmail("123@bcd.org", "123@bcd.org"); + FindAndCheckEmail("[quitelongwelllongemailaddress@somequitelongdomain.org]", + "quitelongwelllongemailaddress@somequitelongdomain.org"); + FindAndCheckEmail("Should find the first@email.org not the second@email.org", + "first@email.org"); + FindAndCheckEmail("Email:HELLO@SOMETHING.COM", "HELLO@SOMETHING.COM"); + FindAndCheckEmail("Email SOMEONE@GOOGLE.COM for details.", + "SOMEONE@GOOGLE.COM"); + FindAndCheckEmail("It's \"testadd@company.fr\"", "testadd@company.fr"); + FindAndCheckEmail("This is not an @emailaddress.com", ""); + FindAndCheckEmail("Apples @2.50 each", ""); + FindAndCheckEmail("Log on to google.com", ""); + FindAndCheckEmail("Try someone@, they might know.", ""); + FindAndCheckEmail("No, bob@com is not an email address.", ""); + FindAndCheckEmail("@", ""); + FindAndCheckEmail("Just bob @google.com", ""); + FindAndCheckEmail("Why not call larry@google and ask him.", ""); +} |