summaryrefslogtreecommitdiffstats
path: root/content
diff options
context:
space:
mode:
authorleandrogracia@chromium.org <leandrogracia@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2012-05-24 21:12:11 +0000
committerleandrogracia@chromium.org <leandrogracia@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2012-05-24 21:12:11 +0000
commit19da2adb70d4f7a869d66ba33fb944b6ebb851c0 (patch)
tree4efe2eb0804160f1de9df233252d61490700d23f /content
parent933fb2edc177d6f5736f66f96e3c5e2d5607e570 (diff)
downloadchromium_src-19da2adb70d4f7a869d66ba33fb944b6ebb851c0.zip
chromium_src-19da2adb70d4f7a869d66ba33fb944b6ebb851c0.tar.gz
chromium_src-19da2adb70d4f7a869d66ba33fb944b6ebb851c0.tar.bz2
[Android] Introduce email address detection.
For more context see: https://chromiumcodereview.appspot.com/10187020/ BUG=125390 TEST=email_detector_unittest.cc Review URL: https://chromiumcodereview.appspot.com/10443015 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@138887 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'content')
-rw-r--r--content/content_renderer.gypi2
-rw-r--r--content/content_tests.gypi1
-rw-r--r--content/renderer/android/email_detector.cc74
-rw-r--r--content/renderer/android/email_detector.h38
-rw-r--r--content/renderer/android/email_detector_unittest.cc51
5 files changed, 166 insertions, 0 deletions
diff --git a/content/content_renderer.gypi b/content/content_renderer.gypi
index 0d67650..fd8eca1 100644
--- a/content/content_renderer.gypi
+++ b/content/content_renderer.gypi
@@ -44,6 +44,8 @@
'renderer/android/address_detector.h',
'renderer/android/content_detector.cc',
'renderer/android/content_detector.h',
+ 'renderer/android/email_detector.cc',
+ 'renderer/android/email_detector.h',
'renderer/active_notification_tracker.cc',
'renderer/active_notification_tracker.h',
'renderer/device_orientation_dispatcher.cc',
diff --git a/content/content_tests.gypi b/content/content_tests.gypi
index 4758bc8..e449b26 100644
--- a/content/content_tests.gypi
+++ b/content/content_tests.gypi
@@ -285,6 +285,7 @@
'gpu/gpu_info_collector_unittest_win.cc',
'renderer/active_notification_tracker_unittest.cc',
'renderer/android/address_detector_unittest.cc',
+ 'renderer/android/email_detector_unittest.cc',
'renderer/gpu/input_event_filter_unittest.cc',
'renderer/media/audio_message_filter_unittest.cc',
'renderer/media/capture_video_decoder_unittest.cc',
diff --git a/content/renderer/android/email_detector.cc b/content/renderer/android/email_detector.cc
new file mode 100644
index 0000000..1178492
--- /dev/null
+++ b/content/renderer/android/email_detector.cc
@@ -0,0 +1,74 @@
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "content/renderer/android/email_detector.h"
+
+#include "base/memory/scoped_ptr.h"
+#include "base/utf_string_conversions.h"
+#include "net/base/escape.h"
+#include "unicode/regex.h"
+
+namespace {
+
+// Maximum length of an email address.
+const size_t kMaximumEmailLength = 254;
+
+// Prefix used for email intent URIs.
+const char kEmailSchemaPrefix[] = "mailto:";
+
+// Regex to match email addresses.
+// This is more specific than RFC 2822 (uncommon special characters are
+// disallowed) in order to avoid false positives.
+// Delimiters are word boundaries to allow punctuation, quote marks etc. around
+// the address.
+const char kEmailRegex[] = "\\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\\.[A-Z]{2,6}\\b";
+
+} // anonymous namespace
+
+namespace content {
+
+EmailDetector::EmailDetector() {
+}
+
+size_t EmailDetector::GetMaximumContentLength() {
+ return kMaximumEmailLength;
+}
+
+GURL EmailDetector::GetIntentURL(const std::string& content_text) {
+ if (content_text.empty())
+ return GURL();
+
+ return GURL(kEmailSchemaPrefix +
+ net::EscapeQueryParamValue(content_text, true));
+}
+
+bool EmailDetector::FindContent(const string16::const_iterator& begin,
+ const string16::const_iterator& end,
+ size_t* start_pos,
+ size_t* end_pos,
+ std::string* content_text) {
+ string16 utf16_input = string16(begin, end);
+ icu::UnicodeString pattern(kEmailRegex);
+ icu::UnicodeString input(utf16_input.data(), utf16_input.length());
+ UErrorCode status = U_ZERO_ERROR;
+ scoped_ptr<icu::RegexMatcher> matcher(
+ new icu::RegexMatcher(pattern,
+ input,
+ UREGEX_CASE_INSENSITIVE,
+ status));
+ if (matcher->find()) {
+ *start_pos = matcher->start(status);
+ DCHECK(U_SUCCESS(status));
+ *end_pos = matcher->end(status);
+ DCHECK(U_SUCCESS(status));
+ icu::UnicodeString content_ustr(matcher->group(status));
+ DCHECK(U_SUCCESS(status));
+ UTF16ToUTF8(content_ustr.getBuffer(), content_ustr.length(), content_text);
+ return true;
+ }
+
+ return false;
+}
+
+} // namespace content
diff --git a/content/renderer/android/email_detector.h b/content/renderer/android/email_detector.h
new file mode 100644
index 0000000..cda43c6
--- /dev/null
+++ b/content/renderer/android/email_detector.h
@@ -0,0 +1,38 @@
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef CONTENT_RENDERER_ANDROID_EMAIL_DETECTOR_H_
+#define CONTENT_RENDERER_ANDROID_EMAIL_DETECTOR_H_
+#pragma once
+
+#include "content/renderer/android/content_detector.h"
+
+class EmailDetectorTest;
+
+namespace content {
+
+// Finds email addresses (in most common formats, but not including special
+// characters) in the given text string.
+class EmailDetector : public ContentDetector {
+ public:
+ EmailDetector();
+
+ private:
+ friend class ::EmailDetectorTest;
+
+ // Implementation of ContentDetector.
+ virtual bool FindContent(const string16::const_iterator& begin,
+ const string16::const_iterator& end,
+ size_t* start_pos,
+ size_t* end_pos,
+ std::string* content_text) OVERRIDE;
+ virtual GURL GetIntentURL(const std::string& content_text) OVERRIDE;
+ virtual size_t GetMaximumContentLength() OVERRIDE;
+
+ DISALLOW_COPY_AND_ASSIGN(EmailDetector);
+};
+
+} // namespace content
+
+#endif // CONTENT_RENDERER_ANDROID_EMAIL_DETECTOR_H_
diff --git a/content/renderer/android/email_detector_unittest.cc b/content/renderer/android/email_detector_unittest.cc
new file mode 100644
index 0000000..a68184e
--- /dev/null
+++ b/content/renderer/android/email_detector_unittest.cc
@@ -0,0 +1,51 @@
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "content/renderer/android/email_detector.h"
+
+#include "base/utf_string_conversions.h"
+#include "testing/gtest/include/gtest/gtest.h"
+
+using content::EmailDetector;
+
+class EmailDetectorTest : public testing::Test {
+ public:
+ static void FindAndCheckEmail(const std::string& content,
+ const std::string& expected) {
+ string16 content_16 = UTF8ToUTF16(content);
+ string16 result_16;
+ size_t start, end;
+ EmailDetector detector;
+ std::string content_text;
+ if (detector.FindContent(content_16.begin(), content_16.end(),
+ &start, &end, &content_text)) {
+ result_16 = content_16.substr(start, end - start);
+ }
+ EXPECT_EQ(expected, UTF16ToUTF8(result_16));
+ EXPECT_EQ(expected, content_text);
+ }
+};
+
+TEST_F(EmailDetectorTest, FindEmail) {
+ FindAndCheckEmail("please email test@testing.com", "test@testing.com");
+ FindAndCheckEmail("please email test@123.456.co.uk.", "test@123.456.co.uk");
+ FindAndCheckEmail("My email is 'a@b.org'.", "a@b.org");
+ FindAndCheckEmail("123@bcd.org", "123@bcd.org");
+ FindAndCheckEmail("[quitelongwelllongemailaddress@somequitelongdomain.org]",
+ "quitelongwelllongemailaddress@somequitelongdomain.org");
+ FindAndCheckEmail("Should find the first@email.org not the second@email.org",
+ "first@email.org");
+ FindAndCheckEmail("Email:HELLO@SOMETHING.COM", "HELLO@SOMETHING.COM");
+ FindAndCheckEmail("Email SOMEONE@GOOGLE.COM for details.",
+ "SOMEONE@GOOGLE.COM");
+ FindAndCheckEmail("It's \"testadd@company.fr\"", "testadd@company.fr");
+ FindAndCheckEmail("This is not an @emailaddress.com", "");
+ FindAndCheckEmail("Apples @2.50 each", "");
+ FindAndCheckEmail("Log on to google.com", "");
+ FindAndCheckEmail("Try someone@, they might know.", "");
+ FindAndCheckEmail("No, bob@com is not an email address.", "");
+ FindAndCheckEmail("@", "");
+ FindAndCheckEmail("Just bob @google.com", "");
+ FindAndCheckEmail("Why not call larry@google and ask him.", "");
+}