summaryrefslogtreecommitdiffstats
path: root/common/src
diff options
context:
space:
mode:
Diffstat (limited to 'common/src')
-rw-r--r--common/src/com/android/common/Patterns.java209
1 files changed, 209 insertions, 0 deletions
diff --git a/common/src/com/android/common/Patterns.java b/common/src/com/android/common/Patterns.java
new file mode 100644
index 0000000..2eab3e1
--- /dev/null
+++ b/common/src/com/android/common/Patterns.java
@@ -0,0 +1,209 @@
+/*
+ * Copyright (C) 2007 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.android.common;
+
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+/**
+ * Commonly used regular expression patterns.
+ */
+public class Patterns {
+ /**
+ * Regular expression pattern to match all IANA top-level domains.
+ * List accurate as of 2007/06/15. List taken from:
+ * http://data.iana.org/TLD/tlds-alpha-by-domain.txt
+ * This pattern is auto-generated by //device/tools/make-iana-tld-pattern.py
+ */
+ public static final Pattern TOP_LEVEL_DOMAIN
+ = Pattern.compile(
+ "((aero|arpa|asia|a[cdefgilmnoqrstuwxz])"
+ + "|(biz|b[abdefghijmnorstvwyz])"
+ + "|(cat|com|coop|c[acdfghiklmnoruvxyz])"
+ + "|d[ejkmoz]"
+ + "|(edu|e[cegrstu])"
+ + "|f[ijkmor]"
+ + "|(gov|g[abdefghilmnpqrstuwy])"
+ + "|h[kmnrtu]"
+ + "|(info|int|i[delmnoqrst])"
+ + "|(jobs|j[emop])"
+ + "|k[eghimnrwyz]"
+ + "|l[abcikrstuvy]"
+ + "|(mil|mobi|museum|m[acdghklmnopqrstuvwxyz])"
+ + "|(name|net|n[acefgilopruz])"
+ + "|(org|om)"
+ + "|(pro|p[aefghklmnrstwy])"
+ + "|qa"
+ + "|r[eouw]"
+ + "|s[abcdeghijklmnortuvyz]"
+ + "|(tel|travel|t[cdfghjklmnoprtvwz])"
+ + "|u[agkmsyz]"
+ + "|v[aceginu]"
+ + "|w[fs]"
+ + "|y[etu]"
+ + "|z[amw])");
+
+ /**
+ * Regular expression pattern to match RFC 1738 URLs
+ * List accurate as of 2007/06/15. List taken from:
+ * http://data.iana.org/TLD/tlds-alpha-by-domain.txt
+ * This pattern is auto-generated by //device/tools/make-iana-tld-pattern.py
+ */
+ public static final Pattern WEB_URL
+ = Pattern.compile(
+ "((?:(http|https|Http|Https|rtsp|Rtsp):\\/\\/(?:(?:[a-zA-Z0-9\\$\\-\\_\\.\\+\\!\\*\\'\\(\\)"
+ + "\\,\\;\\?\\&\\=]|(?:\\%[a-fA-F0-9]{2})){1,64}(?:\\:(?:[a-zA-Z0-9\\$\\-\\_"
+ + "\\.\\+\\!\\*\\'\\(\\)\\,\\;\\?\\&\\=]|(?:\\%[a-fA-F0-9]{2})){1,25})?\\@)?)?"
+ + "((?:(?:[a-zA-Z0-9][a-zA-Z0-9\\-]{0,64}\\.)+" // named host
+ + "(?:" // plus top level domain
+ + "(?:aero|arpa|asia|a[cdefgilmnoqrstuwxz])"
+ + "|(?:biz|b[abdefghijmnorstvwyz])"
+ + "|(?:cat|com|coop|c[acdfghiklmnoruvxyz])"
+ + "|d[ejkmoz]"
+ + "|(?:edu|e[cegrstu])"
+ + "|f[ijkmor]"
+ + "|(?:gov|g[abdefghilmnpqrstuwy])"
+ + "|h[kmnrtu]"
+ + "|(?:info|int|i[delmnoqrst])"
+ + "|(?:jobs|j[emop])"
+ + "|k[eghimnrwyz]"
+ + "|l[abcikrstuvy]"
+ + "|(?:mil|mobi|museum|m[acdghklmnopqrstuvwxyz])"
+ + "|(?:name|net|n[acefgilopruz])"
+ + "|(?:org|om)"
+ + "|(?:pro|p[aefghklmnrstwy])"
+ + "|qa"
+ + "|r[eouw]"
+ + "|s[abcdeghijklmnortuvyz]"
+ + "|(?:tel|travel|t[cdfghjklmnoprtvwz])"
+ + "|u[agkmsyz]"
+ + "|v[aceginu]"
+ + "|w[fs]"
+ + "|y[etu]"
+ + "|z[amw]))"
+ + "|(?:(?:25[0-5]|2[0-4]" // or ip address
+ + "[0-9]|[0-1][0-9]{2}|[1-9][0-9]|[1-9])\\.(?:25[0-5]|2[0-4][0-9]"
+ + "|[0-1][0-9]{2}|[1-9][0-9]|[1-9]|0)\\.(?:25[0-5]|2[0-4][0-9]|[0-1]"
+ + "[0-9]{2}|[1-9][0-9]|[1-9]|0)\\.(?:25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}"
+ + "|[1-9][0-9]|[0-9])))"
+ + "(?:\\:\\d{1,5})?)" // plus option port number
+ + "(\\/(?:(?:[a-zA-Z0-9\\;\\/\\?\\:\\@\\&\\=\\#\\~" // plus option query params
+ + "\\-\\.\\+\\!\\*\\'\\(\\)\\,\\_])|(?:\\%[a-fA-F0-9]{2}))*)?"
+ + "(?:\\b|$)"); // and finally, a word boundary or end of
+ // input. This is to stop foo.sure from
+ // matching as foo.su
+
+ public static final Pattern IP_ADDRESS
+ = Pattern.compile(
+ "((25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}|[1-9][0-9]|[1-9])\\.(25[0-5]|2[0-4]"
+ + "[0-9]|[0-1][0-9]{2}|[1-9][0-9]|[1-9]|0)\\.(25[0-5]|2[0-4][0-9]|[0-1]"
+ + "[0-9]{2}|[1-9][0-9]|[1-9]|0)\\.(25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}"
+ + "|[1-9][0-9]|[0-9]))");
+
+ public static final Pattern DOMAIN_NAME
+ = Pattern.compile(
+ "(((([a-zA-Z0-9][a-zA-Z0-9\\-]*)*[a-zA-Z0-9]\\.)+"
+ + TOP_LEVEL_DOMAIN + ")|"
+ + IP_ADDRESS + ")");
+
+ public static final Pattern EMAIL_ADDRESS
+ = Pattern.compile(
+ "[a-zA-Z0-9\\+\\.\\_\\%\\-]{1,256}" +
+ "\\@" +
+ "[a-zA-Z0-9][a-zA-Z0-9\\-]{0,64}" +
+ "(" +
+ "\\." +
+ "[a-zA-Z0-9][a-zA-Z0-9\\-]{0,25}" +
+ ")+"
+ );
+
+ /**
+ * This pattern is intended for searching for things that look like they
+ * might be phone numbers in arbitrary text, not for validating whether
+ * something is in fact a phone number. It will miss many things that
+ * are legitimate phone numbers.
+ *
+ * <p> The pattern matches the following:
+ * <ul>
+ * <li>Optionally, a + sign followed immediately by one or more digits. Spaces, dots, or dashes
+ * may follow.
+ * <li>Optionally, sets of digits in parentheses, separated by spaces, dots, or dashes.
+ * <li>A string starting and ending with a digit, containing digits, spaces, dots, and/or dashes.
+ * </ul>
+ */
+ public static final Pattern PHONE
+ = Pattern.compile( // sdd = space, dot, or dash
+ "(\\+[0-9]+[\\- \\.]*)?" // +<digits><sdd>*
+ + "(\\([0-9]+\\)[\\- \\.]*)?" // (<digits>)<sdd>*
+ + "([0-9][0-9\\- \\.][0-9\\- \\.]+[0-9])"); // <digit><digit|sdd>+<digit>
+
+ /**
+ * Convenience method to take all of the non-null matching groups in a
+ * regex Matcher and return them as a concatenated string.
+ *
+ * @param matcher The Matcher object from which grouped text will
+ * be extracted
+ *
+ * @return A String comprising all of the non-null matched
+ * groups concatenated together
+ */
+ public static final String concatGroups(Matcher matcher) {
+ StringBuilder b = new StringBuilder();
+ final int numGroups = matcher.groupCount();
+
+ for (int i = 1; i <= numGroups; i++) {
+ String s = matcher.group(i);
+
+ System.err.println("Group(" + i + ") : " + s);
+
+ if (s != null) {
+ b.append(s);
+ }
+ }
+
+ return b.toString();
+ }
+
+ /**
+ * Convenience method to return only the digits and plus signs
+ * in the matching string.
+ *
+ * @param matcher The Matcher object from which digits and plus will
+ * be extracted
+ *
+ * @return A String comprising all of the digits and plus in
+ * the match
+ */
+ public static final String digitsAndPlusOnly(Matcher matcher) {
+ StringBuilder buffer = new StringBuilder();
+ String matchingRegion = matcher.group();
+
+ for (int i = 0, size = matchingRegion.length(); i < size; i++) {
+ char character = matchingRegion.charAt(i);
+
+ if (character == '+' || Character.isDigit(character)) {
+ buffer.append(character);
+ }
+ }
+ return buffer.toString();
+ }
+
+ /**
+ * Do not create this static utility class.
+ */
+ private Patterns() {}
+}