summaryrefslogtreecommitdiffstats
path: root/third_party
diff options
context:
space:
mode:
authorgeorgey@chromium.org <georgey@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2011-05-27 01:56:59 +0000
committergeorgey@chromium.org <georgey@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2011-05-27 01:56:59 +0000
commit866cd68e4fd9d4195026109b2e6d285b50334218 (patch)
tree6e3ddb634b1c85d9ffd4723303b53535a22eeccc /third_party
parentb53979fd573dc0041df20282789e65315000bac4 (diff)
downloadchromium_src-866cd68e4fd9d4195026109b2e6d285b50334218.zip
chromium_src-866cd68e4fd9d4195026109b2e6d285b50334218.tar.gz
chromium_src-866cd68e4fd9d4195026109b2e6d285b50334218.tar.bz2
Change UTF-8 charcters in constants into escape sequences
BUG=none TEST=unit-tested Review URL: http://codereview.chromium.org/7077021 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@86950 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'third_party')
-rw-r--r--third_party/libphonenumber/README.chromium3
-rw-r--r--third_party/libphonenumber/cpp/src/phonenumberutil.cc44
-rw-r--r--third_party/libphonenumber/cpp/src/phonenumberutil_test.cc36
-rw-r--r--third_party/libphonenumber/patches/utf8_v186.patch251
4 files changed, 293 insertions, 41 deletions
diff --git a/third_party/libphonenumber/README.chromium b/third_party/libphonenumber/README.chromium
index 9cf38ee..608a0a5 100644
--- a/third_party/libphonenumber/README.chromium
+++ b/third_party/libphonenumber/README.chromium
@@ -22,7 +22,8 @@ Additional files, not in the original library:
Until the changes are upstreamed library is included directly, with a patch
in patches/version186.patch applied. The patch adds plugability to the regular
expression engine, its RE2 (default) implementation, and the unit-test for the
-changes.
+changes. The second patch utf8_v186.patch is applied to allow compilation on
+some multi-byte locales.
The folders included in our repository for now are
cpp/
diff --git a/third_party/libphonenumber/cpp/src/phonenumberutil.cc b/third_party/libphonenumber/cpp/src/phonenumberutil.cc
index 75ef374..b0201f2 100644
--- a/third_party/libphonenumber/cpp/src/phonenumberutil.cc
+++ b/third_party/libphonenumber/cpp/src/phonenumberutil.cc
@@ -72,7 +72,7 @@ scoped_ptr<map<char32, char> > all_plus_number_grouping_symbols;
// The kPlusSign signifies the international prefix.
const char kPlusSign[] = "+";
-const char kPlusChars[] = "++";
+const char kPlusChars[] = "+\xEF\xBC\x8B";
scoped_ptr<const reg_exp::RegularExpression> plus_chars_pattern;
const char kRfc3966ExtnPrefix[] = ";ext=";
@@ -88,7 +88,7 @@ scoped_ptr<const reg_exp::RegularExpression> unique_international_prefix;
// Digits accepted in phone numbers.
// Both Arabic-Indic and Eastern Arabic-Indic are supported.
-const char kValidDigits[] = "0-90-9٠-٩۰-۹";
+const char kValidDigits[] = "0-9\xEF\xBC\x90-\xEF\xBC\x99\xD9\xA0-\xD9\xA9\xDB\xB0-\xDB\xB9";
// We accept alpha characters in phone numbers, ASCII only. We store lower-case
// here only since our regular expressions are case-insensitive.
const char kValidAlpha[] = "a-z";
@@ -140,7 +140,7 @@ scoped_ptr<const reg_exp::RegularExpression> unwanted_end_char_pattern;
// itself. In emacs, you can use M-x unicode-what to query information about the
// unicode character.
const char kValidPunctuation[] =
- "-x‐-―−ー--/  ​⁠ ()()[].\\[\\]/~⁓∼~";
+ "-x\xE2\x80\x90-\xE2\x80\x95\xE2\x88\x92\xE3\x83\xBC\xEF\xBC\x8D-\xEF\xBC\x8F \xC2\xA0\xE2\x80\x8B\xE2\x81\xA0\xE3\x80\x80()\xEF\xBC\x88\xEF\xBC\x89\xEF\xBC\xBB\xEF\xBC\xBD.\\[\\]/~\xE2\x81\x93\xE2\x88\xBC\xEF\xBD\x9E";
// Regular expression of viable phone numbers. This is location independent.
// Checks we have at least three leading digits, and only valid punctuation,
@@ -454,7 +454,7 @@ char32 ToUnicodeCodepoint(const char* unicode_char) {
// defined order.
void CreateRegularExpressions() {
unique_international_prefix.reset(
- reg_exp::CreateRegularExpression("[\\d]+(?:[~⁓∼~][\\d]+)?"));
+ reg_exp::CreateRegularExpression("[\\d]+(?:[~\xE2\x81\x93\xE2\x88\xBC\xEF\xBD\x9E][\\d]+)?"));
first_group_capturing_pattern.reset(
reg_exp::CreateRegularExpression("(\\$1)"));
carrier_code_pattern.reset(
@@ -476,16 +476,16 @@ void CreateRegularExpressions() {
StrCat("[", kPlusChars, "]*(?:[", kValidPunctuation, "]*[", kValidDigits,
"]){3,}[", kValidAlpha, kValidPunctuation, kValidDigits, "]*")));
// Canonical-equivalence doesn't seem to be an option with RE2, so we allow
- // two options for representing the ó - the character itself, and one in the
+ // two options for representing the \xC3\xB3 - the character itself, and one in the
// unicode decomposed form with the combining acute accent. Note that there
// are currently three capturing groups for the extension itself - if this
// number is changed, MaybeStripExtension needs to be updated.
const string capturing_extn_digits = StrCat("([", kValidDigits, "]{1,7})");
known_extn_patterns.reset(new string(
StrCat(kRfc3966ExtnPrefix, capturing_extn_digits, "|"
- "[  \\t,]*(?:ext(?:ensi(?:ó?|ó))?n?|extn?|[,xx##~~]|"
- "int|int|anexo)"
- "[:\\..]?[  \\t,-]*", capturing_extn_digits, "#?|"
+ "[ \xC2\xA0\\t,]*(?:ext(?:ensi(?:o\xCC\x81?|\xC3\xB3))?n?|\xEF\xBD\x85\xEF\xBD\x98\xEF\xBD\x94\xEF\xBD\x8E?|[,x\xEF\xBD\x98#\xEF\xBC\x83~\xEF\xBD\x9E]|"
+ "int|\xEF\xBD\x89\xEF\xBD\x8E\xEF\xBD\x94|anexo)"
+ "[:\\.\xEF\xBC\x8E]?[ \xC2\xA0\\t,-]*", capturing_extn_digits, "#?|"
"[- ]+([", kValidDigits, "]{1,5})#")));
extn_pattern.reset(reg_exp::CreateRegularExpression(
StrCat("(?i)(?:", *known_extn_patterns, ")$").c_str()));
@@ -509,35 +509,35 @@ void InitializeStaticMapsAndSets() {
all_plus_number_grouping_symbols->insert(
make_pair(ToUnicodeCodepoint("-"), '-'));
all_plus_number_grouping_symbols->insert(
- make_pair(ToUnicodeCodepoint("-"), '-'));
+ make_pair(ToUnicodeCodepoint("\xEF\xBC\x8D"), '-'));
all_plus_number_grouping_symbols->insert(
- make_pair(ToUnicodeCodepoint("‐"), '-'));
+ make_pair(ToUnicodeCodepoint("\xE2\x80\x90"), '-'));
all_plus_number_grouping_symbols->insert(
- make_pair(ToUnicodeCodepoint("‑"), '-'));
+ make_pair(ToUnicodeCodepoint("\xE2\x80\x91"), '-'));
all_plus_number_grouping_symbols->insert(
- make_pair(ToUnicodeCodepoint("‒"), '-'));
+ make_pair(ToUnicodeCodepoint("\xE2\x80\x92"), '-'));
all_plus_number_grouping_symbols->insert(
- make_pair(ToUnicodeCodepoint("–"), '-'));
+ make_pair(ToUnicodeCodepoint("\xE2\x80\x93"), '-'));
all_plus_number_grouping_symbols->insert(
- make_pair(ToUnicodeCodepoint("—"), '-'));
+ make_pair(ToUnicodeCodepoint("\xE2\x80\x94"), '-'));
all_plus_number_grouping_symbols->insert(
- make_pair(ToUnicodeCodepoint("―"), '-'));
+ make_pair(ToUnicodeCodepoint("\xE2\x80\x95"), '-'));
all_plus_number_grouping_symbols->insert(
- make_pair(ToUnicodeCodepoint("−"), '-'));
+ make_pair(ToUnicodeCodepoint("\xE2\x88\x92"), '-'));
all_plus_number_grouping_symbols->insert(
make_pair(ToUnicodeCodepoint("/"), '/'));
all_plus_number_grouping_symbols->insert(
- make_pair(ToUnicodeCodepoint("/"), '/'));
+ make_pair(ToUnicodeCodepoint("\xEF\xBC\x8F"), '/'));
all_plus_number_grouping_symbols->insert(
make_pair(ToUnicodeCodepoint(" "), ' '));
all_plus_number_grouping_symbols->insert(
- make_pair(ToUnicodeCodepoint(" "), ' '));
+ make_pair(ToUnicodeCodepoint("\xE3\x80\x80"), ' '));
all_plus_number_grouping_symbols->insert(
- make_pair(ToUnicodeCodepoint("⁠"), ' '));
+ make_pair(ToUnicodeCodepoint("\xE2\x81\xA0"), ' '));
all_plus_number_grouping_symbols->insert(
make_pair(ToUnicodeCodepoint("."), '.'));
all_plus_number_grouping_symbols->insert(
- make_pair(ToUnicodeCodepoint("."), '.'));
+ make_pair(ToUnicodeCodepoint("\xEF\xBC\x8E"), '.'));
// Only the upper-case letters are added here - the lower-case versions are
// added programmatically.
alpha_mappings->insert(make_pair(ToUnicodeCodepoint("A"), '2'));
@@ -849,7 +849,7 @@ void PhoneNumberUtil::Format(const PhoneNumber& number,
// Note here that all NANPA formatting rules are contained by US, so we use
// that to format NANPA numbers. The same applies to Russian Fed regions -
// rules are contained by Russia. French Indian Ocean country rules are
- // contained by Réunion.
+ // contained by R\xC3\xA9union.
string region_code;
GetRegionCodeForCountryCode(country_calling_code, &region_code);
if (!HasValidRegionCode(region_code, country_calling_code,
@@ -1015,7 +1015,7 @@ void PhoneNumberUtil::FormatOutOfCountryCallingNumber(
// For regions that share a country calling code, the country calling code
// need not be dialled. This also applies when dialling within a region, so
// this if clause covers both these cases.
- // Technically this is the case for dialling from la Réunion to other
+ // Technically this is the case for dialling from la R\xC3\xA9union to other
// overseas departments of France (French Guiana, Martinique, Guadeloupe),
// but not vice versa - so we don't cover this edge case for now and for
// those cases return the version including country calling code.
diff --git a/third_party/libphonenumber/cpp/src/phonenumberutil_test.cc b/third_party/libphonenumber/cpp/src/phonenumberutil_test.cc
index 3ef3fa7..9eadaf9 100644
--- a/third_party/libphonenumber/cpp/src/phonenumberutil_test.cc
+++ b/third_party/libphonenumber/cpp/src/phonenumberutil_test.cc
@@ -1101,11 +1101,11 @@ TEST_F(PhoneNumberUtilTest, ExtractPossibleNumber) {
ExtractPossibleNumber("Tel:+800-345-600", &extracted_number);
EXPECT_EQ("+800-345-600", extracted_number);
// Should recognise wide digits as possible start values.
- ExtractPossibleNumber("023", &extracted_number);
- EXPECT_EQ("023", extracted_number);
+ ExtractPossibleNumber("\xEF\xBC\x90\xEF\xBC\x92\xEF\xBC\x93", &extracted_number);
+ EXPECT_EQ("\xEF\xBC\x90\xEF\xBC\x92\xEF\xBC\x93", extracted_number);
// Dashes are not possible start values and should be removed.
- ExtractPossibleNumber("Num-123", &extracted_number);
- EXPECT_EQ("123", extracted_number);
+ ExtractPossibleNumber("Num-\xEF\xBC\x91\xEF\xBC\x92\xEF\xBC\x93", &extracted_number);
+ EXPECT_EQ("\xEF\xBC\x91\xEF\xBC\x92\xEF\xBC\x93", extracted_number);
// If not possible number present, return empty string.
ExtractPossibleNumber("Num-....", &extracted_number);
EXPECT_EQ("", extracted_number);
@@ -1119,7 +1119,7 @@ TEST_F(PhoneNumberUtilTest, ExtractPossibleNumber) {
ExtractPossibleNumber("(650) 253-0000.", &extracted_number);
EXPECT_EQ("650) 253-0000", extracted_number);
// This case has a trailing RTL char.
- ExtractPossibleNumber("(650) 253-0000‏", &extracted_number);
+ ExtractPossibleNumber("(650) 253-0000\xE2\x80\x8F", &extracted_number);
EXPECT_EQ("650) 253-0000", extracted_number);
}
@@ -1163,7 +1163,7 @@ TEST_F(PhoneNumberUtilTest, IsValidForRegion) {
// This number is no longer valid.
EXPECT_FALSE(phone_util_.IsValidNumber(bs_number));
- // La Mayotte and Réunion use 'leadingDigits' to differentiate them.
+ // La Mayotte and R\xC3\xA9union use 'leadingDigits' to differentiate them.
PhoneNumber re_number;
re_number.set_country_code(262);
re_number.set_national_number(262123456ULL);
@@ -1631,13 +1631,13 @@ TEST_F(PhoneNumberUtilTest, IsViablePhoneNumber) {
EXPECT_TRUE(IsViablePhoneNumber("0800-4-PIZZA"));
// Only one or two digits before possible punctuation followed by more digits.
// The punctuation used here is the unicode character u+3000.
- EXPECT_TRUE(IsViablePhoneNumber("1 34"));
- EXPECT_FALSE(IsViablePhoneNumber("1 3+4"));
+ EXPECT_TRUE(IsViablePhoneNumber("1\xE3\x80\x80" "34"));
+ EXPECT_FALSE(IsViablePhoneNumber("1\xE3\x80\x80" "3+4"));
// Unicode variants of possible starting character and other allowed
// punctuation/digits.
- EXPECT_TRUE(IsViablePhoneNumber("(1) 3456789"));
+ EXPECT_TRUE(IsViablePhoneNumber("\xEF\xBC\x88" "1\xEF\xBC\x89\xE3\x80\x80" "3456789"));
// Testing a leading + is okay.
- EXPECT_TRUE(IsViablePhoneNumber("+1) 3456789"));
+ EXPECT_TRUE(IsViablePhoneNumber("+1\xEF\xBC\x89\xE3\x80\x80" "3456789"));
}
TEST_F(PhoneNumberUtilTest, NormaliseRemovePunctuation) {
@@ -1659,13 +1659,13 @@ TEST_F(PhoneNumberUtilTest, NormaliseReplaceAlphaCharacters) {
TEST_F(PhoneNumberUtilTest, NormaliseOtherDigits) {
// The first digit is a full-width 2, the last digit is an Arabic-indic digit
// 5.
- string input_number("25٥");
+ string input_number("\xEF\xBC\x92" "5\xD9\xA5");
Normalize(&input_number);
static const string kExpectedOutput("255");
EXPECT_EQ(kExpectedOutput, input_number)
<< "Conversion did not correctly replace non-latin digits";
// The first digit is an Eastern-Arabic 5, the latter an Eastern-Arabic 0.
- string eastern_arabic_input_number("۵2۰");
+ string eastern_arabic_input_number("\xDB\xB5" "2\xDB\xB0");
Normalize(&eastern_arabic_input_number);
static const string kExpectedOutput2("520");
EXPECT_EQ(kExpectedOutput2, eastern_arabic_input_number)
@@ -2321,21 +2321,21 @@ TEST_F(PhoneNumberUtilTest, ParseWithInternationalPrefixes) {
// Using a full-width plus sign.
test_number.Clear();
EXPECT_EQ(PhoneNumberUtil::NO_PARSING_ERROR,
- phone_util_.Parse("+1 (650) 333-6000",
+ phone_util_.Parse("\xEF\xBC\x8B" "1 (650) 333-6000",
RegionCode::SG(), &test_number));
EXPECT_EQ(us_number, test_number);
// The whole number, including punctuation, is here represented in full-width
// form.
test_number.Clear();
EXPECT_EQ(PhoneNumberUtil::NO_PARSING_ERROR,
- phone_util_.Parse("+1 (650) 333-6000",
+ phone_util_.Parse("\xEF\xBC\x8B\xEF\xBC\x91\xE3\x80\x80\xEF\xBC\x88\xEF\xBC\x96\xEF\xBC\x95\xEF\xBC\x90\xEF\xBC\x89\xE3\x80\x80\xEF\xBC\x93\xEF\xBC\x93\xEF\xBC\x93\xEF\xBC\x8D\xEF\xBC\x96\xEF\xBC\x90\xEF\xBC\x90\xEF\xBC\x90",
RegionCode::SG(), &test_number));
EXPECT_EQ(us_number, test_number);
// Using the U+30FC dash.
test_number.Clear();
EXPECT_EQ(PhoneNumberUtil::NO_PARSING_ERROR,
- phone_util_.Parse("+1 (650) 333ー6000",
+ phone_util_.Parse("\xEF\xBC\x8B\xEF\xBC\x91\xE3\x80\x80\xEF\xBC\x88\xEF\xBC\x96\xEF\xBC\x95\xEF\xBC\x90\xEF\xBC\x89\xE3\x80\x80\xEF\xBC\x93\xEF\xBC\x93\xEF\xBC\x93\xE3\x83\xBC\xEF\xBC\x96\xEF\xBC\x90\xEF\xBC\x90\xEF\xBC\x90",
RegionCode::SG(), &test_number));
EXPECT_EQ(us_number, test_number);
}
@@ -2575,7 +2575,7 @@ TEST_F(PhoneNumberUtilTest, ParseNumbersWithPlusWithNoRegion) {
// Test with full-width plus.
result_proto.Clear();
EXPECT_EQ(PhoneNumberUtil::NO_PARSING_ERROR,
- phone_util_.Parse("+64 3 331 6005", RegionCode::ZZ(),
+ phone_util_.Parse("\xEF\xBC\x8B" "64 3 331 6005", RegionCode::ZZ(),
&result_proto));
EXPECT_EQ(nz_number, result_proto);
// Test with normal plus but leading characters that need to be stripped.
@@ -2733,7 +2733,7 @@ TEST_F(PhoneNumberUtilTest, ParseExtensions) {
EXPECT_EQ(us_with_extension, test_number);
test_number.Clear();
EXPECT_EQ(PhoneNumberUtil::NO_PARSING_ERROR,
- phone_util_.Parse("(800) 901-3355 ,extensión 7246433",
+ phone_util_.Parse("(800) 901-3355 ,extensi\xC3\xB3n 7246433",
RegionCode::US(),
&test_number));
EXPECT_EQ(us_with_extension, test_number);
@@ -2741,7 +2741,7 @@ TEST_F(PhoneNumberUtilTest, ParseExtensions) {
// Repeat with the small letter o with acute accent created by combining
// characters.
EXPECT_EQ(PhoneNumberUtil::NO_PARSING_ERROR,
- phone_util_.Parse("(800) 901-3355 ,extensión 7246433",
+ phone_util_.Parse("(800) 901-3355 ,extensio\xCC\x81n 7246433",
RegionCode::US(),
&test_number));
EXPECT_EQ(us_with_extension, test_number);
diff --git a/third_party/libphonenumber/patches/utf8_v186.patch b/third_party/libphonenumber/patches/utf8_v186.patch
new file mode 100644
index 0000000..9545bab
--- /dev/null
+++ b/third_party/libphonenumber/patches/utf8_v186.patch
@@ -0,0 +1,251 @@
+Index: D:/src/src/third_party/libphonenumber/cpp/src/phonenumberutil_test.cc
+===================================================================
+--- phonenumberutil_test.cc (revision 186)
++++ phonenumberutil_test.cc (working copy)
+@@ -1101,11 +1101,11 @@
+ ExtractPossibleNumber("Tel:+800-345-600", &extracted_number);
+ EXPECT_EQ("+800-345-600", extracted_number);
+ // Should recognise wide digits as possible start values.
+- ExtractPossibleNumber("023", &extracted_number);
+- EXPECT_EQ("023", extracted_number);
++ ExtractPossibleNumber("\xEF\xBC\x90\xEF\xBC\x92\xEF\xBC\x93", &extracted_number);
++ EXPECT_EQ("\xEF\xBC\x90\xEF\xBC\x92\xEF\xBC\x93", extracted_number);
+ // Dashes are not possible start values and should be removed.
+- ExtractPossibleNumber("Num-123", &extracted_number);
+- EXPECT_EQ("123", extracted_number);
++ ExtractPossibleNumber("Num-\xEF\xBC\x91\xEF\xBC\x92\xEF\xBC\x93", &extracted_number);
++ EXPECT_EQ("\xEF\xBC\x91\xEF\xBC\x92\xEF\xBC\x93", extracted_number);
+ // If not possible number present, return empty string.
+ ExtractPossibleNumber("Num-....", &extracted_number);
+ EXPECT_EQ("", extracted_number);
+@@ -1119,7 +1119,7 @@
+ ExtractPossibleNumber("(650) 253-0000.", &extracted_number);
+ EXPECT_EQ("650) 253-0000", extracted_number);
+ // This case has a trailing RTL char.
+- ExtractPossibleNumber("(650) 253-0000‏", &extracted_number);
++ ExtractPossibleNumber("(650) 253-0000\xE2\x80\x8F", &extracted_number);
+ EXPECT_EQ("650) 253-0000", extracted_number);
+ }
+
+@@ -1163,7 +1163,7 @@
+ // This number is no longer valid.
+ EXPECT_FALSE(phone_util_.IsValidNumber(bs_number));
+
+- // La Mayotte and Réunion use 'leadingDigits' to differentiate them.
++ // La Mayotte and R\xC3\xA9union use 'leadingDigits' to differentiate them.
+ PhoneNumber re_number;
+ re_number.set_country_code(262);
+ re_number.set_national_number(262123456ULL);
+@@ -1631,13 +1631,13 @@
+ EXPECT_TRUE(IsViablePhoneNumber("0800-4-PIZZA"));
+ // Only one or two digits before possible punctuation followed by more digits.
+ // The punctuation used here is the unicode character u+3000.
+- EXPECT_TRUE(IsViablePhoneNumber("1 34"));
+- EXPECT_FALSE(IsViablePhoneNumber("1 3+4"));
++ EXPECT_TRUE(IsViablePhoneNumber("1\xE3\x80\x80" "34"));
++ EXPECT_FALSE(IsViablePhoneNumber("1\xE3\x80\x80" "3+4"));
+ // Unicode variants of possible starting character and other allowed
+ // punctuation/digits.
+- EXPECT_TRUE(IsViablePhoneNumber("(1) 3456789"));
++ EXPECT_TRUE(IsViablePhoneNumber("\xEF\xBC\x88" "1\xEF\xBC\x89\xE3\x80\x80" "3456789"));
+ // Testing a leading + is okay.
+- EXPECT_TRUE(IsViablePhoneNumber("+1) 3456789"));
++ EXPECT_TRUE(IsViablePhoneNumber("+1\xEF\xBC\x89\xE3\x80\x80" "3456789"));
+ }
+
+ TEST_F(PhoneNumberUtilTest, NormaliseRemovePunctuation) {
+@@ -1659,13 +1659,13 @@
+ TEST_F(PhoneNumberUtilTest, NormaliseOtherDigits) {
+ // The first digit is a full-width 2, the last digit is an Arabic-indic digit
+ // 5.
+- string input_number("25٥");
++ string input_number("\xEF\xBC\x92" "5\xD9\xA5");
+ Normalize(&input_number);
+ static const string kExpectedOutput("255");
+ EXPECT_EQ(kExpectedOutput, input_number)
+ << "Conversion did not correctly replace non-latin digits";
+ // The first digit is an Eastern-Arabic 5, the latter an Eastern-Arabic 0.
+- string eastern_arabic_input_number("۵2۰");
++ string eastern_arabic_input_number("\xDB\xB5" "2\xDB\xB0");
+ Normalize(&eastern_arabic_input_number);
+ static const string kExpectedOutput2("520");
+ EXPECT_EQ(kExpectedOutput2, eastern_arabic_input_number)
+@@ -2321,21 +2321,21 @@
+ // Using a full-width plus sign.
+ test_number.Clear();
+ EXPECT_EQ(PhoneNumberUtil::NO_PARSING_ERROR,
+- phone_util_.Parse("+1 (650) 333-6000",
++ phone_util_.Parse("\xEF\xBC\x8B" "1 (650) 333-6000",
+ RegionCode::SG(), &test_number));
+ EXPECT_EQ(us_number, test_number);
+ // The whole number, including punctuation, is here represented in full-width
+ // form.
+ test_number.Clear();
+ EXPECT_EQ(PhoneNumberUtil::NO_PARSING_ERROR,
+- phone_util_.Parse("+1 (650) 333-6000",
++ phone_util_.Parse("\xEF\xBC\x8B\xEF\xBC\x91\xE3\x80\x80\xEF\xBC\x88\xEF\xBC\x96\xEF\xBC\x95\xEF\xBC\x90\xEF\xBC\x89\xE3\x80\x80\xEF\xBC\x93\xEF\xBC\x93\xEF\xBC\x93\xEF\xBC\x8D\xEF\xBC\x96\xEF\xBC\x90\xEF\xBC\x90\xEF\xBC\x90",
+ RegionCode::SG(), &test_number));
+ EXPECT_EQ(us_number, test_number);
+
+ // Using the U+30FC dash.
+ test_number.Clear();
+ EXPECT_EQ(PhoneNumberUtil::NO_PARSING_ERROR,
+- phone_util_.Parse("+1 (650) 333ー6000",
++ phone_util_.Parse("\xEF\xBC\x8B\xEF\xBC\x91\xE3\x80\x80\xEF\xBC\x88\xEF\xBC\x96\xEF\xBC\x95\xEF\xBC\x90\xEF\xBC\x89\xE3\x80\x80\xEF\xBC\x93\xEF\xBC\x93\xEF\xBC\x93\xE3\x83\xBC\xEF\xBC\x96\xEF\xBC\x90\xEF\xBC\x90\xEF\xBC\x90",
+ RegionCode::SG(), &test_number));
+ EXPECT_EQ(us_number, test_number);
+ }
+@@ -2575,7 +2575,7 @@
+ // Test with full-width plus.
+ result_proto.Clear();
+ EXPECT_EQ(PhoneNumberUtil::NO_PARSING_ERROR,
+- phone_util_.Parse("+64 3 331 6005", RegionCode::ZZ(),
++ phone_util_.Parse("\xEF\xBC\x8B" "64 3 331 6005", RegionCode::ZZ(),
+ &result_proto));
+ EXPECT_EQ(nz_number, result_proto);
+ // Test with normal plus but leading characters that need to be stripped.
+@@ -2733,7 +2733,7 @@
+ EXPECT_EQ(us_with_extension, test_number);
+ test_number.Clear();
+ EXPECT_EQ(PhoneNumberUtil::NO_PARSING_ERROR,
+- phone_util_.Parse("(800) 901-3355 ,extensión 7246433",
++ phone_util_.Parse("(800) 901-3355 ,extensi\xC3\xB3n 7246433",
+ RegionCode::US(),
+ &test_number));
+ EXPECT_EQ(us_with_extension, test_number);
+@@ -2741,7 +2741,7 @@
+ // Repeat with the small letter o with acute accent created by combining
+ // characters.
+ EXPECT_EQ(PhoneNumberUtil::NO_PARSING_ERROR,
+- phone_util_.Parse("(800) 901-3355 ,extensión 7246433",
++ phone_util_.Parse("(800) 901-3355 ,extensio\xCC\x81n 7246433",
+ RegionCode::US(),
+ &test_number));
+ EXPECT_EQ(us_with_extension, test_number);
+Index: D:/src/src/third_party/libphonenumber/cpp/src/phonenumberutil.cc
+===================================================================
+--- phonenumberutil.cc (revision 186)
++++ phonenumberutil.cc (working copy)
+@@ -72,7 +72,7 @@
+ // The kPlusSign signifies the international prefix.
+ const char kPlusSign[] = "+";
+
+-const char kPlusChars[] = "++";
++const char kPlusChars[] = "+\xEF\xBC\x8B";
+ scoped_ptr<const reg_exp::RegularExpression> plus_chars_pattern;
+
+ const char kRfc3966ExtnPrefix[] = ";ext=";
+@@ -88,7 +88,7 @@
+
+ // Digits accepted in phone numbers.
+ // Both Arabic-Indic and Eastern Arabic-Indic are supported.
+-const char kValidDigits[] = "0-90-9٠-٩۰-۹";
++const char kValidDigits[] = "0-9\xEF\xBC\x90-\xEF\xBC\x99\xD9\xA0-\xD9\xA9\xDB\xB0-\xDB\xB9";
+ // We accept alpha characters in phone numbers, ASCII only. We store lower-case
+ // here only since our regular expressions are case-insensitive.
+ const char kValidAlpha[] = "a-z";
+@@ -140,7 +140,7 @@
+ // itself. In emacs, you can use M-x unicode-what to query information about the
+ // unicode character.
+ const char kValidPunctuation[] =
+- "-x‐-―−ー--/  ​⁠ ()()[].\\[\\]/~⁓∼~";
++ "-x\xE2\x80\x90-\xE2\x80\x95\xE2\x88\x92\xE3\x83\xBC\xEF\xBC\x8D-\xEF\xBC\x8F \xC2\xA0\xE2\x80\x8B\xE2\x81\xA0\xE3\x80\x80()\xEF\xBC\x88\xEF\xBC\x89\xEF\xBC\xBB\xEF\xBC\xBD.\\[\\]/~\xE2\x81\x93\xE2\x88\xBC\xEF\xBD\x9E";
+
+ // Regular expression of viable phone numbers. This is location independent.
+ // Checks we have at least three leading digits, and only valid punctuation,
+@@ -454,7 +454,7 @@
+ // defined order.
+ void CreateRegularExpressions() {
+ unique_international_prefix.reset(
+- reg_exp::CreateRegularExpression("[\\d]+(?:[~⁓∼~][\\d]+)?"));
++ reg_exp::CreateRegularExpression("[\\d]+(?:[~\xE2\x81\x93\xE2\x88\xBC\xEF\xBD\x9E][\\d]+)?"));
+ first_group_capturing_pattern.reset(
+ reg_exp::CreateRegularExpression("(\\$1)"));
+ carrier_code_pattern.reset(
+@@ -476,16 +476,16 @@
+ StrCat("[", kPlusChars, "]*(?:[", kValidPunctuation, "]*[", kValidDigits,
+ "]){3,}[", kValidAlpha, kValidPunctuation, kValidDigits, "]*")));
+ // Canonical-equivalence doesn't seem to be an option with RE2, so we allow
+- // two options for representing the ó - the character itself, and one in the
++ // two options for representing the \xC3\xB3 - the character itself, and one in the
+ // unicode decomposed form with the combining acute accent. Note that there
+ // are currently three capturing groups for the extension itself - if this
+ // number is changed, MaybeStripExtension needs to be updated.
+ const string capturing_extn_digits = StrCat("([", kValidDigits, "]{1,7})");
+ known_extn_patterns.reset(new string(
+ StrCat(kRfc3966ExtnPrefix, capturing_extn_digits, "|"
+- "[  \\t,]*(?:ext(?:ensi(?:ó?|ó))?n?|extn?|[,xx##~~]|"
+- "int|int|anexo)"
+- "[:\\..]?[  \\t,-]*", capturing_extn_digits, "#?|"
++ "[ \xC2\xA0\\t,]*(?:ext(?:ensi(?:o\xCC\x81?|\xC3\xB3))?n?|\xEF\xBD\x85\xEF\xBD\x98\xEF\xBD\x94\xEF\xBD\x8E?|[,x\xEF\xBD\x98#\xEF\xBC\x83~\xEF\xBD\x9E]|"
++ "int|\xEF\xBD\x89\xEF\xBD\x8E\xEF\xBD\x94|anexo)"
++ "[:\\.\xEF\xBC\x8E]?[ \xC2\xA0\\t,-]*", capturing_extn_digits, "#?|"
+ "[- ]+([", kValidDigits, "]{1,5})#")));
+ extn_pattern.reset(reg_exp::CreateRegularExpression(
+ StrCat("(?i)(?:", *known_extn_patterns, ")$").c_str()));
+@@ -509,35 +509,35 @@
+ all_plus_number_grouping_symbols->insert(
+ make_pair(ToUnicodeCodepoint("-"), '-'));
+ all_plus_number_grouping_symbols->insert(
+- make_pair(ToUnicodeCodepoint("-"), '-'));
++ make_pair(ToUnicodeCodepoint("\xEF\xBC\x8D"), '-'));
+ all_plus_number_grouping_symbols->insert(
+- make_pair(ToUnicodeCodepoint("‐"), '-'));
++ make_pair(ToUnicodeCodepoint("\xE2\x80\x90"), '-'));
+ all_plus_number_grouping_symbols->insert(
+- make_pair(ToUnicodeCodepoint("‑"), '-'));
++ make_pair(ToUnicodeCodepoint("\xE2\x80\x91"), '-'));
+ all_plus_number_grouping_symbols->insert(
+- make_pair(ToUnicodeCodepoint("‒"), '-'));
++ make_pair(ToUnicodeCodepoint("\xE2\x80\x92"), '-'));
+ all_plus_number_grouping_symbols->insert(
+- make_pair(ToUnicodeCodepoint("–"), '-'));
++ make_pair(ToUnicodeCodepoint("\xE2\x80\x93"), '-'));
+ all_plus_number_grouping_symbols->insert(
+- make_pair(ToUnicodeCodepoint("—"), '-'));
++ make_pair(ToUnicodeCodepoint("\xE2\x80\x94"), '-'));
+ all_plus_number_grouping_symbols->insert(
+- make_pair(ToUnicodeCodepoint("―"), '-'));
++ make_pair(ToUnicodeCodepoint("\xE2\x80\x95"), '-'));
+ all_plus_number_grouping_symbols->insert(
+- make_pair(ToUnicodeCodepoint("−"), '-'));
++ make_pair(ToUnicodeCodepoint("\xE2\x88\x92"), '-'));
+ all_plus_number_grouping_symbols->insert(
+ make_pair(ToUnicodeCodepoint("/"), '/'));
+ all_plus_number_grouping_symbols->insert(
+- make_pair(ToUnicodeCodepoint("/"), '/'));
++ make_pair(ToUnicodeCodepoint("\xEF\xBC\x8F"), '/'));
+ all_plus_number_grouping_symbols->insert(
+ make_pair(ToUnicodeCodepoint(" "), ' '));
+ all_plus_number_grouping_symbols->insert(
+- make_pair(ToUnicodeCodepoint(" "), ' '));
++ make_pair(ToUnicodeCodepoint("\xE3\x80\x80"), ' '));
+ all_plus_number_grouping_symbols->insert(
+- make_pair(ToUnicodeCodepoint("⁠"), ' '));
++ make_pair(ToUnicodeCodepoint("\xE2\x81\xA0"), ' '));
+ all_plus_number_grouping_symbols->insert(
+ make_pair(ToUnicodeCodepoint("."), '.'));
+ all_plus_number_grouping_symbols->insert(
+- make_pair(ToUnicodeCodepoint("."), '.'));
++ make_pair(ToUnicodeCodepoint("\xEF\xBC\x8E"), '.'));
+ // Only the upper-case letters are added here - the lower-case versions are
+ // added programmatically.
+ alpha_mappings->insert(make_pair(ToUnicodeCodepoint("A"), '2'));
+@@ -849,7 +849,7 @@
+ // Note here that all NANPA formatting rules are contained by US, so we use
+ // that to format NANPA numbers. The same applies to Russian Fed regions -
+ // rules are contained by Russia. French Indian Ocean country rules are
+- // contained by Réunion.
++ // contained by R\xC3\xA9union.
+ string region_code;
+ GetRegionCodeForCountryCode(country_calling_code, &region_code);
+ if (!HasValidRegionCode(region_code, country_calling_code,
+@@ -1015,7 +1015,7 @@
+ // For regions that share a country calling code, the country calling code
+ // need not be dialled. This also applies when dialling within a region, so
+ // this if clause covers both these cases.
+- // Technically this is the case for dialling from la Réunion to other
++ // Technically this is the case for dialling from la R\xC3\xA9union to other
+ // overseas departments of France (French Guiana, Martinique, Guadeloupe),
+ // but not vice versa - so we don't cover this edge case for now and for
+ // those cases return the version including country calling code.