diff options
Diffstat (limited to 'third_party/libphonenumber/cpp/src/phonenumberutil.cc')
-rw-r--r-- | third_party/libphonenumber/cpp/src/phonenumberutil.cc | 44 |
1 files changed, 22 insertions, 22 deletions
diff --git a/third_party/libphonenumber/cpp/src/phonenumberutil.cc b/third_party/libphonenumber/cpp/src/phonenumberutil.cc index 75ef374..b0201f2 100644 --- a/third_party/libphonenumber/cpp/src/phonenumberutil.cc +++ b/third_party/libphonenumber/cpp/src/phonenumberutil.cc @@ -72,7 +72,7 @@ scoped_ptr<map<char32, char> > all_plus_number_grouping_symbols; // The kPlusSign signifies the international prefix. const char kPlusSign[] = "+"; -const char kPlusChars[] = "++"; +const char kPlusChars[] = "+\xEF\xBC\x8B"; scoped_ptr<const reg_exp::RegularExpression> plus_chars_pattern; const char kRfc3966ExtnPrefix[] = ";ext="; @@ -88,7 +88,7 @@ scoped_ptr<const reg_exp::RegularExpression> unique_international_prefix; // Digits accepted in phone numbers. // Both Arabic-Indic and Eastern Arabic-Indic are supported. -const char kValidDigits[] = "0-90-9٠-٩۰-۹"; +const char kValidDigits[] = "0-9\xEF\xBC\x90-\xEF\xBC\x99\xD9\xA0-\xD9\xA9\xDB\xB0-\xDB\xB9"; // We accept alpha characters in phone numbers, ASCII only. We store lower-case // here only since our regular expressions are case-insensitive. const char kValidAlpha[] = "a-z"; @@ -140,7 +140,7 @@ scoped_ptr<const reg_exp::RegularExpression> unwanted_end_char_pattern; // itself. In emacs, you can use M-x unicode-what to query information about the // unicode character. const char kValidPunctuation[] = - "-x‐-―−ー--/ ()()[].\\[\\]/~⁓∼~"; + "-x\xE2\x80\x90-\xE2\x80\x95\xE2\x88\x92\xE3\x83\xBC\xEF\xBC\x8D-\xEF\xBC\x8F \xC2\xA0\xE2\x80\x8B\xE2\x81\xA0\xE3\x80\x80()\xEF\xBC\x88\xEF\xBC\x89\xEF\xBC\xBB\xEF\xBC\xBD.\\[\\]/~\xE2\x81\x93\xE2\x88\xBC\xEF\xBD\x9E"; // Regular expression of viable phone numbers. This is location independent. // Checks we have at least three leading digits, and only valid punctuation, @@ -454,7 +454,7 @@ char32 ToUnicodeCodepoint(const char* unicode_char) { // defined order. void CreateRegularExpressions() { unique_international_prefix.reset( - reg_exp::CreateRegularExpression("[\\d]+(?:[~⁓∼~][\\d]+)?")); + reg_exp::CreateRegularExpression("[\\d]+(?:[~\xE2\x81\x93\xE2\x88\xBC\xEF\xBD\x9E][\\d]+)?")); first_group_capturing_pattern.reset( reg_exp::CreateRegularExpression("(\\$1)")); carrier_code_pattern.reset( @@ -476,16 +476,16 @@ void CreateRegularExpressions() { StrCat("[", kPlusChars, "]*(?:[", kValidPunctuation, "]*[", kValidDigits, "]){3,}[", kValidAlpha, kValidPunctuation, kValidDigits, "]*"))); // Canonical-equivalence doesn't seem to be an option with RE2, so we allow - // two options for representing the ó - the character itself, and one in the + // two options for representing the \xC3\xB3 - the character itself, and one in the // unicode decomposed form with the combining acute accent. Note that there // are currently three capturing groups for the extension itself - if this // number is changed, MaybeStripExtension needs to be updated. const string capturing_extn_digits = StrCat("([", kValidDigits, "]{1,7})"); known_extn_patterns.reset(new string( StrCat(kRfc3966ExtnPrefix, capturing_extn_digits, "|" - "[ \\t,]*(?:ext(?:ensi(?:ó?|ó))?n?|extn?|[,xx##~~]|" - "int|int|anexo)" - "[:\\..]?[ \\t,-]*", capturing_extn_digits, "#?|" + "[ \xC2\xA0\\t,]*(?:ext(?:ensi(?:o\xCC\x81?|\xC3\xB3))?n?|\xEF\xBD\x85\xEF\xBD\x98\xEF\xBD\x94\xEF\xBD\x8E?|[,x\xEF\xBD\x98#\xEF\xBC\x83~\xEF\xBD\x9E]|" + "int|\xEF\xBD\x89\xEF\xBD\x8E\xEF\xBD\x94|anexo)" + "[:\\.\xEF\xBC\x8E]?[ \xC2\xA0\\t,-]*", capturing_extn_digits, "#?|" "[- ]+([", kValidDigits, "]{1,5})#"))); extn_pattern.reset(reg_exp::CreateRegularExpression( StrCat("(?i)(?:", *known_extn_patterns, ")$").c_str())); @@ -509,35 +509,35 @@ void InitializeStaticMapsAndSets() { all_plus_number_grouping_symbols->insert( make_pair(ToUnicodeCodepoint("-"), '-')); all_plus_number_grouping_symbols->insert( - make_pair(ToUnicodeCodepoint("-"), '-')); + make_pair(ToUnicodeCodepoint("\xEF\xBC\x8D"), '-')); all_plus_number_grouping_symbols->insert( - make_pair(ToUnicodeCodepoint("‐"), '-')); + make_pair(ToUnicodeCodepoint("\xE2\x80\x90"), '-')); all_plus_number_grouping_symbols->insert( - make_pair(ToUnicodeCodepoint("‑"), '-')); + make_pair(ToUnicodeCodepoint("\xE2\x80\x91"), '-')); all_plus_number_grouping_symbols->insert( - make_pair(ToUnicodeCodepoint("‒"), '-')); + make_pair(ToUnicodeCodepoint("\xE2\x80\x92"), '-')); all_plus_number_grouping_symbols->insert( - make_pair(ToUnicodeCodepoint("–"), '-')); + make_pair(ToUnicodeCodepoint("\xE2\x80\x93"), '-')); all_plus_number_grouping_symbols->insert( - make_pair(ToUnicodeCodepoint("—"), '-')); + make_pair(ToUnicodeCodepoint("\xE2\x80\x94"), '-')); all_plus_number_grouping_symbols->insert( - make_pair(ToUnicodeCodepoint("―"), '-')); + make_pair(ToUnicodeCodepoint("\xE2\x80\x95"), '-')); all_plus_number_grouping_symbols->insert( - make_pair(ToUnicodeCodepoint("−"), '-')); + make_pair(ToUnicodeCodepoint("\xE2\x88\x92"), '-')); all_plus_number_grouping_symbols->insert( make_pair(ToUnicodeCodepoint("/"), '/')); all_plus_number_grouping_symbols->insert( - make_pair(ToUnicodeCodepoint("/"), '/')); + make_pair(ToUnicodeCodepoint("\xEF\xBC\x8F"), '/')); all_plus_number_grouping_symbols->insert( make_pair(ToUnicodeCodepoint(" "), ' ')); all_plus_number_grouping_symbols->insert( - make_pair(ToUnicodeCodepoint(" "), ' ')); + make_pair(ToUnicodeCodepoint("\xE3\x80\x80"), ' ')); all_plus_number_grouping_symbols->insert( - make_pair(ToUnicodeCodepoint(""), ' ')); + make_pair(ToUnicodeCodepoint("\xE2\x81\xA0"), ' ')); all_plus_number_grouping_symbols->insert( make_pair(ToUnicodeCodepoint("."), '.')); all_plus_number_grouping_symbols->insert( - make_pair(ToUnicodeCodepoint("."), '.')); + make_pair(ToUnicodeCodepoint("\xEF\xBC\x8E"), '.')); // Only the upper-case letters are added here - the lower-case versions are // added programmatically. alpha_mappings->insert(make_pair(ToUnicodeCodepoint("A"), '2')); @@ -849,7 +849,7 @@ void PhoneNumberUtil::Format(const PhoneNumber& number, // Note here that all NANPA formatting rules are contained by US, so we use // that to format NANPA numbers. The same applies to Russian Fed regions - // rules are contained by Russia. French Indian Ocean country rules are - // contained by Réunion. + // contained by R\xC3\xA9union. string region_code; GetRegionCodeForCountryCode(country_calling_code, ®ion_code); if (!HasValidRegionCode(region_code, country_calling_code, @@ -1015,7 +1015,7 @@ void PhoneNumberUtil::FormatOutOfCountryCallingNumber( // For regions that share a country calling code, the country calling code // need not be dialled. This also applies when dialling within a region, so // this if clause covers both these cases. - // Technically this is the case for dialling from la Réunion to other + // Technically this is the case for dialling from la R\xC3\xA9union to other // overseas departments of France (French Guiana, Martinique, Guadeloupe), // but not vice versa - so we don't cover this edge case for now and for // those cases return the version including country calling code. |