diff options
author | mmenke@chromium.org <mmenke@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2014-04-15 14:48:31 +0000 |
---|---|---|
committer | mmenke@chromium.org <mmenke@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2014-04-15 14:48:31 +0000 |
commit | 3454c167fc6b3cc9fb43348685ba977dccdd743c (patch) | |
tree | a248925b16e86a39370b5b34459fb64fb368b313 /net | |
parent | daa82051edc3ec197f698a58815274f4ffe9c2ba (diff) | |
download | chromium_src-3454c167fc6b3cc9fb43348685ba977dccdd743c.zip chromium_src-3454c167fc6b3cc9fb43348685ba977dccdd743c.tar.gz chromium_src-3454c167fc6b3cc9fb43348685ba977dccdd743c.tar.bz2 |
Only allow certain files in net to depend on icu.
Also split out functions that depend on icu from net_util.cc.
This is a first step in allowing net/ to be build without icu,
using platform functions instead, to get binary site down when
used as a library on mobile.
BUG=362608
R=mef@chromium.org
Review URL: https://codereview.chromium.org/235373003
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@263850 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'net')
-rw-r--r-- | net/DEPS | 60 | ||||
-rw-r--r-- | net/base/net_util.cc | 850 | ||||
-rw-r--r-- | net/base/net_util_icu.cc | 866 | ||||
-rw-r--r-- | net/net.gypi | 1 | ||||
-rw-r--r-- | net/tools/DEPS | 4 |
5 files changed, 931 insertions, 850 deletions
@@ -8,8 +8,68 @@ include_rules = [ "+third_party/zlib", "+sdch/open-vcdiff", "+v8", + + # Most of net should not depend on icu, to keep size down when built as a + # library. + "-base/i18n", + "-third_party/icu", ] +specific_include_rules = { + # Within net, only used by file: requests. + "directory_lister(\.cc|_unittest\.cc)": [ + "+base/i18n", + ], + + # Within net, only used by file: requests. + "filename_util\.cc": [ + "+base/i18n", + ], + + # Functions largely not used by the rest of net. + "net_util_icu\.cc": [ + "+base/i18n", + "+third_party/icu", + ], + + # Uses icu for debug logging only. + "network_time_notifier\.cc": [ + "+base/i18n", + ], + + # Only use icu for string conversions. + "escape_unittest\.cc": [ + "+base/i18n/icu_string_conversions.h", + ], + "http_auth_handler_basic\.cc": [ + "+base/i18n/icu_string_conversions.h", + ], + "http_auth_handler_digest\.cc": [ + "+base/i18n/icu_string_conversions.h", + ], + "proxy_script_fetcher_impl\.cc": [ + "+base/i18n/icu_string_conversions.h", + ], + "x509_cert_types_mac\.cc": [ + "+base/i18n/icu_string_conversions.h", + ], + "http_content_disposition\.cc": [ + "+base/i18n/icu_string_conversions.h", + "+third_party/icu", + ], + "websocket_channel\.h": [ + "+base/i18n", + ], + + "ftp_util\.cc": [ + "+base/i18n", + "+third_party/icu", + ], + "ftp_directory_listing_parser\.cc": [ + "+base/i18n", + ], +} + skip_child_includes = [ "third_party", "tools/flip_server", diff --git a/net/base/net_util.cc b/net/base/net_util.cc index 106486d..d0355df 100644 --- a/net/base/net_util.cc +++ b/net/base/net_util.cc @@ -8,7 +8,6 @@ #include <algorithm> #include <iterator> -#include <map> #include <set> #include "build/build_config.h" @@ -31,26 +30,16 @@ #endif // defined(OS_POSIX) #include "base/basictypes.h" -#include "base/i18n/time_formatting.h" #include "base/json/string_escape.h" #include "base/lazy_instance.h" #include "base/logging.h" -#include "base/memory/singleton.h" -#include "base/message_loop/message_loop.h" -#include "base/metrics/histogram.h" -#include "base/stl_util.h" #include "base/strings/string_number_conversions.h" #include "base/strings/string_piece.h" #include "base/strings/string_split.h" -#include "base/strings/string_tokenizer.h" #include "base/strings/string_util.h" #include "base/strings/stringprintf.h" -#include "base/strings/sys_string_conversions.h" -#include "base/strings/utf_offset_string_conversions.h" #include "base/strings/utf_string_conversions.h" -#include "base/synchronization/lock.h" #include "base/sys_byteorder.h" -#include "base/time/time.h" #include "base/values.h" #include "grit/net_resources.h" #include "url/gurl.h" @@ -58,17 +47,9 @@ #include "url/url_canon_ip.h" #include "url/url_parse.h" #include "net/base/dns_util.h" -#include "net/base/escape.h" #include "net/base/net_module.h" #include "net/base/registry_controlled_domains/registry_controlled_domain.h" #include "net/http/http_content_disposition.h" -#include "third_party/icu/source/common/unicode/uidna.h" -#include "third_party/icu/source/common/unicode/uniset.h" -#include "third_party/icu/source/common/unicode/uscript.h" -#include "third_party/icu/source/common/unicode/uset.h" -#include "third_party/icu/source/i18n/unicode/datefmt.h" -#include "third_party/icu/source/i18n/unicode/regex.h" -#include "third_party/icu/source/i18n/unicode/ulocdata.h" #if defined(OS_ANDROID) #include "net/android/network_library.h" @@ -77,14 +58,10 @@ #include "net/base/winsock_init.h" #endif -using base::Time; - namespace net { namespace { -typedef std::vector<size_t> Offsets; - // The general list of blocked ports. Will be blocked unless a specific // protocol overrides it. (Ex: ftp can use ports 20 and 21) static const int kRestrictedPorts[] = { @@ -163,595 +140,6 @@ static const int kAllowedFtpPorts[] = { 22, // ssh }; -// Does some simple normalization of scripts so we can allow certain scripts -// to exist together. -// TODO(brettw) bug 880223: we should allow some other languages to be -// oombined such as Chinese and Latin. We will probably need a more -// complicated system of language pairs to have more fine-grained control. -UScriptCode NormalizeScript(UScriptCode code) { - switch (code) { - case USCRIPT_KATAKANA: - case USCRIPT_HIRAGANA: - case USCRIPT_KATAKANA_OR_HIRAGANA: - case USCRIPT_HANGUL: // This one is arguable. - return USCRIPT_HAN; - default: - return code; - } -} - -bool IsIDNComponentInSingleScript(const base::char16* str, int str_len) { - UScriptCode first_script = USCRIPT_INVALID_CODE; - bool is_first = true; - - int i = 0; - while (i < str_len) { - unsigned code_point; - U16_NEXT(str, i, str_len, code_point); - - UErrorCode err = U_ZERO_ERROR; - UScriptCode cur_script = uscript_getScript(code_point, &err); - if (err != U_ZERO_ERROR) - return false; // Report mixed on error. - cur_script = NormalizeScript(cur_script); - - // TODO(brettw) We may have to check for USCRIPT_INHERENT as well. - if (is_first && cur_script != USCRIPT_COMMON) { - first_script = cur_script; - is_first = false; - } else { - if (cur_script != USCRIPT_COMMON && cur_script != first_script) - return false; - } - } - return true; -} - -// Check if the script of a language can be 'safely' mixed with -// Latin letters in the ASCII range. -bool IsCompatibleWithASCIILetters(const std::string& lang) { - // For now, just list Chinese, Japanese and Korean (positive list). - // An alternative is negative-listing (languages using Greek and - // Cyrillic letters), but it can be more dangerous. - return !lang.substr(0, 2).compare("zh") || - !lang.substr(0, 2).compare("ja") || - !lang.substr(0, 2).compare("ko"); -} - -typedef std::map<std::string, icu::UnicodeSet*> LangToExemplarSetMap; - -class LangToExemplarSet { - public: - static LangToExemplarSet* GetInstance() { - return Singleton<LangToExemplarSet>::get(); - } - - private: - LangToExemplarSetMap map; - LangToExemplarSet() { } - ~LangToExemplarSet() { - STLDeleteContainerPairSecondPointers(map.begin(), map.end()); - } - - friend class Singleton<LangToExemplarSet>; - friend struct DefaultSingletonTraits<LangToExemplarSet>; - friend bool GetExemplarSetForLang(const std::string&, icu::UnicodeSet**); - friend void SetExemplarSetForLang(const std::string&, icu::UnicodeSet*); - - DISALLOW_COPY_AND_ASSIGN(LangToExemplarSet); -}; - -bool GetExemplarSetForLang(const std::string& lang, - icu::UnicodeSet** lang_set) { - const LangToExemplarSetMap& map = LangToExemplarSet::GetInstance()->map; - LangToExemplarSetMap::const_iterator pos = map.find(lang); - if (pos != map.end()) { - *lang_set = pos->second; - return true; - } - return false; -} - -void SetExemplarSetForLang(const std::string& lang, - icu::UnicodeSet* lang_set) { - LangToExemplarSetMap& map = LangToExemplarSet::GetInstance()->map; - map.insert(std::make_pair(lang, lang_set)); -} - -static base::LazyInstance<base::Lock>::Leaky - g_lang_set_lock = LAZY_INSTANCE_INITIALIZER; - -// Returns true if all the characters in component_characters are used by -// the language |lang|. -bool IsComponentCoveredByLang(const icu::UnicodeSet& component_characters, - const std::string& lang) { - CR_DEFINE_STATIC_LOCAL( - const icu::UnicodeSet, kASCIILetters, ('a', 'z')); - icu::UnicodeSet* lang_set = NULL; - // We're called from both the UI thread and the history thread. - { - base::AutoLock lock(g_lang_set_lock.Get()); - if (!GetExemplarSetForLang(lang, &lang_set)) { - UErrorCode status = U_ZERO_ERROR; - ULocaleData* uld = ulocdata_open(lang.c_str(), &status); - // TODO(jungshik) Turn this check on when the ICU data file is - // rebuilt with the minimal subset of locale data for languages - // to which Chrome is not localized but which we offer in the list - // of languages selectable for Accept-Languages. With the rebuilt ICU - // data, ulocdata_open never should fall back to the default locale. - // (issue 2078) - // DCHECK(U_SUCCESS(status) && status != U_USING_DEFAULT_WARNING); - if (U_SUCCESS(status) && status != U_USING_DEFAULT_WARNING) { - lang_set = reinterpret_cast<icu::UnicodeSet *>( - ulocdata_getExemplarSet(uld, NULL, 0, - ULOCDATA_ES_STANDARD, &status)); - // If |lang| is compatible with ASCII Latin letters, add them. - if (IsCompatibleWithASCIILetters(lang)) - lang_set->addAll(kASCIILetters); - } else { - lang_set = new icu::UnicodeSet(1, 0); - } - lang_set->freeze(); - SetExemplarSetForLang(lang, lang_set); - ulocdata_close(uld); - } - } - return !lang_set->isEmpty() && lang_set->containsAll(component_characters); -} - -// Returns true if the given Unicode host component is safe to display to the -// user. -bool IsIDNComponentSafe(const base::char16* str, - int str_len, - const std::string& languages) { - // Most common cases (non-IDN) do not reach here so that we don't - // need a fast return path. - // TODO(jungshik) : Check if there's any character inappropriate - // (although allowed) for domain names. - // See http://www.unicode.org/reports/tr39/#IDN_Security_Profiles and - // http://www.unicode.org/reports/tr39/data/xidmodifications.txt - // For now, we borrow the list from Mozilla and tweaked it slightly. - // (e.g. Characters like U+00A0, U+3000, U+3002 are omitted because - // they're gonna be canonicalized to U+0020 and full stop before - // reaching here.) - // The original list is available at - // http://kb.mozillazine.org/Network.IDN.blacklist_chars and - // at http://mxr.mozilla.org/seamonkey/source/modules/libpref/src/init/all.js#703 - - UErrorCode status = U_ZERO_ERROR; -#ifdef U_WCHAR_IS_UTF16 - icu::UnicodeSet dangerous_characters(icu::UnicodeString( - L"[[\\ \u00ad\u00bc\u00bd\u01c3\u0337\u0338" - L"\u05c3\u05f4\u06d4\u0702\u115f\u1160][\u2000-\u200b]" - L"[\u2024\u2027\u2028\u2029\u2039\u203a\u2044\u205f]" - L"[\u2154-\u2156][\u2159-\u215b][\u215f\u2215\u23ae" - L"\u29f6\u29f8\u2afb\u2afd][\u2ff0-\u2ffb][\u3014" - L"\u3015\u3033\u3164\u321d\u321e\u33ae\u33af\u33c6\u33df\ufe14" - L"\ufe15\ufe3f\ufe5d\ufe5e\ufeff\uff0e\uff06\uff61\uffa0\ufff9]" - L"[\ufffa-\ufffd]]"), status); - DCHECK(U_SUCCESS(status)); - icu::RegexMatcher dangerous_patterns(icu::UnicodeString( - // Lone katakana no, so, or n - L"[^\\p{Katakana}][\u30ce\u30f3\u30bd][^\\p{Katakana}]" - // Repeating Japanese accent characters - L"|[\u3099\u309a\u309b\u309c][\u3099\u309a\u309b\u309c]"), - 0, status); -#else - icu::UnicodeSet dangerous_characters(icu::UnicodeString( - "[[\\u0020\\u00ad\\u00bc\\u00bd\\u01c3\\u0337\\u0338" - "\\u05c3\\u05f4\\u06d4\\u0702\\u115f\\u1160][\\u2000-\\u200b]" - "[\\u2024\\u2027\\u2028\\u2029\\u2039\\u203a\\u2044\\u205f]" - "[\\u2154-\\u2156][\\u2159-\\u215b][\\u215f\\u2215\\u23ae" - "\\u29f6\\u29f8\\u2afb\\u2afd][\\u2ff0-\\u2ffb][\\u3014" - "\\u3015\\u3033\\u3164\\u321d\\u321e\\u33ae\\u33af\\u33c6\\u33df\\ufe14" - "\\ufe15\\ufe3f\\ufe5d\\ufe5e\\ufeff\\uff0e\\uff06\\uff61\\uffa0\\ufff9]" - "[\\ufffa-\\ufffd]]", -1, US_INV), status); - DCHECK(U_SUCCESS(status)); - icu::RegexMatcher dangerous_patterns(icu::UnicodeString( - // Lone katakana no, so, or n - "[^\\p{Katakana}][\\u30ce\\u30f3\u30bd][^\\p{Katakana}]" - // Repeating Japanese accent characters - "|[\\u3099\\u309a\\u309b\\u309c][\\u3099\\u309a\\u309b\\u309c]"), - 0, status); -#endif - DCHECK(U_SUCCESS(status)); - icu::UnicodeSet component_characters; - icu::UnicodeString component_string(str, str_len); - component_characters.addAll(component_string); - if (dangerous_characters.containsSome(component_characters)) - return false; - - DCHECK(U_SUCCESS(status)); - dangerous_patterns.reset(component_string); - if (dangerous_patterns.find()) - return false; - - // If the language list is empty, the result is completely determined - // by whether a component is a single script or not. This will block - // even "safe" script mixing cases like <Chinese, Latin-ASCII> that are - // allowed with |languages| (while it blocks Chinese + Latin letters with - // an accent as should be the case), but we want to err on the safe side - // when |languages| is empty. - if (languages.empty()) - return IsIDNComponentInSingleScript(str, str_len); - - // |common_characters| is made up of ASCII numbers, hyphen, plus and - // underscore that are used across scripts and allowed in domain names. - // (sync'd with characters allowed in url_canon_host with square - // brackets excluded.) See kHostCharLookup[] array in url_canon_host.cc. - icu::UnicodeSet common_characters(UNICODE_STRING_SIMPLE("[[0-9]\\-_+\\ ]"), - status); - DCHECK(U_SUCCESS(status)); - // Subtract common characters because they're always allowed so that - // we just have to check if a language-specific set contains - // the remainder. - component_characters.removeAll(common_characters); - - base::StringTokenizer t(languages, ","); - while (t.GetNext()) { - if (IsComponentCoveredByLang(component_characters, t.token())) - return true; - } - return false; -} - -// A wrapper to use LazyInstance<>::Leaky with ICU's UIDNA, a C pointer to -// a UTS46/IDNA 2008 handling object opened with uidna_openUTS46(). -// -// We use UTS46 with BiDiCheck to migrate from IDNA 2003 to IDNA 2008 with -// the backward compatibility in mind. What it does: -// -// 1. Use the up-to-date Unicode data. -// 2. Define a case folding/mapping with the up-to-date Unicode data as -// in IDNA 2003. -// 3. Use transitional mechanism for 4 deviation characters (sharp-s, -// final sigma, ZWJ and ZWNJ) for now. -// 4. Continue to allow symbols and punctuations. -// 5. Apply new BiDi check rules more permissive than the IDNA 2003 BiDI rules. -// 6. Do not apply STD3 rules -// 7. Do not allow unassigned code points. -// -// It also closely matches what IE 10 does except for the BiDi check ( -// http://goo.gl/3XBhqw ). -// See http://http://unicode.org/reports/tr46/ and references therein -// for more details. -struct UIDNAWrapper { - UIDNAWrapper() { - UErrorCode err = U_ZERO_ERROR; - // TODO(jungshik): Change options as different parties (browsers, - // registrars, search engines) converge toward a consensus. - value = uidna_openUTS46(UIDNA_CHECK_BIDI, &err); - if (U_FAILURE(err)) - value = NULL; - } - - UIDNA* value; -}; - -static base::LazyInstance<UIDNAWrapper>::Leaky - g_uidna = LAZY_INSTANCE_INITIALIZER; - -// Converts one component of a host (between dots) to IDN if safe. The result -// will be APPENDED to the given output string and will be the same as the input -// if it is not IDN or the IDN is unsafe to display. Returns whether any -// conversion was performed. -bool IDNToUnicodeOneComponent(const base::char16* comp, - size_t comp_len, - const std::string& languages, - base::string16* out) { - DCHECK(out); - if (comp_len == 0) - return false; - - // Only transform if the input can be an IDN component. - static const base::char16 kIdnPrefix[] = {'x', 'n', '-', '-'}; - if ((comp_len > arraysize(kIdnPrefix)) && - !memcmp(comp, kIdnPrefix, arraysize(kIdnPrefix) * sizeof(base::char16))) { - UIDNA* uidna = g_uidna.Get().value; - DCHECK(uidna != NULL); - size_t original_length = out->length(); - int output_length = 64; - UIDNAInfo info = UIDNA_INFO_INITIALIZER; - UErrorCode status; - do { - out->resize(original_length + output_length); - status = U_ZERO_ERROR; - // This returns the actual length required. If this is more than 64 - // code units, |status| will be U_BUFFER_OVERFLOW_ERROR and we'll try - // the conversion again, but with a sufficiently large buffer. - output_length = uidna_labelToUnicode( - uidna, comp, static_cast<int32_t>(comp_len), &(*out)[original_length], - output_length, &info, &status); - } while ((status == U_BUFFER_OVERFLOW_ERROR && info.errors == 0)); - - if (U_SUCCESS(status) && info.errors == 0) { - // Converted successfully. Ensure that the converted component - // can be safely displayed to the user. - out->resize(original_length + output_length); - if (IsIDNComponentSafe(out->data() + original_length, output_length, - languages)) - return true; - } - - // Something went wrong. Revert to original string. - out->resize(original_length); - } - - // We get here with no IDN or on error, in which case we just append the - // literal input. - out->append(comp, comp_len); - return false; -} - -// Clamps the offsets in |offsets_for_adjustment| to the length of |str|. -void LimitOffsets(const base::string16& str, Offsets* offsets_for_adjustment) { - if (offsets_for_adjustment) { - std::for_each(offsets_for_adjustment->begin(), - offsets_for_adjustment->end(), - base::LimitOffset<base::string16>(str.length())); - } -} - -// TODO(brettw) bug 734373: check the scripts for each host component and -// don't un-IDN-ize if there is more than one. Alternatively, only IDN for -// scripts that the user has installed. For now, just put the entire -// path through IDN. Maybe this feature can be implemented in ICU itself? -// -// We may want to skip this step in the case of file URLs to allow unicode -// UNC hostnames regardless of encodings. -base::string16 IDNToUnicodeWithOffsets(const std::string& host, - const std::string& languages, - Offsets* offsets_for_adjustment) { - // Convert the ASCII input to a base::string16 for ICU. - base::string16 input16; - input16.reserve(host.length()); - input16.insert(input16.end(), host.begin(), host.end()); - - // Do each component of the host separately, since we enforce script matching - // on a per-component basis. - base::string16 out16; - { - base::OffsetAdjuster offset_adjuster(offsets_for_adjustment); - for (size_t component_start = 0, component_end; - component_start < input16.length(); - component_start = component_end + 1) { - // Find the end of the component. - component_end = input16.find('.', component_start); - if (component_end == base::string16::npos) - component_end = input16.length(); // For getting the last component. - size_t component_length = component_end - component_start; - size_t new_component_start = out16.length(); - bool converted_idn = false; - if (component_end > component_start) { - // Add the substring that we just found. - converted_idn = IDNToUnicodeOneComponent( - input16.data() + component_start, component_length, languages, - &out16); - } - size_t new_component_length = out16.length() - new_component_start; - - if (converted_idn && offsets_for_adjustment) { - offset_adjuster.Add(base::OffsetAdjuster::Adjustment(component_start, - component_length, new_component_length)); - } - - // Need to add the dot we just found (if we found one). - if (component_end < input16.length()) - out16.push_back('.'); - } - } - - LimitOffsets(out16, offsets_for_adjustment); - return out16; -} - -// Called after transforming a component to set all affected elements in -// |offsets_for_adjustment| to the correct new values. |original_offsets| -// represents the offsets before the transform; |original_component_begin| and -// |original_component_end| represent the pre-transform boundaries of the -// affected component. |transformed_offsets| should be a vector created by -// adjusting |original_offsets| to be relative to the beginning of the component -// in question (via an OffsetAdjuster) and then transformed along with the -// component. Note that any elements in this vector which didn't originally -// point into the component may contain arbitrary values and should be ignored. -// |transformed_component_begin| and |transformed_component_end| are the -// endpoints of the transformed component and are used in combination with the -// two offset vectors to calculate the resulting absolute offsets, which are -// stored in |offsets_for_adjustment|. -void AdjustForComponentTransform(const Offsets& original_offsets, - size_t original_component_begin, - size_t original_component_end, - const Offsets& transformed_offsets, - size_t transformed_component_begin, - size_t transformed_component_end, - Offsets* offsets_for_adjustment) { - if (!offsets_for_adjustment) - return; // Nothing to do. - - for (size_t i = 0; i < original_offsets.size(); ++i) { - size_t original_offset = original_offsets[i]; - if ((original_offset >= original_component_begin) && - (original_offset < original_component_end)) { - // This offset originally pointed into the transformed component. - // Adjust the transformed relative offset by the new beginning point of - // the transformed component. - size_t transformed_offset = transformed_offsets[i]; - (*offsets_for_adjustment)[i] = - (transformed_offset == base::string16::npos) ? - base::string16::npos : - (transformed_offset + transformed_component_begin); - } else if ((original_offset >= original_component_end) && - (original_offset != std::string::npos)) { - // This offset pointed after the transformed component. Adjust the - // original absolute offset by the difference between the new and old - // component lengths. - (*offsets_for_adjustment)[i] = - original_offset - original_component_end + transformed_component_end; - } - } -} - -// If |component| is valid, its begin is incremented by |delta|. -void AdjustComponent(int delta, url_parse::Component* component) { - if (!component->is_valid()) - return; - - DCHECK(delta >= 0 || component->begin >= -delta); - component->begin += delta; -} - -// Adjusts all the components of |parsed| by |delta|, except for the scheme. -void AdjustAllComponentsButScheme(int delta, url_parse::Parsed* parsed) { - AdjustComponent(delta, &(parsed->username)); - AdjustComponent(delta, &(parsed->password)); - AdjustComponent(delta, &(parsed->host)); - AdjustComponent(delta, &(parsed->port)); - AdjustComponent(delta, &(parsed->path)); - AdjustComponent(delta, &(parsed->query)); - AdjustComponent(delta, &(parsed->ref)); -} - -// Helper for FormatUrlWithOffsets(). -base::string16 FormatViewSourceUrl(const GURL& url, - const Offsets& original_offsets, - const std::string& languages, - FormatUrlTypes format_types, - UnescapeRule::Type unescape_rules, - url_parse::Parsed* new_parsed, - size_t* prefix_end, - Offsets* offsets_for_adjustment) { - DCHECK(new_parsed); - const char kViewSource[] = "view-source:"; - const size_t kViewSourceLength = arraysize(kViewSource) - 1; - - // Format the underlying URL and adjust offsets. - const std::string& url_str(url.possibly_invalid_spec()); - Offsets offsets_into_underlying_url(original_offsets); - { - base::OffsetAdjuster adjuster(&offsets_into_underlying_url); - adjuster.Add(base::OffsetAdjuster::Adjustment(0, kViewSourceLength, 0)); - } - base::string16 result(base::ASCIIToUTF16(kViewSource) + - FormatUrlWithOffsets(GURL(url_str.substr(kViewSourceLength)), languages, - format_types, unescape_rules, new_parsed, prefix_end, - &offsets_into_underlying_url)); - AdjustForComponentTransform(original_offsets, kViewSourceLength, - url_str.length(), offsets_into_underlying_url, - kViewSourceLength, result.length(), - offsets_for_adjustment); - LimitOffsets(result, offsets_for_adjustment); - - // Adjust positions of the parsed components. - if (new_parsed->scheme.is_nonempty()) { - // Assume "view-source:real-scheme" as a scheme. - new_parsed->scheme.len += kViewSourceLength; - } else { - new_parsed->scheme.begin = 0; - new_parsed->scheme.len = kViewSourceLength - 1; - } - AdjustAllComponentsButScheme(kViewSourceLength, new_parsed); - - if (prefix_end) - *prefix_end += kViewSourceLength; - - return result; -} - -class AppendComponentTransform { - public: - AppendComponentTransform() {} - virtual ~AppendComponentTransform() {} - - virtual base::string16 Execute(const std::string& component_text, - Offsets* offsets_into_component) const = 0; - - // NOTE: No DISALLOW_COPY_AND_ASSIGN here, since gcc < 4.3.0 requires an - // accessible copy constructor in order to call AppendFormattedComponent() - // with an inline temporary (see http://gcc.gnu.org/bugs/#cxx%5Frvalbind ). -}; - -class HostComponentTransform : public AppendComponentTransform { - public: - explicit HostComponentTransform(const std::string& languages) - : languages_(languages) { - } - - private: - virtual base::string16 Execute( - const std::string& component_text, - Offsets* offsets_into_component) const OVERRIDE { - return IDNToUnicodeWithOffsets(component_text, languages_, - offsets_into_component); - } - - const std::string& languages_; -}; - -class NonHostComponentTransform : public AppendComponentTransform { - public: - explicit NonHostComponentTransform(UnescapeRule::Type unescape_rules) - : unescape_rules_(unescape_rules) { - } - - private: - virtual base::string16 Execute( - const std::string& component_text, - Offsets* offsets_into_component) const OVERRIDE { - return (unescape_rules_ == UnescapeRule::NONE) ? - base::UTF8ToUTF16AndAdjustOffsets(component_text, - offsets_into_component) : - UnescapeAndDecodeUTF8URLComponentWithOffsets(component_text, - unescape_rules_, offsets_into_component); - } - - const UnescapeRule::Type unescape_rules_; -}; - -// Transforms the portion of |spec| covered by |original_component| according to -// |transform|. Appends the result to |output|. If |output_component| is -// non-NULL, its start and length are set to the transformed component's new -// start and length. For each element in |original_offsets| which is at least -// as large as original_component.begin, the corresponding element of -// |offsets_for_adjustment| is transformed appropriately. -void AppendFormattedComponent(const std::string& spec, - const url_parse::Component& original_component, - const Offsets& original_offsets, - const AppendComponentTransform& transform, - base::string16* output, - url_parse::Component* output_component, - Offsets* offsets_for_adjustment) { - DCHECK(output); - if (original_component.is_nonempty()) { - size_t original_component_begin = - static_cast<size_t>(original_component.begin); - size_t output_component_begin = output->length(); - std::string component_str(spec, original_component_begin, - static_cast<size_t>(original_component.len)); - - // Transform |component_str| and adjust the offsets accordingly. - Offsets offsets_into_component(original_offsets); - { - base::OffsetAdjuster adjuster(&offsets_into_component); - adjuster.Add(base::OffsetAdjuster::Adjustment(0, original_component_begin, - 0)); - } - output->append(transform.Execute(component_str, &offsets_into_component)); - AdjustForComponentTransform(original_offsets, original_component_begin, - static_cast<size_t>(original_component.end()), - offsets_into_component, output_component_begin, - output->length(), offsets_for_adjustment); - - // Set positions of the parsed component. - if (output_component) { - output_component->begin = static_cast<int>(output_component_begin); - output_component->len = - static_cast<int>(output->length() - output_component_begin); - } - } else if (output_component) { - output_component->reset(); - } -} - bool IPNumberPrefixCheck(const IPAddressNumber& ip_number, const unsigned char* ip_prefix, size_t prefix_length_in_bits) { @@ -776,13 +164,6 @@ bool IPNumberPrefixCheck(const IPAddressNumber& ip_number, } // namespace -const FormatUrlType kFormatUrlOmitNothing = 0; -const FormatUrlType kFormatUrlOmitUsernamePassword = 1 << 0; -const FormatUrlType kFormatUrlOmitHTTP = 1 << 1; -const FormatUrlType kFormatUrlOmitTrailingSlashOnBareHostname = 1 << 2; -const FormatUrlType kFormatUrlOmitAll = kFormatUrlOmitUsernamePassword | - kFormatUrlOmitHTTP | kFormatUrlOmitTrailingSlashOnBareHostname; - static base::LazyInstance<std::multiset<int> >::Leaky g_explicitly_allowed_ports = LAZY_INSTANCE_INITIALIZER; @@ -821,11 +202,6 @@ std::string GetSpecificHeader(const std::string& headers, return ret; } -base::string16 IDNToUnicode(const std::string& host, - const std::string& languages) { - return IDNToUnicodeWithOffsets(host, languages, NULL); -} - std::string CanonicalizeHost(const std::string& host, url_canon::CanonHostInfo* host_info) { // Try to canonicalize the host. @@ -908,46 +284,6 @@ bool IsCanonicalizedHostCompliant(const std::string& host, (!desired_tld.empty() && IsHostCharAlphanumeric(desired_tld[0])); } -std::string GetDirectoryListingEntry(const base::string16& name, - const std::string& raw_bytes, - bool is_dir, - int64 size, - Time modified) { - std::string result; - result.append("<script>addRow("); - base::EscapeJSONString(name, true, &result); - result.append(","); - if (raw_bytes.empty()) { - base::EscapeJSONString(EscapePath(base::UTF16ToUTF8(name)), true, &result); - } else { - base::EscapeJSONString(EscapePath(raw_bytes), true, &result); - } - if (is_dir) { - result.append(",1,"); - } else { - result.append(",0,"); - } - - // Negative size means unknown or not applicable (e.g. directory). - base::string16 size_string; - if (size >= 0) - size_string = FormatBytesUnlocalized(size); - base::EscapeJSONString(size_string, true, &result); - - result.append(","); - - base::string16 modified_str; - // |modified| can be NULL in FTP listings. - if (!modified.is_null()) { - modified_str = base::TimeFormatShortDateAndTime(modified); - } - base::EscapeJSONString(modified_str, true, &result); - - result.append(");</script>\n"); - - return result; -} - base::string16 StripWWW(const base::string16& text) { const base::string16 www(base::ASCIIToUTF16("www.")); return StartsWith(text, www, true) ? text.substr(www.length()) : text; @@ -958,7 +294,6 @@ base::string16 StripWWWFromHost(const GURL& url) { return StripWWW(base::ASCIIToUTF16(url.host())); } - bool IsPortAllowedByDefault(int port) { int array_size = arraysize(kRestrictedPorts); for (int i = 0; i < array_size; i++) { @@ -1299,191 +634,6 @@ std::string GetHostOrSpecFromURL(const GURL& url) { return url.has_host() ? TrimEndingDot(url.host()) : url.spec(); } -void AppendFormattedHost(const GURL& url, - const std::string& languages, - base::string16* output) { - Offsets offsets; - AppendFormattedComponent(url.possibly_invalid_spec(), - url.parsed_for_possibly_invalid_spec().host, offsets, - HostComponentTransform(languages), output, NULL, NULL); -} - -base::string16 FormatUrlWithOffsets( - const GURL& url, - const std::string& languages, - FormatUrlTypes format_types, - UnescapeRule::Type unescape_rules, - url_parse::Parsed* new_parsed, - size_t* prefix_end, - Offsets* offsets_for_adjustment) { - url_parse::Parsed parsed_temp; - if (!new_parsed) - new_parsed = &parsed_temp; - else - *new_parsed = url_parse::Parsed(); - Offsets original_offsets; - if (offsets_for_adjustment) - original_offsets = *offsets_for_adjustment; - - // Special handling for view-source:. Don't use content::kViewSourceScheme - // because this library shouldn't depend on chrome. - const char* const kViewSource = "view-source"; - // Reject "view-source:view-source:..." to avoid deep recursion. - const char* const kViewSourceTwice = "view-source:view-source:"; - if (url.SchemeIs(kViewSource) && - !StartsWithASCII(url.possibly_invalid_spec(), kViewSourceTwice, false)) { - return FormatViewSourceUrl(url, original_offsets, languages, format_types, - unescape_rules, new_parsed, prefix_end, - offsets_for_adjustment); - } - - // We handle both valid and invalid URLs (this will give us the spec - // regardless of validity). - const std::string& spec = url.possibly_invalid_spec(); - const url_parse::Parsed& parsed = url.parsed_for_possibly_invalid_spec(); - - // Scheme & separators. These are ASCII. - base::string16 url_string; - url_string.insert(url_string.end(), spec.begin(), - spec.begin() + parsed.CountCharactersBefore(url_parse::Parsed::USERNAME, - true)); - const char kHTTP[] = "http://"; - const char kFTP[] = "ftp."; - // URLFixerUpper::FixupURL() treats "ftp.foo.com" as ftp://ftp.foo.com. This - // means that if we trim "http://" off a URL whose host starts with "ftp." and - // the user inputs this into any field subject to fixup (which is basically - // all input fields), the meaning would be changed. (In fact, often the - // formatted URL is directly pre-filled into an input field.) For this reason - // we avoid stripping "http://" in this case. - bool omit_http = (format_types & kFormatUrlOmitHTTP) && - EqualsASCII(url_string, kHTTP) && - !StartsWithASCII(url.host(), kFTP, true); - new_parsed->scheme = parsed.scheme; - - // Username & password. - if ((format_types & kFormatUrlOmitUsernamePassword) != 0) { - // Remove the username and password fields. We don't want to display those - // to the user since they can be used for attacks, - // e.g. "http://google.com:search@evil.ru/" - new_parsed->username.reset(); - new_parsed->password.reset(); - // Update the offsets based on removed username and/or password. - if (offsets_for_adjustment && !offsets_for_adjustment->empty() && - (parsed.username.is_nonempty() || parsed.password.is_nonempty())) { - base::OffsetAdjuster offset_adjuster(offsets_for_adjustment); - if (parsed.username.is_nonempty() && parsed.password.is_nonempty()) { - // The seeming off-by-one and off-by-two in these first two lines are to - // account for the ':' after the username and '@' after the password. - offset_adjuster.Add(base::OffsetAdjuster::Adjustment( - static_cast<size_t>(parsed.username.begin), - static_cast<size_t>(parsed.username.len + parsed.password.len + 2), - 0)); - } else { - const url_parse::Component* nonempty_component = - parsed.username.is_nonempty() ? &parsed.username : &parsed.password; - // The seeming off-by-one in below is to account for the '@' after the - // username/password. - offset_adjuster.Add(base::OffsetAdjuster::Adjustment( - static_cast<size_t>(nonempty_component->begin), - static_cast<size_t>(nonempty_component->len + 1), 0)); - } - } - } else { - AppendFormattedComponent(spec, parsed.username, original_offsets, - NonHostComponentTransform(unescape_rules), &url_string, - &new_parsed->username, offsets_for_adjustment); - if (parsed.password.is_valid()) - url_string.push_back(':'); - AppendFormattedComponent(spec, parsed.password, original_offsets, - NonHostComponentTransform(unescape_rules), &url_string, - &new_parsed->password, offsets_for_adjustment); - if (parsed.username.is_valid() || parsed.password.is_valid()) - url_string.push_back('@'); - } - if (prefix_end) - *prefix_end = static_cast<size_t>(url_string.length()); - - // Host. - AppendFormattedComponent(spec, parsed.host, original_offsets, - HostComponentTransform(languages), &url_string, &new_parsed->host, - offsets_for_adjustment); - - // Port. - if (parsed.port.is_nonempty()) { - url_string.push_back(':'); - new_parsed->port.begin = url_string.length(); - url_string.insert(url_string.end(), - spec.begin() + parsed.port.begin, - spec.begin() + parsed.port.end()); - new_parsed->port.len = url_string.length() - new_parsed->port.begin; - } else { - new_parsed->port.reset(); - } - - // Path & query. Both get the same general unescape & convert treatment. - if (!(format_types & kFormatUrlOmitTrailingSlashOnBareHostname) || - !CanStripTrailingSlash(url)) { - AppendFormattedComponent(spec, parsed.path, original_offsets, - NonHostComponentTransform(unescape_rules), &url_string, - &new_parsed->path, offsets_for_adjustment); - } else { - base::OffsetAdjuster offset_adjuster(offsets_for_adjustment); - offset_adjuster.Add(base::OffsetAdjuster::Adjustment( - url_string.length(), parsed.path.len, 0)); - } - if (parsed.query.is_valid()) - url_string.push_back('?'); - AppendFormattedComponent(spec, parsed.query, original_offsets, - NonHostComponentTransform(unescape_rules), &url_string, - &new_parsed->query, offsets_for_adjustment); - - // Ref. This is valid, unescaped UTF-8, so we can just convert. - if (parsed.ref.is_valid()) - url_string.push_back('#'); - AppendFormattedComponent(spec, parsed.ref, original_offsets, - NonHostComponentTransform(UnescapeRule::NONE), &url_string, - &new_parsed->ref, offsets_for_adjustment); - - // If we need to strip out http do it after the fact. This way we don't need - // to worry about how offset_for_adjustment is interpreted. - if (omit_http && StartsWith(url_string, base::ASCIIToUTF16(kHTTP), true)) { - const size_t kHTTPSize = arraysize(kHTTP) - 1; - url_string = url_string.substr(kHTTPSize); - if (offsets_for_adjustment && !offsets_for_adjustment->empty()) { - base::OffsetAdjuster offset_adjuster(offsets_for_adjustment); - offset_adjuster.Add(base::OffsetAdjuster::Adjustment(0, kHTTPSize, 0)); - } - if (prefix_end) - *prefix_end -= kHTTPSize; - - // Adjust new_parsed. - DCHECK(new_parsed->scheme.is_valid()); - int delta = -(new_parsed->scheme.len + 3); // +3 for ://. - new_parsed->scheme.reset(); - AdjustAllComponentsButScheme(delta, new_parsed); - } - - LimitOffsets(url_string, offsets_for_adjustment); - return url_string; -} - -base::string16 FormatUrl(const GURL& url, - const std::string& languages, - FormatUrlTypes format_types, - UnescapeRule::Type unescape_rules, - url_parse::Parsed* new_parsed, - size_t* prefix_end, - size_t* offset_for_adjustment) { - Offsets offsets; - if (offset_for_adjustment) - offsets.push_back(*offset_for_adjustment); - base::string16 result = FormatUrlWithOffsets(url, languages, format_types, - unescape_rules, new_parsed, prefix_end, &offsets); - if (offset_for_adjustment) - *offset_for_adjustment = offsets[0]; - return result; -} - bool CanStripTrailingSlash(const GURL& url) { // Omit the path only for standard, non-file URLs with nothing but "/" after // the hostname. diff --git a/net/base/net_util_icu.cc b/net/base/net_util_icu.cc new file mode 100644 index 0000000..2a4f6ff --- /dev/null +++ b/net/base/net_util_icu.cc @@ -0,0 +1,866 @@ +// Copyright (c) 2012 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "net/base/net_util.h" + +#include <map> +#include <vector> + +#include "base/i18n/time_formatting.h" +#include "base/json/string_escape.h" +#include "base/lazy_instance.h" +#include "base/logging.h" +#include "base/memory/singleton.h" +#include "base/stl_util.h" +#include "base/strings/string_tokenizer.h" +#include "base/strings/string_util.h" +#include "base/strings/utf_offset_string_conversions.h" +#include "base/strings/utf_string_conversions.h" +#include "base/time/time.h" +#include "url/gurl.h" +#include "third_party/icu/source/common/unicode/uidna.h" +#include "third_party/icu/source/common/unicode/uniset.h" +#include "third_party/icu/source/common/unicode/uscript.h" +#include "third_party/icu/source/common/unicode/uset.h" +#include "third_party/icu/source/i18n/unicode/datefmt.h" +#include "third_party/icu/source/i18n/unicode/regex.h" +#include "third_party/icu/source/i18n/unicode/ulocdata.h" + +using base::Time; + +namespace net { + +namespace { + +typedef std::vector<size_t> Offsets; + +// Does some simple normalization of scripts so we can allow certain scripts +// to exist together. +// TODO(brettw) bug 880223: we should allow some other languages to be +// oombined such as Chinese and Latin. We will probably need a more +// complicated system of language pairs to have more fine-grained control. +UScriptCode NormalizeScript(UScriptCode code) { + switch (code) { + case USCRIPT_KATAKANA: + case USCRIPT_HIRAGANA: + case USCRIPT_KATAKANA_OR_HIRAGANA: + case USCRIPT_HANGUL: // This one is arguable. + return USCRIPT_HAN; + default: + return code; + } +} + +bool IsIDNComponentInSingleScript(const base::char16* str, int str_len) { + UScriptCode first_script = USCRIPT_INVALID_CODE; + bool is_first = true; + + int i = 0; + while (i < str_len) { + unsigned code_point; + U16_NEXT(str, i, str_len, code_point); + + UErrorCode err = U_ZERO_ERROR; + UScriptCode cur_script = uscript_getScript(code_point, &err); + if (err != U_ZERO_ERROR) + return false; // Report mixed on error. + cur_script = NormalizeScript(cur_script); + + // TODO(brettw) We may have to check for USCRIPT_INHERENT as well. + if (is_first && cur_script != USCRIPT_COMMON) { + first_script = cur_script; + is_first = false; + } else { + if (cur_script != USCRIPT_COMMON && cur_script != first_script) + return false; + } + } + return true; +} + +// Check if the script of a language can be 'safely' mixed with +// Latin letters in the ASCII range. +bool IsCompatibleWithASCIILetters(const std::string& lang) { + // For now, just list Chinese, Japanese and Korean (positive list). + // An alternative is negative-listing (languages using Greek and + // Cyrillic letters), but it can be more dangerous. + return !lang.substr(0, 2).compare("zh") || + !lang.substr(0, 2).compare("ja") || + !lang.substr(0, 2).compare("ko"); +} + +typedef std::map<std::string, icu::UnicodeSet*> LangToExemplarSetMap; + +class LangToExemplarSet { + public: + static LangToExemplarSet* GetInstance() { + return Singleton<LangToExemplarSet>::get(); + } + + private: + LangToExemplarSetMap map; + LangToExemplarSet() { } + ~LangToExemplarSet() { + STLDeleteContainerPairSecondPointers(map.begin(), map.end()); + } + + friend class Singleton<LangToExemplarSet>; + friend struct DefaultSingletonTraits<LangToExemplarSet>; + friend bool GetExemplarSetForLang(const std::string&, icu::UnicodeSet**); + friend void SetExemplarSetForLang(const std::string&, icu::UnicodeSet*); + + DISALLOW_COPY_AND_ASSIGN(LangToExemplarSet); +}; + +bool GetExemplarSetForLang(const std::string& lang, + icu::UnicodeSet** lang_set) { + const LangToExemplarSetMap& map = LangToExemplarSet::GetInstance()->map; + LangToExemplarSetMap::const_iterator pos = map.find(lang); + if (pos != map.end()) { + *lang_set = pos->second; + return true; + } + return false; +} + +void SetExemplarSetForLang(const std::string& lang, + icu::UnicodeSet* lang_set) { + LangToExemplarSetMap& map = LangToExemplarSet::GetInstance()->map; + map.insert(std::make_pair(lang, lang_set)); +} + +static base::LazyInstance<base::Lock>::Leaky + g_lang_set_lock = LAZY_INSTANCE_INITIALIZER; + +// Returns true if all the characters in component_characters are used by +// the language |lang|. +bool IsComponentCoveredByLang(const icu::UnicodeSet& component_characters, + const std::string& lang) { + CR_DEFINE_STATIC_LOCAL( + const icu::UnicodeSet, kASCIILetters, ('a', 'z')); + icu::UnicodeSet* lang_set = NULL; + // We're called from both the UI thread and the history thread. + { + base::AutoLock lock(g_lang_set_lock.Get()); + if (!GetExemplarSetForLang(lang, &lang_set)) { + UErrorCode status = U_ZERO_ERROR; + ULocaleData* uld = ulocdata_open(lang.c_str(), &status); + // TODO(jungshik) Turn this check on when the ICU data file is + // rebuilt with the minimal subset of locale data for languages + // to which Chrome is not localized but which we offer in the list + // of languages selectable for Accept-Languages. With the rebuilt ICU + // data, ulocdata_open never should fall back to the default locale. + // (issue 2078) + // DCHECK(U_SUCCESS(status) && status != U_USING_DEFAULT_WARNING); + if (U_SUCCESS(status) && status != U_USING_DEFAULT_WARNING) { + lang_set = reinterpret_cast<icu::UnicodeSet *>( + ulocdata_getExemplarSet(uld, NULL, 0, + ULOCDATA_ES_STANDARD, &status)); + // If |lang| is compatible with ASCII Latin letters, add them. + if (IsCompatibleWithASCIILetters(lang)) + lang_set->addAll(kASCIILetters); + } else { + lang_set = new icu::UnicodeSet(1, 0); + } + lang_set->freeze(); + SetExemplarSetForLang(lang, lang_set); + ulocdata_close(uld); + } + } + return !lang_set->isEmpty() && lang_set->containsAll(component_characters); +} + +// Returns true if the given Unicode host component is safe to display to the +// user. +bool IsIDNComponentSafe(const base::char16* str, + int str_len, + const std::string& languages) { + // Most common cases (non-IDN) do not reach here so that we don't + // need a fast return path. + // TODO(jungshik) : Check if there's any character inappropriate + // (although allowed) for domain names. + // See http://www.unicode.org/reports/tr39/#IDN_Security_Profiles and + // http://www.unicode.org/reports/tr39/data/xidmodifications.txt + // For now, we borrow the list from Mozilla and tweaked it slightly. + // (e.g. Characters like U+00A0, U+3000, U+3002 are omitted because + // they're gonna be canonicalized to U+0020 and full stop before + // reaching here.) + // The original list is available at + // http://kb.mozillazine.org/Network.IDN.blacklist_chars and + // at http://mxr.mozilla.org/seamonkey/source/modules/libpref/src/init/all.js#703 + + UErrorCode status = U_ZERO_ERROR; +#ifdef U_WCHAR_IS_UTF16 + icu::UnicodeSet dangerous_characters(icu::UnicodeString( + L"[[\\ \u00ad\u00bc\u00bd\u01c3\u0337\u0338" + L"\u05c3\u05f4\u06d4\u0702\u115f\u1160][\u2000-\u200b]" + L"[\u2024\u2027\u2028\u2029\u2039\u203a\u2044\u205f]" + L"[\u2154-\u2156][\u2159-\u215b][\u215f\u2215\u23ae" + L"\u29f6\u29f8\u2afb\u2afd][\u2ff0-\u2ffb][\u3014" + L"\u3015\u3033\u3164\u321d\u321e\u33ae\u33af\u33c6\u33df\ufe14" + L"\ufe15\ufe3f\ufe5d\ufe5e\ufeff\uff0e\uff06\uff61\uffa0\ufff9]" + L"[\ufffa-\ufffd]]"), status); + DCHECK(U_SUCCESS(status)); + icu::RegexMatcher dangerous_patterns(icu::UnicodeString( + // Lone katakana no, so, or n + L"[^\\p{Katakana}][\u30ce\u30f3\u30bd][^\\p{Katakana}]" + // Repeating Japanese accent characters + L"|[\u3099\u309a\u309b\u309c][\u3099\u309a\u309b\u309c]"), + 0, status); +#else + icu::UnicodeSet dangerous_characters(icu::UnicodeString( + "[[\\u0020\\u00ad\\u00bc\\u00bd\\u01c3\\u0337\\u0338" + "\\u05c3\\u05f4\\u06d4\\u0702\\u115f\\u1160][\\u2000-\\u200b]" + "[\\u2024\\u2027\\u2028\\u2029\\u2039\\u203a\\u2044\\u205f]" + "[\\u2154-\\u2156][\\u2159-\\u215b][\\u215f\\u2215\\u23ae" + "\\u29f6\\u29f8\\u2afb\\u2afd][\\u2ff0-\\u2ffb][\\u3014" + "\\u3015\\u3033\\u3164\\u321d\\u321e\\u33ae\\u33af\\u33c6\\u33df\\ufe14" + "\\ufe15\\ufe3f\\ufe5d\\ufe5e\\ufeff\\uff0e\\uff06\\uff61\\uffa0\\ufff9]" + "[\\ufffa-\\ufffd]]", -1, US_INV), status); + DCHECK(U_SUCCESS(status)); + icu::RegexMatcher dangerous_patterns(icu::UnicodeString( + // Lone katakana no, so, or n + "[^\\p{Katakana}][\\u30ce\\u30f3\u30bd][^\\p{Katakana}]" + // Repeating Japanese accent characters + "|[\\u3099\\u309a\\u309b\\u309c][\\u3099\\u309a\\u309b\\u309c]"), + 0, status); +#endif + DCHECK(U_SUCCESS(status)); + icu::UnicodeSet component_characters; + icu::UnicodeString component_string(str, str_len); + component_characters.addAll(component_string); + if (dangerous_characters.containsSome(component_characters)) + return false; + + DCHECK(U_SUCCESS(status)); + dangerous_patterns.reset(component_string); + if (dangerous_patterns.find()) + return false; + + // If the language list is empty, the result is completely determined + // by whether a component is a single script or not. This will block + // even "safe" script mixing cases like <Chinese, Latin-ASCII> that are + // allowed with |languages| (while it blocks Chinese + Latin letters with + // an accent as should be the case), but we want to err on the safe side + // when |languages| is empty. + if (languages.empty()) + return IsIDNComponentInSingleScript(str, str_len); + + // |common_characters| is made up of ASCII numbers, hyphen, plus and + // underscore that are used across scripts and allowed in domain names. + // (sync'd with characters allowed in url_canon_host with square + // brackets excluded.) See kHostCharLookup[] array in url_canon_host.cc. + icu::UnicodeSet common_characters(UNICODE_STRING_SIMPLE("[[0-9]\\-_+\\ ]"), + status); + DCHECK(U_SUCCESS(status)); + // Subtract common characters because they're always allowed so that + // we just have to check if a language-specific set contains + // the remainder. + component_characters.removeAll(common_characters); + + base::StringTokenizer t(languages, ","); + while (t.GetNext()) { + if (IsComponentCoveredByLang(component_characters, t.token())) + return true; + } + return false; +} + +// A wrapper to use LazyInstance<>::Leaky with ICU's UIDNA, a C pointer to +// a UTS46/IDNA 2008 handling object opened with uidna_openUTS46(). +// +// We use UTS46 with BiDiCheck to migrate from IDNA 2003 to IDNA 2008 with +// the backward compatibility in mind. What it does: +// +// 1. Use the up-to-date Unicode data. +// 2. Define a case folding/mapping with the up-to-date Unicode data as +// in IDNA 2003. +// 3. Use transitional mechanism for 4 deviation characters (sharp-s, +// final sigma, ZWJ and ZWNJ) for now. +// 4. Continue to allow symbols and punctuations. +// 5. Apply new BiDi check rules more permissive than the IDNA 2003 BiDI rules. +// 6. Do not apply STD3 rules +// 7. Do not allow unassigned code points. +// +// It also closely matches what IE 10 does except for the BiDi check ( +// http://goo.gl/3XBhqw ). +// See http://http://unicode.org/reports/tr46/ and references therein +// for more details. +struct UIDNAWrapper { + UIDNAWrapper() { + UErrorCode err = U_ZERO_ERROR; + // TODO(jungshik): Change options as different parties (browsers, + // registrars, search engines) converge toward a consensus. + value = uidna_openUTS46(UIDNA_CHECK_BIDI, &err); + if (U_FAILURE(err)) + value = NULL; + } + + UIDNA* value; +}; + +static base::LazyInstance<UIDNAWrapper>::Leaky + g_uidna = LAZY_INSTANCE_INITIALIZER; + +// Converts one component of a host (between dots) to IDN if safe. The result +// will be APPENDED to the given output string and will be the same as the input +// if it is not IDN or the IDN is unsafe to display. Returns whether any +// conversion was performed. +bool IDNToUnicodeOneComponent(const base::char16* comp, + size_t comp_len, + const std::string& languages, + base::string16* out) { + DCHECK(out); + if (comp_len == 0) + return false; + + // Only transform if the input can be an IDN component. + static const base::char16 kIdnPrefix[] = {'x', 'n', '-', '-'}; + if ((comp_len > arraysize(kIdnPrefix)) && + !memcmp(comp, kIdnPrefix, arraysize(kIdnPrefix) * sizeof(base::char16))) { + UIDNA* uidna = g_uidna.Get().value; + DCHECK(uidna != NULL); + size_t original_length = out->length(); + int output_length = 64; + UIDNAInfo info = UIDNA_INFO_INITIALIZER; + UErrorCode status; + do { + out->resize(original_length + output_length); + status = U_ZERO_ERROR; + // This returns the actual length required. If this is more than 64 + // code units, |status| will be U_BUFFER_OVERFLOW_ERROR and we'll try + // the conversion again, but with a sufficiently large buffer. + output_length = uidna_labelToUnicode( + uidna, comp, static_cast<int32_t>(comp_len), &(*out)[original_length], + output_length, &info, &status); + } while ((status == U_BUFFER_OVERFLOW_ERROR && info.errors == 0)); + + if (U_SUCCESS(status) && info.errors == 0) { + // Converted successfully. Ensure that the converted component + // can be safely displayed to the user. + out->resize(original_length + output_length); + if (IsIDNComponentSafe(out->data() + original_length, output_length, + languages)) + return true; + } + + // Something went wrong. Revert to original string. + out->resize(original_length); + } + + // We get here with no IDN or on error, in which case we just append the + // literal input. + out->append(comp, comp_len); + return false; +} + +// Clamps the offsets in |offsets_for_adjustment| to the length of |str|. +void LimitOffsets(const base::string16& str, Offsets* offsets_for_adjustment) { + if (offsets_for_adjustment) { + std::for_each(offsets_for_adjustment->begin(), + offsets_for_adjustment->end(), + base::LimitOffset<base::string16>(str.length())); + } +} + +// TODO(brettw) bug 734373: check the scripts for each host component and +// don't un-IDN-ize if there is more than one. Alternatively, only IDN for +// scripts that the user has installed. For now, just put the entire +// path through IDN. Maybe this feature can be implemented in ICU itself? +// +// We may want to skip this step in the case of file URLs to allow unicode +// UNC hostnames regardless of encodings. +base::string16 IDNToUnicodeWithOffsets(const std::string& host, + const std::string& languages, + Offsets* offsets_for_adjustment) { + // Convert the ASCII input to a base::string16 for ICU. + base::string16 input16; + input16.reserve(host.length()); + input16.insert(input16.end(), host.begin(), host.end()); + + // Do each component of the host separately, since we enforce script matching + // on a per-component basis. + base::string16 out16; + { + base::OffsetAdjuster offset_adjuster(offsets_for_adjustment); + for (size_t component_start = 0, component_end; + component_start < input16.length(); + component_start = component_end + 1) { + // Find the end of the component. + component_end = input16.find('.', component_start); + if (component_end == base::string16::npos) + component_end = input16.length(); // For getting the last component. + size_t component_length = component_end - component_start; + size_t new_component_start = out16.length(); + bool converted_idn = false; + if (component_end > component_start) { + // Add the substring that we just found. + converted_idn = IDNToUnicodeOneComponent( + input16.data() + component_start, component_length, languages, + &out16); + } + size_t new_component_length = out16.length() - new_component_start; + + if (converted_idn && offsets_for_adjustment) { + offset_adjuster.Add(base::OffsetAdjuster::Adjustment(component_start, + component_length, new_component_length)); + } + + // Need to add the dot we just found (if we found one). + if (component_end < input16.length()) + out16.push_back('.'); + } + } + + LimitOffsets(out16, offsets_for_adjustment); + return out16; +} + +// Called after transforming a component to set all affected elements in +// |offsets_for_adjustment| to the correct new values. |original_offsets| +// represents the offsets before the transform; |original_component_begin| and +// |original_component_end| represent the pre-transform boundaries of the +// affected component. |transformed_offsets| should be a vector created by +// adjusting |original_offsets| to be relative to the beginning of the component +// in question (via an OffsetAdjuster) and then transformed along with the +// component. Note that any elements in this vector which didn't originally +// point into the component may contain arbitrary values and should be ignored. +// |transformed_component_begin| and |transformed_component_end| are the +// endpoints of the transformed component and are used in combination with the +// two offset vectors to calculate the resulting absolute offsets, which are +// stored in |offsets_for_adjustment|. +void AdjustForComponentTransform(const Offsets& original_offsets, + size_t original_component_begin, + size_t original_component_end, + const Offsets& transformed_offsets, + size_t transformed_component_begin, + size_t transformed_component_end, + Offsets* offsets_for_adjustment) { + if (!offsets_for_adjustment) + return; // Nothing to do. + + for (size_t i = 0; i < original_offsets.size(); ++i) { + size_t original_offset = original_offsets[i]; + if ((original_offset >= original_component_begin) && + (original_offset < original_component_end)) { + // This offset originally pointed into the transformed component. + // Adjust the transformed relative offset by the new beginning point of + // the transformed component. + size_t transformed_offset = transformed_offsets[i]; + (*offsets_for_adjustment)[i] = + (transformed_offset == base::string16::npos) ? + base::string16::npos : + (transformed_offset + transformed_component_begin); + } else if ((original_offset >= original_component_end) && + (original_offset != std::string::npos)) { + // This offset pointed after the transformed component. Adjust the + // original absolute offset by the difference between the new and old + // component lengths. + (*offsets_for_adjustment)[i] = + original_offset - original_component_end + transformed_component_end; + } + } +} + +// If |component| is valid, its begin is incremented by |delta|. +void AdjustComponent(int delta, url_parse::Component* component) { + if (!component->is_valid()) + return; + + DCHECK(delta >= 0 || component->begin >= -delta); + component->begin += delta; +} + +// Adjusts all the components of |parsed| by |delta|, except for the scheme. +void AdjustAllComponentsButScheme(int delta, url_parse::Parsed* parsed) { + AdjustComponent(delta, &(parsed->username)); + AdjustComponent(delta, &(parsed->password)); + AdjustComponent(delta, &(parsed->host)); + AdjustComponent(delta, &(parsed->port)); + AdjustComponent(delta, &(parsed->path)); + AdjustComponent(delta, &(parsed->query)); + AdjustComponent(delta, &(parsed->ref)); +} + +// Helper for FormatUrlWithOffsets(). +base::string16 FormatViewSourceUrl(const GURL& url, + const Offsets& original_offsets, + const std::string& languages, + FormatUrlTypes format_types, + UnescapeRule::Type unescape_rules, + url_parse::Parsed* new_parsed, + size_t* prefix_end, + Offsets* offsets_for_adjustment) { + DCHECK(new_parsed); + const char kViewSource[] = "view-source:"; + const size_t kViewSourceLength = arraysize(kViewSource) - 1; + + // Format the underlying URL and adjust offsets. + const std::string& url_str(url.possibly_invalid_spec()); + Offsets offsets_into_underlying_url(original_offsets); + { + base::OffsetAdjuster adjuster(&offsets_into_underlying_url); + adjuster.Add(base::OffsetAdjuster::Adjustment(0, kViewSourceLength, 0)); + } + base::string16 result(base::ASCIIToUTF16(kViewSource) + + FormatUrlWithOffsets(GURL(url_str.substr(kViewSourceLength)), languages, + format_types, unescape_rules, new_parsed, prefix_end, + &offsets_into_underlying_url)); + AdjustForComponentTransform(original_offsets, kViewSourceLength, + url_str.length(), offsets_into_underlying_url, + kViewSourceLength, result.length(), + offsets_for_adjustment); + LimitOffsets(result, offsets_for_adjustment); + + // Adjust positions of the parsed components. + if (new_parsed->scheme.is_nonempty()) { + // Assume "view-source:real-scheme" as a scheme. + new_parsed->scheme.len += kViewSourceLength; + } else { + new_parsed->scheme.begin = 0; + new_parsed->scheme.len = kViewSourceLength - 1; + } + AdjustAllComponentsButScheme(kViewSourceLength, new_parsed); + + if (prefix_end) + *prefix_end += kViewSourceLength; + + return result; +} + +class AppendComponentTransform { + public: + AppendComponentTransform() {} + virtual ~AppendComponentTransform() {} + + virtual base::string16 Execute(const std::string& component_text, + Offsets* offsets_into_component) const = 0; + + // NOTE: No DISALLOW_COPY_AND_ASSIGN here, since gcc < 4.3.0 requires an + // accessible copy constructor in order to call AppendFormattedComponent() + // with an inline temporary (see http://gcc.gnu.org/bugs/#cxx%5Frvalbind ). +}; + +class HostComponentTransform : public AppendComponentTransform { + public: + explicit HostComponentTransform(const std::string& languages) + : languages_(languages) { + } + + private: + virtual base::string16 Execute( + const std::string& component_text, + Offsets* offsets_into_component) const OVERRIDE { + return IDNToUnicodeWithOffsets(component_text, languages_, + offsets_into_component); + } + + const std::string& languages_; +}; + +class NonHostComponentTransform : public AppendComponentTransform { + public: + explicit NonHostComponentTransform(UnescapeRule::Type unescape_rules) + : unescape_rules_(unescape_rules) { + } + + private: + virtual base::string16 Execute( + const std::string& component_text, + Offsets* offsets_into_component) const OVERRIDE { + return (unescape_rules_ == UnescapeRule::NONE) ? + base::UTF8ToUTF16AndAdjustOffsets(component_text, + offsets_into_component) : + UnescapeAndDecodeUTF8URLComponentWithOffsets(component_text, + unescape_rules_, offsets_into_component); + } + + const UnescapeRule::Type unescape_rules_; +}; + +// Transforms the portion of |spec| covered by |original_component| according to +// |transform|. Appends the result to |output|. If |output_component| is +// non-NULL, its start and length are set to the transformed component's new +// start and length. For each element in |original_offsets| which is at least +// as large as original_component.begin, the corresponding element of +// |offsets_for_adjustment| is transformed appropriately. +void AppendFormattedComponent(const std::string& spec, + const url_parse::Component& original_component, + const Offsets& original_offsets, + const AppendComponentTransform& transform, + base::string16* output, + url_parse::Component* output_component, + Offsets* offsets_for_adjustment) { + DCHECK(output); + if (original_component.is_nonempty()) { + size_t original_component_begin = + static_cast<size_t>(original_component.begin); + size_t output_component_begin = output->length(); + std::string component_str(spec, original_component_begin, + static_cast<size_t>(original_component.len)); + + // Transform |component_str| and adjust the offsets accordingly. + Offsets offsets_into_component(original_offsets); + { + base::OffsetAdjuster adjuster(&offsets_into_component); + adjuster.Add(base::OffsetAdjuster::Adjustment(0, original_component_begin, + 0)); + } + output->append(transform.Execute(component_str, &offsets_into_component)); + AdjustForComponentTransform(original_offsets, original_component_begin, + static_cast<size_t>(original_component.end()), + offsets_into_component, output_component_begin, + output->length(), offsets_for_adjustment); + + // Set positions of the parsed component. + if (output_component) { + output_component->begin = static_cast<int>(output_component_begin); + output_component->len = + static_cast<int>(output->length() - output_component_begin); + } + } else if (output_component) { + output_component->reset(); + } +} + +} // namespace + +const FormatUrlType kFormatUrlOmitNothing = 0; +const FormatUrlType kFormatUrlOmitUsernamePassword = 1 << 0; +const FormatUrlType kFormatUrlOmitHTTP = 1 << 1; +const FormatUrlType kFormatUrlOmitTrailingSlashOnBareHostname = 1 << 2; +const FormatUrlType kFormatUrlOmitAll = kFormatUrlOmitUsernamePassword | + kFormatUrlOmitHTTP | kFormatUrlOmitTrailingSlashOnBareHostname; + +base::string16 IDNToUnicode(const std::string& host, + const std::string& languages) { + return IDNToUnicodeWithOffsets(host, languages, NULL); +} + +std::string GetDirectoryListingEntry(const base::string16& name, + const std::string& raw_bytes, + bool is_dir, + int64 size, + Time modified) { + std::string result; + result.append("<script>addRow("); + base::EscapeJSONString(name, true, &result); + result.append(","); + if (raw_bytes.empty()) { + base::EscapeJSONString(EscapePath(base::UTF16ToUTF8(name)), true, &result); + } else { + base::EscapeJSONString(EscapePath(raw_bytes), true, &result); + } + if (is_dir) { + result.append(",1,"); + } else { + result.append(",0,"); + } + + // Negative size means unknown or not applicable (e.g. directory). + base::string16 size_string; + if (size >= 0) + size_string = FormatBytesUnlocalized(size); + base::EscapeJSONString(size_string, true, &result); + + result.append(","); + + base::string16 modified_str; + // |modified| can be NULL in FTP listings. + if (!modified.is_null()) { + modified_str = base::TimeFormatShortDateAndTime(modified); + } + base::EscapeJSONString(modified_str, true, &result); + + result.append(");</script>\n"); + + return result; +} + +void AppendFormattedHost(const GURL& url, + const std::string& languages, + base::string16* output) { + Offsets offsets; + AppendFormattedComponent(url.possibly_invalid_spec(), + url.parsed_for_possibly_invalid_spec().host, offsets, + HostComponentTransform(languages), output, NULL, NULL); +} + +base::string16 FormatUrlWithOffsets( + const GURL& url, + const std::string& languages, + FormatUrlTypes format_types, + UnescapeRule::Type unescape_rules, + url_parse::Parsed* new_parsed, + size_t* prefix_end, + Offsets* offsets_for_adjustment) { + url_parse::Parsed parsed_temp; + if (!new_parsed) + new_parsed = &parsed_temp; + else + *new_parsed = url_parse::Parsed(); + Offsets original_offsets; + if (offsets_for_adjustment) + original_offsets = *offsets_for_adjustment; + + // Special handling for view-source:. Don't use content::kViewSourceScheme + // because this library shouldn't depend on chrome. + const char* const kViewSource = "view-source"; + // Reject "view-source:view-source:..." to avoid deep recursion. + const char* const kViewSourceTwice = "view-source:view-source:"; + if (url.SchemeIs(kViewSource) && + !StartsWithASCII(url.possibly_invalid_spec(), kViewSourceTwice, false)) { + return FormatViewSourceUrl(url, original_offsets, languages, format_types, + unescape_rules, new_parsed, prefix_end, + offsets_for_adjustment); + } + + // We handle both valid and invalid URLs (this will give us the spec + // regardless of validity). + const std::string& spec = url.possibly_invalid_spec(); + const url_parse::Parsed& parsed = url.parsed_for_possibly_invalid_spec(); + + // Scheme & separators. These are ASCII. + base::string16 url_string; + url_string.insert(url_string.end(), spec.begin(), + spec.begin() + parsed.CountCharactersBefore(url_parse::Parsed::USERNAME, + true)); + const char kHTTP[] = "http://"; + const char kFTP[] = "ftp."; + // URLFixerUpper::FixupURL() treats "ftp.foo.com" as ftp://ftp.foo.com. This + // means that if we trim "http://" off a URL whose host starts with "ftp." and + // the user inputs this into any field subject to fixup (which is basically + // all input fields), the meaning would be changed. (In fact, often the + // formatted URL is directly pre-filled into an input field.) For this reason + // we avoid stripping "http://" in this case. + bool omit_http = (format_types & kFormatUrlOmitHTTP) && + EqualsASCII(url_string, kHTTP) && + !StartsWithASCII(url.host(), kFTP, true); + new_parsed->scheme = parsed.scheme; + + // Username & password. + if ((format_types & kFormatUrlOmitUsernamePassword) != 0) { + // Remove the username and password fields. We don't want to display those + // to the user since they can be used for attacks, + // e.g. "http://google.com:search@evil.ru/" + new_parsed->username.reset(); + new_parsed->password.reset(); + // Update the offsets based on removed username and/or password. + if (offsets_for_adjustment && !offsets_for_adjustment->empty() && + (parsed.username.is_nonempty() || parsed.password.is_nonempty())) { + base::OffsetAdjuster offset_adjuster(offsets_for_adjustment); + if (parsed.username.is_nonempty() && parsed.password.is_nonempty()) { + // The seeming off-by-one and off-by-two in these first two lines are to + // account for the ':' after the username and '@' after the password. + offset_adjuster.Add(base::OffsetAdjuster::Adjustment( + static_cast<size_t>(parsed.username.begin), + static_cast<size_t>(parsed.username.len + parsed.password.len + 2), + 0)); + } else { + const url_parse::Component* nonempty_component = + parsed.username.is_nonempty() ? &parsed.username : &parsed.password; + // The seeming off-by-one in below is to account for the '@' after the + // username/password. + offset_adjuster.Add(base::OffsetAdjuster::Adjustment( + static_cast<size_t>(nonempty_component->begin), + static_cast<size_t>(nonempty_component->len + 1), 0)); + } + } + } else { + AppendFormattedComponent(spec, parsed.username, original_offsets, + NonHostComponentTransform(unescape_rules), &url_string, + &new_parsed->username, offsets_for_adjustment); + if (parsed.password.is_valid()) + url_string.push_back(':'); + AppendFormattedComponent(spec, parsed.password, original_offsets, + NonHostComponentTransform(unescape_rules), &url_string, + &new_parsed->password, offsets_for_adjustment); + if (parsed.username.is_valid() || parsed.password.is_valid()) + url_string.push_back('@'); + } + if (prefix_end) + *prefix_end = static_cast<size_t>(url_string.length()); + + // Host. + AppendFormattedComponent(spec, parsed.host, original_offsets, + HostComponentTransform(languages), &url_string, &new_parsed->host, + offsets_for_adjustment); + + // Port. + if (parsed.port.is_nonempty()) { + url_string.push_back(':'); + new_parsed->port.begin = url_string.length(); + url_string.insert(url_string.end(), + spec.begin() + parsed.port.begin, + spec.begin() + parsed.port.end()); + new_parsed->port.len = url_string.length() - new_parsed->port.begin; + } else { + new_parsed->port.reset(); + } + + // Path & query. Both get the same general unescape & convert treatment. + if (!(format_types & kFormatUrlOmitTrailingSlashOnBareHostname) || + !CanStripTrailingSlash(url)) { + AppendFormattedComponent(spec, parsed.path, original_offsets, + NonHostComponentTransform(unescape_rules), &url_string, + &new_parsed->path, offsets_for_adjustment); + } else { + base::OffsetAdjuster offset_adjuster(offsets_for_adjustment); + offset_adjuster.Add(base::OffsetAdjuster::Adjustment( + url_string.length(), parsed.path.len, 0)); + } + if (parsed.query.is_valid()) + url_string.push_back('?'); + AppendFormattedComponent(spec, parsed.query, original_offsets, + NonHostComponentTransform(unescape_rules), &url_string, + &new_parsed->query, offsets_for_adjustment); + + // Ref. This is valid, unescaped UTF-8, so we can just convert. + if (parsed.ref.is_valid()) + url_string.push_back('#'); + AppendFormattedComponent(spec, parsed.ref, original_offsets, + NonHostComponentTransform(UnescapeRule::NONE), &url_string, + &new_parsed->ref, offsets_for_adjustment); + + // If we need to strip out http do it after the fact. This way we don't need + // to worry about how offset_for_adjustment is interpreted. + if (omit_http && StartsWith(url_string, base::ASCIIToUTF16(kHTTP), true)) { + const size_t kHTTPSize = arraysize(kHTTP) - 1; + url_string = url_string.substr(kHTTPSize); + if (offsets_for_adjustment && !offsets_for_adjustment->empty()) { + base::OffsetAdjuster offset_adjuster(offsets_for_adjustment); + offset_adjuster.Add(base::OffsetAdjuster::Adjustment(0, kHTTPSize, 0)); + } + if (prefix_end) + *prefix_end -= kHTTPSize; + + // Adjust new_parsed. + DCHECK(new_parsed->scheme.is_valid()); + int delta = -(new_parsed->scheme.len + 3); // +3 for ://. + new_parsed->scheme.reset(); + AdjustAllComponentsButScheme(delta, new_parsed); + } + + LimitOffsets(url_string, offsets_for_adjustment); + return url_string; +} + +base::string16 FormatUrl(const GURL& url, + const std::string& languages, + FormatUrlTypes format_types, + UnescapeRule::Type unescape_rules, + url_parse::Parsed* new_parsed, + size_t* prefix_end, + size_t* offset_for_adjustment) { + Offsets offsets; + if (offset_for_adjustment) + offsets.push_back(*offset_for_adjustment); + base::string16 result = FormatUrlWithOffsets(url, languages, format_types, + unescape_rules, new_parsed, prefix_end, &offsets); + if (offset_for_adjustment) + *offset_for_adjustment = offsets[0]; + return result; +} + +} // namespace net diff --git a/net/net.gypi b/net/net.gypi index 2fec5b6..795236c 100644 --- a/net/net.gypi +++ b/net/net.gypi @@ -43,6 +43,7 @@ 'base/net_module.h', 'base/net_util.cc', 'base/net_util.h', + 'base/net_util_icu.cc', 'base/net_util_posix.cc', 'base/openssl_private_key_store.h', 'base/openssl_private_key_store_android.cc', diff --git a/net/tools/DEPS b/net/tools/DEPS index 1ea12e4..4648a84 100644 --- a/net/tools/DEPS +++ b/net/tools/DEPS @@ -1,3 +1,7 @@ +include_rules = [ + "+base/i18n", +] + skip_child_includes = [ "balsa", "flip_server", |