diff options
author | mmentovai@google.com <mmentovai@google.com@0039d316-1c4b-4281-b951-d872f2087c98> | 2008-08-21 18:15:35 +0000 |
---|---|---|
committer | mmentovai@google.com <mmentovai@google.com@0039d316-1c4b-4281-b951-d872f2087c98> | 2008-08-21 18:15:35 +0000 |
commit | d862fd9d396126f3f06a8061412def6d61d156fd (patch) | |
tree | 7052b96f95ac8b9b2f3d861daddbc82d4cdda99a /net/base/net_util.cc | |
parent | 4167c3a50e73968cd47e0eb2502f017f432d367a (diff) | |
download | chromium_src-d862fd9d396126f3f06a8061412def6d61d156fd.zip chromium_src-d862fd9d396126f3f06a8061412def6d61d156fd.tar.gz chromium_src-d862fd9d396126f3f06a8061412def6d61d156fd.tar.bz2 |
Port cookie_monster, net_util, and registry_controlled_domain to POSIXish platforms
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@1164 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'net/base/net_util.cc')
-rw-r--r-- | net/base/net_util.cc | 73 |
1 files changed, 35 insertions, 38 deletions
diff --git a/net/base/net_util.cc b/net/base/net_util.cc index e0cbd04..c2f9c7c 100644 --- a/net/base/net_util.cc +++ b/net/base/net_util.cc @@ -451,8 +451,8 @@ UScriptCode NormalizeScript(UScriptCode code) { } } -bool IsIDNComponentInSingleScript(const wchar_t* str, int str_len) { - UScriptCode first_script; +bool IsIDNComponentInSingleScript(const char16* str, int str_len) { + UScriptCode first_script = USCRIPT_INVALID_CODE; bool is_first = true; int i = 0; @@ -491,7 +491,7 @@ bool IsCompatibleWithASCIILetters(const std::string& lang) { // Returns true if the given Unicode host component is safe to display to the // user. -bool IsIDNComponentSafe(const wchar_t* str, +bool IsIDNComponentSafe(const char16* str, int str_len, const std::wstring& languages) { // Most common cases (non-IDN) do not reach here so that we don't @@ -532,14 +532,7 @@ bool IsIDNComponentSafe(const wchar_t* str, #endif DCHECK(U_SUCCESS(status)); UnicodeSet component_characters; -#ifdef WCHAR_T_IS_UTF32 - std::string16 converted_str; - WideToUTF16(str, str_len, &converted_str); - component_characters.addAll(UnicodeString(converted_str.c_str(), - converted_str.length())); -#else component_characters.addAll(UnicodeString(str, str_len)); -#endif if (dangerous_characters.containsSome(component_characters)) return false; @@ -598,10 +591,10 @@ bool IsIDNComponentSafe(const wchar_t* str, // Converts one component of a host (between dots) to IDN if safe. The result // will be APPENDED to the given output string and will be the same as the // input if it is not IDN or the IDN is unsafe to display. -void IDNToUnicodeOneComponent(const wchar_t* comp, +void IDNToUnicodeOneComponent(const char16* comp, int comp_len, const std::wstring& languages, - std::wstring* out) { + std::string16* out) { DCHECK(comp_len >= 0); if (comp_len == 0) return; @@ -612,7 +605,8 @@ void IDNToUnicodeOneComponent(const wchar_t* comp, size_t host_begin_in_output = out->size(); // Just copy the input if it can't be an IDN component. - if (comp_len < 4 || wcsncmp(comp, L"xn--", 4)) { + if (comp_len < 4 || + comp[0] != 'x' || comp[1] != 'n' || comp[2] != '-' || comp[3] != '-') { out->resize(host_begin_in_output + comp_len); for (int i = 0; i < comp_len; i++) (*out)[host_begin_in_output + i] = comp[i]; @@ -621,23 +615,10 @@ void IDNToUnicodeOneComponent(const wchar_t* comp, while (true) { UErrorCode status = U_ZERO_ERROR; -#if defined(WCHAR_T_IS_UTF32) - std::string16 comp16; - WideToUTF16(comp, comp_len, &comp16); - std::string16 out16; - WideToUTF16(out->c_str(), out->length(), &out16); - out16.resize(out16.size() + extra_space); - int output_chars = - uidna_IDNToUnicode(comp16.data(), static_cast<int32>(comp16.length()), - &(out16)[host_begin_in_output], extra_space, - UIDNA_DEFAULT, NULL, &status); - *out = UTF16ToWide(out16); -#else out->resize(out->size() + extra_space); int output_chars = uidna_IDNToUnicode(comp, comp_len, &(*out)[host_begin_in_output], - extra_space, UIDNA_DEFAULT, NULL, &status); -#endif + extra_space, UIDNA_DEFAULT, NULL, &status); if (status == U_ZERO_ERROR) { // Converted successfully. out->resize(host_begin_in_output + output_chars); @@ -812,37 +793,53 @@ void IDNToUnicode(const char* host, const std::wstring& languages, std::wstring* out) { // Convert the ASCII input to a wide string for ICU. - std::wstring wide_input; - wide_input.reserve(host_len); + std::string16 input16; + input16.reserve(host_len); for (int i = 0; i < host_len; i++) - wide_input.push_back(host[i]); + input16.push_back(host[i]); + + std::string16 out16; + // The output string is appended to, so convert what's already there if + // needed. +#if defined(WCHAR_T_IS_UTF32) + WideToUTF16(out->data(), out->length(), &out16); + out->clear(); // for equivalence with the swap below +#elif defined(WCHAR_T_IS_UTF16) + out->swap(out16); +#endif // Do each component of the host separately, since we enforce script matching // on a per-component basis. size_t cur_begin = 0; // Beginning of the current component (inclusive). - while (cur_begin < wide_input.size()) { + while (cur_begin < input16.size()) { // Find the next dot or the end of the string. - size_t next_dot = wide_input.find_first_of('.', cur_begin); + size_t next_dot = input16.find_first_of('.', cur_begin); if (next_dot == std::wstring::npos) - next_dot = wide_input.size(); // For getting the last component. + next_dot = input16.size(); // For getting the last component. if (next_dot > cur_begin) { // Add the substring that we just found. - IDNToUnicodeOneComponent(&wide_input[cur_begin], + IDNToUnicodeOneComponent(&input16[cur_begin], static_cast<int>(next_dot - cur_begin), languages, - out); + &out16); } // Need to add the dot we just found (if we found one). This needs to be // done before we break out below in case the URL ends in a dot. - if (next_dot < wide_input.size()) - out->push_back('.'); + if (next_dot < input16.size()) + out16.push_back('.'); else break; // No more components left. cur_begin = next_dot + 1; } + +#if defined(WCHAR_T_IS_UTF32) + UTF16ToWide(out16.data(), out16.length(), out); +#elif defined(WCHAR_T_IS_UTF16) + out->swap(out16); +#endif } std::string CanonicalizeHost(const std::string& host, bool* is_ip_address) { @@ -872,7 +869,7 @@ std::string CanonicalizeHost(const std::string& host, bool* is_ip_address) { // Return the host as a string, stripping any unnecessary bits off the ends. if ((canon_host_component.begin == 0) && - (canon_host_component.len == canon_host.length())) + (static_cast<size_t>(canon_host_component.len) == canon_host.length())) return canon_host; return canon_host.substr(canon_host_component.begin, canon_host_component.len); |