Port cookie_monster, net_util, and registry_controlled_domain to POSIXish platforms

git-svn-id: svn://svn.chromium.org/chrome/trunk/src@1164 0039d316-1c4b-4281-b951-d872f2087c98
author: mmentovai@google.com <mmentovai@google.com@0039d316-1c4b-4281-b951-d872f2087c98> 2008-08-21 18:15:35 +0000
committer: mmentovai@google.com <mmentovai@google.com@0039d316-1c4b-4281-b951-d872f2087c98> 2008-08-21 18:15:35 +0000
commit: d862fd9d396126f3f06a8061412def6d61d156fd (patch)
tree: 7052b96f95ac8b9b2f3d861daddbc82d4cdda99a /net/base/net_util.cc
parent: 4167c3a50e73968cd47e0eb2502f017f432d367a (diff)
download: chromium_src-d862fd9d396126f3f06a8061412def6d61d156fd.zip
chromium_src-d862fd9d396126f3f06a8061412def6d61d156fd.tar.gz
chromium_src-d862fd9d396126f3f06a8061412def6d61d156fd.tar.bz2
1 files changed, 35 insertions, 38 deletions
diff --git a/net/base/net_util.cc b/net/base/net_util.cc
index e0cbd04..c2f9c7c 100644
--- a/net/base/net_util.cc
+++ b/net/base/net_util.cc
@@ -451,8 +451,8 @@ UScriptCode NormalizeScript(UScriptCode code) {
   }
 }
 
-bool IsIDNComponentInSingleScript(const wchar_t* str, int str_len) {
-  UScriptCode first_script;
+bool IsIDNComponentInSingleScript(const char16* str, int str_len) {
+  UScriptCode first_script = USCRIPT_INVALID_CODE;
   bool is_first = true;
 
   int i = 0;
@@ -491,7 +491,7 @@ bool IsCompatibleWithASCIILetters(const std::string& lang) {
 
 // Returns true if the given Unicode host component is safe to display to the
 // user.
-bool IsIDNComponentSafe(const wchar_t* str,
+bool IsIDNComponentSafe(const char16* str,
                         int str_len,
                         const std::wstring& languages) {
   // Most common cases (non-IDN) do not reach here so that we don't
@@ -532,14 +532,7 @@ bool IsIDNComponentSafe(const wchar_t* str,
 #endif
   DCHECK(U_SUCCESS(status));
   UnicodeSet component_characters;
-#ifdef WCHAR_T_IS_UTF32
-  std::string16 converted_str;
-  WideToUTF16(str, str_len, &converted_str);
-  component_characters.addAll(UnicodeString(converted_str.c_str(),
-                                            converted_str.length()));
-#else
   component_characters.addAll(UnicodeString(str, str_len));
-#endif
   if (dangerous_characters.containsSome(component_characters))
     return false;
 
@@ -598,10 +591,10 @@ bool IsIDNComponentSafe(const wchar_t* str,
 // Converts one component of a host (between dots) to IDN if safe. The result
 // will be APPENDED to the given output string and  will be the same as the
 // input if it is not IDN or the IDN is unsafe to display.
-void IDNToUnicodeOneComponent(const wchar_t* comp,
+void IDNToUnicodeOneComponent(const char16* comp,
                               int comp_len,
                               const std::wstring& languages,
-                              std::wstring* out) {
+                              std::string16* out) {
   DCHECK(comp_len >= 0);
   if (comp_len == 0)
     return;
@@ -612,7 +605,8 @@ void IDNToUnicodeOneComponent(const wchar_t* comp,
   size_t host_begin_in_output = out->size();
 
   // Just copy the input if it can't be an IDN component.
-  if (comp_len < 4 || wcsncmp(comp, L"xn--", 4)) {
+  if (comp_len < 4 ||
+      comp[0] != 'x' || comp[1] != 'n' || comp[2] != '-' || comp[3] != '-') {
     out->resize(host_begin_in_output + comp_len);
     for (int i = 0; i < comp_len; i++)
       (*out)[host_begin_in_output + i] = comp[i];
@@ -621,23 +615,10 @@ void IDNToUnicodeOneComponent(const wchar_t* comp,
 
   while (true) {
     UErrorCode status = U_ZERO_ERROR;
-#if defined(WCHAR_T_IS_UTF32)
-    std::string16 comp16;
-    WideToUTF16(comp, comp_len, &comp16);
-    std::string16 out16;
-    WideToUTF16(out->c_str(), out->length(), &out16);
-    out16.resize(out16.size() + extra_space);
-    int output_chars =
-        uidna_IDNToUnicode(comp16.data(), static_cast<int32>(comp16.length()),
-                           &(out16)[host_begin_in_output], extra_space,
-                           UIDNA_DEFAULT, NULL, &status);
-    *out = UTF16ToWide(out16);
-#else
     out->resize(out->size() + extra_space);
     int output_chars =
         uidna_IDNToUnicode(comp, comp_len, &(*out)[host_begin_in_output],
-                          extra_space, UIDNA_DEFAULT, NULL, &status);
-#endif
+                           extra_space, UIDNA_DEFAULT, NULL, &status);
     if (status == U_ZERO_ERROR) {
       // Converted successfully.
       out->resize(host_begin_in_output + output_chars);
@@ -812,37 +793,53 @@ void IDNToUnicode(const char* host,
                   const std::wstring& languages,
                   std::wstring* out) {
   // Convert the ASCII input to a wide string for ICU.
-  std::wstring wide_input;
-  wide_input.reserve(host_len);
+  std::string16 input16;
+  input16.reserve(host_len);
   for (int i = 0; i < host_len; i++)
-    wide_input.push_back(host[i]);
+    input16.push_back(host[i]);
+
+  std::string16 out16;
+  // The output string is appended to, so convert what's already there if
+  // needed.
+#if defined(WCHAR_T_IS_UTF32)
+  WideToUTF16(out->data(), out->length(), &out16);
+  out->clear();  // for equivalence with the swap below
+#elif defined(WCHAR_T_IS_UTF16)
+  out->swap(out16);
+#endif
 
   // Do each component of the host separately, since we enforce script matching
   // on a per-component basis.
   size_t cur_begin = 0;  // Beginning of the current component (inclusive).
-  while (cur_begin < wide_input.size()) {
+  while (cur_begin < input16.size()) {
     // Find the next dot or the end of the string.
-    size_t next_dot = wide_input.find_first_of('.', cur_begin);
+    size_t next_dot = input16.find_first_of('.', cur_begin);
     if (next_dot == std::wstring::npos)
-      next_dot = wide_input.size();  // For getting the last component.
+      next_dot = input16.size();  // For getting the last component.
 
     if (next_dot > cur_begin) {
       // Add the substring that we just found.
-      IDNToUnicodeOneComponent(&wide_input[cur_begin],
+      IDNToUnicodeOneComponent(&input16[cur_begin],
                                static_cast<int>(next_dot - cur_begin),
                                languages,
-                               out);
+                               &out16);
     }
 
     // Need to add the dot we just found (if we found one). This needs to be
     // done before we break out below in case the URL ends in a dot.
-    if (next_dot < wide_input.size())
-      out->push_back('.');
+    if (next_dot < input16.size())
+      out16.push_back('.');
     else
       break;  // No more components left.
 
     cur_begin = next_dot + 1;
   }
+
+#if defined(WCHAR_T_IS_UTF32)
+  UTF16ToWide(out16.data(), out16.length(), out);
+#elif defined(WCHAR_T_IS_UTF16)
+  out->swap(out16);
+#endif
 }
 
 std::string CanonicalizeHost(const std::string& host, bool* is_ip_address) {
@@ -872,7 +869,7 @@ std::string CanonicalizeHost(const std::string& host, bool* is_ip_address) {
 
   // Return the host as a string, stripping any unnecessary bits off the ends.
   if ((canon_host_component.begin == 0) &&
-      (canon_host_component.len == canon_host.length()))
+      (static_cast<size_t>(canon_host_component.len) == canon_host.length()))
     return canon_host;
   return canon_host.substr(canon_host_component.begin,
                            canon_host_component.len);
author	mmentovai@google.com <mmentovai@google.com@0039d316-1c4b-4281-b951-d872f2087c98>	2008-08-21 18:15:35 +0000
committer	mmentovai@google.com <mmentovai@google.com@0039d316-1c4b-4281-b951-d872f2087c98>	2008-08-21 18:15:35 +0000
commit	d862fd9d396126f3f06a8061412def6d61d156fd (patch)
tree	7052b96f95ac8b9b2f3d861daddbc82d4cdda99a /net/base/net_util.cc
parent	4167c3a50e73968cd47e0eb2502f017f432d367a (diff)
download	chromium_src-d862fd9d396126f3f06a8061412def6d61d156fd.zip chromium_src-d862fd9d396126f3f06a8061412def6d61d156fd.tar.gz chromium_src-d862fd9d396126f3f06a8061412def6d61d156fd.tar.bz2