1 files changed, 272 insertions, 0 deletions
diff --git a/net/base/escape.cc b/net/base/escape.cc
new file mode 100644
index 0000000..bd4aa95
--- /dev/null
+++ b/net/base/escape.cc
@@ -0,0 +1,272 @@
+// Copyright 2008, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//    * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//    * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//    * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include <algorithm>
+
+#include "net/base/escape.h"
+
+#include "base/logging.h"
+#include "base/string_util.h"
+
+namespace {
+
+template <class char_type>
+inline bool IsHex(char_type ch) {
+  return (ch >= '0' && ch <= '9') ||
+         (ch >= 'A' && ch <= 'F') ||
+         (ch >= 'a' && ch <= 'f');
+}
+
+template <class char_type>
+inline char_type HexToInt(char_type ch) {
+  if (ch >= '0' && ch <= '9')
+    return ch - '0';
+  if (ch >= 'A' && ch <= 'F')
+    return ch - 'A' + 10;
+  if (ch >= 'a' && ch <= 'f')
+    return ch - 'a' + 10;
+  NOTREACHED();
+  return 0;
+}
+
+static const char* const kHexString = "0123456789ABCDEF";
+inline char IntToHex(int i) {
+  DCHECK(i >= 0 && i <= 15) << i << " not a hex value";
+  return kHexString[i];
+}
+
+// A fast bit-vector map for ascii characters.
+//
+// Internally stores 256 bits in an array of 8 ints.
+// Does quick bit-flicking to lookup needed characters.
+class Charmap {
+ public:
+  Charmap(uint32 b0, uint32 b1, uint32 b2, uint32 b3,
+          uint32 b4, uint32 b5, uint32 b6, uint32 b7) {
+    map_[0] = b0; map_[1] = b1; map_[2] = b2; map_[3] = b3;
+    map_[4] = b4; map_[5] = b5; map_[6] = b6; map_[7] = b7;
+  }
+
+  bool Contains(unsigned char c) const {
+    return (map_[c >> 5] & (1 << (c & 31))) ? true : false;
+  }
+
+ private:
+  uint32 map_[8];
+};
+
+
+// Given text to escape and a Charmap defining which values to escape,
+// return an escaped string.  If use_plus is true, spaces are converted
+// to +, otherwise, if spaces are in the charmap, they are converted to
+// %20.
+const std::string Escape(const std::string& text, const Charmap& charmap,
+                         bool use_plus) {
+  std::string escaped;
+  escaped.reserve(text.length() * 3);
+  for (unsigned int i = 0; i < text.length(); ++i) {
+    unsigned char c = static_cast<unsigned char>(text[i]);
+    if (use_plus && ' ' == c) {
+      escaped.push_back('+');
+    } else if (charmap.Contains(c)) {
+      escaped.push_back('%');
+      escaped.push_back(IntToHex(c >> 4));
+      escaped.push_back(IntToHex(c & 0xf));
+    } else {
+      escaped.push_back(c);
+    }
+  }
+  return escaped;
+}
+
+std::string UnescapeURLImpl(const std::string& escaped_text,
+                            UnescapeRule::Type rules) {
+  // The output of the unescaping is always smaller than the input, so we can
+  // reserve the input size to make sure we have enough buffer and don't have
+  // to allocate in the loop below.
+  std::string result;
+  result.reserve(escaped_text.length());
+
+  for (size_t i = 0, max = escaped_text.size(), max_digit_index = max - 2;
+       i < max; ++i) {
+    if (escaped_text[i] == '%' && i < max_digit_index) {
+      const std::string::value_type most_sig_digit(escaped_text[i + 1]);
+      const std::string::value_type least_sig_digit(escaped_text[i + 2]);
+      if (IsHex(most_sig_digit) && IsHex(least_sig_digit)) {
+        unsigned char value = HexToInt(most_sig_digit) * 16 +
+            HexToInt(least_sig_digit);
+        if (((rules & UnescapeRule::PERCENTS) || value != '%') &&
+            ((rules & UnescapeRule::SPACES) || value != ' ')) {
+          // Use the unescaped version of the character.
+          result.push_back(value);
+          i += 2;
+        } else {
+          result.push_back('%');
+        }
+      } else {
+        result.push_back('%');
+      }
+    } else if ((rules & UnescapeRule::REPLACE_PLUS_WITH_SPACE) &&
+               escaped_text[i] == '+') {
+      result.push_back(' ');
+    } else {
+      result.push_back(escaped_text[i]);
+    }
+  }
+
+  return result;
+}
+
+}  // namespace
+
+// Everything except alphanumerics and !'()*-._~
+// See RFC 2396 for the list of reserved characters.
+static const Charmap kQueryCharmap(
+  0xffffffffL, 0xfc00987dL, 0x78000001L, 0xb8000001L,
+  0xffffffffL, 0xffffffffL, 0xffffffffL, 0xffffffffL);
+
+std::string EscapeQueryParamValue(const std::string& text) {
+  return Escape(text, kQueryCharmap, true);
+}
+
+// Convert the string to a sequence of bytes and then % escape anything
+// except alphanumerics and !'()*-._~
+std::wstring EscapeQueryParamValueUTF8(const std::wstring& text) {
+  return UTF8ToWide(Escape(WideToUTF8(text), kQueryCharmap, true));
+}
+
+// non-printable, non-7bit, and (including space)  "#%:<>?[\]^`{|}
+static const Charmap kPathCharmap(
+  0xffffffffL, 0xd400002dL, 0x78000000L, 0xb8000001L,
+  0xffffffffL, 0xffffffffL, 0xffffffffL, 0xffffffffL);
+
+std::string EscapePath(const std::string& path) {
+  return Escape(path, kPathCharmap, false);
+}
+
+// non-7bit
+static const Charmap kNonASCIICharmap(
+  0x00000000L, 0x00000000L, 0x00000000L, 0x00000000L,
+  0xffffffffL, 0xffffffffL, 0xffffffffL, 0xffffffffL);
+
+std::string EscapeNonASCII(const std::string& input) {
+  return Escape(input, kNonASCIICharmap, false);
+}
+
+// Everything except alphanumerics, the reserved characters(;/?:@&=+$,) and
+// !'()*-._~%
+static const Charmap kExternalHandlerCharmap(
+  0xffffffffL, 0x5000080dL, 0x68000000L, 0xb8000001L,
+  0xffffffffL, 0xffffffffL, 0xffffffffL, 0xffffffffL);
+
+std::string EscapeExternalHandlerValue(const std::string& text) {
+  return Escape(text, kExternalHandlerCharmap, false);
+}
+
+bool EscapeQueryParamValue(const std::wstring& text, const char* codepage,
+                           std::wstring* escaped) {
+  // TODO(brettw) bug 1201094: this function should be removed, this "SKIP"
+  // behavior is wrong when the character can't be encoded properly.
+  std::string encoded;
+  if (!WideToCodepage(text, codepage,
+                      OnStringUtilConversionError::SKIP, &encoded))
+    return false;
+
+  // It's safe to use UTF8ToWide here because Escape should only return
+  // alphanumerics and !'()*-._~
+  escaped->assign(UTF8ToWide(Escape(encoded, kQueryCharmap, true)));
+  return true;
+}
+
+std::wstring UnescapeAndDecodeURLComponent(const std::string& text,
+                                           const char* codepage,
+                                           UnescapeRule::Type rules) {
+  std::wstring result;
+  if (CodepageToWide(UnescapeURLImpl(text, rules), codepage,
+                     OnStringUtilConversionError::FAIL, &result))
+    return result;          // Character set looks like it's valid.
+  return UTF8ToWide(text);  // Return the escaped version when it's not.
+}
+
+std::string UnescapeURLComponent(const std::string& escaped_text,
+                                 UnescapeRule::Type rules) {
+  return UnescapeURLImpl(escaped_text, rules);
+}
+
+template <class str>
+void AppendEscapedCharForHTMLImpl(typename str::value_type c, str* output) {
+  static const struct {
+    char key;
+    const char *replacement;
+  } kCharsToEscape[] = {
+    { '<', "&lt;" },
+    { '>', "&gt;" },
+    { '&', "&amp;" },
+    { '"', "&quot;" },
+    { '\'', "&#39;" },
+  };
+  size_t k;
+  for (k = 0; k < arraysize(kCharsToEscape); ++k) {
+    if (c == kCharsToEscape[k].key) {
+      const char* p = kCharsToEscape[k].replacement;
+      while (*p)
+        output->push_back(*p++);
+      break;
+    }
+  }
+  if (k == arraysize(kCharsToEscape))
+    output->push_back(c);
+}
+
+void AppendEscapedCharForHTML(char c, std::string* output) {
+  AppendEscapedCharForHTMLImpl(c, output);
+}
+
+void AppendEscapedCharForHTML(wchar_t c, std::wstring* output) {
+  AppendEscapedCharForHTMLImpl(c, output);
+}
+
+template <class str>
+str EscapeForHTMLImpl(const str& input) {
+  str result;
+  result.reserve(input.size());  // optimize for no escaping
+
+  for (str::const_iterator it = input.begin(); it != input.end(); ++it)
+    AppendEscapedCharForHTMLImpl(*it, &result);
+
+  return result;
+}
+
+std::string EscapeForHTML(const std::string& input) {
+  return EscapeForHTMLImpl(input);
+}
+
+std::wstring EscapeForHTML(const std::wstring& input) {
+  return EscapeForHTMLImpl(input);
+}