summaryrefslogtreecommitdiffstats
path: root/net/base/escape.cc
diff options
context:
space:
mode:
Diffstat (limited to 'net/base/escape.cc')
-rw-r--r--net/base/escape.cc272
1 files changed, 272 insertions, 0 deletions
diff --git a/net/base/escape.cc b/net/base/escape.cc
new file mode 100644
index 0000000..bd4aa95
--- /dev/null
+++ b/net/base/escape.cc
@@ -0,0 +1,272 @@
+// Copyright 2008, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include <algorithm>
+
+#include "net/base/escape.h"
+
+#include "base/logging.h"
+#include "base/string_util.h"
+
+namespace {
+
+template <class char_type>
+inline bool IsHex(char_type ch) {
+ return (ch >= '0' && ch <= '9') ||
+ (ch >= 'A' && ch <= 'F') ||
+ (ch >= 'a' && ch <= 'f');
+}
+
+template <class char_type>
+inline char_type HexToInt(char_type ch) {
+ if (ch >= '0' && ch <= '9')
+ return ch - '0';
+ if (ch >= 'A' && ch <= 'F')
+ return ch - 'A' + 10;
+ if (ch >= 'a' && ch <= 'f')
+ return ch - 'a' + 10;
+ NOTREACHED();
+ return 0;
+}
+
+static const char* const kHexString = "0123456789ABCDEF";
+inline char IntToHex(int i) {
+ DCHECK(i >= 0 && i <= 15) << i << " not a hex value";
+ return kHexString[i];
+}
+
+// A fast bit-vector map for ascii characters.
+//
+// Internally stores 256 bits in an array of 8 ints.
+// Does quick bit-flicking to lookup needed characters.
+class Charmap {
+ public:
+ Charmap(uint32 b0, uint32 b1, uint32 b2, uint32 b3,
+ uint32 b4, uint32 b5, uint32 b6, uint32 b7) {
+ map_[0] = b0; map_[1] = b1; map_[2] = b2; map_[3] = b3;
+ map_[4] = b4; map_[5] = b5; map_[6] = b6; map_[7] = b7;
+ }
+
+ bool Contains(unsigned char c) const {
+ return (map_[c >> 5] & (1 << (c & 31))) ? true : false;
+ }
+
+ private:
+ uint32 map_[8];
+};
+
+
+// Given text to escape and a Charmap defining which values to escape,
+// return an escaped string. If use_plus is true, spaces are converted
+// to +, otherwise, if spaces are in the charmap, they are converted to
+// %20.
+const std::string Escape(const std::string& text, const Charmap& charmap,
+ bool use_plus) {
+ std::string escaped;
+ escaped.reserve(text.length() * 3);
+ for (unsigned int i = 0; i < text.length(); ++i) {
+ unsigned char c = static_cast<unsigned char>(text[i]);
+ if (use_plus && ' ' == c) {
+ escaped.push_back('+');
+ } else if (charmap.Contains(c)) {
+ escaped.push_back('%');
+ escaped.push_back(IntToHex(c >> 4));
+ escaped.push_back(IntToHex(c & 0xf));
+ } else {
+ escaped.push_back(c);
+ }
+ }
+ return escaped;
+}
+
+std::string UnescapeURLImpl(const std::string& escaped_text,
+ UnescapeRule::Type rules) {
+ // The output of the unescaping is always smaller than the input, so we can
+ // reserve the input size to make sure we have enough buffer and don't have
+ // to allocate in the loop below.
+ std::string result;
+ result.reserve(escaped_text.length());
+
+ for (size_t i = 0, max = escaped_text.size(), max_digit_index = max - 2;
+ i < max; ++i) {
+ if (escaped_text[i] == '%' && i < max_digit_index) {
+ const std::string::value_type most_sig_digit(escaped_text[i + 1]);
+ const std::string::value_type least_sig_digit(escaped_text[i + 2]);
+ if (IsHex(most_sig_digit) && IsHex(least_sig_digit)) {
+ unsigned char value = HexToInt(most_sig_digit) * 16 +
+ HexToInt(least_sig_digit);
+ if (((rules & UnescapeRule::PERCENTS) || value != '%') &&
+ ((rules & UnescapeRule::SPACES) || value != ' ')) {
+ // Use the unescaped version of the character.
+ result.push_back(value);
+ i += 2;
+ } else {
+ result.push_back('%');
+ }
+ } else {
+ result.push_back('%');
+ }
+ } else if ((rules & UnescapeRule::REPLACE_PLUS_WITH_SPACE) &&
+ escaped_text[i] == '+') {
+ result.push_back(' ');
+ } else {
+ result.push_back(escaped_text[i]);
+ }
+ }
+
+ return result;
+}
+
+} // namespace
+
+// Everything except alphanumerics and !'()*-._~
+// See RFC 2396 for the list of reserved characters.
+static const Charmap kQueryCharmap(
+ 0xffffffffL, 0xfc00987dL, 0x78000001L, 0xb8000001L,
+ 0xffffffffL, 0xffffffffL, 0xffffffffL, 0xffffffffL);
+
+std::string EscapeQueryParamValue(const std::string& text) {
+ return Escape(text, kQueryCharmap, true);
+}
+
+// Convert the string to a sequence of bytes and then % escape anything
+// except alphanumerics and !'()*-._~
+std::wstring EscapeQueryParamValueUTF8(const std::wstring& text) {
+ return UTF8ToWide(Escape(WideToUTF8(text), kQueryCharmap, true));
+}
+
+// non-printable, non-7bit, and (including space) "#%:<>?[\]^`{|}
+static const Charmap kPathCharmap(
+ 0xffffffffL, 0xd400002dL, 0x78000000L, 0xb8000001L,
+ 0xffffffffL, 0xffffffffL, 0xffffffffL, 0xffffffffL);
+
+std::string EscapePath(const std::string& path) {
+ return Escape(path, kPathCharmap, false);
+}
+
+// non-7bit
+static const Charmap kNonASCIICharmap(
+ 0x00000000L, 0x00000000L, 0x00000000L, 0x00000000L,
+ 0xffffffffL, 0xffffffffL, 0xffffffffL, 0xffffffffL);
+
+std::string EscapeNonASCII(const std::string& input) {
+ return Escape(input, kNonASCIICharmap, false);
+}
+
+// Everything except alphanumerics, the reserved characters(;/?:@&=+$,) and
+// !'()*-._~%
+static const Charmap kExternalHandlerCharmap(
+ 0xffffffffL, 0x5000080dL, 0x68000000L, 0xb8000001L,
+ 0xffffffffL, 0xffffffffL, 0xffffffffL, 0xffffffffL);
+
+std::string EscapeExternalHandlerValue(const std::string& text) {
+ return Escape(text, kExternalHandlerCharmap, false);
+}
+
+bool EscapeQueryParamValue(const std::wstring& text, const char* codepage,
+ std::wstring* escaped) {
+ // TODO(brettw) bug 1201094: this function should be removed, this "SKIP"
+ // behavior is wrong when the character can't be encoded properly.
+ std::string encoded;
+ if (!WideToCodepage(text, codepage,
+ OnStringUtilConversionError::SKIP, &encoded))
+ return false;
+
+ // It's safe to use UTF8ToWide here because Escape should only return
+ // alphanumerics and !'()*-._~
+ escaped->assign(UTF8ToWide(Escape(encoded, kQueryCharmap, true)));
+ return true;
+}
+
+std::wstring UnescapeAndDecodeURLComponent(const std::string& text,
+ const char* codepage,
+ UnescapeRule::Type rules) {
+ std::wstring result;
+ if (CodepageToWide(UnescapeURLImpl(text, rules), codepage,
+ OnStringUtilConversionError::FAIL, &result))
+ return result; // Character set looks like it's valid.
+ return UTF8ToWide(text); // Return the escaped version when it's not.
+}
+
+std::string UnescapeURLComponent(const std::string& escaped_text,
+ UnescapeRule::Type rules) {
+ return UnescapeURLImpl(escaped_text, rules);
+}
+
+template <class str>
+void AppendEscapedCharForHTMLImpl(typename str::value_type c, str* output) {
+ static const struct {
+ char key;
+ const char *replacement;
+ } kCharsToEscape[] = {
+ { '<', "&lt;" },
+ { '>', "&gt;" },
+ { '&', "&amp;" },
+ { '"', "&quot;" },
+ { '\'', "&#39;" },
+ };
+ size_t k;
+ for (k = 0; k < arraysize(kCharsToEscape); ++k) {
+ if (c == kCharsToEscape[k].key) {
+ const char* p = kCharsToEscape[k].replacement;
+ while (*p)
+ output->push_back(*p++);
+ break;
+ }
+ }
+ if (k == arraysize(kCharsToEscape))
+ output->push_back(c);
+}
+
+void AppendEscapedCharForHTML(char c, std::string* output) {
+ AppendEscapedCharForHTMLImpl(c, output);
+}
+
+void AppendEscapedCharForHTML(wchar_t c, std::wstring* output) {
+ AppendEscapedCharForHTMLImpl(c, output);
+}
+
+template <class str>
+str EscapeForHTMLImpl(const str& input) {
+ str result;
+ result.reserve(input.size()); // optimize for no escaping
+
+ for (str::const_iterator it = input.begin(); it != input.end(); ++it)
+ AppendEscapedCharForHTMLImpl(*it, &result);
+
+ return result;
+}
+
+std::string EscapeForHTML(const std::string& input) {
+ return EscapeForHTMLImpl(input);
+}
+
+std::wstring EscapeForHTML(const std::wstring& input) {
+ return EscapeForHTMLImpl(input);
+}