// Copyright 2008, Google Inc. // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: // // * Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // * Redistributions in binary form must reproduce the above // copyright notice, this list of conditions and the following disclaimer // in the documentation and/or other materials provided with the // distribution. // * Neither the name of Google Inc. nor the names of its // contributors may be used to endorse or promote products derived from // this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include "net/base/escape.h" #include "base/logging.h" #include "base/string_util.h" namespace { template inline bool IsHex(char_type ch) { return (ch >= '0' && ch <= '9') || (ch >= 'A' && ch <= 'F') || (ch >= 'a' && ch <= 'f'); } template inline char_type HexToInt(char_type ch) { if (ch >= '0' && ch <= '9') return ch - '0'; if (ch >= 'A' && ch <= 'F') return ch - 'A' + 10; if (ch >= 'a' && ch <= 'f') return ch - 'a' + 10; NOTREACHED(); return 0; } static const char* const kHexString = "0123456789ABCDEF"; inline char IntToHex(int i) { DCHECK(i >= 0 && i <= 15) << i << " not a hex value"; return kHexString[i]; } // A fast bit-vector map for ascii characters. // // Internally stores 256 bits in an array of 8 ints. // Does quick bit-flicking to lookup needed characters. class Charmap { public: Charmap(uint32 b0, uint32 b1, uint32 b2, uint32 b3, uint32 b4, uint32 b5, uint32 b6, uint32 b7) { map_[0] = b0; map_[1] = b1; map_[2] = b2; map_[3] = b3; map_[4] = b4; map_[5] = b5; map_[6] = b6; map_[7] = b7; } bool Contains(unsigned char c) const { return (map_[c >> 5] & (1 << (c & 31))) ? true : false; } private: uint32 map_[8]; }; // Given text to escape and a Charmap defining which values to escape, // return an escaped string. If use_plus is true, spaces are converted // to +, otherwise, if spaces are in the charmap, they are converted to // %20. const std::string Escape(const std::string& text, const Charmap& charmap, bool use_plus) { std::string escaped; escaped.reserve(text.length() * 3); for (unsigned int i = 0; i < text.length(); ++i) { unsigned char c = static_cast(text[i]); if (use_plus && ' ' == c) { escaped.push_back('+'); } else if (charmap.Contains(c)) { escaped.push_back('%'); escaped.push_back(IntToHex(c >> 4)); escaped.push_back(IntToHex(c & 0xf)); } else { escaped.push_back(c); } } return escaped; } std::string UnescapeURLImpl(const std::string& escaped_text, UnescapeRule::Type rules) { // The output of the unescaping is always smaller than the input, so we can // reserve the input size to make sure we have enough buffer and don't have // to allocate in the loop below. std::string result; result.reserve(escaped_text.length()); for (size_t i = 0, max = escaped_text.size(), max_digit_index = max - 2; i < max; ++i) { if (escaped_text[i] == '%' && i < max_digit_index) { const std::string::value_type most_sig_digit(escaped_text[i + 1]); const std::string::value_type least_sig_digit(escaped_text[i + 2]); if (IsHex(most_sig_digit) && IsHex(least_sig_digit)) { unsigned char value = HexToInt(most_sig_digit) * 16 + HexToInt(least_sig_digit); if (((rules & UnescapeRule::PERCENTS) || value != '%') && ((rules & UnescapeRule::SPACES) || value != ' ')) { // Use the unescaped version of the character. result.push_back(value); i += 2; } else { result.push_back('%'); } } else { result.push_back('%'); } } else if ((rules & UnescapeRule::REPLACE_PLUS_WITH_SPACE) && escaped_text[i] == '+') { result.push_back(' '); } else { result.push_back(escaped_text[i]); } } return result; } } // namespace // Everything except alphanumerics and !'()*-._~ // See RFC 2396 for the list of reserved characters. static const Charmap kQueryCharmap( 0xffffffffL, 0xfc00987dL, 0x78000001L, 0xb8000001L, 0xffffffffL, 0xffffffffL, 0xffffffffL, 0xffffffffL); std::string EscapeQueryParamValue(const std::string& text) { return Escape(text, kQueryCharmap, true); } // Convert the string to a sequence of bytes and then % escape anything // except alphanumerics and !'()*-._~ std::wstring EscapeQueryParamValueUTF8(const std::wstring& text) { return UTF8ToWide(Escape(WideToUTF8(text), kQueryCharmap, true)); } // non-printable, non-7bit, and (including space) "#%:<>?[\]^`{|} static const Charmap kPathCharmap( 0xffffffffL, 0xd400002dL, 0x78000000L, 0xb8000001L, 0xffffffffL, 0xffffffffL, 0xffffffffL, 0xffffffffL); std::string EscapePath(const std::string& path) { return Escape(path, kPathCharmap, false); } // non-7bit static const Charmap kNonASCIICharmap( 0x00000000L, 0x00000000L, 0x00000000L, 0x00000000L, 0xffffffffL, 0xffffffffL, 0xffffffffL, 0xffffffffL); std::string EscapeNonASCII(const std::string& input) { return Escape(input, kNonASCIICharmap, false); } // Everything except alphanumerics, the reserved characters(;/?:@&=+$,) and // !'()*-._~% static const Charmap kExternalHandlerCharmap( 0xffffffffL, 0x5000080dL, 0x68000000L, 0xb8000001L, 0xffffffffL, 0xffffffffL, 0xffffffffL, 0xffffffffL); std::string EscapeExternalHandlerValue(const std::string& text) { return Escape(text, kExternalHandlerCharmap, false); } bool EscapeQueryParamValue(const std::wstring& text, const char* codepage, std::wstring* escaped) { // TODO(brettw) bug 1201094: this function should be removed, this "SKIP" // behavior is wrong when the character can't be encoded properly. std::string encoded; if (!WideToCodepage(text, codepage, OnStringUtilConversionError::SKIP, &encoded)) return false; // It's safe to use UTF8ToWide here because Escape should only return // alphanumerics and !'()*-._~ escaped->assign(UTF8ToWide(Escape(encoded, kQueryCharmap, true))); return true; } std::wstring UnescapeAndDecodeURLComponent(const std::string& text, const char* codepage, UnescapeRule::Type rules) { std::wstring result; if (CodepageToWide(UnescapeURLImpl(text, rules), codepage, OnStringUtilConversionError::FAIL, &result)) return result; // Character set looks like it's valid. return UTF8ToWide(text); // Return the escaped version when it's not. } std::string UnescapeURLComponent(const std::string& escaped_text, UnescapeRule::Type rules) { return UnescapeURLImpl(escaped_text, rules); } template void AppendEscapedCharForHTMLImpl(typename str::value_type c, str* output) { static const struct { char key; const char *replacement; } kCharsToEscape[] = { { '<', "<" }, { '>', ">" }, { '&', "&" }, { '"', """ }, { '\'', "'" }, }; size_t k; for (k = 0; k < arraysize(kCharsToEscape); ++k) { if (c == kCharsToEscape[k].key) { const char* p = kCharsToEscape[k].replacement; while (*p) output->push_back(*p++); break; } } if (k == arraysize(kCharsToEscape)) output->push_back(c); } void AppendEscapedCharForHTML(char c, std::string* output) { AppendEscapedCharForHTMLImpl(c, output); } void AppendEscapedCharForHTML(wchar_t c, std::wstring* output) { AppendEscapedCharForHTMLImpl(c, output); } template str EscapeForHTMLImpl(const str& input) { str result; result.reserve(input.size()); // optimize for no escaping for (str::const_iterator it = input.begin(); it != input.end(); ++it) AppendEscapedCharForHTMLImpl(*it, &result); return result; } std::string EscapeForHTML(const std::string& input) { return EscapeForHTMLImpl(input); } std::wstring EscapeForHTML(const std::wstring& input) { return EscapeForHTMLImpl(input); }