diff options
Diffstat (limited to 'googleurl/src')
-rw-r--r-- | googleurl/src/gurl.h | 2 | ||||
-rw-r--r-- | googleurl/src/url_canon_etc.cc | 12 | ||||
-rw-r--r-- | googleurl/src/url_canon_internal.h | 21 | ||||
-rw-r--r-- | googleurl/src/url_canon_unittest.cc | 38 | ||||
-rw-r--r-- | googleurl/src/url_parse.cc | 4 | ||||
-rw-r--r-- | googleurl/src/url_test_utils.h | 7 |
6 files changed, 52 insertions, 32 deletions
diff --git a/googleurl/src/gurl.h b/googleurl/src/gurl.h index 29fea81..ba97191 100644 --- a/googleurl/src/gurl.h +++ b/googleurl/src/gurl.h @@ -30,7 +30,7 @@ #ifndef GOOGLEURL_SRC_GURL_H__ #define GOOGLEURL_SRC_GURL_H__ -#include <iostream> +#include <iosfwd> #include <string> #include "base/string16.h" diff --git a/googleurl/src/url_canon_etc.cc b/googleurl/src/url_canon_etc.cc index aea181a..318c906 100644 --- a/googleurl/src/url_canon_etc.cc +++ b/googleurl/src/url_canon_etc.cc @@ -213,9 +213,6 @@ bool DoUserInfo(const CHAR* username_spec, inline void WritePortInt(char* output, int output_len, int port) { _itoa_s(port, output, output_len, 10); } -inline void WritePortInt(char16* output, int output_len, int port) { - _itow_s(port, output, output_len, 10); -} // This function will prepend the colon if there will be a port. template<typename CHAR, typename UCHAR> @@ -290,12 +287,11 @@ void DoCanonicalizeRef(const CHAR* spec, } else { // Non-ASCII characters are appended unescaped, but only when they are // valid. Invalid Unicode characters are replaced with the "invalid - // character" as IE seems to. + // character" as IE seems to (ReadUTFChar puts the unicode replacement + // character in the output on failure for us). unsigned code_point; - if (!ReadUTFChar(spec, &i, end, &code_point)) - AppendUTF8Value(kUnicodeReplacementCharacter, output); - else - AppendUTF8Value(code_point, output); + ReadUTFChar(spec, &i, end, &code_point); + AppendUTF8Value(code_point, output); } } diff --git a/googleurl/src/url_canon_internal.h b/googleurl/src/url_canon_internal.h index 4b1e45a..6305647 100644 --- a/googleurl/src/url_canon_internal.h +++ b/googleurl/src/url_canon_internal.h @@ -37,6 +37,7 @@ #include <stdlib.h> +#include "base/logging.h" #include "googleurl/src/url_canon.h" namespace url_canon { @@ -173,6 +174,9 @@ bool ReadUTFChar(const char* str, int* begin, int length, // Generic To-UTF-8 converter. This will call the given append method for each // character that should be appended, with the given output method. Wrappers // are provided below for escaped and non-escaped versions of this. +// +// The char_value must have already been checked that it's a valid Unicode +// character. template<class Output, void Appender(unsigned char, Output*)> inline void DoAppendUTF8(unsigned char_value, Output* output) { if (char_value <= 0x7f) { @@ -191,7 +195,7 @@ inline void DoAppendUTF8(unsigned char_value, Output* output) { output); Appender(static_cast<unsigned char>(0x80 | (char_value & 0x3f)), output); - } else if (char_value <= 0x1fffff) { + } else if (char_value <= 0x10FFFF) { // Max unicode code point. // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx Appender(static_cast<unsigned char>(0xf0 | (char_value >> 18)), output); @@ -201,20 +205,9 @@ inline void DoAppendUTF8(unsigned char_value, Output* output) { output); Appender(static_cast<unsigned char>(0x80 | (char_value & 0x3f)), output); - } else if (char_value <= 0x10FFFF) { // Max unicode code point. - // 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx - Appender(static_cast<unsigned char>(0xf8 | (char_value >> 24)), - output); - Appender(static_cast<unsigned char>(0x80 | ((char_value >> 18) & 0x3f)), - output); - Appender(static_cast<unsigned char>(0x80 | ((char_value >> 12) & 0x3f)), - output); - Appender(static_cast<unsigned char>(0x80 | ((char_value >> 6) & 0x3f)), - output); - Appender(static_cast<unsigned char>(0x80 | (char_value & 0x3f)), - output); } else { - // Invalid UTF-8 character (>20 bits) + // Invalid UTF-8 character (>20 bits). + NOTREACHED(); } } diff --git a/googleurl/src/url_canon_unittest.cc b/googleurl/src/url_canon_unittest.cc index a3e43e2..1a3cd53 100644 --- a/googleurl/src/url_canon_unittest.cc +++ b/googleurl/src/url_canon_unittest.cc @@ -144,6 +144,44 @@ void SetupReplComp( } // namespace +TEST(URLCanonTest, DoAppendUTF8) { + struct UTF8Case { + unsigned input; + const char* output; + } utf_cases[] = { + // Valid code points. + {0x24, "\x24"}, + {0xA2, "\xC2\xA2"}, + {0x20AC, "\xE2\x82\xAC"}, + {0x24B62, "\xF0\xA4\xAD\xA2"}, + {0x10FFFF, "\xF4\x8F\xBF\xBF"}, + }; + std::string out_str; + for (size_t i = 0; i < ARRAYSIZE(utf_cases); i++) { + out_str.clear(); + url_canon::StdStringCanonOutput output(&out_str); + url_canon::AppendUTF8Value(utf_cases[i].input, &output); + output.Complete(); + EXPECT_EQ(utf_cases[i].output, out_str); + } +} + +// TODO(mattm): Can't run this in debug mode for now, since the DCHECK will +// cause the Chromium stacktrace dialog to appear and hang the test. +// See http://crbug.com/49580. +#ifdef NDEBUG +TEST(URLCanonTest, DoAppendUTF8Invalid) { + std::string out_str; + url_canon::StdStringCanonOutput output(&out_str); + // Invalid code point (too large). + ASSERT_DEBUG_DEATH({ + url_canon::AppendUTF8Value(0x110000, &output); + output.Complete(); + EXPECT_EQ("", out_str); + }, ""); +} +#endif + TEST(URLCanonTest, UTF) { // Low-level test that we handle reading, canonicalization, and writing // UTF-8/UTF-16 strings properly. diff --git a/googleurl/src/url_parse.cc b/googleurl/src/url_parse.cc index a08c4da..fa31210 100644 --- a/googleurl/src/url_parse.cc +++ b/googleurl/src/url_parse.cc @@ -324,7 +324,7 @@ void DoParseStandardURL(const CHAR* spec, int spec_len, Parsed* parsed) { if (DoExtractScheme(spec, spec_len, &parsed->scheme)) { after_scheme = parsed->scheme.end() + 1; // Skip past the colon. } else { - // Say there's no scheme when there is a colon. We could also say that + // Say there's no scheme when there is no colon. We could also say that // everything is the scheme. Both would produce an invalid URL, but this way // seems less wrong in more cases. parsed->scheme.reset(); @@ -645,7 +645,7 @@ bool ExtractScheme(const char16* url, int url_len, Component* scheme) { // This handles everything that may be an authority terminator, including // backslash. For special backslash handling see DoParseAfterScheme. bool IsAuthorityTerminator(char16 ch) { - return IsURLSlash(ch) || ch == '?' || ch == '#' || ch == ';'; + return IsURLSlash(ch) || ch == '?' || ch == '#'; } void ExtractFileName(const char* url, diff --git a/googleurl/src/url_test_utils.h b/googleurl/src/url_test_utils.h index 5294202..6278e3f 100644 --- a/googleurl/src/url_test_utils.h +++ b/googleurl/src/url_test_utils.h @@ -75,11 +75,4 @@ inline std::string ConvertUTF16ToUTF8(const string16& src) { } // namespace url_test_utils -// This operator allows EXPECT_EQ(astring16, anotherstring16); to work. -inline std::ostream& operator<<(std::ostream& os, - const string16& str) { - // Convert to UTF-8 and print the string - return os << url_test_utils::ConvertUTF16ToUTF8(str); -} - #endif // GOOGLEURL_SRC_URL_TEST_UTILS_H__ |