summaryrefslogtreecommitdiffstats
path: root/googleurl/src
diff options
context:
space:
mode:
Diffstat (limited to 'googleurl/src')
-rw-r--r--googleurl/src/gurl.h2
-rw-r--r--googleurl/src/url_canon_etc.cc12
-rw-r--r--googleurl/src/url_canon_internal.h21
-rw-r--r--googleurl/src/url_canon_unittest.cc38
-rw-r--r--googleurl/src/url_parse.cc4
-rw-r--r--googleurl/src/url_test_utils.h7
6 files changed, 52 insertions, 32 deletions
diff --git a/googleurl/src/gurl.h b/googleurl/src/gurl.h
index 29fea81..ba97191 100644
--- a/googleurl/src/gurl.h
+++ b/googleurl/src/gurl.h
@@ -30,7 +30,7 @@
#ifndef GOOGLEURL_SRC_GURL_H__
#define GOOGLEURL_SRC_GURL_H__
-#include <iostream>
+#include <iosfwd>
#include <string>
#include "base/string16.h"
diff --git a/googleurl/src/url_canon_etc.cc b/googleurl/src/url_canon_etc.cc
index aea181a..318c906 100644
--- a/googleurl/src/url_canon_etc.cc
+++ b/googleurl/src/url_canon_etc.cc
@@ -213,9 +213,6 @@ bool DoUserInfo(const CHAR* username_spec,
inline void WritePortInt(char* output, int output_len, int port) {
_itoa_s(port, output, output_len, 10);
}
-inline void WritePortInt(char16* output, int output_len, int port) {
- _itow_s(port, output, output_len, 10);
-}
// This function will prepend the colon if there will be a port.
template<typename CHAR, typename UCHAR>
@@ -290,12 +287,11 @@ void DoCanonicalizeRef(const CHAR* spec,
} else {
// Non-ASCII characters are appended unescaped, but only when they are
// valid. Invalid Unicode characters are replaced with the "invalid
- // character" as IE seems to.
+ // character" as IE seems to (ReadUTFChar puts the unicode replacement
+ // character in the output on failure for us).
unsigned code_point;
- if (!ReadUTFChar(spec, &i, end, &code_point))
- AppendUTF8Value(kUnicodeReplacementCharacter, output);
- else
- AppendUTF8Value(code_point, output);
+ ReadUTFChar(spec, &i, end, &code_point);
+ AppendUTF8Value(code_point, output);
}
}
diff --git a/googleurl/src/url_canon_internal.h b/googleurl/src/url_canon_internal.h
index 4b1e45a..6305647 100644
--- a/googleurl/src/url_canon_internal.h
+++ b/googleurl/src/url_canon_internal.h
@@ -37,6 +37,7 @@
#include <stdlib.h>
+#include "base/logging.h"
#include "googleurl/src/url_canon.h"
namespace url_canon {
@@ -173,6 +174,9 @@ bool ReadUTFChar(const char* str, int* begin, int length,
// Generic To-UTF-8 converter. This will call the given append method for each
// character that should be appended, with the given output method. Wrappers
// are provided below for escaped and non-escaped versions of this.
+//
+// The char_value must have already been checked that it's a valid Unicode
+// character.
template<class Output, void Appender(unsigned char, Output*)>
inline void DoAppendUTF8(unsigned char_value, Output* output) {
if (char_value <= 0x7f) {
@@ -191,7 +195,7 @@ inline void DoAppendUTF8(unsigned char_value, Output* output) {
output);
Appender(static_cast<unsigned char>(0x80 | (char_value & 0x3f)),
output);
- } else if (char_value <= 0x1fffff) {
+ } else if (char_value <= 0x10FFFF) { // Max unicode code point.
// 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
Appender(static_cast<unsigned char>(0xf0 | (char_value >> 18)),
output);
@@ -201,20 +205,9 @@ inline void DoAppendUTF8(unsigned char_value, Output* output) {
output);
Appender(static_cast<unsigned char>(0x80 | (char_value & 0x3f)),
output);
- } else if (char_value <= 0x10FFFF) { // Max unicode code point.
- // 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
- Appender(static_cast<unsigned char>(0xf8 | (char_value >> 24)),
- output);
- Appender(static_cast<unsigned char>(0x80 | ((char_value >> 18) & 0x3f)),
- output);
- Appender(static_cast<unsigned char>(0x80 | ((char_value >> 12) & 0x3f)),
- output);
- Appender(static_cast<unsigned char>(0x80 | ((char_value >> 6) & 0x3f)),
- output);
- Appender(static_cast<unsigned char>(0x80 | (char_value & 0x3f)),
- output);
} else {
- // Invalid UTF-8 character (>20 bits)
+ // Invalid UTF-8 character (>20 bits).
+ NOTREACHED();
}
}
diff --git a/googleurl/src/url_canon_unittest.cc b/googleurl/src/url_canon_unittest.cc
index a3e43e2..1a3cd53 100644
--- a/googleurl/src/url_canon_unittest.cc
+++ b/googleurl/src/url_canon_unittest.cc
@@ -144,6 +144,44 @@ void SetupReplComp(
} // namespace
+TEST(URLCanonTest, DoAppendUTF8) {
+ struct UTF8Case {
+ unsigned input;
+ const char* output;
+ } utf_cases[] = {
+ // Valid code points.
+ {0x24, "\x24"},
+ {0xA2, "\xC2\xA2"},
+ {0x20AC, "\xE2\x82\xAC"},
+ {0x24B62, "\xF0\xA4\xAD\xA2"},
+ {0x10FFFF, "\xF4\x8F\xBF\xBF"},
+ };
+ std::string out_str;
+ for (size_t i = 0; i < ARRAYSIZE(utf_cases); i++) {
+ out_str.clear();
+ url_canon::StdStringCanonOutput output(&out_str);
+ url_canon::AppendUTF8Value(utf_cases[i].input, &output);
+ output.Complete();
+ EXPECT_EQ(utf_cases[i].output, out_str);
+ }
+}
+
+// TODO(mattm): Can't run this in debug mode for now, since the DCHECK will
+// cause the Chromium stacktrace dialog to appear and hang the test.
+// See http://crbug.com/49580.
+#ifdef NDEBUG
+TEST(URLCanonTest, DoAppendUTF8Invalid) {
+ std::string out_str;
+ url_canon::StdStringCanonOutput output(&out_str);
+ // Invalid code point (too large).
+ ASSERT_DEBUG_DEATH({
+ url_canon::AppendUTF8Value(0x110000, &output);
+ output.Complete();
+ EXPECT_EQ("", out_str);
+ }, "");
+}
+#endif
+
TEST(URLCanonTest, UTF) {
// Low-level test that we handle reading, canonicalization, and writing
// UTF-8/UTF-16 strings properly.
diff --git a/googleurl/src/url_parse.cc b/googleurl/src/url_parse.cc
index a08c4da..fa31210 100644
--- a/googleurl/src/url_parse.cc
+++ b/googleurl/src/url_parse.cc
@@ -324,7 +324,7 @@ void DoParseStandardURL(const CHAR* spec, int spec_len, Parsed* parsed) {
if (DoExtractScheme(spec, spec_len, &parsed->scheme)) {
after_scheme = parsed->scheme.end() + 1; // Skip past the colon.
} else {
- // Say there's no scheme when there is a colon. We could also say that
+ // Say there's no scheme when there is no colon. We could also say that
// everything is the scheme. Both would produce an invalid URL, but this way
// seems less wrong in more cases.
parsed->scheme.reset();
@@ -645,7 +645,7 @@ bool ExtractScheme(const char16* url, int url_len, Component* scheme) {
// This handles everything that may be an authority terminator, including
// backslash. For special backslash handling see DoParseAfterScheme.
bool IsAuthorityTerminator(char16 ch) {
- return IsURLSlash(ch) || ch == '?' || ch == '#' || ch == ';';
+ return IsURLSlash(ch) || ch == '?' || ch == '#';
}
void ExtractFileName(const char* url,
diff --git a/googleurl/src/url_test_utils.h b/googleurl/src/url_test_utils.h
index 5294202..6278e3f 100644
--- a/googleurl/src/url_test_utils.h
+++ b/googleurl/src/url_test_utils.h
@@ -75,11 +75,4 @@ inline std::string ConvertUTF16ToUTF8(const string16& src) {
} // namespace url_test_utils
-// This operator allows EXPECT_EQ(astring16, anotherstring16); to work.
-inline std::ostream& operator<<(std::ostream& os,
- const string16& str) {
- // Convert to UTF-8 and print the string
- return os << url_test_utils::ConvertUTF16ToUTF8(str);
-}
-
#endif // GOOGLEURL_SRC_URL_TEST_UTILS_H__