diff options
author | meacer <meacer@chromium.org> | 2014-10-21 17:44:52 -0700 |
---|---|---|
committer | Commit bot <commit-bot@chromium.org> | 2014-10-22 04:03:12 +0000 |
commit | c62e2eeb225afbc123db27c2d3500a7bbbab6a48 (patch) | |
tree | c78a981e910f764292c09e2ee7e7da1eb49a504b /net/base/escape.cc | |
parent | 82f6f9885248ba9ece56af554fe8a087b74bdf02 (diff) | |
download | chromium_src-c62e2eeb225afbc123db27c2d3500a7bbbab6a48.zip chromium_src-c62e2eeb225afbc123db27c2d3500a7bbbab6a48.tar.gz chromium_src-c62e2eeb225afbc123db27c2d3500a7bbbab6a48.tar.bz2 |
Unescape BiDi control chars while parsing data: urls
The fix for bug 337746 prevented unescaping of BiDi control
characters in URLs. This breaks the loading of data: URLs
because BiDi control chars appear escaped in the loaded HTML.
This patch adds a special case for the parsing of BiDi control
chars. This shouldn't change the way URLs are shown in the omnibox
or any other UI. URLs with BiDi control characters are always
displayed as escaped in the omnibox. This behavior is also
consistent with Firefox.
BUG=423901
Review URL: https://codereview.chromium.org/643963004
Cr-Commit-Position: refs/heads/master@{#300584}
Diffstat (limited to 'net/base/escape.cc')
-rw-r--r-- | net/base/escape.cc | 74 |
1 files changed, 53 insertions, 21 deletions
diff --git a/net/base/escape.cc b/net/base/escape.cc index 1798b6c..3c8adc6 100644 --- a/net/base/escape.cc +++ b/net/base/escape.cc @@ -120,6 +120,44 @@ bool UnescapeUnsignedCharAtIndex(const STR& escaped_text, return false; } +// Returns true if there is an Arabic Language Mark at |index|. |first_byte| +// is the byte at |index|. +template<typename STR> +bool HasArabicLanguageMarkAtIndex(const STR& escaped_text, + unsigned char first_byte, + size_t index) { + if (first_byte != 0xD8) + return false; + unsigned char second_byte; + if (!UnescapeUnsignedCharAtIndex(escaped_text, index + 3, &second_byte)) + return false; + return second_byte == 0x9c; +} + +// Returns true if there is a BiDi control char at |index|. |first_byte| is the +// byte at |index|. +template<typename STR> +bool HasThreeByteBidiControlCharAtIndex(const STR& escaped_text, + unsigned char first_byte, + size_t index) { + if (first_byte != 0xE2) + return false; + unsigned char second_byte; + if (!UnescapeUnsignedCharAtIndex(escaped_text, index + 3, &second_byte)) + return false; + if (second_byte != 0x80 && second_byte != 0x81) + return false; + unsigned char third_byte; + if (!UnescapeUnsignedCharAtIndex(escaped_text, index + 6, &third_byte)) + return false; + if (second_byte == 0x80) { + return third_byte == 0x8E || + third_byte == 0x8F || + (third_byte >= 0xAA && third_byte <= 0xAE); + } + return third_byte >= 0xA6 && third_byte <= 0xA9; +} + // Unescapes |escaped_text| according to |rules|, returning the resulting // string. Fills in an |adjustments| parameter, if non-NULL, so it reflects // the alterations done to the string that are not one-character-to-one- @@ -172,27 +210,21 @@ STR UnescapeURLWithAdjustmentsImpl( // U+2067 RIGHT-TO-LEFT ISOLATE (%E2%81%A7) // U+2068 FIRST STRONG ISOLATE (%E2%81%A8) // U+2069 POP DIRECTIONAL ISOLATE (%E2%81%A9) - - unsigned char second_byte; - // Check for ALM. - if ((first_byte == 0xD8) && - UnescapeUnsignedCharAtIndex(escaped_text, i + 3, &second_byte) && - (second_byte == 0x9c)) { - result.append(escaped_text, i, 6); - i += 5; - continue; - } - - // Check for other BiDi control characters. - if ((first_byte == 0xE2) && - UnescapeUnsignedCharAtIndex(escaped_text, i + 3, &second_byte) && - ((second_byte == 0x80) || (second_byte == 0x81))) { - unsigned char third_byte; - if (UnescapeUnsignedCharAtIndex(escaped_text, i + 6, &third_byte) && - ((second_byte == 0x80) ? - ((third_byte == 0x8E) || (third_byte == 0x8F) || - ((third_byte >= 0xAA) && (third_byte <= 0xAE))) : - ((third_byte >= 0xA6) && (third_byte <= 0xA9)))) { + // + // However, some schemes such as data: and file: need to parse the exact + // binary data when loading the URL. For that reason, CONTROL_CHARS allows + // unescaping BiDi control characters. + // DO NOT use CONTROL_CHARS if the parsed URL is going to be displayed + // in the UI. + if (!(rules & UnescapeRule::CONTROL_CHARS)) { + if (HasArabicLanguageMarkAtIndex(escaped_text, first_byte, i)) { + // Keep Arabic Language Mark escaped. + result.append(escaped_text, i, 6); + i += 5; + continue; + } + if (HasThreeByteBidiControlCharAtIndex(escaped_text, first_byte, i)) { + // Keep BiDi control char escaped. result.append(escaped_text, i, 9); i += 8; continue; |