diff options
author | joth@chromium.org <joth@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2013-11-23 01:53:52 +0000 |
---|---|---|
committer | joth@chromium.org <joth@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2013-11-23 01:53:52 +0000 |
commit | 369e84f70d256d188a1866d8cef52edf4468cd9b (patch) | |
tree | e7e90408125f4831ce7983fd37414ad763b965b1 /url/url_canon_pathurl.cc | |
parent | a7e3691579181327dc65b02d043e7c01d4b06cb9 (diff) | |
download | chromium_src-369e84f70d256d188a1866d8cef52edf4468cd9b.zip chromium_src-369e84f70d256d188a1866d8cef52edf4468cd9b.tar.gz chromium_src-369e84f70d256d188a1866d8cef52edf4468cd9b.tar.bz2 |
Support URL fragment resolution against non-hierarchical schemes
Support URL fragment resolution against non-hierarchical schemes
As a result, data: about: etc now have 'query' and 'ref' components
parsed; as a result a new GURL::GetContent() convenience is added to
retrieve the spec with the scheme stripped off.
A complication in supporting this is that we now need to allow whitespace
to trailing whitespace to be preserved when transferring url_parse::Parsed
structs between KURL and GURL. Without this, the URL prior to the
#fragment can change (i.e. whitespace stripped) when following an anchor
link which breaks the page (causes reload from source). See
http://crbug.com/291747 for more details on this.
R=brettw@chromium.org
TBR=cbentzel@chromium.org
BUG=291747
Review URL: https://codereview.chromium.org/23835019
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@236917 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'url/url_canon_pathurl.cc')
-rw-r--r-- | url/url_canon_pathurl.cc | 64 |
1 files changed, 41 insertions, 23 deletions
diff --git a/url/url_canon_pathurl.cc b/url/url_canon_pathurl.cc index bc681f4..8f7dee4 100644 --- a/url/url_canon_pathurl.cc +++ b/url/url_canon_pathurl.cc @@ -13,6 +13,39 @@ namespace url_canon { namespace { +// Canonicalize the given |component| from |source| into |output| and +// |new_component|. If |separator| is non-zero, it is pre-pended to |ouput| +// prior to the canonicalized component; i.e. for the '?' or '#' characters. +template<typename CHAR, typename UCHAR> +bool DoCanonicalizePathComponent(const CHAR* source, + const url_parse::Component& component, + CHAR seperator, + CanonOutput* output, + url_parse::Component* new_component) { + bool success = true; + if (component.is_valid()) { + if (seperator) + output->push_back(seperator); + // Copy the path using path URL's more lax escaping rules (think for + // javascript:). We convert to UTF-8 and escape non-ASCII, but leave all + // ASCII characters alone. This helps readability of JavaStript. + new_component->begin = output->length(); + int end = component.end(); + for (int i = component.begin; i < end; i++) { + UCHAR uch = static_cast<UCHAR>(source[i]); + if (uch < 0x20 || uch >= 0x80) + success &= AppendUTF8EscapedChar(source, &i, end, output); + else + output->push_back(static_cast<char>(uch)); + } + new_component->len = output->length() - new_component->begin; + } else { + // Empty part. + new_component->reset(); + } + return success; +} + template<typename CHAR, typename UCHAR> bool DoCanonicalizePathURL(const URLComponentSource<CHAR>& source, const url_parse::Parsed& parsed, @@ -28,29 +61,14 @@ bool DoCanonicalizePathURL(const URLComponentSource<CHAR>& source, new_parsed->password.reset(); new_parsed->host.reset(); new_parsed->port.reset(); - - if (parsed.path.is_valid()) { - // Copy the path using path URL's more lax escaping rules (think for - // javascript:). We convert to UTF-8 and escape non-ASCII, but leave all - // ASCII characters alone. This helps readability of JavaStript. - new_parsed->path.begin = output->length(); - int end = parsed.path.end(); - for (int i = parsed.path.begin; i < end; i++) { - UCHAR uch = static_cast<UCHAR>(source.path[i]); - if (uch < 0x20 || uch >= 0x80) - success &= AppendUTF8EscapedChar(source.path, &i, end, output); - else - output->push_back(static_cast<char>(uch)); - } - new_parsed->path.len = output->length() - new_parsed->path.begin; - } else { - // Empty path. - new_parsed->path.reset(); - } - - // Assume there's no query or ref. - new_parsed->query.reset(); - new_parsed->ref.reset(); + // We allow path URLs to have the path, query and fragment components, but we + // will canonicalize each of the via the weaker path URL rules. + success &= DoCanonicalizePathComponent<CHAR, UCHAR>( + source.path, parsed.path, 0, output, &new_parsed->path); + success &= DoCanonicalizePathComponent<CHAR, UCHAR>( + source.query, parsed.query, '?', output, &new_parsed->query); + success &= DoCanonicalizePathComponent<CHAR, UCHAR>( + source.ref, parsed.ref, '#', output, &new_parsed->ref); return success; } |