summaryrefslogtreecommitdiffstats
path: root/url/url_canon_pathurl.cc
diff options
context:
space:
mode:
authorjoth@chromium.org <joth@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2013-11-23 01:53:52 +0000
committerjoth@chromium.org <joth@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2013-11-23 01:53:52 +0000
commit369e84f70d256d188a1866d8cef52edf4468cd9b (patch)
treee7e90408125f4831ce7983fd37414ad763b965b1 /url/url_canon_pathurl.cc
parenta7e3691579181327dc65b02d043e7c01d4b06cb9 (diff)
downloadchromium_src-369e84f70d256d188a1866d8cef52edf4468cd9b.zip
chromium_src-369e84f70d256d188a1866d8cef52edf4468cd9b.tar.gz
chromium_src-369e84f70d256d188a1866d8cef52edf4468cd9b.tar.bz2
Support URL fragment resolution against non-hierarchical schemes
Support URL fragment resolution against non-hierarchical schemes As a result, data: about: etc now have 'query' and 'ref' components parsed; as a result a new GURL::GetContent() convenience is added to retrieve the spec with the scheme stripped off. A complication in supporting this is that we now need to allow whitespace to trailing whitespace to be preserved when transferring url_parse::Parsed structs between KURL and GURL. Without this, the URL prior to the #fragment can change (i.e. whitespace stripped) when following an anchor link which breaks the page (causes reload from source). See http://crbug.com/291747 for more details on this. R=brettw@chromium.org TBR=cbentzel@chromium.org BUG=291747 Review URL: https://codereview.chromium.org/23835019 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@236917 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'url/url_canon_pathurl.cc')
-rw-r--r--url/url_canon_pathurl.cc64
1 files changed, 41 insertions, 23 deletions
diff --git a/url/url_canon_pathurl.cc b/url/url_canon_pathurl.cc
index bc681f4..8f7dee4 100644
--- a/url/url_canon_pathurl.cc
+++ b/url/url_canon_pathurl.cc
@@ -13,6 +13,39 @@ namespace url_canon {
namespace {
+// Canonicalize the given |component| from |source| into |output| and
+// |new_component|. If |separator| is non-zero, it is pre-pended to |ouput|
+// prior to the canonicalized component; i.e. for the '?' or '#' characters.
+template<typename CHAR, typename UCHAR>
+bool DoCanonicalizePathComponent(const CHAR* source,
+ const url_parse::Component& component,
+ CHAR seperator,
+ CanonOutput* output,
+ url_parse::Component* new_component) {
+ bool success = true;
+ if (component.is_valid()) {
+ if (seperator)
+ output->push_back(seperator);
+ // Copy the path using path URL's more lax escaping rules (think for
+ // javascript:). We convert to UTF-8 and escape non-ASCII, but leave all
+ // ASCII characters alone. This helps readability of JavaStript.
+ new_component->begin = output->length();
+ int end = component.end();
+ for (int i = component.begin; i < end; i++) {
+ UCHAR uch = static_cast<UCHAR>(source[i]);
+ if (uch < 0x20 || uch >= 0x80)
+ success &= AppendUTF8EscapedChar(source, &i, end, output);
+ else
+ output->push_back(static_cast<char>(uch));
+ }
+ new_component->len = output->length() - new_component->begin;
+ } else {
+ // Empty part.
+ new_component->reset();
+ }
+ return success;
+}
+
template<typename CHAR, typename UCHAR>
bool DoCanonicalizePathURL(const URLComponentSource<CHAR>& source,
const url_parse::Parsed& parsed,
@@ -28,29 +61,14 @@ bool DoCanonicalizePathURL(const URLComponentSource<CHAR>& source,
new_parsed->password.reset();
new_parsed->host.reset();
new_parsed->port.reset();
-
- if (parsed.path.is_valid()) {
- // Copy the path using path URL's more lax escaping rules (think for
- // javascript:). We convert to UTF-8 and escape non-ASCII, but leave all
- // ASCII characters alone. This helps readability of JavaStript.
- new_parsed->path.begin = output->length();
- int end = parsed.path.end();
- for (int i = parsed.path.begin; i < end; i++) {
- UCHAR uch = static_cast<UCHAR>(source.path[i]);
- if (uch < 0x20 || uch >= 0x80)
- success &= AppendUTF8EscapedChar(source.path, &i, end, output);
- else
- output->push_back(static_cast<char>(uch));
- }
- new_parsed->path.len = output->length() - new_parsed->path.begin;
- } else {
- // Empty path.
- new_parsed->path.reset();
- }
-
- // Assume there's no query or ref.
- new_parsed->query.reset();
- new_parsed->ref.reset();
+ // We allow path URLs to have the path, query and fragment components, but we
+ // will canonicalize each of the via the weaker path URL rules.
+ success &= DoCanonicalizePathComponent<CHAR, UCHAR>(
+ source.path, parsed.path, 0, output, &new_parsed->path);
+ success &= DoCanonicalizePathComponent<CHAR, UCHAR>(
+ source.query, parsed.query, '?', output, &new_parsed->query);
+ success &= DoCanonicalizePathComponent<CHAR, UCHAR>(
+ source.ref, parsed.ref, '#', output, &new_parsed->ref);
return success;
}