diff options
Diffstat (limited to 'googleurl/src/url_parse_file.cc')
-rw-r--r-- | googleurl/src/url_parse_file.cc | 243 |
1 files changed, 243 insertions, 0 deletions
diff --git a/googleurl/src/url_parse_file.cc b/googleurl/src/url_parse_file.cc new file mode 100644 index 0000000..2e8429f --- /dev/null +++ b/googleurl/src/url_parse_file.cc @@ -0,0 +1,243 @@ +// Copyright 2007, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include "base/logging.h" +#include "googleurl/src/url_file.h" +#include "googleurl/src/url_parse.h" +#include "googleurl/src/url_parse_internal.h" + +// Interesting IE file:isms... +// +// INPUT OUTPUT +// ========================= ============================== +// file:/foo/bar file:///foo/bar +// The result here seems totally invalid!?!? This isn't UNC. +// +// file:/ +// file:// or any other number of slashes +// IE6 doesn't do anything at all if you click on this link. No error: +// nothing. IE6's history system seems to always color this link, so I'm +// guessing that it maps internally to the empty URL. +// +// C:\ file:///C:/ +// When on a file: URL source page, this link will work. When over HTTP, +// the file: URL will appear in the status bar but the link will not work +// (security restriction for all file URLs). +// +// file:foo/ file:foo/ (invalid?!?!?) +// file:/foo/ file:///foo/ (invalid?!?!?) +// file://foo/ file://foo/ (UNC to server "foo") +// file:///foo/ file:///foo/ (invalid, seems to be a file) +// file:////foo/ file://foo/ (UNC to server "foo") +// Any more than four slashes is also treated as UNC. +// +// file:C:/ file://C:/ +// file:/C:/ file://C:/ +// The number of slashes after "file:" don't matter if the thing following +// it looks like an absolute drive path. Also, slashes and backslashes are +// equally valid here. + +namespace url_parse { + +namespace { + +// A subcomponent of DoInitFileURL, the input of this function should be a UNC +// path name, with the index of the first character after the slashes following +// the scheme given in |after_slashes|. This will initialize the host, path, +// query, and ref, and leave the other output components untouched +// (DoInitFileURL handles these for us). +template<typename CHAR> +void DoParseUNC(const CHAR* spec, + int after_slashes, + int spec_len, + Parsed* parsed) { + int next_slash = FindNextSlash(spec, after_slashes, spec_len); + if (next_slash == spec_len) { + // No additional slash found, as in "file://foo", treat the text as the + // host with no path (this will end up being UNC to server "foo"). + int host_len = spec_len - after_slashes; + if (host_len) + parsed->host = Component(after_slashes, host_len); + else + parsed->host.reset(); + parsed->path.reset(); + return; + } + +#ifdef WIN32 + // See if we have something that looks like a path following the first + // component. As in "file://localhost/c:/", we get "c:/" out. We want to + // treat this as a having no host but the path given. Works on Windows only. + if (DoesBeginWindowsDriveSpec(spec, next_slash + 1, spec_len)) { + parsed->host.reset(); + ParsePathInternal(spec, MakeRange(next_slash, spec_len), + &parsed->path, &parsed->query, &parsed->ref); + return; + } +#endif + + // Otherwise, everything up until that first slash we found is the host name, + // which will end up being the UNC host. For example "file://foo/bar.txt" + // will get a server name of "foo" and a path of "/bar". Later, on Windows, + // this should be treated as the filename "\\foo\bar.txt" in proper UNC + // notation. + int host_len = next_slash - after_slashes; + if (host_len) + parsed->host = MakeRange(after_slashes, next_slash); + else + parsed->host.reset(); + if (next_slash < spec_len) { + ParsePathInternal(spec, MakeRange(next_slash, spec_len), + &parsed->path, &parsed->query, &parsed->ref); + } else { + parsed->path.reset(); + } +} + +// A subcomponent of DoParseFileURL, the input should be a local file, with the +// beginning of the path indicated by the index in |path_begin|. This will +// initialize the host, path, query, and ref, and leave the other output +// components untouched (DoInitFileURL handles these for us). +template<typename CHAR> +void DoParseLocalFile(const CHAR* spec, + int path_begin, + int spec_len, + Parsed* parsed) { + parsed->host.reset(); + ParsePathInternal(spec, MakeRange(path_begin, spec_len), + &parsed->path, &parsed->query, &parsed->ref); +} + +// Backend for the external functions that operates on either char type. +// We are handed the character after the "file:" at the beginning of the spec. +// Usually this is a slash, but needn't be; we allow paths like "file:c:\foo". +template<typename CHAR> +void DoParseFileURL(const CHAR* spec, int spec_len, Parsed* parsed) { + DCHECK(spec_len >= 0); + + // Get the parts we never use for file URLs out of the way. + parsed->username.reset(); + parsed->password.reset(); + parsed->port.reset(); + + // Many of the code paths don't set these, so it's convenient to just clear + // them. We'll write them in those cases we need them. + parsed->query.reset(); + parsed->ref.reset(); + + // Strip leading & trailing spaces and control characters. + int begin = 0; + TrimURL(spec, &begin, &spec_len); + + // Find the scheme. + int num_slashes; + int after_scheme; + int after_slashes; +#ifdef WIN32 + // See how many slashes there are. We want to handle cases like UNC but also + // "/c:/foo". This is when there is no scheme, so we can allow pages to do + // links like "c:/foo/bar" or "//foo/bar". This is also called by the + // relative URL resolver when it determines there is an absolute URL, which + // may give us input like "/c:/foo". + num_slashes = CountConsecutiveSlashes(spec, begin, spec_len); + after_slashes = begin + num_slashes; + if (DoesBeginWindowsDriveSpec(spec, after_slashes, spec_len)) { + // Windows path, don't try to extract the scheme (for example, "c:\foo"). + parsed->scheme.reset(); + after_scheme = after_slashes; + } else if (DoesBeginUNCPath(spec, begin, spec_len, false)) { + // Windows UNC path: don't try to extract the scheme, but keep the slashes. + parsed->scheme.reset(); + after_scheme = begin; + } else +#endif + { + if (ExtractScheme(&spec[begin], spec_len - begin, &parsed->scheme)) { + // Offset the results since we gave ExtractScheme a substring. + parsed->scheme.begin += begin; + after_scheme = parsed->scheme.end() + 1; + } else { + // No scheme found, remember that. + parsed->scheme.reset(); + after_scheme = begin; + } + } + + // Handle empty specs ones that contain only whitespace or control chars, + // or that are just the scheme (for example "file:"). + if (after_scheme == spec_len) { + parsed->host.reset(); + parsed->path.reset(); + return; + } + + num_slashes = CountConsecutiveSlashes(spec, after_scheme, spec_len); + + after_slashes = after_scheme + num_slashes; +#ifdef WIN32 + // Check whether the input is a drive again. We checked above for windows + // drive specs, but that's only at the very beginning to see if we have a + // scheme at all. This test will be duplicated in that case, but will + // additionally handle all cases with a real scheme such as "file:///C:/". + if (!DoesBeginWindowsDriveSpec(spec, after_slashes, spec_len) && + num_slashes != 3) { + // Anything not beginning with a drive spec ("c:\") on Windows is treated + // as UNC, with the exception of three slashes which always means a file. + // Even IE7 treats file:///foo/bar as "/foo/bar", which then fails. + DoParseUNC(spec, after_slashes, spec_len, parsed); + return; + } +#else + // file: URL with exactly 2 slashes is considered to have a host component. + if (num_slashes == 2) { + DoParseUNC(spec, after_slashes, spec_len, parsed); + return; + } +#endif // WIN32 + + // Easy and common case, the full path immediately follows the scheme + // (modulo slashes), as in "file://c:/foo". Just treat everything from + // there to the end as the path. Empty hosts have 0 length instead of -1. + // We include the last slash as part of the path if there is one. + DoParseLocalFile(spec, + num_slashes > 0 ? after_scheme + num_slashes - 1 : after_scheme, + spec_len, parsed); +} + +} // namespace + +void ParseFileURL(const char* url, int url_len, Parsed* parsed) { + DoParseFileURL(url, url_len, parsed); +} + +void ParseFileURL(const char16* url, int url_len, Parsed* parsed) { + DoParseFileURL(url, url_len, parsed); +} + +} // namespace url_parse |