diff options
author | tfarina@chromium.org <tfarina@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2013-08-12 05:01:49 +0000 |
---|---|---|
committer | tfarina@chromium.org <tfarina@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2013-08-12 05:01:49 +0000 |
commit | f968211f7391a19f3383ab184d1461a2e3b367fb (patch) | |
tree | 6f2b1b36a68bc63186a6b90aabd09c83ee24dc53 | |
parent | db716ca721e9307905bef6db95acb0eb35c49526 (diff) | |
download | chromium_src-f968211f7391a19f3383ab184d1461a2e3b367fb.zip chromium_src-f968211f7391a19f3383ab184d1461a2e3b367fb.tar.gz chromium_src-f968211f7391a19f3383ab184d1461a2e3b367fb.tar.bz2 |
Stop pulling googleurl through DEPS.
This is the final patch in this series of merging the external googleurl repo
into Chromium source code base.
BUG=229660
R=brettw@chromium.org,joth@chromium.org,blundell@chromium.org,thakis@chromium.org,thestig@chromium.org
TBR=brettw
Review URL: https://chromiumcodereview.appspot.com/20349002
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@216922 0039d316-1c4b-4281-b951-d872f2087c98
-rw-r--r-- | DEPS | 6 | ||||
-rwxr-xr-x | android_webview/buildbot/deps_whitelist.py | 1 | ||||
-rw-r--r-- | ios/public/DEPS | 1 | ||||
-rwxr-xr-x | tools/checklicenses/checklicenses.py | 4 | ||||
-rw-r--r-- | tools/clang/plugins/ChromeClassTester.cpp | 1 | ||||
-rwxr-xr-x | tools/licenses.py | 8 | ||||
-rw-r--r-- | url/third_party/mozilla/LICENSE.txt | 65 | ||||
-rw-r--r-- | url/third_party/mozilla/README.chromium | 8 | ||||
-rw-r--r-- | url/third_party/mozilla/url_parse.cc (renamed from url/url_parse.cc) | 2 | ||||
-rw-r--r-- | url/third_party/mozilla/url_parse.h | 361 | ||||
-rw-r--r-- | url/url.gyp | 4 | ||||
-rw-r--r-- | url/url_parse.h | 354 |
12 files changed, 440 insertions, 375 deletions
@@ -55,9 +55,6 @@ deps = { "src/breakpad/src": (Var("googlecode_url") % "google-breakpad") + "/trunk/src@1199", - "src/googleurl": - (Var("googlecode_url") % "google-url") + "/trunk@185", - "src/sdch/open-vcdiff": (Var("googlecode_url") % "open-vcdiff") + "/trunk@42", @@ -513,8 +510,6 @@ include_rules = [ "+third_party/icu/source/common/unicode", "+third_party/icu/source/i18n/unicode", "+url", - # TODO(tfarina): Temporary, until we finish the migration to url. Remove this! - "!googleurl", ] @@ -523,7 +518,6 @@ skip_child_includes = [ "breakpad", "chrome_frame", "delegate_execute", - "googleurl", "metro_driver", "native_client_sdk", "o3d", diff --git a/android_webview/buildbot/deps_whitelist.py b/android_webview/buildbot/deps_whitelist.py index 69c87be..105646f 100755 --- a/android_webview/buildbot/deps_whitelist.py +++ b/android_webview/buildbot/deps_whitelist.py @@ -32,7 +32,6 @@ class DepsWhitelist(object): # Dependencies that need to be merged into the Android tree. self._snapshot_into_android_dependencies = [ - 'googleurl', 'sdch/open-vcdiff', 'testing/gtest', 'third_party/WebKit', diff --git a/ios/public/DEPS b/ios/public/DEPS index 3f0cd59..c04c1d5 100644 --- a/ios/public/DEPS +++ b/ios/public/DEPS @@ -4,7 +4,6 @@ include_rules = [ # be kept in sync with src/DEPS. "-base", "-build", - "-googleurl", "-library_loaders", "-testing", "-third_party/icu/source/common/unicode", diff --git a/tools/checklicenses/checklicenses.py b/tools/checklicenses/checklicenses.py index 9c27256..25dedc2 100755 --- a/tools/checklicenses/checklicenses.py +++ b/tools/checklicenses/checklicenses.py @@ -128,10 +128,6 @@ PATH_SPECIFIC_WHITELISTED_LICENSES = { 'data/tab_switching': [ 'UNKNOWN', ], - 'googleurl': [ # http://code.google.com/p/google-url/issues/detail?id=15 - 'UNKNOWN', - ], - 'native_client': [ # http://crbug.com/98099 'UNKNOWN', ], diff --git a/tools/clang/plugins/ChromeClassTester.cpp b/tools/clang/plugins/ChromeClassTester.cpp index 5784334..ee8452d 100644 --- a/tools/clang/plugins/ChromeClassTester.cpp +++ b/tools/clang/plugins/ChromeClassTester.cpp @@ -151,7 +151,6 @@ void ChromeClassTester::BuildBannedLists() { banned_directories_.push_back("ppapi/"); banned_directories_.push_back("usr/"); banned_directories_.push_back("testing/"); - banned_directories_.push_back("googleurl/"); banned_directories_.push_back("v8/"); banned_directories_.push_back("dart/"); banned_directories_.push_back("sdch/"); diff --git a/tools/licenses.py b/tools/licenses.py index 4d1cb01..d2c535b 100755 --- a/tools/licenses.py +++ b/tools/licenses.py @@ -82,7 +82,6 @@ ADDITIONAL_PATHS = ( os.path.join('chrome', 'common', 'extensions', 'docs', 'examples'), os.path.join('chrome', 'test', 'chromeos', 'autotest'), os.path.join('chrome', 'test', 'data'), - os.path.join('googleurl'), os.path.join('native_client'), os.path.join('native_client_sdk'), os.path.join('net', 'tools', 'spdyshark'), @@ -97,6 +96,7 @@ ADDITIONAL_PATHS = ( os.path.join('tools', 'grit'), os.path.join('tools', 'gyp'), os.path.join('tools', 'page_cycler', 'acid3'), + os.path.join('url', 'third_party', 'mozilla'), os.path.join('v8'), # Fake directory so we can include the strongtalk license. os.path.join('v8', 'strongtalk'), @@ -107,12 +107,6 @@ ADDITIONAL_PATHS = ( # can't provide a README.chromium. Please prefer a README.chromium # wherever possible. SPECIAL_CASES = { - os.path.join('googleurl'): { - "Name": "google-url", - "URL": "http://code.google.com/p/google-url/", - "License": "BSD and MPL 1.1/GPL 2.0/LGPL 2.1", - "License File": "LICENSE.txt", - }, os.path.join('native_client'): { "Name": "native client", "URL": "http://code.google.com/p/nativeclient", diff --git a/url/third_party/mozilla/LICENSE.txt b/url/third_party/mozilla/LICENSE.txt new file mode 100644 index 0000000..ac40837 --- /dev/null +++ b/url/third_party/mozilla/LICENSE.txt @@ -0,0 +1,65 @@ +Copyright 2007, Google Inc. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +------------------------------------------------------------------------------- + +The file url_parse.cc is based on nsURLParsers.cc from Mozilla. This file is +licensed separately as follows: + +The contents of this file are subject to the Mozilla Public License Version +1.1 (the "License"); you may not use this file except in compliance with +the License. You may obtain a copy of the License at +http://www.mozilla.org/MPL/ + +Software distributed under the License is distributed on an "AS IS" basis, +WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License +for the specific language governing rights and limitations under the +License. + +The Original Code is mozilla.org code. + +The Initial Developer of the Original Code is +Netscape Communications Corporation. +Portions created by the Initial Developer are Copyright (C) 1998 +the Initial Developer. All Rights Reserved. + +Contributor(s): + Darin Fisher (original author) + +Alternatively, the contents of this file may be used under the terms of +either the GNU General Public License Version 2 or later (the "GPL"), or +the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), +in which case the provisions of the GPL or the LGPL are applicable instead +of those above. If you wish to allow use of your version of this file only +under the terms of either the GPL or the LGPL, and not to allow others to +use your version of this file under the terms of the MPL, indicate your +decision by deleting the provisions above and replace them with the notice +and other provisions required by the GPL or the LGPL. If you do not delete +the provisions above, a recipient may use your version of this file under +the terms of any one of the MPL, the GPL or the LGPL. diff --git a/url/third_party/mozilla/README.chromium b/url/third_party/mozilla/README.chromium new file mode 100644 index 0000000..ef396d3 --- /dev/null +++ b/url/third_party/mozilla/README.chromium @@ -0,0 +1,8 @@ +Name: url_parse +URL: http://mxr.mozilla.org/comm-central/source/mozilla/netwerk/base/src/nsURLParsers.cpp +License: BSD and MPL 1.1/GPL 2.0/LGPL 2.1 +License File: LICENSE.txt + +Description: + +The file url_parse.cc is based on nsURLParsers.cc from Mozilla. diff --git a/url/url_parse.cc b/url/third_party/mozilla/url_parse.cc index 0d9c6dd1c..52c6196 100644 --- a/url/url_parse.cc +++ b/url/third_party/mozilla/url_parse.cc @@ -34,7 +34,7 @@ * * ***** END LICENSE BLOCK ***** */ -#include "url/url_parse.h" +#include "url/third_party/mozilla/url_parse.h" #include <stdlib.h> diff --git a/url/third_party/mozilla/url_parse.h b/url/third_party/mozilla/url_parse.h new file mode 100644 index 0000000..fd974f8 --- /dev/null +++ b/url/third_party/mozilla/url_parse.h @@ -0,0 +1,361 @@ +// Copyright 2013 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef URL_THIRD_PARTY_MOZILLA_URL_PARSE_H_ +#define URL_THIRD_PARTY_MOZILLA_URL_PARSE_H_ + +#include <string> + +#include "base/basictypes.h" +#include "base/strings/string16.h" +#include "url/url_export.h" + +namespace url_parse { + +// Deprecated, but WebKit/WebCore/platform/KURLGooglePrivate.h and +// KURLGoogle.cpp still rely on this type. +typedef base::char16 UTF16Char; + +// Component ------------------------------------------------------------------ + +// Represents a substring for URL parsing. +struct Component { + Component() : begin(0), len(-1) {} + + // Normal constructor: takes an offset and a length. + Component(int b, int l) : begin(b), len(l) {} + + int end() const { + return begin + len; + } + + // Returns true if this component is valid, meaning the length is given. Even + // valid components may be empty to record the fact that they exist. + bool is_valid() const { + return (len != -1); + } + + // Returns true if the given component is specified on false, the component + // is either empty or invalid. + bool is_nonempty() const { + return (len > 0); + } + + void reset() { + begin = 0; + len = -1; + } + + bool operator==(const Component& other) const { + return begin == other.begin && len == other.len; + } + + int begin; // Byte offset in the string of this component. + int len; // Will be -1 if the component is unspecified. +}; + +// Helper that returns a component created with the given begin and ending +// points. The ending point is non-inclusive. +inline Component MakeRange(int begin, int end) { + return Component(begin, end - begin); +} + +// Parsed --------------------------------------------------------------------- + +// A structure that holds the identified parts of an input URL. This structure +// does NOT store the URL itself. The caller will have to store the URL text +// and its corresponding Parsed structure separately. +// +// Typical usage would be: +// +// url_parse::Parsed parsed; +// url_parse::Component scheme; +// if (!url_parse::ExtractScheme(url, url_len, &scheme)) +// return I_CAN_NOT_FIND_THE_SCHEME_DUDE; +// +// if (IsStandardScheme(url, scheme)) // Not provided by this component +// url_parseParseStandardURL(url, url_len, &parsed); +// else if (IsFileURL(url, scheme)) // Not provided by this component +// url_parse::ParseFileURL(url, url_len, &parsed); +// else +// url_parse::ParsePathURL(url, url_len, &parsed); +// +struct URL_EXPORT Parsed { + // Identifies different components. + enum ComponentType { + SCHEME, + USERNAME, + PASSWORD, + HOST, + PORT, + PATH, + QUERY, + REF, + }; + + // The default constructor is sufficient for the components, but inner_parsed_ + // requires special handling. + Parsed(); + Parsed(const Parsed&); + Parsed& operator=(const Parsed&); + ~Parsed(); + + // Returns the length of the URL (the end of the last component). + // + // Note that for some invalid, non-canonical URLs, this may not be the length + // of the string. For example "http://": the parsed structure will only + // contain an entry for the four-character scheme, and it doesn't know about + // the "://". For all other last-components, it will return the real length. + int Length() const; + + // Returns the number of characters before the given component if it exists, + // or where the component would be if it did exist. This will return the + // string length if the component would be appended to the end. + // + // Note that this can get a little funny for the port, query, and ref + // components which have a delimiter that is not counted as part of the + // component. The |include_delimiter| flag controls if you want this counted + // as part of the component or not when the component exists. + // + // This example shows the difference between the two flags for two of these + // delimited components that is present (the port and query) and one that + // isn't (the reference). The components that this flag affects are marked + // with a *. + // 0 1 2 + // 012345678901234567890 + // Example input: http://foo:80/?query + // include_delim=true, ...=false ("<-" indicates different) + // SCHEME: 0 0 + // USERNAME: 5 5 + // PASSWORD: 5 5 + // HOST: 7 7 + // *PORT: 10 11 <- + // PATH: 13 13 + // *QUERY: 14 15 <- + // *REF: 20 20 + // + int CountCharactersBefore(ComponentType type, bool include_delimiter) const; + + // Scheme without the colon: "http://foo"/ would have a scheme of "http". + // The length will be -1 if no scheme is specified ("foo.com"), or 0 if there + // is a colon but no scheme (":foo"). Note that the scheme is not guaranteed + // to start at the beginning of the string if there are preceeding whitespace + // or control characters. + Component scheme; + + // Username. Specified in URLs with an @ sign before the host. See |password| + Component username; + + // Password. The length will be -1 if unspecified, 0 if specified but empty. + // Not all URLs with a username have a password, as in "http://me@host/". + // The password is separated form the username with a colon, as in + // "http://me:secret@host/" + Component password; + + // Host name. + Component host; + + // Port number. + Component port; + + // Path, this is everything following the host name. Length will be -1 if + // unspecified. This includes the preceeding slash, so the path on + // http://www.google.com/asdf" is "/asdf". As a result, it is impossible to + // have a 0 length path, it will be -1 in cases like "http://host?foo". + // Note that we treat backslashes the same as slashes. + Component path; + + // Stuff between the ? and the # after the path. This does not include the + // preceeding ? character. Length will be -1 if unspecified, 0 if there is + // a question mark but no query string. + Component query; + + // Indicated by a #, this is everything following the hash sign (not + // including it). If there are multiple hash signs, we'll use the last one. + // Length will be -1 if there is no hash sign, or 0 if there is one but + // nothing follows it. + Component ref; + + // This is used for nested URL types, currently only filesystem. If you + // parse a filesystem URL, the resulting Parsed will have a nested + // inner_parsed_ to hold the parsed inner URL's component information. + // For all other url types [including the inner URL], it will be NULL. + Parsed* inner_parsed() const { + return inner_parsed_; + } + + void set_inner_parsed(const Parsed& inner_parsed) { + if (!inner_parsed_) + inner_parsed_ = new Parsed(inner_parsed); + else + *inner_parsed_ = inner_parsed; + } + + void clear_inner_parsed() { + if (inner_parsed_) { + delete inner_parsed_; + inner_parsed_ = NULL; + } + } + + private: + Parsed* inner_parsed_; // This object is owned and managed by this struct. +}; + +// Initialization functions --------------------------------------------------- +// +// These functions parse the given URL, filling in all of the structure's +// components. These functions can not fail, they will always do their best +// at interpreting the input given. +// +// The string length of the URL MUST be specified, we do not check for NULLs +// at any point in the process, and will actually handle embedded NULLs. +// +// IMPORTANT: These functions do NOT hang on to the given pointer or copy it +// in any way. See the comment above the struct. +// +// The 8-bit versions require UTF-8 encoding. + +// StandardURL is for when the scheme is known to be one that has an +// authority (host) like "http". This function will not handle weird ones +// like "about:" and "javascript:", or do the right thing for "file:" URLs. +URL_EXPORT void ParseStandardURL(const char* url, + int url_len, + Parsed* parsed); +URL_EXPORT void ParseStandardURL(const base::char16* url, + int url_len, + Parsed* parsed); + +// PathURL is for when the scheme is known not to have an authority (host) +// section but that aren't file URLs either. The scheme is parsed, and +// everything after the scheme is considered as the path. This is used for +// things like "about:" and "javascript:" +URL_EXPORT void ParsePathURL(const char* url, int url_len, Parsed* parsed); +URL_EXPORT void ParsePathURL(const base::char16* url, + int url_len, + Parsed* parsed); + +// FileURL is for file URLs. There are some special rules for interpreting +// these. +URL_EXPORT void ParseFileURL(const char* url, int url_len, Parsed* parsed); +URL_EXPORT void ParseFileURL(const base::char16* url, + int url_len, + Parsed* parsed); + +// Filesystem URLs are structured differently than other URLs. +URL_EXPORT void ParseFileSystemURL(const char* url, + int url_len, + Parsed* parsed); +URL_EXPORT void ParseFileSystemURL(const base::char16* url, + int url_len, + Parsed* parsed); + +// MailtoURL is for mailto: urls. They are made up scheme,path,query +URL_EXPORT void ParseMailtoURL(const char* url, int url_len, Parsed* parsed); +URL_EXPORT void ParseMailtoURL(const base::char16* url, + int url_len, + Parsed* parsed); + +// Helper functions ----------------------------------------------------------- + +// Locates the scheme according to the URL parser's rules. This function is +// designed so the caller can find the scheme and call the correct Init* +// function according to their known scheme types. +// +// It also does not perform any validation on the scheme. +// +// This function will return true if the scheme is found and will put the +// scheme's range into *scheme. False means no scheme could be found. Note +// that a URL beginning with a colon has a scheme, but it is empty, so this +// function will return true but *scheme will = (0,0). +// +// The scheme is found by skipping spaces and control characters at the +// beginning, and taking everything from there to the first colon to be the +// scheme. The character at scheme.end() will be the colon (we may enhance +// this to handle full width colons or something, so don't count on the +// actual character value). The character at scheme.end()+1 will be the +// beginning of the rest of the URL, be it the authority or the path (or the +// end of the string). +// +// The 8-bit version requires UTF-8 encoding. +URL_EXPORT bool ExtractScheme(const char* url, + int url_len, + Component* scheme); +URL_EXPORT bool ExtractScheme(const base::char16* url, + int url_len, + Component* scheme); + +// Returns true if ch is a character that terminates the authority segment +// of a URL. +URL_EXPORT bool IsAuthorityTerminator(base::char16 ch); + +// Does a best effort parse of input |spec|, in range |auth|. If a particular +// component is not found, it will be set to invalid. +URL_EXPORT void ParseAuthority(const char* spec, + const Component& auth, + Component* username, + Component* password, + Component* hostname, + Component* port_num); +URL_EXPORT void ParseAuthority(const base::char16* spec, + const Component& auth, + Component* username, + Component* password, + Component* hostname, + Component* port_num); + +// Computes the integer port value from the given port component. The port +// component should have been identified by one of the init functions on +// |Parsed| for the given input url. +// +// The return value will be a positive integer between 0 and 64K, or one of +// the two special values below. +enum SpecialPort { PORT_UNSPECIFIED = -1, PORT_INVALID = -2 }; +URL_EXPORT int ParsePort(const char* url, const Component& port); +URL_EXPORT int ParsePort(const base::char16* url, const Component& port); + +// Extracts the range of the file name in the given url. The path must +// already have been computed by the parse function, and the matching URL +// and extracted path are provided to this function. The filename is +// defined as being everything from the last slash/backslash of the path +// to the end of the path. +// +// The file name will be empty if the path is empty or there is nothing +// following the last slash. +// +// The 8-bit version requires UTF-8 encoding. +URL_EXPORT void ExtractFileName(const char* url, + const Component& path, + Component* file_name); +URL_EXPORT void ExtractFileName(const base::char16* url, + const Component& path, + Component* file_name); + +// Extract the first key/value from the range defined by |*query|. Updates +// |*query| to start at the end of the extracted key/value pair. This is +// designed for use in a loop: you can keep calling it with the same query +// object and it will iterate over all items in the query. +// +// Some key/value pairs may have the key, the value, or both be empty (for +// example, the query string "?&"). These will be returned. Note that an empty +// last parameter "foo.com?" or foo.com?a&" will not be returned, this case +// is the same as "done." +// +// The initial query component should not include the '?' (this is the default +// for parsed URLs). +// +// If no key/value are found |*key| and |*value| will be unchanged and it will +// return false. +URL_EXPORT bool ExtractQueryKeyValue(const char* url, + Component* query, + Component* key, + Component* value); +URL_EXPORT bool ExtractQueryKeyValue(const base::char16* url, + Component* query, + Component* key, + Component* value); + +} // namespace url_parse + +#endif // URL_THIRD_PARTY_MOZILLA_URL_PARSE_H_ diff --git a/url/url.gyp b/url/url.gyp index a9a1a19d..fe4d5fd 100644 --- a/url/url.gyp +++ b/url/url.gyp @@ -22,6 +22,8 @@ 'sources': [ 'gurl.cc', 'gurl.h', + 'third_party/mozilla/url_parse.cc', + 'third_party/mozilla/url_parse.h', 'url_canon.h', 'url_canon_etc.cc', 'url_canon_filesystemurl.cc', @@ -42,8 +44,6 @@ 'url_canon_stdstring.h', 'url_canon_stdurl.cc', 'url_file.h', - 'url_parse.cc', - 'url_parse.h', 'url_parse_file.cc', 'url_parse_internal.h', 'url_util.cc', diff --git a/url/url_parse.h b/url/url_parse.h index 21033dd..3b9c546 100644 --- a/url/url_parse.h +++ b/url/url_parse.h @@ -5,357 +5,7 @@ #ifndef URL_URL_PARSE_H_ #define URL_URL_PARSE_H_ -#include <string> - -#include "base/basictypes.h" -#include "base/strings/string16.h" -#include "url/url_export.h" - -namespace url_parse { - -// Deprecated, but WebKit/WebCore/platform/KURLGooglePrivate.h and -// KURLGoogle.cpp still rely on this type. -typedef base::char16 UTF16Char; - -// Component ------------------------------------------------------------------ - -// Represents a substring for URL parsing. -struct Component { - Component() : begin(0), len(-1) {} - - // Normal constructor: takes an offset and a length. - Component(int b, int l) : begin(b), len(l) {} - - int end() const { - return begin + len; - } - - // Returns true if this component is valid, meaning the length is given. Even - // valid components may be empty to record the fact that they exist. - bool is_valid() const { - return (len != -1); - } - - // Returns true if the given component is specified on false, the component - // is either empty or invalid. - bool is_nonempty() const { - return (len > 0); - } - - void reset() { - begin = 0; - len = -1; - } - - bool operator==(const Component& other) const { - return begin == other.begin && len == other.len; - } - - int begin; // Byte offset in the string of this component. - int len; // Will be -1 if the component is unspecified. -}; - -// Helper that returns a component created with the given begin and ending -// points. The ending point is non-inclusive. -inline Component MakeRange(int begin, int end) { - return Component(begin, end - begin); -} - -// Parsed --------------------------------------------------------------------- - -// A structure that holds the identified parts of an input URL. This structure -// does NOT store the URL itself. The caller will have to store the URL text -// and its corresponding Parsed structure separately. -// -// Typical usage would be: -// -// url_parse::Parsed parsed; -// url_parse::Component scheme; -// if (!url_parse::ExtractScheme(url, url_len, &scheme)) -// return I_CAN_NOT_FIND_THE_SCHEME_DUDE; -// -// if (IsStandardScheme(url, scheme)) // Not provided by this component -// url_parseParseStandardURL(url, url_len, &parsed); -// else if (IsFileURL(url, scheme)) // Not provided by this component -// url_parse::ParseFileURL(url, url_len, &parsed); -// else -// url_parse::ParsePathURL(url, url_len, &parsed); -// -struct URL_EXPORT Parsed { - // Identifies different components. - enum ComponentType { - SCHEME, - USERNAME, - PASSWORD, - HOST, - PORT, - PATH, - QUERY, - REF, - }; - - // The default constructor is sufficient for the components, but inner_parsed_ - // requires special handling. - Parsed(); - Parsed(const Parsed&); - Parsed& operator=(const Parsed&); - ~Parsed(); - - // Returns the length of the URL (the end of the last component). - // - // Note that for some invalid, non-canonical URLs, this may not be the length - // of the string. For example "http://": the parsed structure will only - // contain an entry for the four-character scheme, and it doesn't know about - // the "://". For all other last-components, it will return the real length. - int Length() const; - - // Returns the number of characters before the given component if it exists, - // or where the component would be if it did exist. This will return the - // string length if the component would be appended to the end. - // - // Note that this can get a little funny for the port, query, and ref - // components which have a delimiter that is not counted as part of the - // component. The |include_delimiter| flag controls if you want this counted - // as part of the component or not when the component exists. - // - // This example shows the difference between the two flags for two of these - // delimited components that is present (the port and query) and one that - // isn't (the reference). The components that this flag affects are marked - // with a *. - // 0 1 2 - // 012345678901234567890 - // Example input: http://foo:80/?query - // include_delim=true, ...=false ("<-" indicates different) - // SCHEME: 0 0 - // USERNAME: 5 5 - // PASSWORD: 5 5 - // HOST: 7 7 - // *PORT: 10 11 <- - // PATH: 13 13 - // *QUERY: 14 15 <- - // *REF: 20 20 - // - int CountCharactersBefore(ComponentType type, bool include_delimiter) const; - - // Scheme without the colon: "http://foo"/ would have a scheme of "http". - // The length will be -1 if no scheme is specified ("foo.com"), or 0 if there - // is a colon but no scheme (":foo"). Note that the scheme is not guaranteed - // to start at the beginning of the string if there are preceeding whitespace - // or control characters. - Component scheme; - - // Username. Specified in URLs with an @ sign before the host. See |password| - Component username; - - // Password. The length will be -1 if unspecified, 0 if specified but empty. - // Not all URLs with a username have a password, as in "http://me@host/". - // The password is separated form the username with a colon, as in - // "http://me:secret@host/" - Component password; - - // Host name. - Component host; - - // Port number. - Component port; - - // Path, this is everything following the host name. Length will be -1 if - // unspecified. This includes the preceeding slash, so the path on - // http://www.google.com/asdf" is "/asdf". As a result, it is impossible to - // have a 0 length path, it will be -1 in cases like "http://host?foo". - // Note that we treat backslashes the same as slashes. - Component path; - - // Stuff between the ? and the # after the path. This does not include the - // preceeding ? character. Length will be -1 if unspecified, 0 if there is - // a question mark but no query string. - Component query; - - // Indicated by a #, this is everything following the hash sign (not - // including it). If there are multiple hash signs, we'll use the last one. - // Length will be -1 if there is no hash sign, or 0 if there is one but - // nothing follows it. - Component ref; - - // This is used for nested URL types, currently only filesystem. If you - // parse a filesystem URL, the resulting Parsed will have a nested - // inner_parsed_ to hold the parsed inner URL's component information. - // For all other url types [including the inner URL], it will be NULL. - Parsed* inner_parsed() const { - return inner_parsed_; - } - - void set_inner_parsed(const Parsed& inner_parsed) { - if (!inner_parsed_) - inner_parsed_ = new Parsed(inner_parsed); - else - *inner_parsed_ = inner_parsed; - } - - void clear_inner_parsed() { - if (inner_parsed_) { - delete inner_parsed_; - inner_parsed_ = NULL; - } - } - - private: - Parsed* inner_parsed_; // This object is owned and managed by this struct. -}; - -// Initialization functions --------------------------------------------------- -// -// These functions parse the given URL, filling in all of the structure's -// components. These functions can not fail, they will always do their best -// at interpreting the input given. -// -// The string length of the URL MUST be specified, we do not check for NULLs -// at any point in the process, and will actually handle embedded NULLs. -// -// IMPORTANT: These functions do NOT hang on to the given pointer or copy it -// in any way. See the comment above the struct. -// -// The 8-bit versions require UTF-8 encoding. - -// StandardURL is for when the scheme is known to be one that has an -// authority (host) like "http". This function will not handle weird ones -// like "about:" and "javascript:", or do the right thing for "file:" URLs. -URL_EXPORT void ParseStandardURL(const char* url, - int url_len, - Parsed* parsed); -URL_EXPORT void ParseStandardURL(const base::char16* url, - int url_len, - Parsed* parsed); - -// PathURL is for when the scheme is known not to have an authority (host) -// section but that aren't file URLs either. The scheme is parsed, and -// everything after the scheme is considered as the path. This is used for -// things like "about:" and "javascript:" -URL_EXPORT void ParsePathURL(const char* url, int url_len, Parsed* parsed); -URL_EXPORT void ParsePathURL(const base::char16* url, - int url_len, - Parsed* parsed); - -// FileURL is for file URLs. There are some special rules for interpreting -// these. -URL_EXPORT void ParseFileURL(const char* url, int url_len, Parsed* parsed); -URL_EXPORT void ParseFileURL(const base::char16* url, - int url_len, - Parsed* parsed); - -// Filesystem URLs are structured differently than other URLs. -URL_EXPORT void ParseFileSystemURL(const char* url, - int url_len, - Parsed* parsed); -URL_EXPORT void ParseFileSystemURL(const base::char16* url, - int url_len, - Parsed* parsed); - -// MailtoURL is for mailto: urls. They are made up scheme,path,query -URL_EXPORT void ParseMailtoURL(const char* url, int url_len, Parsed* parsed); -URL_EXPORT void ParseMailtoURL(const base::char16* url, - int url_len, - Parsed* parsed); - -// Helper functions ----------------------------------------------------------- - -// Locates the scheme according to the URL parser's rules. This function is -// designed so the caller can find the scheme and call the correct Init* -// function according to their known scheme types. -// -// It also does not perform any validation on the scheme. -// -// This function will return true if the scheme is found and will put the -// scheme's range into *scheme. False means no scheme could be found. Note -// that a URL beginning with a colon has a scheme, but it is empty, so this -// function will return true but *scheme will = (0,0). -// -// The scheme is found by skipping spaces and control characters at the -// beginning, and taking everything from there to the first colon to be the -// scheme. The character at scheme.end() will be the colon (we may enhance -// this to handle full width colons or something, so don't count on the -// actual character value). The character at scheme.end()+1 will be the -// beginning of the rest of the URL, be it the authority or the path (or the -// end of the string). -// -// The 8-bit version requires UTF-8 encoding. -URL_EXPORT bool ExtractScheme(const char* url, - int url_len, - Component* scheme); -URL_EXPORT bool ExtractScheme(const base::char16* url, - int url_len, - Component* scheme); - -// Returns true if ch is a character that terminates the authority segment -// of a URL. -URL_EXPORT bool IsAuthorityTerminator(base::char16 ch); - -// Does a best effort parse of input |spec|, in range |auth|. If a particular -// component is not found, it will be set to invalid. -URL_EXPORT void ParseAuthority(const char* spec, - const Component& auth, - Component* username, - Component* password, - Component* hostname, - Component* port_num); -URL_EXPORT void ParseAuthority(const base::char16* spec, - const Component& auth, - Component* username, - Component* password, - Component* hostname, - Component* port_num); - -// Computes the integer port value from the given port component. The port -// component should have been identified by one of the init functions on -// |Parsed| for the given input url. -// -// The return value will be a positive integer between 0 and 64K, or one of -// the two special values below. -enum SpecialPort { PORT_UNSPECIFIED = -1, PORT_INVALID = -2 }; -URL_EXPORT int ParsePort(const char* url, const Component& port); -URL_EXPORT int ParsePort(const base::char16* url, const Component& port); - -// Extracts the range of the file name in the given url. The path must -// already have been computed by the parse function, and the matching URL -// and extracted path are provided to this function. The filename is -// defined as being everything from the last slash/backslash of the path -// to the end of the path. -// -// The file name will be empty if the path is empty or there is nothing -// following the last slash. -// -// The 8-bit version requires UTF-8 encoding. -URL_EXPORT void ExtractFileName(const char* url, - const Component& path, - Component* file_name); -URL_EXPORT void ExtractFileName(const base::char16* url, - const Component& path, - Component* file_name); - -// Extract the first key/value from the range defined by |*query|. Updates -// |*query| to start at the end of the extracted key/value pair. This is -// designed for use in a loop: you can keep calling it with the same query -// object and it will iterate over all items in the query. -// -// Some key/value pairs may have the key, the value, or both be empty (for -// example, the query string "?&"). These will be returned. Note that an empty -// last parameter "foo.com?" or foo.com?a&" will not be returned, this case -// is the same as "done." -// -// The initial query component should not include the '?' (this is the default -// for parsed URLs). -// -// If no key/value are found |*key| and |*value| will be unchanged and it will -// return false. -URL_EXPORT bool ExtractQueryKeyValue(const char* url, - Component* query, - Component* key, - Component* value); -URL_EXPORT bool ExtractQueryKeyValue(const base::char16* url, - Component* query, - Component* key, - Component* value); - -} // namespace url_parse +// TODO(tfarina): Remove this file when the callers are updated. +#include "url/third_party/mozilla/url_parse.h" #endif // URL_URL_PARSE_H_ |