diff options
Diffstat (limited to 'googleurl/src/url_canon.h')
-rw-r--r-- | googleurl/src/url_canon.h | 435 |
1 files changed, 218 insertions, 217 deletions
diff --git a/googleurl/src/url_canon.h b/googleurl/src/url_canon.h index 143574d..e2cfb55 100644 --- a/googleurl/src/url_canon.h +++ b/googleurl/src/url_canon.h @@ -33,6 +33,7 @@ #include <stdlib.h> #include "base/string16.h" +#include "googleurl/src/url_common.h" #include "googleurl/src/url_parse.h" namespace url_canon { @@ -248,12 +249,12 @@ class CharsetConverter { // // Therefore, callers should not use the buffer, since it may actuall be empty, // use the computed pointer and |*output_len| instead. -const char* RemoveURLWhitespace(const char* input, int input_len, - CanonOutputT<char>* buffer, - int* output_len); -const char16* RemoveURLWhitespace(const char16* input, int input_len, - CanonOutputT<char16>* buffer, - int* output_len); +GURL_API const char* RemoveURLWhitespace(const char* input, int input_len, + CanonOutputT<char>* buffer, + int* output_len); +GURL_API const char16* RemoveURLWhitespace(const char16* input, int input_len, + CanonOutputT<char16>* buffer, + int* output_len); // IDN ------------------------------------------------------------------------ @@ -266,7 +267,7 @@ const char16* RemoveURLWhitespace(const char16* input, int input_len, // the length of the output will be set to the length of the new host name. // // On error, returns false. The output in this case is undefined. -bool IDNToASCII(const char16* src, int src_len, CanonOutputW* output); +GURL_API bool IDNToASCII(const char16* src, int src_len, CanonOutputW* output); // Piece-by-piece canonicalizers ---------------------------------------------- // @@ -292,14 +293,14 @@ bool IDNToASCII(const char16* src, int src_len, CanonOutputW* output); // URLs. // // The 8-bit version requires UTF-8 encoding. -bool CanonicalizeScheme(const char* spec, - const url_parse::Component& scheme, - CanonOutput* output, - url_parse::Component* out_scheme); -bool CanonicalizeScheme(const char16* spec, - const url_parse::Component& scheme, - CanonOutput* output, - url_parse::Component* out_scheme); +GURL_API bool CanonicalizeScheme(const char* spec, + const url_parse::Component& scheme, + CanonOutput* output, + url_parse::Component* out_scheme); +GURL_API bool CanonicalizeScheme(const char16* spec, + const url_parse::Component& scheme, + CanonOutput* output, + url_parse::Component* out_scheme); // User info: username/password. If present, this will add the delimiters so // the output will be "<username>:<password>@" or "<username>@". Empty @@ -311,20 +312,20 @@ bool CanonicalizeScheme(const char16* spec, // is legal as long as the two components don't overlap. // // The 8-bit version requires UTF-8 encoding. -bool CanonicalizeUserInfo(const char* username_source, - const url_parse::Component& username, - const char* password_source, - const url_parse::Component& password, - CanonOutput* output, - url_parse::Component* out_username, - url_parse::Component* out_password); -bool CanonicalizeUserInfo(const char16* username_source, - const url_parse::Component& username, - const char16* password_source, - const url_parse::Component& password, - CanonOutput* output, - url_parse::Component* out_username, - url_parse::Component* out_password); +GURL_API bool CanonicalizeUserInfo(const char* username_source, + const url_parse::Component& username, + const char* password_source, + const url_parse::Component& password, + CanonOutput* output, + url_parse::Component* out_username, + url_parse::Component* out_password); +GURL_API bool CanonicalizeUserInfo(const char16* username_source, + const url_parse::Component& username, + const char16* password_source, + const url_parse::Component& password, + CanonOutput* output, + url_parse::Component* out_username, + url_parse::Component* out_password); // This structure holds detailed state exported from the IP/Host canonicalizers. @@ -366,27 +367,27 @@ struct CanonHostInfo { // // The 8-bit version requires UTF-8 encoding. Use this version when you only // need to know whether canonicalization succeeded. -bool CanonicalizeHost(const char* spec, - const url_parse::Component& host, - CanonOutput* output, - url_parse::Component* out_host); -bool CanonicalizeHost(const char16* spec, - const url_parse::Component& host, - CanonOutput* output, - url_parse::Component* out_host); +GURL_API bool CanonicalizeHost(const char* spec, + const url_parse::Component& host, + CanonOutput* output, + url_parse::Component* out_host); +GURL_API bool CanonicalizeHost(const char16* spec, + const url_parse::Component& host, + CanonOutput* output, + url_parse::Component* out_host); // Extended version of CanonicalizeHost, which returns additional information. // Use this when you need to know whether the hostname was an IP address. // A successful return is indicated by host_info->family != BROKEN. See the // definition of CanonHostInfo above for details. -void CanonicalizeHostVerbose(const char* spec, - const url_parse::Component& host, - CanonOutput* output, - CanonHostInfo* host_info); -void CanonicalizeHostVerbose(const char16* spec, - const url_parse::Component& host, - CanonOutput* output, - CanonHostInfo* host_info); +GURL_API void CanonicalizeHostVerbose(const char* spec, + const url_parse::Component& host, + CanonOutput* output, + CanonHostInfo* host_info); +GURL_API void CanonicalizeHostVerbose(const char16* spec, + const url_parse::Component& host, + CanonOutput* output, + CanonHostInfo* host_info); // IP addresses. @@ -399,34 +400,34 @@ void CanonicalizeHostVerbose(const char16* spec, // This is called AUTOMATICALLY from the host canonicalizer, which ensures that // the input is unescaped and name-prepped, etc. It should not normally be // necessary or wise to call this directly. -void CanonicalizeIPAddress(const char* spec, - const url_parse::Component& host, - CanonOutput* output, - CanonHostInfo* host_info); -void CanonicalizeIPAddress(const char16* spec, - const url_parse::Component& host, - CanonOutput* output, - CanonHostInfo* host_info); +GURL_API void CanonicalizeIPAddress(const char* spec, + const url_parse::Component& host, + CanonOutput* output, + CanonHostInfo* host_info); +GURL_API void CanonicalizeIPAddress(const char16* spec, + const url_parse::Component& host, + CanonOutput* output, + CanonHostInfo* host_info); // Port: this function will add the colon for the port if a port is present. // The caller can pass url_parse::PORT_UNSPECIFIED as the // default_port_for_scheme argument if there is no default port. // // The 8-bit version requires UTF-8 encoding. -bool CanonicalizePort(const char* spec, - const url_parse::Component& port, - int default_port_for_scheme, - CanonOutput* output, - url_parse::Component* out_port); -bool CanonicalizePort(const char16* spec, - const url_parse::Component& port, - int default_port_for_scheme, - CanonOutput* output, - url_parse::Component* out_port); +GURL_API bool CanonicalizePort(const char* spec, + const url_parse::Component& port, + int default_port_for_scheme, + CanonOutput* output, + url_parse::Component* out_port); +GURL_API bool CanonicalizePort(const char16* spec, + const url_parse::Component& port, + int default_port_for_scheme, + CanonOutput* output, + url_parse::Component* out_port); // Returns the default port for the given canonical scheme, or PORT_UNSPECIFIED // if the scheme is unknown. -int DefaultPortForScheme(const char* scheme, int scheme_len); +GURL_API int DefaultPortForScheme(const char* scheme, int scheme_len); // Path. If the input does not begin in a slash (including if the input is // empty), we'll prepend a slash to the path to make it canonical. @@ -437,14 +438,14 @@ int DefaultPortForScheme(const char* scheme, int scheme_len); // an issue. Somebody giving us an 8-bit path is responsible for generating // the path that the server expects (we'll escape high-bit characters), so // if something is invalid, it's their problem. -bool CanonicalizePath(const char* spec, - const url_parse::Component& path, - CanonOutput* output, - url_parse::Component* out_path); -bool CanonicalizePath(const char16* spec, - const url_parse::Component& path, - CanonOutput* output, - url_parse::Component* out_path); +GURL_API bool CanonicalizePath(const char* spec, + const url_parse::Component& path, + CanonOutput* output, + url_parse::Component* out_path); +GURL_API bool CanonicalizePath(const char16* spec, + const url_parse::Component& path, + CanonOutput* output, + url_parse::Component* out_path); // Canonicalizes the input as a file path. This is like CanonicalizePath except // that it also handles Windows drive specs. For example, the path can begin @@ -452,14 +453,14 @@ bool CanonicalizePath(const char16* spec, // The string will be appended to |*output| and |*out_path| will be updated. // // The 8-bit version requires UTF-8 encoding. -bool FileCanonicalizePath(const char* spec, - const url_parse::Component& path, - CanonOutput* output, - url_parse::Component* out_path); -bool FileCanonicalizePath(const char16* spec, - const url_parse::Component& path, - CanonOutput* output, - url_parse::Component* out_path); +GURL_API bool FileCanonicalizePath(const char* spec, + const url_parse::Component& path, + CanonOutput* output, + url_parse::Component* out_path); +GURL_API bool FileCanonicalizePath(const char16* spec, + const url_parse::Component& path, + CanonOutput* output, + url_parse::Component* out_path); // Query: Prepends the ? if needed. // @@ -473,16 +474,16 @@ bool FileCanonicalizePath(const char16* spec, // if necessary, for ASCII input, no conversions are necessary. // // The converter can be NULL. In this case, the output encoding will be UTF-8. -void CanonicalizeQuery(const char* spec, - const url_parse::Component& query, - CharsetConverter* converter, - CanonOutput* output, - url_parse::Component* out_query); -void CanonicalizeQuery(const char16* spec, - const url_parse::Component& query, - CharsetConverter* converter, - CanonOutput* output, - url_parse::Component* out_query); +GURL_API void CanonicalizeQuery(const char* spec, + const url_parse::Component& query, + CharsetConverter* converter, + CanonOutput* output, + url_parse::Component* out_query); +GURL_API void CanonicalizeQuery(const char16* spec, + const url_parse::Component& query, + CharsetConverter* converter, + CanonOutput* output, + url_parse::Component* out_query); // Ref: Prepends the # if needed. The output will be UTF-8 (this is the only // canonicalizer that does not produce ASCII output). The output is @@ -490,14 +491,14 @@ void CanonicalizeQuery(const char16* spec, // // This function will not fail. If the input is invalid UTF-8/UTF-16, we'll use // the "Unicode replacement character" for the confusing bits and copy the rest. -void CanonicalizeRef(const char* spec, - const url_parse::Component& path, - CanonOutput* output, - url_parse::Component* out_path); -void CanonicalizeRef(const char16* spec, - const url_parse::Component& path, - CanonOutput* output, - url_parse::Component* out_path); +GURL_API void CanonicalizeRef(const char* spec, + const url_parse::Component& path, + CanonOutput* output, + url_parse::Component* out_path); +GURL_API void CanonicalizeRef(const char16* spec, + const url_parse::Component& path, + CanonOutput* output, + url_parse::Component* out_path); // Full canonicalizer --------------------------------------------------------- // @@ -510,61 +511,61 @@ void CanonicalizeRef(const char16* spec, // The 8-bit versions require UTF-8 encoding. // Use for standard URLs with authorities and paths. -bool CanonicalizeStandardURL(const char* spec, - int spec_len, - const url_parse::Parsed& parsed, - CharsetConverter* query_converter, - CanonOutput* output, - url_parse::Parsed* new_parsed); -bool CanonicalizeStandardURL(const char16* spec, - int spec_len, - const url_parse::Parsed& parsed, - CharsetConverter* query_converter, - CanonOutput* output, - url_parse::Parsed* new_parsed); +GURL_API bool CanonicalizeStandardURL(const char* spec, + int spec_len, + const url_parse::Parsed& parsed, + CharsetConverter* query_converter, + CanonOutput* output, + url_parse::Parsed* new_parsed); +GURL_API bool CanonicalizeStandardURL(const char16* spec, + int spec_len, + const url_parse::Parsed& parsed, + CharsetConverter* query_converter, + CanonOutput* output, + url_parse::Parsed* new_parsed); // Use for file URLs. -bool CanonicalizeFileURL(const char* spec, - int spec_len, - const url_parse::Parsed& parsed, - CharsetConverter* query_converter, - CanonOutput* output, - url_parse::Parsed* new_parsed); -bool CanonicalizeFileURL(const char16* spec, - int spec_len, - const url_parse::Parsed& parsed, - CharsetConverter* query_converter, - CanonOutput* output, - url_parse::Parsed* new_parsed); +GURL_API bool CanonicalizeFileURL(const char* spec, + int spec_len, + const url_parse::Parsed& parsed, + CharsetConverter* query_converter, + CanonOutput* output, + url_parse::Parsed* new_parsed); +GURL_API bool CanonicalizeFileURL(const char16* spec, + int spec_len, + const url_parse::Parsed& parsed, + CharsetConverter* query_converter, + CanonOutput* output, + url_parse::Parsed* new_parsed); // Use for path URLs such as javascript. This does not modify the path in any // way, for example, by escaping it. -bool CanonicalizePathURL(const char* spec, - int spec_len, - const url_parse::Parsed& parsed, - CanonOutput* output, - url_parse::Parsed* new_parsed); -bool CanonicalizePathURL(const char16* spec, - int spec_len, - const url_parse::Parsed& parsed, - CanonOutput* output, - url_parse::Parsed* new_parsed); +GURL_API bool CanonicalizePathURL(const char* spec, + int spec_len, + const url_parse::Parsed& parsed, + CanonOutput* output, + url_parse::Parsed* new_parsed); +GURL_API bool CanonicalizePathURL(const char16* spec, + int spec_len, + const url_parse::Parsed& parsed, + CanonOutput* output, + url_parse::Parsed* new_parsed); // Use for mailto URLs. This "canonicalizes" the url into a path and query // component. It does not attempt to merge "to" fields. It uses UTF-8 for // the query encoding if there is a query. This is because a mailto URL is // really intended for an external mail program, and the encoding of a page, // etc. which would influence a query encoding normally are irrelevant. -bool CanonicalizeMailtoURL(const char* spec, - int spec_len, - const url_parse::Parsed& parsed, - CanonOutput* output, - url_parse::Parsed* new_parsed); -bool CanonicalizeMailtoURL(const char16* spec, - int spec_len, - const url_parse::Parsed& parsed, - CanonOutput* output, - url_parse::Parsed* new_parsed); +GURL_API bool CanonicalizeMailtoURL(const char* spec, + int spec_len, + const url_parse::Parsed& parsed, + CanonOutput* output, + url_parse::Parsed* new_parsed); +GURL_API bool CanonicalizeMailtoURL(const char16* spec, + int spec_len, + const url_parse::Parsed& parsed, + CanonOutput* output, + url_parse::Parsed* new_parsed); // Part replacer -------------------------------------------------------------- @@ -585,7 +586,7 @@ template<typename CHAR> struct URLComponentSource { // Constructor normally used by callers wishing to replace components. This // will make them all NULL, which is no replacement. The caller would then - // override the compoents they want to replace. + // override the components they want to replace. URLComponentSource() : scheme(NULL), username(NULL), @@ -749,59 +750,59 @@ class Replacements { }; // The base must be an 8-bit canonical URL. -bool ReplaceStandardURL(const char* base, - const url_parse::Parsed& base_parsed, - const Replacements<char>& replacements, - CharsetConverter* query_converter, - CanonOutput* output, - url_parse::Parsed* new_parsed); -bool ReplaceStandardURL(const char* base, - const url_parse::Parsed& base_parsed, - const Replacements<char16>& replacements, - CharsetConverter* query_converter, - CanonOutput* output, - url_parse::Parsed* new_parsed); +GURL_API bool ReplaceStandardURL(const char* base, + const url_parse::Parsed& base_parsed, + const Replacements<char>& replacements, + CharsetConverter* query_converter, + CanonOutput* output, + url_parse::Parsed* new_parsed); +GURL_API bool ReplaceStandardURL(const char* base, + const url_parse::Parsed& base_parsed, + const Replacements<char16>& replacements, + CharsetConverter* query_converter, + CanonOutput* output, + url_parse::Parsed* new_parsed); // Replacing some parts of a file URL is not permitted. Everything except // the host, path, query, and ref will be ignored. -bool ReplaceFileURL(const char* base, - const url_parse::Parsed& base_parsed, - const Replacements<char>& replacements, - CharsetConverter* query_converter, - CanonOutput* output, - url_parse::Parsed* new_parsed); -bool ReplaceFileURL(const char* base, - const url_parse::Parsed& base_parsed, - const Replacements<char16>& replacements, - CharsetConverter* query_converter, - CanonOutput* output, - url_parse::Parsed* new_parsed); +GURL_API bool ReplaceFileURL(const char* base, + const url_parse::Parsed& base_parsed, + const Replacements<char>& replacements, + CharsetConverter* query_converter, + CanonOutput* output, + url_parse::Parsed* new_parsed); +GURL_API bool ReplaceFileURL(const char* base, + const url_parse::Parsed& base_parsed, + const Replacements<char16>& replacements, + CharsetConverter* query_converter, + CanonOutput* output, + url_parse::Parsed* new_parsed); // Path URLs can only have the scheme and path replaced. All other components // will be ignored. -bool ReplacePathURL(const char* base, - const url_parse::Parsed& base_parsed, - const Replacements<char>& replacements, - CanonOutput* output, - url_parse::Parsed* new_parsed); -bool ReplacePathURL(const char* base, - const url_parse::Parsed& base_parsed, - const Replacements<char16>& replacements, - CanonOutput* output, - url_parse::Parsed* new_parsed); +GURL_API bool ReplacePathURL(const char* base, + const url_parse::Parsed& base_parsed, + const Replacements<char>& replacements, + CanonOutput* output, + url_parse::Parsed* new_parsed); +GURL_API bool ReplacePathURL(const char* base, + const url_parse::Parsed& base_parsed, + const Replacements<char16>& replacements, + CanonOutput* output, + url_parse::Parsed* new_parsed); // Mailto URLs can only have the scheme, path, and query replaced. // All other components will be ignored. -bool ReplaceMailtoURL(const char* base, - const url_parse::Parsed& base_parsed, - const Replacements<char>& replacements, - CanonOutput* output, - url_parse::Parsed* new_parsed); -bool ReplaceMailtoURL(const char* base, - const url_parse::Parsed& base_parsed, - const Replacements<char16>& replacements, - CanonOutput* output, - url_parse::Parsed* new_parsed); +GURL_API bool ReplaceMailtoURL(const char* base, + const url_parse::Parsed& base_parsed, + const Replacements<char>& replacements, + CanonOutput* output, + url_parse::Parsed* new_parsed); +GURL_API bool ReplaceMailtoURL(const char* base, + const url_parse::Parsed& base_parsed, + const Replacements<char16>& replacements, + CanonOutput* output, + url_parse::Parsed* new_parsed); // Relative URL --------------------------------------------------------------- @@ -816,20 +817,20 @@ bool ReplaceMailtoURL(const char* base, // not). Failure means that the combination of URLs doesn't make any sense. // // The base URL should always be canonical, therefore is ASCII. -bool IsRelativeURL(const char* base, - const url_parse::Parsed& base_parsed, - const char* fragment, - int fragment_len, - bool is_base_hierarchical, - bool* is_relative, - url_parse::Component* relative_component); -bool IsRelativeURL(const char* base, - const url_parse::Parsed& base_parsed, - const char16* fragment, - int fragment_len, - bool is_base_hierarchical, - bool* is_relative, - url_parse::Component* relative_component); +GURL_API bool IsRelativeURL(const char* base, + const url_parse::Parsed& base_parsed, + const char* fragment, + int fragment_len, + bool is_base_hierarchical, + bool* is_relative, + url_parse::Component* relative_component); +GURL_API bool IsRelativeURL(const char* base, + const url_parse::Parsed& base_parsed, + const char16* fragment, + int fragment_len, + bool is_base_hierarchical, + bool* is_relative, + url_parse::Component* relative_component); // Given a canonical parsed source URL, a URL fragment known to be relative, // and the identified relevant portion of the relative URL (computed by @@ -849,22 +850,22 @@ bool IsRelativeURL(const char* base, // Returns true on success. On failure, the output will be "something // reasonable" that will be consistent and valid, just probably not what // was intended by the web page author or caller. -bool ResolveRelativeURL(const char* base_url, - const url_parse::Parsed& base_parsed, - bool base_is_file, - const char* relative_url, - const url_parse::Component& relative_component, - CharsetConverter* query_converter, - CanonOutput* output, - url_parse::Parsed* out_parsed); -bool ResolveRelativeURL(const char* base_url, - const url_parse::Parsed& base_parsed, - bool base_is_file, - const char16* relative_url, - const url_parse::Component& relative_component, - CharsetConverter* query_converter, - CanonOutput* output, - url_parse::Parsed* out_parsed); +GURL_API bool ResolveRelativeURL(const char* base_url, + const url_parse::Parsed& base_parsed, + bool base_is_file, + const char* relative_url, + const url_parse::Component& relative_component, + CharsetConverter* query_converter, + CanonOutput* output, + url_parse::Parsed* out_parsed); +GURL_API bool ResolveRelativeURL(const char* base_url, + const url_parse::Parsed& base_parsed, + bool base_is_file, + const char16* relative_url, + const url_parse::Component& relative_component, + CharsetConverter* query_converter, + CanonOutput* output, + url_parse::Parsed* out_parsed); } // namespace url_canon |