diff options
author | brettw@google.com <brettw@google.com@0039d316-1c4b-4281-b951-d872f2087c98> | 2009-05-22 18:15:24 +0000 |
---|---|---|
committer | brettw@google.com <brettw@google.com@0039d316-1c4b-4281-b951-d872f2087c98> | 2009-05-22 18:15:24 +0000 |
commit | f9fe8630a0ceba09f1bfcc4af7a52048be0d133c (patch) | |
tree | c3a75daafa2d78e70e17bf24fb91502e8f94171c | |
parent | 03ce2f5bf335b39ad24306a3a962823e46305cc4 (diff) | |
download | chromium_src-f9fe8630a0ceba09f1bfcc4af7a52048be0d133c.zip chromium_src-f9fe8630a0ceba09f1bfcc4af7a52048be0d133c.tar.gz chromium_src-f9fe8630a0ceba09f1bfcc4af7a52048be0d133c.tar.bz2 |
Shows Unicode IDN instead of Punycode in the followings:
- Bookmark Manager
- Edit Bookmark dialog opened by Bookmark Manager
- Edit Bookmark dialog opened by the star on the left of the address bar
Introduces new function, net::FormatUrl(), which has the following
parameters in addition to gfx::GetCleanStringFromUrl().
- bool omit_username_password
- bool unescape
and moves gfx::GetClienStringFromUrl() to net:: namespace, and removed
the last two parameters.
BUG=3991
Checked in for tkent
Original review = http://codereview.chromium.org/115346
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@16761 0039d316-1c4b-4281-b951-d872f2087c98
-rw-r--r-- | app/gfx/text_elider.cc | 136 | ||||
-rw-r--r-- | app/gfx/text_elider.h | 15 | ||||
-rw-r--r-- | chrome/browser/autocomplete/autocomplete.cc | 7 | ||||
-rw-r--r-- | chrome/browser/autocomplete/history_url_provider.cc | 6 | ||||
-rw-r--r-- | chrome/browser/bookmarks/bookmark_table_model.cc | 10 | ||||
-rw-r--r-- | chrome/browser/net/url_fixer_upper.cc | 7 | ||||
-rw-r--r-- | chrome/browser/tab_contents/navigation_entry.cc | 9 | ||||
-rw-r--r-- | chrome/browser/toolbar_model.cc | 3 | ||||
-rw-r--r-- | chrome/browser/views/bookmark_editor_view.cc | 15 | ||||
-rw-r--r-- | net/base/net_util.cc | 144 | ||||
-rw-r--r-- | net/base/net_util.h | 33 | ||||
-rw-r--r-- | net/base/net_util_unittest.cc | 164 |
12 files changed, 381 insertions, 168 deletions
diff --git a/app/gfx/text_elider.cc b/app/gfx/text_elider.cc index 7568aef..e7b279f 100644 --- a/app/gfx/text_elider.cc +++ b/app/gfx/text_elider.cc @@ -16,23 +16,6 @@ const wchar_t kEllipsis[] = L"\x2026"; namespace gfx { -// Appends the given part of the original URL to the output string formatted for -// the user. The given parsed structure will be updated. The host name formatter -// also takes the same accept languages component as ElideURL. |new_parsed| may -// be null. -static void AppendFormattedHost(const GURL& url, - const std::wstring& languages, - std::wstring* output, - url_parse::Parsed* new_parsed); - -// Calls the unescaper for the substring |in_component| inside of the URL -// |spec|. The decoded string will be appended to |output| and the resulting -// range will be filled into |out_component|. -static void AppendFormattedComponent(const std::string& spec, - const url_parse::Component& in_component, - std::wstring* output, - url_parse::Component* out_component); - // This function takes a GURL object and elides it. It returns a string // which composed of parts from subdomain, domain, path, filename and query. // A "..." is added automatically at the end if the elided string is bigger @@ -49,8 +32,8 @@ std::wstring ElideUrl(const GURL& url, const std::wstring& languages) { // Get a formatted string and corresponding parsing of the url. url_parse::Parsed parsed; - std::wstring url_string = GetCleanStringFromUrl(url, languages, &parsed, - NULL); + std::wstring url_string = + net::FormatUrl(url, languages, true, true, &parsed, NULL); if (available_pixel_width <= 0) return url_string; @@ -338,124 +321,15 @@ std::wstring ElideText(const std::wstring& text, return text.substr(0, lo) + kEllipsis; } -void AppendFormattedHost(const GURL& url, - const std::wstring& languages, - std::wstring* output, - url_parse::Parsed* new_parsed) { - const url_parse::Component& host = - url.parsed_for_possibly_invalid_spec().host; - - if (host.is_nonempty()) { - // Handle possible IDN in the host name. - if (new_parsed) - new_parsed->host.begin = static_cast<int>(output->length()); - - const std::string& spec = url.possibly_invalid_spec(); - DCHECK(host.begin >= 0 && - ((spec.length() == 0 && host.begin == 0) || - host.begin < static_cast<int>(spec.length()))); - net::IDNToUnicode(&spec[host.begin], host.len, languages, output); - - if (new_parsed) { - new_parsed->host.len = - static_cast<int>(output->length()) - new_parsed->host.begin; - } - } else if (new_parsed) { - new_parsed->host.reset(); - } -} - -void AppendFormattedComponent(const std::string& spec, - const url_parse::Component& in_component, - std::wstring* output, - url_parse::Component* out_component) { - if (in_component.is_nonempty()) { - out_component->begin = static_cast<int>(output->length()); - - output->append(UnescapeAndDecodeUTF8URLComponent( - spec.substr(in_component.begin, in_component.len), - UnescapeRule::NORMAL)); - - out_component->len = - static_cast<int>(output->length()) - out_component->begin; - } else { - out_component->reset(); - } -} - -std::wstring GetCleanStringFromUrl(const GURL& url, - const std::wstring& languages, - url_parse::Parsed* new_parsed, - size_t* prefix_end) { - url_parse::Parsed parsed_temp; - if (!new_parsed) - new_parsed = &parsed_temp; - - std::wstring url_string; - - // Check for empty URLs or 0 available text width. - if (url.is_empty()) { - if (prefix_end) - *prefix_end = 0; - return url_string; - } - - // We handle both valid and invalid URLs (this will give us the spec - // regardless of validity). - const std::string& spec = url.possibly_invalid_spec(); - const url_parse::Parsed& parsed = url.parsed_for_possibly_invalid_spec(); - - // Construct a new URL with the username and password fields removed. We - // don't want to display those to the user since they can be used for - // attacks, e.g. "http://google.com:search@evil.ru/" - // - // Copy everything before the host name we want (the scheme and the - // separators), minus the username start we computed above. These are ASCII. - int pre_end = parsed.CountCharactersBefore( - url_parse::Parsed::USERNAME, true); - for (int i = 0; i < pre_end; ++i) - url_string.push_back(spec[i]); - if (prefix_end) - *prefix_end = static_cast<size_t>(pre_end); - new_parsed->scheme = parsed.scheme; - new_parsed->username.reset(); - new_parsed->password.reset(); - - AppendFormattedHost(url, languages, &url_string, new_parsed); - - // Port. - if (parsed.port.is_nonempty()) { - url_string.push_back(':'); - for (int i = parsed.port.begin; i < parsed.port.end(); ++i) - url_string.push_back(spec[i]); - } - - // Path and query both get the same general unescape & convert treatment. - AppendFormattedComponent(spec, parsed.path, &url_string, &new_parsed->path); - if (parsed.query.is_valid()) - url_string.push_back('?'); - AppendFormattedComponent(spec, parsed.query, &url_string, &new_parsed->query); - - // Reference is stored in valid, unescaped UTF-8, so we can just convert. - if (parsed.ref.is_valid()) { - url_string.push_back('#'); - if (parsed.ref.len > 0) - url_string.append(UTF8ToWide(std::string(&spec[parsed.ref.begin], - parsed.ref.len))); - } - - return url_string; -} - SortedDisplayURL::SortedDisplayURL(const GURL& url, const std::wstring& languages) { std::wstring host; - AppendFormattedHost(url, languages, &host, NULL); + net::AppendFormattedHost(url, languages, &host, NULL); sort_host_ = WideToUTF16Hack(host); string16 host_minus_www = WideToUTF16Hack(net::StripWWW(host)); url_parse::Parsed parsed; - display_url_ = WideToUTF16Hack(GetCleanStringFromUrl(url, languages, - &parsed, &prefix_end_)); + display_url_ = WideToUTF16Hack(net::FormatUrl(url, languages, + true, true, &parsed, &prefix_end_)); if (sort_host_.length() > host_minus_www.length()) { prefix_end_ += sort_host_.length() - host_minus_www.length(); sort_host_.swap(host_minus_www); diff --git a/app/gfx/text_elider.h b/app/gfx/text_elider.h index c0d0be6..96afdfb 100644 --- a/app/gfx/text_elider.h +++ b/app/gfx/text_elider.h @@ -15,24 +15,9 @@ class FilePath; class GURL; -namespace url_parse { -struct Parsed; -} - // TODO(port): this file should deal in string16s rather than wstrings. namespace gfx { -// A function to get URL string from a GURL that will be suitable for display -// to the user. The parsing of the URL may change because various parts of the -// string will change lengths. The new parsing will be placed in the given out -// parameter. |prefix_end| is set to the end of the prefix (spec and separator -// characters before host). -// |languages|, |new_parsed|, and |prefix_end| may all be empty or NULL. -std::wstring GetCleanStringFromUrl(const GURL& url, - const std::wstring& languages, - url_parse::Parsed* new_parsed, - size_t* prefix_end); - // This function takes a GURL object and elides it. It returns a string // which composed of parts from subdomain, domain, path, filename and query. // A "..." is added automatically at the end if the elided string is bigger diff --git a/chrome/browser/autocomplete/autocomplete.cc b/chrome/browser/autocomplete/autocomplete.cc index 65f71c4..9720e73 100644 --- a/chrome/browser/autocomplete/autocomplete.cc +++ b/chrome/browser/autocomplete/autocomplete.cc @@ -6,7 +6,6 @@ #include <algorithm> -#include "app/gfx/text_elider.h" #include "app/l10n_util.h" #include "base/basictypes.h" #include "base/string_util.h" @@ -492,9 +491,9 @@ void AutocompleteProvider::UpdateStarredStateOfMatches() { std::wstring AutocompleteProvider::StringForURLDisplay( const GURL& url, bool check_accept_lang) const { - return gfx::GetCleanStringFromUrl(url, (check_accept_lang && profile_) ? - profile_->GetPrefs()->GetString(prefs::kAcceptLanguages) : std::wstring(), - NULL, NULL); + std::wstring languages = (check_accept_lang && profile_) ? + profile_->GetPrefs()->GetString(prefs::kAcceptLanguages) : std::wstring(); + return net::FormatUrl(url, languages); } // AutocompleteResult --------------------------------------------------------- diff --git a/chrome/browser/autocomplete/history_url_provider.cc b/chrome/browser/autocomplete/history_url_provider.cc index 0083bfe..faae195 100644 --- a/chrome/browser/autocomplete/history_url_provider.cc +++ b/chrome/browser/autocomplete/history_url_provider.cc @@ -6,7 +6,6 @@ #include <algorithm> -#include "app/gfx/text_elider.h" #include "base/basictypes.h" #include "base/histogram.h" #include "base/message_loop.h" @@ -832,9 +831,8 @@ AutocompleteMatch HistoryURLProvider::HistoryMatchToACMatch( !!info.visit_count(), AutocompleteMatch::HISTORY_URL); match.destination_url = info.url(); DCHECK(match.destination_url.is_valid()); - match.fill_into_edit = gfx::GetCleanStringFromUrl(info.url(), - match_type == WHAT_YOU_TYPED ? std::wstring() : params->languages, - NULL, NULL); + match.fill_into_edit = net::FormatUrl(info.url(), + match_type == WHAT_YOU_TYPED ? std::wstring() : params->languages); if (!params->input.prevent_inline_autocomplete()) { match.inline_autocomplete_offset = history_match.input_location + params->input.text().length(); diff --git a/chrome/browser/bookmarks/bookmark_table_model.cc b/chrome/browser/bookmarks/bookmark_table_model.cc index 458ad0c..85d4032 100644 --- a/chrome/browser/bookmarks/bookmark_table_model.cc +++ b/chrome/browser/bookmarks/bookmark_table_model.cc @@ -11,10 +11,14 @@ #include "base/string_util.h" #include "base/time_format.h" #include "chrome/browser/bookmarks/bookmark_utils.h" +#include "chrome/browser/profile.h" +#include "chrome/common/pref_names.h" +#include "chrome/common/pref_service.h" #include "googleurl/src/gurl.h" #include "grit/app_resources.h" #include "grit/generated_resources.h" #include "grit/theme_resources.h" +#include "net/base/net_util.h" namespace { @@ -308,7 +312,11 @@ std::wstring BookmarkTableModel::GetText(int row, int column_id) { case IDS_BOOKMARK_TABLE_URL: { if (!node->is_url()) return std::wstring(); - std::wstring url_text = UTF8ToWide(node->GetURL().spec()); + std::wstring languages = model_ && model_->profile() + ? model_->profile()->GetPrefs()->GetString(prefs::kAcceptLanguages) + : std::wstring(); + std::wstring url_text = + net::FormatUrl(node->GetURL(), languages, false, true, NULL, NULL); if (l10n_util::GetTextDirection() == l10n_util::RIGHT_TO_LEFT) l10n_util::WrapStringWithLTRFormatting(&url_text); return url_text; diff --git a/chrome/browser/net/url_fixer_upper.cc b/chrome/browser/net/url_fixer_upper.cc index 6572183..f2c29ca 100644 --- a/chrome/browser/net/url_fixer_upper.cc +++ b/chrome/browser/net/url_fixer_upper.cc @@ -6,7 +6,6 @@ #include <algorithm> -#include "app/gfx/text_elider.h" #include "base/file_util.h" #include "base/logging.h" #include "base/string_util.h" @@ -122,8 +121,7 @@ static string FixupPath(const string& text) { // Here, we know the input looks like a file. GURL file_url = net::FilePathToFileURL(FilePath(filename)); if (file_url.is_valid()) { - return WideToUTF8(gfx::GetCleanStringFromUrl(file_url, std::wstring(), - NULL, NULL)); + return WideToUTF8(net::FormatUrl(file_url, std::wstring())); } // Invalid file URL, just return the input. @@ -522,8 +520,7 @@ string URLFixerUpper::FixupRelativeFile(const FilePath& base_dir, if (is_file) { GURL file_url = net::FilePathToFileURL(full_path); if (file_url.is_valid()) - return WideToUTF8(gfx::GetCleanStringFromUrl(file_url, std::wstring(), - NULL, NULL)); + return WideToUTF8(net::FormatUrl(file_url, std::wstring())); // Invalid files fall through to regular processing. } diff --git a/chrome/browser/tab_contents/navigation_entry.cc b/chrome/browser/tab_contents/navigation_entry.cc index 655a998..b2235bc 100644 --- a/chrome/browser/tab_contents/navigation_entry.cc +++ b/chrome/browser/tab_contents/navigation_entry.cc @@ -4,13 +4,13 @@ #include "chrome/browser/tab_contents/navigation_entry.h" -#include "app/gfx/text_elider.h" #include "app/resource_bundle.h" #include "chrome/browser/tab_contents/navigation_controller.h" #include "chrome/common/pref_names.h" #include "chrome/common/pref_service.h" #include "chrome/common/url_constants.h" #include "grit/app_resources.h" +#include "net/base/net_util.h" // Use this to get a new unique ID for a NavigationEntry during construction. // The returned ID is guaranteed to be nonzero (which is the "no ID" indicator). @@ -80,11 +80,10 @@ const string16& NavigationEntry::GetTitleForDisplay( prefs::kAcceptLanguages); } if (!display_url_.is_empty()) { - cached_display_title_ = WideToUTF16Hack(gfx::GetCleanStringFromUrl( - display_url_, languages, NULL, NULL)); + cached_display_title_ = WideToUTF16Hack(net::FormatUrl( + display_url_, languages)); } else if (!url_.is_empty()) { - cached_display_title_ = WideToUTF16Hack(gfx::GetCleanStringFromUrl( - url_, languages, NULL, NULL)); + cached_display_title_ = WideToUTF16Hack(net::FormatUrl(url_, languages)); } return cached_display_title_; } diff --git a/chrome/browser/toolbar_model.cc b/chrome/browser/toolbar_model.cc index b091fc8..e2ee68f 100644 --- a/chrome/browser/toolbar_model.cc +++ b/chrome/browser/toolbar_model.cc @@ -4,7 +4,6 @@ #include "chrome/browser/toolbar_model.h" -#include "app/gfx/text_elider.h" #include "app/l10n_util.h" #include "chrome/browser/cert_store.h" #include "chrome/browser/ssl/ssl_error_info.h" @@ -41,7 +40,7 @@ std::wstring ToolbarModel::GetText() { url = entry->display_url(); } } - return gfx::GetCleanStringFromUrl(url, languages, NULL, NULL); + return net::FormatUrl(url, languages); } ToolbarModel::SecurityLevel ToolbarModel::GetSecurityLevel() { diff --git a/chrome/browser/views/bookmark_editor_view.cc b/chrome/browser/views/bookmark_editor_view.cc index 835ba59..b79ef28 100644 --- a/chrome/browser/views/bookmark_editor_view.cc +++ b/chrome/browser/views/bookmark_editor_view.cc @@ -12,10 +12,13 @@ #include "chrome/browser/history/history.h" #include "chrome/browser/profile.h" #include "chrome/browser/net/url_fixer_upper.h" +#include "chrome/common/pref_names.h" +#include "chrome/common/pref_service.h" #include "googleurl/src/gurl.h" #include "grit/chromium_strings.h" #include "grit/generated_resources.h" #include "grit/locale_settings.h" +#include "net/base/net_util.h" #include "views/background.h" #include "views/focus/focus_manager.h" #include "views/grid_layout.h" @@ -246,7 +249,17 @@ void BookmarkEditorView::Init() { title_tf_.SetText(node_ ? node_->GetTitle() : std::wstring()); title_tf_.SetController(this); - url_tf_.SetText(node_ ? UTF8ToWide(node_->GetURL().spec()) : std::wstring()); + std::wstring url_text; + if (node_) { + std::wstring languages = profile_ + ? profile_->GetPrefs()->GetString(prefs::kAcceptLanguages) + : std::wstring(); + // The following URL is user-editable. We specify omit_username_password= + // false and unescape=false to show the original URL except IDN. + url_text = + net::FormatUrl(node_->GetURL(), languages, false, false, NULL, NULL); + } + url_tf_.SetText(url_text); url_tf_.SetController(this); if (show_tree_) { diff --git a/net/base/net_util.cc b/net/base/net_util.cc index 06b43be..bfc2cea 100644 --- a/net/base/net_util.cc +++ b/net/base/net_util.cc @@ -655,6 +655,15 @@ void IDNToUnicodeOneComponent(const char16* comp, namespace net { +// Appends the substring |in_component| inside of the URL |spec| to |output|, +// and the resulting range will be filled into |out_component|. Calls the +// unescaper for the substring if |unescape| is true. +static void AppendFormattedComponent(const std::string& spec, + const url_parse::Component& in_component, + bool unescape, + std::wstring* output, + url_parse::Component* out_component); + GURL FilePathToFileURL(const FilePath& path) { // Produce a URL like "file:///C:/foo" for a regular file, or // "file://///server/path" for UNC. The URL canonicalizer will fix up the @@ -1045,4 +1054,139 @@ std::string GetHostName() { return std::string(buffer); } +void AppendFormattedHost(const GURL& url, + const std::wstring& languages, + std::wstring* output, + url_parse::Parsed* new_parsed) { + const url_parse::Component& host = + url.parsed_for_possibly_invalid_spec().host; + + if (host.is_nonempty()) { + // Handle possible IDN in the host name. + if (new_parsed) + new_parsed->host.begin = static_cast<int>(output->length()); + + const std::string& spec = url.possibly_invalid_spec(); + DCHECK(host.begin >= 0 && + ((spec.length() == 0 && host.begin == 0) || + host.begin < static_cast<int>(spec.length()))); + net::IDNToUnicode(&spec[host.begin], host.len, languages, output); + + if (new_parsed) { + new_parsed->host.len = + static_cast<int>(output->length()) - new_parsed->host.begin; + } + } else if (new_parsed) { + new_parsed->host.reset(); + } +} + +/* static */ +void AppendFormattedComponent(const std::string& spec, + const url_parse::Component& in_component, + bool unescape, + std::wstring* output, + url_parse::Component* out_component) { + if (in_component.is_nonempty()) { + out_component->begin = static_cast<int>(output->length()); + if (unescape) { + output->append(UnescapeAndDecodeUTF8URLComponent( + spec.substr(in_component.begin, in_component.len), + UnescapeRule::NORMAL)); + } else { + output->append(UTF8ToWide(spec.substr( + in_component.begin, in_component.len))); + } + out_component->len = + static_cast<int>(output->length()) - out_component->begin; + } else { + out_component->reset(); + } +} + +std::wstring FormatUrl( + const GURL& url, const std::wstring& languages, bool omit_username_password, + bool unescape, url_parse::Parsed* new_parsed, size_t* prefix_end) { + url_parse::Parsed parsed_temp; + if (!new_parsed) + new_parsed = &parsed_temp; + + std::wstring url_string; + + // Check for empty URLs or 0 available text width. + if (url.is_empty()) { + if (prefix_end) + *prefix_end = 0; + return url_string; + } + + // We handle both valid and invalid URLs (this will give us the spec + // regardless of validity). + const std::string& spec = url.possibly_invalid_spec(); + const url_parse::Parsed& parsed = url.parsed_for_possibly_invalid_spec(); + + // Copy everything before the username (the scheme and the separators.) + // These are ASCII. + int pre_end = parsed.CountCharactersBefore(url_parse::Parsed::USERNAME, true); + for (int i = 0; i < pre_end; ++i) + url_string.push_back(spec[i]); + new_parsed->scheme = parsed.scheme; + + if (omit_username_password) { + // Remove the username and password fields. We don't want to display those + // to the user since they can be used for attacks, + // e.g. "http://google.com:search@evil.ru/" + new_parsed->username.reset(); + new_parsed->password.reset(); + } else { + AppendFormattedComponent( + spec, parsed.username, unescape, &url_string, &new_parsed->username); + if (parsed.password.is_valid()) { + url_string.push_back(':'); + } + AppendFormattedComponent( + spec, parsed.password, unescape, &url_string, &new_parsed->password); + if (parsed.username.is_valid() || parsed.password.is_valid()) { + url_string.push_back('@'); + } + } + if (prefix_end) + *prefix_end = static_cast<size_t>(url_string.length()); + + AppendFormattedHost(url, languages, &url_string, new_parsed); + + // Port. + if (parsed.port.is_nonempty()) { + url_string.push_back(':'); + int begin = url_string.length(); + for (int i = parsed.port.begin; i < parsed.port.end(); ++i) + url_string.push_back(spec[i]); + new_parsed->port.begin = begin; + new_parsed->port.len = url_string.length() - begin; + } else { + new_parsed->port.reset(); + } + + // Path and query both get the same general unescape & convert treatment. + AppendFormattedComponent( + spec, parsed.path, unescape, &url_string, &new_parsed->path); + if (parsed.query.is_valid()) + url_string.push_back('?'); + AppendFormattedComponent( + spec, parsed.query, unescape, &url_string, &new_parsed->query); + + // Reference is stored in valid, unescaped UTF-8, so we can just convert. + if (parsed.ref.is_valid()) { + url_string.push_back('#'); + int begin = url_string.length(); + if (parsed.ref.len > 0) + url_string.append(UTF8ToWide(std::string(&spec[parsed.ref.begin], + parsed.ref.len))); + new_parsed->ref.begin = begin; + new_parsed->ref.len = url_string.length() - begin; + } + + return url_string; +} + } // namespace net diff --git a/net/base/net_util.h b/net/base/net_util.h index e64cb88..2ab6101 100644 --- a/net/base/net_util.h +++ b/net/base/net_util.h @@ -23,6 +23,10 @@ namespace base { class Time; } +namespace url_parse { +struct Parsed; +} + namespace net { // Given the full path to a file name, creates a file: URL. The returned URL @@ -170,6 +174,35 @@ bool IsPortAllowedByFtp(int port); // Set socket to non-blocking mode int SetNonBlocking(int fd); +// Appends the given part of the original URL to the output string formatted for +// the user. The given parsed structure will be updated. The host name formatter +// also takes the same accept languages component as ElideURL. |new_parsed| may +// be null. +void AppendFormattedHost(const GURL& url, const std::wstring& languages, + std::wstring* output, url_parse::Parsed* new_parsed); + +// Creates a string representation of |url|. The IDN host name may +// be in Unicode if |languages| accepts the Unicode representation. +// If |omit_username_password| is true, the username and the password are +// omitted. If |unescape| is true and the path part and the query part seem to +// be encoded in %-encoded UTF-8, decodes %-encoding and UTF-8. +// |new_parsed| will have parsing parameters of the resultant URL. |prefix_end| +// will be the length before the hostname of the resultant URL. |new_parsed| +// and |prefix_end| may be NULL. +std::wstring FormatUrl(const GURL& url, + const std::wstring& languages, + bool omit_username_password, + bool unescape, + url_parse::Parsed* new_parsed, + size_t* prefix_end); + +// Creates a string representation of |url| for display to the user. +// This is a shorthand of the above function with omit_username_password=true, +// unescape=true, new_parsed=NULL, and prefix_end=NULL. +inline std::wstring FormatUrl(const GURL& url, const std::wstring& languages) { + return FormatUrl(url, languages, true, true, NULL, NULL); +} + } // namespace net #endif // NET_BASE_NET_UTIL_H__ diff --git a/net/base/net_util_unittest.cc b/net/base/net_util_unittest.cc index 79c1138..5f9ed4e 100644 --- a/net/base/net_util_unittest.cc +++ b/net/base/net_util_unittest.cc @@ -337,6 +337,16 @@ struct SuggestedFilenameCase { const wchar_t* expected_filename; }; +struct UrlTestData { + const char* description; + const char* input; + const std::wstring languages; + bool omit; + bool unescape; + const std::wstring output; + size_t prefix_len; +}; + // Returns an addrinfo for the given 32-bit address (IPv4.) // The result lives in static storage, so don't delete it. const struct addrinfo* GetIPv4Address(const uint8 bytes[4]) { @@ -1008,3 +1018,157 @@ TEST(NetUtilTest, GetHostName) { std::string hostname = net::GetHostName(); EXPECT_FALSE(hostname.empty()); } + +TEST(NetUtilTest, FormatUrl) { + const UrlTestData tests[] = { + {"Empty URL", "", L"", true, true, L"", 0}, + + {"Simple URL", + "http://www.google.com/", L"", true, true, + L"http://www.google.com/", 7}, + + {"With a port number and a reference", + "http://www.google.com:8080/#\xE3\x82\xB0", L"", true, true, + L"http://www.google.com:8080/#\x30B0", 7}, + + // -------- IDN tests -------- + {"Japanese IDN with ja", + "http://xn--l8jvb1ey91xtjb.jp", L"ja", true, true, + L"http://\x671d\x65e5\x3042\x3055\x3072.jp/", 7}, + + {"Japanese IDN with en", + "http://xn--l8jvb1ey91xtjb.jp", L"en", true, true, + L"http://xn--l8jvb1ey91xtjb.jp/", 7}, + + {"Japanese IDN without any languages", + "http://xn--l8jvb1ey91xtjb.jp", L"", true, true, + // Single script is safe for empty languages. + L"http://\x671d\x65e5\x3042\x3055\x3072.jp/", 7}, + + {"mailto: with Japanese IDN", + "mailto:foo@xn--l8jvb1ey91xtjb.jp", L"ja", true, true, + // GURL doesn't assume an email address's domain part as a host name. + L"mailto:foo@xn--l8jvb1ey91xtjb.jp", 7}, + + {"file: with Japanese IDN", + "file://xn--l8jvb1ey91xtjb.jp/config.sys", L"ja", true, true, + L"file://\x671d\x65e5\x3042\x3055\x3072.jp/config.sys", 7}, + + {"ftp: with Japanese IDN", + "ftp://xn--l8jvb1ey91xtjb.jp/config.sys", L"ja", true, true, + L"ftp://\x671d\x65e5\x3042\x3055\x3072.jp/config.sys", 6}, + + // -------- omit_username_password flag tests -------- + {"With username and password, omit_username_password=false", + "http://user:passwd@example.com/foo", L"", false, true, + L"http://user:passwd@example.com/foo", 19}, + + {"With username and password, omit_username_password=true", + "http://user:passwd@example.com/foo", L"", true, true, + L"http://example.com/foo", 7}, + + {"With username and no password", + "http://user@example.com/foo", L"", true, true, + L"http://example.com/foo", 7}, + + {"Just '@' without username and password", + "http://@example.com/foo", L"", true, true, + L"http://example.com/foo", 7}, + + // GURL doesn't think local-part of an email address is username for URL. + {"mailto:, omit_username_password=true", + "mailto:foo@example.com", L"", true, true, + L"mailto:foo@example.com", 7}, + + // -------- unescape flag tests -------- + {"unescape=false", + "http://%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB.jp/" + "%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB" + "?q=%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB", L"en", true, false, + // GURL parses %-encoded hostnames into Punycode. + L"http://xn--qcka1pmc.jp/%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB" + L"?q=%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB", 7}, + + {"unescape=true", + "http://%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB.jp/" + "%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB" + "?q=%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB", L"en", true, true, + L"http://xn--qcka1pmc.jp/\x30B0\x30FC\x30B0\x30EB" + L"?q=\x30B0\x30FC\x30B0\x30EB", 7}, + + /* + {"unescape=true with some special characters", + "http://user%3A:%40passwd@example.com/foo%3Fbar?q=b%26z", L"", false, true, + L"http://user%3A:%40passwd@example.com/foo%3Fbar?q=b%26z", 25}, + */ + // Disabled: the resultant URL becomes "...user%253A:%2540passwd...". + }; + + for (size_t i = 0; i < arraysize(tests); ++i) { + size_t prefix_len; + std::wstring formatted = net::FormatUrl( + GURL(tests[i].input), tests[i].languages, tests[i].omit, + tests[i].unescape, NULL, &prefix_len); + EXPECT_EQ(tests[i].output, formatted) << tests[i].description; + EXPECT_EQ(tests[i].prefix_len, prefix_len) << tests[i].description; + } +} + +TEST(NetUtilTest, FormatUrlParsed) { + // No unescape case. + url_parse::Parsed parsed; + std::wstring formatted = net::FormatUrl( + GURL("http://\xE3\x82\xB0:\xE3\x83\xBC@xn--qcka1pmc.jp:8080/" + "%E3%82%B0/?q=%E3%82%B0#\xE3\x82\xB0"), + L"ja", false, false, &parsed, NULL); + EXPECT_EQ(L"http://%E3%82%B0:%E3%83%BC@\x30B0\x30FC\x30B0\x30EB.jp:8080" + L"/%E3%82%B0/?q=%E3%82%B0#\x30B0", formatted); + EXPECT_EQ(L"%E3%82%B0", + formatted.substr(parsed.username.begin, parsed.username.len)); + EXPECT_EQ(L"%E3%83%BC", + formatted.substr(parsed.password.begin, parsed.password.len)); + EXPECT_EQ(L"\x30B0\x30FC\x30B0\x30EB.jp", + formatted.substr(parsed.host.begin, parsed.host.len)); + EXPECT_EQ(L"8080", formatted.substr(parsed.port.begin, parsed.port.len)); + EXPECT_EQ(L"/%E3%82%B0/", + formatted.substr(parsed.path.begin, parsed.path.len)); + EXPECT_EQ(L"q=%E3%82%B0", + formatted.substr(parsed.query.begin, parsed.query.len)); + EXPECT_EQ(L"\x30B0", formatted.substr(parsed.ref.begin, parsed.ref.len)); + + // Unescape case. + formatted = net::FormatUrl( + GURL("http://\xE3\x82\xB0:\xE3\x83\xBC@xn--qcka1pmc.jp:8080/" + "%E3%82%B0/?q=%E3%82%B0#\xE3\x82\xB0"), + L"ja", false, true, &parsed, NULL); + EXPECT_EQ(L"http://\x30B0:\x30FC@\x30B0\x30FC\x30B0\x30EB.jp:8080" + L"/\x30B0/?q=\x30B0#\x30B0", formatted); + EXPECT_EQ(L"\x30B0", + formatted.substr(parsed.username.begin, parsed.username.len)); + EXPECT_EQ(L"\x30FC", + formatted.substr(parsed.password.begin, parsed.password.len)); + EXPECT_EQ(L"\x30B0\x30FC\x30B0\x30EB.jp", + formatted.substr(parsed.host.begin, parsed.host.len)); + EXPECT_EQ(L"8080", formatted.substr(parsed.port.begin, parsed.port.len)); + EXPECT_EQ(L"/\x30B0/", formatted.substr(parsed.path.begin, parsed.path.len)); + EXPECT_EQ(L"q=\x30B0", + formatted.substr(parsed.query.begin, parsed.query.len)); + EXPECT_EQ(L"\x30B0", formatted.substr(parsed.ref.begin, parsed.ref.len)); + + // Omit_username_password + unescape case. + formatted = net::FormatUrl( + GURL("http://\xE3\x82\xB0:\xE3\x83\xBC@xn--qcka1pmc.jp:8080/" + "%E3%82%B0/?q=%E3%82%B0#\xE3\x82\xB0"), + L"ja", true, true, &parsed, NULL); + EXPECT_EQ(L"http://\x30B0\x30FC\x30B0\x30EB.jp:8080" + L"/\x30B0/?q=\x30B0#\x30B0", formatted); + EXPECT_FALSE(parsed.username.is_valid()); + EXPECT_FALSE(parsed.password.is_valid()); + EXPECT_EQ(L"\x30B0\x30FC\x30B0\x30EB.jp", + formatted.substr(parsed.host.begin, parsed.host.len)); + EXPECT_EQ(L"8080", formatted.substr(parsed.port.begin, parsed.port.len)); + EXPECT_EQ(L"/\x30B0/", formatted.substr(parsed.path.begin, parsed.path.len)); + EXPECT_EQ(L"q=\x30B0", + formatted.substr(parsed.query.begin, parsed.query.len)); + EXPECT_EQ(L"\x30B0", formatted.substr(parsed.ref.begin, parsed.ref.len)); +} |