summaryrefslogtreecommitdiffstats
path: root/net/base/net_util.cc
diff options
context:
space:
mode:
authorpkasting@chromium.org <pkasting@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2013-09-11 00:42:28 +0000
committerpkasting@chromium.org <pkasting@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2013-09-11 00:42:28 +0000
commitcf7ca8aba2960aa5ba1b7accda08ba045a60c98d (patch)
tree51e5f8dfd75a5efa9eb46c03fa2b59f073c2a43a /net/base/net_util.cc
parentb8982bf7ec898420ced6999746dbc20a06fa0aff (diff)
downloadchromium_src-cf7ca8aba2960aa5ba1b7accda08ba045a60c98d.zip
chromium_src-cf7ca8aba2960aa5ba1b7accda08ba045a60c98d.tar.gz
chromium_src-cf7ca8aba2960aa5ba1b7accda08ba045a60c98d.tar.bz2
Switch the offset conversion routines from an "offsets point at characters"
worldview to an "offsets point between characters" worldview. This more closely aligns with how the omnibox autocomplete code (which is what this was originally written for) expects things to behave. Direct fallout from this change: * An input offset of 0 will always map to an output offset of 0. * An input offset of (length of string) will always map to the length of the output string, instead of npos. * It's possible for multiple unique input offsets to map to a single non-npos output offset, if they e.g. point to the start and end of a collapsed sequence. * Input offsets pointing into the middle of a completely-removed sequence may not be set to npos if they fall on the boundaries of a subsequence processed by the transformer. For example, when running FormatUrlWithOffsets() on "http://user:pass@domain.com/" and directing it to omit both the scheme and username/password, an input offset of "7" that points in between the scheme and the username/password will be transformed to an output offset of 0 instead of npos. Indirect fallout: * A caller like SearchProvider::NavigationToMatch() will now mark certain matches as "allowed to be default" that it didn't before. Specifically, if the user's input string ends at the same point as the desired |fill_into_edit|, the autocomplete offset will be calculated as (length of string) instead of npos, and thus the match will be thought of as "inlinable" and thus "allowed to be default". BUG=284781 TEST=none R=msw@chromium.org, willchan@chromium.org Review URL: https://codereview.chromium.org/23619016 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@222426 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'net/base/net_util.cc')
-rw-r--r--net/base/net_util.cc243
1 files changed, 114 insertions, 129 deletions
diff --git a/net/base/net_util.cc b/net/base/net_util.cc
index 153942e..dd0826c 100644
--- a/net/base/net_util.cc
+++ b/net/base/net_util.cc
@@ -83,6 +83,8 @@ namespace net {
namespace {
+typedef std::vector<size_t> Offsets;
+
// what we prepend to get a file URL
static const base::FilePath::CharType kFileURLPrefix[] =
FILE_PATH_LITERAL("file:///");
@@ -445,8 +447,7 @@ bool IDNToUnicodeOneComponent(const base::char16* comp,
}
// Clamps the offsets in |offsets_for_adjustment| to the length of |str|.
-void LimitOffsets(const base::string16& str,
- std::vector<size_t>* offsets_for_adjustment) {
+void LimitOffsets(const base::string16& str, Offsets* offsets_for_adjustment) {
if (offsets_for_adjustment) {
std::for_each(offsets_for_adjustment->begin(),
offsets_for_adjustment->end(),
@@ -461,10 +462,9 @@ void LimitOffsets(const base::string16& str,
//
// We may want to skip this step in the case of file URLs to allow unicode
// UNC hostnames regardless of encodings.
-base::string16 IDNToUnicodeWithOffsets(
- const std::string& host,
- const std::string& languages,
- std::vector<size_t>* offsets_for_adjustment) {
+base::string16 IDNToUnicodeWithOffsets(const std::string& host,
+ const std::string& languages,
+ Offsets* offsets_for_adjustment) {
// Convert the ASCII input to a base::string16 for ICU.
base::string16 input16;
input16.reserve(host.length());
@@ -508,52 +508,48 @@ base::string16 IDNToUnicodeWithOffsets(
return out16;
}
-// Transforms |original_offsets| by subtracting |component_begin| from all
-// offsets. Any offset which was not at least this large to begin with is set
-// to std::string::npos.
-std::vector<size_t> OffsetsIntoComponent(
- const std::vector<size_t>& original_offsets,
- size_t component_begin) {
- DCHECK_NE(std::string::npos, component_begin);
- std::vector<size_t> offsets_into_component(original_offsets);
- for (std::vector<size_t>::iterator i(offsets_into_component.begin());
- i != offsets_into_component.end(); ++i) {
- if (*i != std::string::npos)
- *i = (*i < component_begin) ? std::string::npos : (*i - component_begin);
- }
- return offsets_into_component;
-}
-
-// Called after we transform a component and append it to an output string.
-// Maps |transformed_offsets|, which represent offsets into the transformed
-// component itself, into appropriate offsets for the output string, by adding
-// |output_component_begin| to each. Determines which offsets need mapping by
-// checking to see which of the |original_offsets| were within the designated
-// original component, using its provided endpoints.
-void AdjustForComponentTransform(
- const std::vector<size_t>& original_offsets,
- size_t original_component_begin,
- size_t original_component_end,
- const std::vector<size_t>& transformed_offsets,
- size_t output_component_begin,
- std::vector<size_t>* offsets_for_adjustment) {
+// Called after transforming a component to set all affected elements in
+// |offsets_for_adjustment| to the correct new values. |original_offsets|
+// represents the offsets before the transform; |original_component_begin| and
+// |original_component_end| represent the pre-transform boundaries of the
+// affected component. |transformed_offsets| should be a vector created by
+// adjusting |original_offsets| to be relative to the beginning of the component
+// in question (via an OffsetAdjuster) and then transformed along with the
+// component. Note that any elements in this vector which didn't originally
+// point into the component may contain arbitrary values and should be ignored.
+// |transformed_component_begin| and |transformed_component_end| are the
+// endpoints of the transformed component and are used in combination with the
+// two offset vectors to calculate the resulting absolute offsets, which are
+// stored in |offsets_for_adjustment|.
+void AdjustForComponentTransform(const Offsets& original_offsets,
+ size_t original_component_begin,
+ size_t original_component_end,
+ const Offsets& transformed_offsets,
+ size_t transformed_component_begin,
+ size_t transformed_component_end,
+ Offsets* offsets_for_adjustment) {
if (!offsets_for_adjustment)
- return;
+ return; // Nothing to do.
- DCHECK_NE(std::string::npos, original_component_begin);
- DCHECK_NE(std::string::npos, original_component_end);
- DCHECK_NE(base::string16::npos, output_component_begin);
- size_t offsets_size = offsets_for_adjustment->size();
- DCHECK_EQ(offsets_size, original_offsets.size());
- DCHECK_EQ(offsets_size, transformed_offsets.size());
- for (size_t i = 0; i < offsets_size; ++i) {
+ for (size_t i = 0; i < original_offsets.size(); ++i) {
size_t original_offset = original_offsets[i];
if ((original_offset >= original_component_begin) &&
(original_offset < original_component_end)) {
+ // This offset originally pointed into the transformed component.
+ // Adjust the transformed relative offset by the new beginning point of
+ // the transformed component.
size_t transformed_offset = transformed_offsets[i];
(*offsets_for_adjustment)[i] =
(transformed_offset == base::string16::npos) ?
- base::string16::npos : (output_component_begin + transformed_offset);
+ base::string16::npos :
+ (transformed_offset + transformed_component_begin);
+ } else if ((original_offset >= original_component_end) &&
+ (original_offset != std::string::npos)) {
+ // This offset pointed after the transformed component. Adjust the
+ // original absolute offset by the difference between the new and old
+ // component lengths.
+ (*offsets_for_adjustment)[i] =
+ original_offset - original_component_end + transformed_component_end;
}
}
}
@@ -568,7 +564,7 @@ void AdjustComponent(int delta, url_parse::Component* component) {
}
// Adjusts all the components of |parsed| by |delta|, except for the scheme.
-void AdjustComponents(int delta, url_parse::Parsed* parsed) {
+void AdjustAllComponentsButScheme(int delta, url_parse::Parsed* parsed) {
AdjustComponent(delta, &(parsed->username));
AdjustComponent(delta, &(parsed->password));
AdjustComponent(delta, &(parsed->host));
@@ -579,27 +575,36 @@ void AdjustComponents(int delta, url_parse::Parsed* parsed) {
}
// Helper for FormatUrlWithOffsets().
-base::string16 FormatViewSourceUrl(
- const GURL& url,
- const std::vector<size_t>& original_offsets,
- const std::string& languages,
- FormatUrlTypes format_types,
- UnescapeRule::Type unescape_rules,
- url_parse::Parsed* new_parsed,
- size_t* prefix_end,
- std::vector<size_t>* offsets_for_adjustment) {
+base::string16 FormatViewSourceUrl(const GURL& url,
+ const Offsets& original_offsets,
+ const std::string& languages,
+ FormatUrlTypes format_types,
+ UnescapeRule::Type unescape_rules,
+ url_parse::Parsed* new_parsed,
+ size_t* prefix_end,
+ Offsets* offsets_for_adjustment) {
DCHECK(new_parsed);
const char kViewSource[] = "view-source:";
const size_t kViewSourceLength = arraysize(kViewSource) - 1;
- std::vector<size_t> offsets_into_url(
- OffsetsIntoComponent(original_offsets, kViewSourceLength));
- GURL real_url(url.possibly_invalid_spec().substr(kViewSourceLength));
+ // Format the underlying URL and adjust offsets.
+ const std::string& url_str(url.possibly_invalid_spec());
+ Offsets offsets_into_underlying_url(original_offsets);
+ {
+ base::OffsetAdjuster adjuster(&offsets_into_underlying_url);
+ adjuster.Add(base::OffsetAdjuster::Adjustment(0, kViewSourceLength, 0));
+ }
base::string16 result(ASCIIToUTF16(kViewSource) +
- FormatUrlWithOffsets(real_url, languages, format_types, unescape_rules,
- new_parsed, prefix_end, &offsets_into_url));
+ FormatUrlWithOffsets(GURL(url_str.substr(kViewSourceLength)), languages,
+ format_types, unescape_rules, new_parsed, prefix_end,
+ &offsets_into_underlying_url));
+ AdjustForComponentTransform(original_offsets, kViewSourceLength,
+ url_str.length(), offsets_into_underlying_url,
+ kViewSourceLength, result.length(),
+ offsets_for_adjustment);
+ LimitOffsets(result, offsets_for_adjustment);
- // Adjust position values.
+ // Adjust positions of the parsed components.
if (new_parsed->scheme.is_nonempty()) {
// Assume "view-source:real-scheme" as a scheme.
new_parsed->scheme.len += kViewSourceLength;
@@ -607,13 +612,11 @@ base::string16 FormatViewSourceUrl(
new_parsed->scheme.begin = 0;
new_parsed->scheme.len = kViewSourceLength - 1;
}
- AdjustComponents(kViewSourceLength, new_parsed);
+ AdjustAllComponentsButScheme(kViewSourceLength, new_parsed);
+
if (prefix_end)
*prefix_end += kViewSourceLength;
- AdjustForComponentTransform(original_offsets, kViewSourceLength,
- url.possibly_invalid_spec().length(), offsets_into_url, kViewSourceLength,
- offsets_for_adjustment);
- LimitOffsets(result, offsets_for_adjustment);
+
return result;
}
@@ -622,9 +625,8 @@ class AppendComponentTransform {
AppendComponentTransform() {}
virtual ~AppendComponentTransform() {}
- virtual base::string16 Execute(
- const std::string& component_text,
- std::vector<size_t>* offsets_into_component) const = 0;
+ virtual base::string16 Execute(const std::string& component_text,
+ Offsets* offsets_into_component) const = 0;
// NOTE: No DISALLOW_COPY_AND_ASSIGN here, since gcc < 4.3.0 requires an
// accessible copy constructor in order to call AppendFormattedComponent()
@@ -640,7 +642,7 @@ class HostComponentTransform : public AppendComponentTransform {
private:
virtual base::string16 Execute(
const std::string& component_text,
- std::vector<size_t>* offsets_into_component) const OVERRIDE {
+ Offsets* offsets_into_component) const OVERRIDE {
return IDNToUnicodeWithOffsets(component_text, languages_,
offsets_into_component);
}
@@ -657,7 +659,7 @@ class NonHostComponentTransform : public AppendComponentTransform {
private:
virtual base::string16 Execute(
const std::string& component_text,
- std::vector<size_t>* offsets_into_component) const OVERRIDE {
+ Offsets* offsets_into_component) const OVERRIDE {
return (unescape_rules_ == UnescapeRule::NONE) ?
base::UTF8ToUTF16AndAdjustOffsets(component_text,
offsets_into_component) :
@@ -668,34 +670,46 @@ class NonHostComponentTransform : public AppendComponentTransform {
const UnescapeRule::Type unescape_rules_;
};
+// Transforms the portion of |spec| covered by |original_component| according to
+// |transform|. Appends the result to |output|. If |output_component| is
+// non-NULL, its start and length are set to the transformed component's new
+// start and length. For each element in |original_offsets| which is at least
+// as large as original_component.begin, the corresponding element of
+// |offsets_for_adjustment| is transformed appropriately.
void AppendFormattedComponent(const std::string& spec,
const url_parse::Component& original_component,
- const std::vector<size_t>& original_offsets,
+ const Offsets& original_offsets,
const AppendComponentTransform& transform,
base::string16* output,
url_parse::Component* output_component,
- std::vector<size_t>* offsets_for_adjustment) {
+ Offsets* offsets_for_adjustment) {
DCHECK(output);
if (original_component.is_nonempty()) {
size_t original_component_begin =
static_cast<size_t>(original_component.begin);
size_t output_component_begin = output->length();
- if (output_component)
- output_component->begin = static_cast<int>(output_component_begin);
-
- std::vector<size_t> offsets_into_component =
- OffsetsIntoComponent(original_offsets, original_component_begin);
- output->append(transform.Execute(std::string(spec, original_component_begin,
- static_cast<size_t>(original_component.len)), &offsets_into_component));
+ std::string component_str(spec, original_component_begin,
+ static_cast<size_t>(original_component.len));
+
+ // Transform |component_str| and adjust the offsets accordingly.
+ Offsets offsets_into_component(original_offsets);
+ {
+ base::OffsetAdjuster adjuster(&offsets_into_component);
+ adjuster.Add(base::OffsetAdjuster::Adjustment(0, original_component_begin,
+ 0));
+ }
+ output->append(transform.Execute(component_str, &offsets_into_component));
+ AdjustForComponentTransform(original_offsets, original_component_begin,
+ static_cast<size_t>(original_component.end()),
+ offsets_into_component, output_component_begin,
+ output->length(), offsets_for_adjustment);
+ // Set positions of the parsed component.
if (output_component) {
+ output_component->begin = static_cast<int>(output_component_begin);
output_component->len =
static_cast<int>(output->length() - output_component_begin);
}
- AdjustForComponentTransform(original_offsets, original_component_begin,
- static_cast<size_t>(original_component.end()),
- offsets_into_component, output_component_begin,
- offsets_for_adjustment);
} else if (output_component) {
output_component->reset();
}
@@ -1638,7 +1652,7 @@ std::string GetHostOrSpecFromURL(const GURL& url) {
void AppendFormattedHost(const GURL& url,
const std::string& languages,
base::string16* output) {
- std::vector<size_t> offsets;
+ Offsets offsets;
AppendFormattedComponent(url.possibly_invalid_spec(),
url.parsed_for_possibly_invalid_spec().host, offsets,
HostComponentTransform(languages), output, NULL, NULL);
@@ -1651,13 +1665,13 @@ base::string16 FormatUrlWithOffsets(
UnescapeRule::Type unescape_rules,
url_parse::Parsed* new_parsed,
size_t* prefix_end,
- std::vector<size_t>* offsets_for_adjustment) {
+ Offsets* offsets_for_adjustment) {
url_parse::Parsed parsed_temp;
if (!new_parsed)
new_parsed = &parsed_temp;
else
*new_parsed = url_parse::Parsed();
- std::vector<size_t> original_offsets;
+ Offsets original_offsets;
if (offsets_for_adjustment)
original_offsets = *offsets_for_adjustment;
@@ -1669,7 +1683,8 @@ base::string16 FormatUrlWithOffsets(
if (url.SchemeIs(kViewSource) &&
!StartsWithASCII(url.possibly_invalid_spec(), kViewSourceTwice, false)) {
return FormatViewSourceUrl(url, original_offsets, languages, format_types,
- unescape_rules, new_parsed, prefix_end, offsets_for_adjustment);
+ unescape_rules, new_parsed, prefix_end,
+ offsets_for_adjustment);
}
// We handle both valid and invalid URLs (this will give us the spec
@@ -1727,32 +1742,13 @@ base::string16 FormatUrlWithOffsets(
AppendFormattedComponent(spec, parsed.username, original_offsets,
NonHostComponentTransform(unescape_rules), &url_string,
&new_parsed->username, offsets_for_adjustment);
- if (parsed.password.is_valid()) {
- size_t colon = parsed.username.end();
- DCHECK_EQ(static_cast<size_t>(parsed.password.begin - 1), colon);
- std::vector<size_t>::const_iterator colon_iter =
- std::find(original_offsets.begin(), original_offsets.end(), colon);
- if (colon_iter != original_offsets.end()) {
- (*offsets_for_adjustment)[colon_iter - original_offsets.begin()] =
- url_string.length();
- }
+ if (parsed.password.is_valid())
url_string.push_back(':');
- }
AppendFormattedComponent(spec, parsed.password, original_offsets,
NonHostComponentTransform(unescape_rules), &url_string,
&new_parsed->password, offsets_for_adjustment);
- if (parsed.username.is_valid() || parsed.password.is_valid()) {
- size_t at_sign = (parsed.password.is_valid() ?
- parsed.password : parsed.username).end();
- DCHECK_EQ(static_cast<size_t>(parsed.host.begin - 1), at_sign);
- std::vector<size_t>::const_iterator at_sign_iter =
- std::find(original_offsets.begin(), original_offsets.end(), at_sign);
- if (at_sign_iter != original_offsets.end()) {
- (*offsets_for_adjustment)[at_sign_iter - original_offsets.begin()] =
- url_string.length();
- }
+ if (parsed.username.is_valid() || parsed.password.is_valid())
url_string.push_back('@');
- }
}
if (prefix_end)
*prefix_end = static_cast<size_t>(url_string.length());
@@ -1780,6 +1776,10 @@ base::string16 FormatUrlWithOffsets(
AppendFormattedComponent(spec, parsed.path, original_offsets,
NonHostComponentTransform(unescape_rules), &url_string,
&new_parsed->path, offsets_for_adjustment);
+ } else {
+ base::OffsetAdjuster offset_adjuster(offsets_for_adjustment);
+ offset_adjuster.Add(base::OffsetAdjuster::Adjustment(
+ url_string.length(), parsed.path.len, 0));
}
if (parsed.query.is_valid())
url_string.push_back('?');
@@ -1788,26 +1788,11 @@ base::string16 FormatUrlWithOffsets(
&new_parsed->query, offsets_for_adjustment);
// Ref. This is valid, unescaped UTF-8, so we can just convert.
- if (parsed.ref.is_valid()) {
+ if (parsed.ref.is_valid())
url_string.push_back('#');
- size_t original_ref_begin = static_cast<size_t>(parsed.ref.begin);
- size_t output_ref_begin = url_string.length();
- new_parsed->ref.begin = static_cast<int>(output_ref_begin);
-
- std::vector<size_t> offsets_into_ref(
- OffsetsIntoComponent(original_offsets, original_ref_begin));
- if (parsed.ref.len > 0) {
- url_string.append(base::UTF8ToUTF16AndAdjustOffsets(
- spec.substr(original_ref_begin, static_cast<size_t>(parsed.ref.len)),
- &offsets_into_ref));
- }
-
- new_parsed->ref.len =
- static_cast<int>(url_string.length() - new_parsed->ref.begin);
- AdjustForComponentTransform(original_offsets, original_ref_begin,
- static_cast<size_t>(parsed.ref.end()), offsets_into_ref,
- output_ref_begin, offsets_for_adjustment);
- }
+ AppendFormattedComponent(spec, parsed.ref, original_offsets,
+ NonHostComponentTransform(UnescapeRule::NONE), &url_string,
+ &new_parsed->ref, offsets_for_adjustment);
// If we need to strip out http do it after the fact. This way we don't need
// to worry about how offset_for_adjustment is interpreted.
@@ -1825,7 +1810,7 @@ base::string16 FormatUrlWithOffsets(
DCHECK(new_parsed->scheme.is_valid());
int delta = -(new_parsed->scheme.len + 3); // +3 for ://.
new_parsed->scheme.reset();
- AdjustComponents(delta, new_parsed);
+ AdjustAllComponentsButScheme(delta, new_parsed);
}
LimitOffsets(url_string, offsets_for_adjustment);
@@ -1839,7 +1824,7 @@ base::string16 FormatUrl(const GURL& url,
url_parse::Parsed* new_parsed,
size_t* prefix_end,
size_t* offset_for_adjustment) {
- std::vector<size_t> offsets;
+ Offsets offsets;
if (offset_for_adjustment)
offsets.push_back(*offset_for_adjustment);
base::string16 result = FormatUrlWithOffsets(url, languages, format_types,