summaryrefslogtreecommitdiffstats
path: root/net/base/escape.cc
diff options
context:
space:
mode:
authormrossetti@chromium.org <mrossetti@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2011-04-13 00:45:39 +0000
committermrossetti@chromium.org <mrossetti@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2011-04-13 00:45:39 +0000
commita47f8eadd67f75d3b663fdcc898caabb335bad0b (patch)
treefdf872770d4cd58ee753f219475850490a008f6d /net/base/escape.cc
parent2e0e8253a232fa499d22e47753c5bbadaebd69e7 (diff)
downloadchromium_src-a47f8eadd67f75d3b663fdcc898caabb335bad0b.zip
chromium_src-a47f8eadd67f75d3b663fdcc898caabb335bad0b.tar.gz
chromium_src-a47f8eadd67f75d3b663fdcc898caabb335bad0b.tar.bz2
Add multiple-offset versions of the various URL reformatting functions. Fixed a couple of erroneous unit tests of offsets into username/password.
Note: This does not complete the work required for 78153 -- tis but the first 2/3rds. BUG=78153 TEST=Many unit tests updated and added. Review URL: http://codereview.chromium.org/6822038 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@81343 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'net/base/escape.cc')
-rw-r--r--net/base/escape.cc117
1 files changed, 85 insertions, 32 deletions
diff --git a/net/base/escape.cc b/net/base/escape.cc
index 64bd107..61c3e81 100644
--- a/net/base/escape.cc
+++ b/net/base/escape.cc
@@ -2,11 +2,12 @@
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
-#include <algorithm>
-
#include "net/base/escape.h"
+#include <algorithm>
+
#include "base/logging.h"
+#include "base/scoped_ptr.h"
#include "base/string_piece.h"
#include "base/string_util.h"
#include "base/utf_string_conversions.h"
@@ -98,15 +99,14 @@ const char kUrlUnescape[128] = {
};
template<typename STR>
-STR UnescapeURLImpl(const STR& escaped_text,
- UnescapeRule::Type rules,
- size_t* offset_for_adjustment) {
- size_t offset_temp = string16::npos;
- if (!offset_for_adjustment)
- offset_for_adjustment = &offset_temp;
- else if (*offset_for_adjustment >= escaped_text.length())
- *offset_for_adjustment = string16::npos;
-
+STR UnescapeURLWithOffsetsImpl(const STR& escaped_text,
+ UnescapeRule::Type rules,
+ std::vector<size_t>* offsets_for_adjustment) {
+ if (offsets_for_adjustment) {
+ std::for_each(offsets_for_adjustment->begin(),
+ offsets_for_adjustment->end(),
+ LimitOffset<std::wstring>(escaped_text.length()));
+ }
// Do not unescape anything, return the |escaped_text| text.
if (rules == UnescapeRule::NONE)
return escaped_text;
@@ -117,6 +117,7 @@ STR UnescapeURLImpl(const STR& escaped_text,
STR result;
result.reserve(escaped_text.length());
+ AdjustEncodingOffset::Adjustments adjustments; // Locations of adjusted text.
for (size_t i = 0, max = escaped_text.size(); i < max; ++i) {
if (static_cast<unsigned char>(escaped_text[i]) >= 128) {
// Non ASCII character, append as is.
@@ -144,17 +145,9 @@ STR UnescapeURLImpl(const STR& escaped_text,
// Additionally allow control characters if requested.
(value < ' ' && (rules & UnescapeRule::CONTROL_CHARS)))) {
// Use the unescaped version of the character.
- size_t length_before_append = result.length();
+ adjustments.push_back(i);
result.push_back(value);
i += 2;
-
- // Adjust offset to match length change.
- if (*offset_for_adjustment != std::string::npos) {
- if (*offset_for_adjustment > (length_before_append + 2))
- *offset_for_adjustment -= 2;
- else if (*offset_for_adjustment > length_before_append)
- *offset_for_adjustment = std::string::npos;
- }
} else {
// Keep escaped. Append a percent and we'll get the following two
// digits on the next loops through.
@@ -174,6 +167,26 @@ STR UnescapeURLImpl(const STR& escaped_text,
}
}
+ // Make offset adjustment.
+ if (offsets_for_adjustment && !adjustments.empty()) {
+ std::for_each(offsets_for_adjustment->begin(),
+ offsets_for_adjustment->end(),
+ AdjustEncodingOffset(adjustments));
+ }
+
+ return result;
+}
+
+template<typename STR>
+STR UnescapeURLImpl(const STR& escaped_text,
+ UnescapeRule::Type rules,
+ size_t* offset_for_adjustment) {
+ std::vector<size_t> offsets;
+ if (offset_for_adjustment)
+ offsets.push_back(*offset_for_adjustment);
+ STR result = UnescapeURLWithOffsetsImpl(escaped_text, rules, &offsets);
+ if (offset_for_adjustment)
+ *offset_for_adjustment = offsets[0];
return result;
}
@@ -234,33 +247,49 @@ std::string EscapeExternalHandlerValue(const std::string& text) {
return Escape(text, kExternalHandlerCharmap, false);
}
-string16 UnescapeAndDecodeUTF8URLComponent(const std::string& text,
- UnescapeRule::Type rules,
- size_t* offset_for_adjustment) {
+string16 UnescapeAndDecodeUTF8URLComponentWithOffsets(
+ const std::string& text,
+ UnescapeRule::Type rules,
+ std::vector<size_t>* offsets_for_adjustment) {
std::wstring result;
- size_t original_offset = offset_for_adjustment ? *offset_for_adjustment : 0;
+ std::vector<size_t> original_offsets;
+ if (offsets_for_adjustment)
+ original_offsets = *offsets_for_adjustment;
std::string unescaped_url(
- UnescapeURLImpl(text, rules, offset_for_adjustment));
- if (UTF8ToWideAndAdjustOffset(unescaped_url.data(), unescaped_url.length(),
- &result, offset_for_adjustment))
+ UnescapeURLWithOffsetsImpl(text, rules, offsets_for_adjustment));
+ if (UTF8ToWideAndAdjustOffsets(unescaped_url.data(), unescaped_url.length(),
+ &result, offsets_for_adjustment))
return WideToUTF16Hack(result); // Character set looks like it's valid.
// Not valid. Return the escaped version. Undo our changes to
// |offset_for_adjustment| since we haven't changed the string after all.
+ if (offsets_for_adjustment)
+ *offsets_for_adjustment = original_offsets;
+ return WideToUTF16Hack(UTF8ToWideAndAdjustOffsets(
+ text, offsets_for_adjustment));
+}
+
+string16 UnescapeAndDecodeUTF8URLComponent(const std::string& text,
+ UnescapeRule::Type rules,
+ size_t* offset_for_adjustment) {
+ std::vector<size_t> offsets;
+ if (offset_for_adjustment)
+ offsets.push_back(*offset_for_adjustment);
+ string16 result =
+ UnescapeAndDecodeUTF8URLComponentWithOffsets(text, rules, &offsets);
if (offset_for_adjustment)
- *offset_for_adjustment = original_offset;
- return WideToUTF16Hack(UTF8ToWideAndAdjustOffset(text,
- offset_for_adjustment));
+ *offset_for_adjustment = offsets[0];
+ return result;
}
std::string UnescapeURLComponent(const std::string& escaped_text,
UnescapeRule::Type rules) {
- return UnescapeURLImpl(escaped_text, rules, NULL);
+ return UnescapeURLWithOffsetsImpl<std::string>(escaped_text, rules, NULL);
}
string16 UnescapeURLComponent(const string16& escaped_text,
UnescapeRule::Type rules) {
- return UnescapeURLImpl(escaped_text, rules, NULL);
+ return UnescapeURLWithOffsetsImpl<string16>(escaped_text, rules, NULL);
}
@@ -350,3 +379,27 @@ string16 UnescapeForHTML(const string16& input) {
}
return text;
}
+
+AdjustEncodingOffset::AdjustEncodingOffset(const Adjustments& adjustments)
+ : adjustments(adjustments) {}
+
+void AdjustEncodingOffset::operator()(size_t& offset) {
+ // For each encoded character occurring before an offset subtract 2.
+ if (offset == string16::npos)
+ return;
+ size_t adjusted_offset = offset;
+ for (Adjustments::const_iterator i = adjustments.begin();
+ i != adjustments.end(); ++i) {
+ size_t location = *i;
+ if (offset <= location) {
+ offset = adjusted_offset;
+ return;
+ }
+ if (offset <= (location + 2)) {
+ offset = string16::npos;
+ return;
+ }
+ adjusted_offset -= 2;
+ }
+ offset = adjusted_offset;
+}