summaryrefslogtreecommitdiffstats
path: root/base/string_util.cc
diff options
context:
space:
mode:
authornick@chromium.org <nick@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2010-05-28 20:18:20 +0000
committernick@chromium.org <nick@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2010-05-28 20:18:20 +0000
commitd06e3e078e734adbb98909f871e510eddcdd54a5 (patch)
tree8fc0b814307a9dec0e157d902a4bb6d442a02fb3 /base/string_util.cc
parentd87f845659b2e54622850acfc780b7fc07cfed6c (diff)
downloadchromium_src-d06e3e078e734adbb98909f871e510eddcdd54a5.zip
chromium_src-d06e3e078e734adbb98909f871e510eddcdd54a5.tar.gz
chromium_src-d06e3e078e734adbb98909f871e510eddcdd54a5.tar.bz2
Add a method that truncates strings to the end point of a valid UTF8 character, leaving the string's size to be less than or equal to a specified byte size.
BUG=43675 TEST=base/string_util_unittest.cc Patch contributed by to Jerrica Jones (jerrica@chromium.org). Review URL: http://codereview.chromium.org/2239007 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@48518 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'base/string_util.cc')
-rw-r--r--base/string_util.cc35
1 files changed, 35 insertions, 0 deletions
diff --git a/base/string_util.cc b/base/string_util.cc
index 0269ba1..e36ae51 100644
--- a/base/string_util.cc
+++ b/base/string_util.cc
@@ -483,6 +483,41 @@ bool TrimString(const std::string& input,
return TrimStringT(input, trim_chars, TRIM_ALL, output) != TRIM_NONE;
}
+void TruncateUTF8ToByteSize(const std::string& input,
+ const size_t byte_size,
+ std::string* output) {
+ if (byte_size > input.length()) {
+ *output = input;
+ return;
+ }
+
+ int32 truncation_length = static_cast<int32>(byte_size);
+ int32 char_index = truncation_length - 1;
+ const char* cstr = input.c_str();
+
+ // Using CBU8, we will move backwards from the truncation point
+ // to the beginning of the string looking for a valid UTF8
+ // character. Once a full UTF8 character is found, we will
+ // truncate the string to the end of that character.
+ while (char_index >= 0) {
+ int32 prev = char_index;
+ uint32 code_point = 0;
+ CBU8_NEXT(cstr, char_index, truncation_length, code_point);
+ if (!base::IsValidCharacter(code_point) ||
+ !base::IsValidCodepoint(code_point)) {
+ char_index = prev - 1;
+ } else {
+ break;
+ }
+ }
+
+ DCHECK(output != NULL);
+ if (char_index >= 0 )
+ *output = input.substr(0, char_index);
+ else
+ output->clear();
+}
+
TrimPositions TrimWhitespace(const std::wstring& input,
TrimPositions positions,
std::wstring* output) {