summaryrefslogtreecommitdiffstats
path: root/base/utf_string_conversion_utils.h
diff options
context:
space:
mode:
authorjschuh@chromium.org <jschuh@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2010-01-16 16:40:38 +0000
committerjschuh@chromium.org <jschuh@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2010-01-16 16:40:38 +0000
commit548a6c0f30c3dcf374cc06be48f02a06da5c1d19 (patch)
tree57a3406e6d51b44774b9fa37cde1810a1d76a47c /base/utf_string_conversion_utils.h
parentb78e6049b83a6a221b6c7b47c229b337e3abce43 (diff)
downloadchromium_src-548a6c0f30c3dcf374cc06be48f02a06da5c1d19.zip
chromium_src-548a6c0f30c3dcf374cc06be48f02a06da5c1d19.tar.gz
chromium_src-548a6c0f30c3dcf374cc06be48f02a06da5c1d19.tar.bz2
Changes are:
* base::IsValidCodepoint() now returns false on non-character code points. * base::IsStringUTF8() now uses ICU library (removed old Mozilla implementation). * Removed base::IsStringWideUTF8() (was unused and confusing) * file_util::ReplaceIllegalCharactersInPath() now treats Unicode replacement character (U+FFFD) as invalid. * Associated unit tests updated. BUG=2759 BUG=30662 TEST=base_unittests --gtest_filter=StringUtilTest.IsStringUTF8 TEST=base_unittests --gtest_filter=UTFStringConversionsTest.* TEST=base_unittests --gtest_filter=FileUtilICUTestReplaceIllegalCharactersInPathTest Review URL: http://codereview.chromium.org/548017 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@36459 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'base/utf_string_conversion_utils.h')
-rw-r--r--base/utf_string_conversion_utils.h11
1 files changed, 6 insertions, 5 deletions
diff --git a/base/utf_string_conversion_utils.h b/base/utf_string_conversion_utils.h
index a8a76c5..3fcb689 100644
--- a/base/utf_string_conversion_utils.h
+++ b/base/utf_string_conversion_utils.h
@@ -12,11 +12,12 @@
namespace base {
inline bool IsValidCodepoint(uint32 code_point) {
- // Excludes the surrogate code points ([0xD800, 0xDFFF]) and
- // codepoints larger than 0x10FFFF (the highest codepoint allowed).
- // Non-characters and unassigned codepoints are allowed.
- return code_point < 0xD800u ||
- (code_point >= 0xE000u && code_point <= 0x10FFFFu);
+ // Excludes non-characters (U+FDD0..U+FDEF, and all codepoints ending in
+ // 0xFFFE or 0xFFFF), surrogate code points (U+D800..U+DFFF), and codepoints
+ // larger than U+10FFFF (the highest codepoint allowed).
+ return code_point < 0xD800u || (code_point >= 0xE000u &&
+ code_point < 0xFDD0u) || (code_point > 0xFDEFu &&
+ code_point <= 0x10FFFFu && (code_point & 0xFFFEu) != 0xFFFEu);
}
// ReadUnicodeCharacter --------------------------------------------------------