diff options
author | brettw@chromium.org <brettw@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2009-10-08 17:38:30 +0000 |
---|---|---|
committer | brettw@chromium.org <brettw@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2009-10-08 17:38:30 +0000 |
commit | d0767cb54b2b5ee4d9cf00b3ee0fa585826b4036 (patch) | |
tree | 1c2ee733bf62a44c31dc11f76dad53243a84439f /base | |
parent | e91d532339c854ff0a082c6562a519647524fa66 (diff) | |
download | chromium_src-d0767cb54b2b5ee4d9cf00b3ee0fa585826b4036.zip chromium_src-d0767cb54b2b5ee4d9cf00b3ee0fa585826b4036.tar.gz chromium_src-d0767cb54b2b5ee4d9cf00b3ee0fa585826b4036.tar.bz2 |
Separate out some more ICU from base and into base/i18n.
This moves string_util_icu. I moved the number formatting function into
base/i18n/number_formatting and just removed the other function in
string_util_icu which was TrimWhitespaceUTF8. It is only used in a few places
and isn't actually helpful (and the fact that it round-trips through UTF-16 is
better for the caller to see).
This takes out the sorting from the FileEnumerator. The comment says the
sorting is not guaranteed. I moved it into file_util_icu as a standalone
function for callers of FileEnumerator to call manually if they need sorted
results. I modified the directory lister to use this sorting instead, and filed
a bug on doing more optimal JS-based sorting.
TEST=none
BUG=none
Review URL: http://codereview.chromium.org/267001
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@28405 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'base')
-rw-r--r-- | base/base.gyp | 7 | ||||
-rw-r--r-- | base/file_util.h | 19 | ||||
-rw-r--r-- | base/file_util_posix.cc | 70 | ||||
-rw-r--r-- | base/file_util_unittest.cc | 89 | ||||
-rw-r--r-- | base/i18n/file_util_icu.cc (renamed from base/file_util_icu.cc) | 80 | ||||
-rw-r--r-- | base/i18n/file_util_icu.h | 33 | ||||
-rw-r--r-- | base/i18n/file_util_icu_unittest.cc | 71 | ||||
-rw-r--r-- | base/i18n/number_formatting.cc | 48 | ||||
-rw-r--r-- | base/i18n/number_formatting.h | 19 | ||||
-rw-r--r-- | base/string_util.h | 5 | ||||
-rw-r--r-- | base/string_util_icu.cc | 80 | ||||
-rw-r--r-- | base/string_util_unittest.cc | 46 |
12 files changed, 250 insertions, 317 deletions
diff --git a/base/base.gyp b/base/base.gyp index 41108dd..570e9f0 100644 --- a/base/base.gyp +++ b/base/base.gyp @@ -121,7 +121,6 @@ 'file_path.h', 'file_util.cc', 'file_util.h', - 'file_util_icu.cc', 'file_util_linux.cc', 'file_util_mac.mm', 'file_util_posix.cc', @@ -141,8 +140,12 @@ 'hmac_mac.cc', 'hmac_nss.cc', 'hmac_win.cc', + 'i18n/file_util_icu.cc', + 'i18n/file_util_icu.h', 'i18n/icu_string_conversions.cc', 'i18n/icu_string_conversions.h', + 'i18n/number_formatting.cc', + 'i18n/number_formatting.h', 'iat_patch.cc', 'iat_patch.h', 'icu_util.cc', @@ -284,7 +287,6 @@ 'string_tokenizer.h', 'string_util.cc', 'string_util.h', - 'string_util_icu.cc', 'string_util_win.h', 'sys_info.h', 'sys_info_chromeos.cc', @@ -582,6 +584,7 @@ 'histogram_unittest.cc', 'hmac_unittest.cc', 'id_map_unittest.cc', + 'i18n/file_util_icu_unittest.cc', 'json_reader_unittest.cc', 'json_writer_unittest.cc', 'lazy_instance_unittest.cc', diff --git a/base/file_util.h b/base/file_util.h index 6474f048..37634b9 100644 --- a/base/file_util.h +++ b/base/file_util.h @@ -103,19 +103,6 @@ void InsertBeforeExtension(FilePath* path, const FilePath::StringType& suffix); void ReplaceExtension(FilePath* file_name, const FilePath::StringType& extension); -// Replaces characters in 'file_name' that are illegal for file names with -// 'replace_char'. 'file_name' must not be a full or relative path, but just the -// file name component. Any leading or trailing whitespace in 'file_name' is -// removed. -// Example: -// file_name == "bad:file*name?.txt", changed to: "bad-file-name-.txt" when -// 'replace_char' is '-'. -void ReplaceIllegalCharacters(std::wstring* file_name, int replace_char); - -// Returns true if file_name does not have any illegal character. The input -// param has the same restriction as that for ReplaceIllegalCharacters. -bool IsFilenameLegal(const string16& file_name); - //----------------------------------------------------------------------------- // Functions that involve filesystem access or modification: @@ -490,10 +477,6 @@ class FileEnumerator { static bool ReadDirectory(std::vector<DirectoryEntryInfo>* entries, const FilePath& source, bool show_links); - // Comparison function to neatly sort directory entries - static bool CompareFiles(const DirectoryEntryInfo& a, - const DirectoryEntryInfo& b); - // The files in the current directory std::vector<DirectoryEntryInfo> directory_entries_; @@ -501,7 +484,7 @@ class FileEnumerator { size_t current_directory_entry_; #endif - DISALLOW_EVIL_CONSTRUCTORS(FileEnumerator); + DISALLOW_COPY_AND_ASSIGN(FileEnumerator); }; class MemoryMappedFile { diff --git a/base/file_util_posix.cc b/base/file_util_posix.cc index 27adbfa..4621bb3 100644 --- a/base/file_util_posix.cc +++ b/base/file_util_posix.cc @@ -35,56 +35,6 @@ #include "base/sys_string_conversions.h" #include "base/time.h" #include "base/utf_string_conversions.h" -#include "unicode/coll.h" - - -namespace { - -class LocaleAwareComparator { - public: - LocaleAwareComparator() { - UErrorCode error_code = U_ZERO_ERROR; - // Use the default collator. The default locale should have been properly - // set by the time this constructor is called. - collator_.reset(icu::Collator::createInstance(error_code)); - DCHECK(U_SUCCESS(error_code)); - // Make it case-sensitive. - collator_->setStrength(icu::Collator::TERTIARY); - // Note: We do not set UCOL_NORMALIZATION_MODE attribute. In other words, we - // do not pay performance penalty to guarantee sort order correctness for - // non-FCD (http://unicode.org/notes/tn5/#FCD) file names. This should be a - // reasonable tradeoff because such file names should be rare and the sort - // order doesn't change much anyway. - } - - // Note: A similar function is available in l10n_util. - // We cannot use it because base should not depend on l10n_util. - // TODO(yuzo): Move some of l10n_util to base. - int Compare(const string16& a, const string16& b) { - // We are not sure if Collator::compare is thread-safe. - // Use an AutoLock just in case. - AutoLock auto_lock(lock_); - - UErrorCode error_code = U_ZERO_ERROR; - UCollationResult result = collator_->compare( - static_cast<const UChar*>(a.c_str()), - static_cast<int>(a.length()), - static_cast<const UChar*>(b.c_str()), - static_cast<int>(b.length()), - error_code); - DCHECK(U_SUCCESS(error_code)); - return result; - } - - private: - scoped_ptr<icu::Collator> collator_; - Lock lock_; - friend struct DefaultSingletonTraits<LocaleAwareComparator>; - - DISALLOW_COPY_AND_ASSIGN(LocaleAwareComparator); -}; - -} // namespace namespace file_util { @@ -623,9 +573,6 @@ FilePath FileEnumerator::Next() { if (!ReadDirectory(&entries, root_path_, file_type_ & SHOW_SYM_LINKS)) continue; - // The API says that order is not guaranteed, but order affects UX - std::sort(entries.begin(), entries.end(), CompareFiles); - directory_entries_.clear(); current_directory_entry_ = 0; for (std::vector<DirectoryEntryInfo>::const_iterator @@ -691,23 +638,6 @@ bool FileEnumerator::ReadDirectory(std::vector<DirectoryEntryInfo>* entries, return true; } -bool FileEnumerator::CompareFiles(const DirectoryEntryInfo& a, - const DirectoryEntryInfo& b) { - // Order lexicographically with directories before other files. - if (S_ISDIR(a.stat.st_mode) != S_ISDIR(b.stat.st_mode)) - return S_ISDIR(a.stat.st_mode); - - // On linux, the file system encoding is not defined. We assume - // SysNativeMBToWide takes care of it. - // - // ICU's collator can take strings in OS native encoding. But we convert the - // strings to UTF-16 ourselves to ensure conversion consistency. - // TODO(yuzo): Perhaps we should define SysNativeMBToUTF16? - return Singleton<LocaleAwareComparator>()->Compare( - WideToUTF16(base::SysNativeMBToWide(a.filename.value().c_str())), - WideToUTF16(base::SysNativeMBToWide(b.filename.value().c_str()))) < 0; -} - /////////////////////////////////////////////// // MemoryMappedFile diff --git a/base/file_util_unittest.cc b/base/file_util_unittest.cc index 5b606c9..b1f9fed 100644 --- a/base/file_util_unittest.cc +++ b/base/file_util_unittest.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. +// Copyright (c) 2009 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. @@ -876,41 +876,6 @@ TEST_F(FileUtilTest, DetectDirectoryTest) { EXPECT_TRUE(file_util::Delete(test_root, true)); } -static const struct goodbad_pair { - std::wstring bad_name; - std::wstring good_name; -} kIllegalCharacterCases[] = { - {L"bad*file:name?.jpg", L"bad-file-name-.jpg"}, - {L"**********::::.txt", L"--------------.txt"}, - // We can't use UCNs (universal character names) for C0/C1 characters and - // U+007F, but \x escape is interpreted by MSVC and gcc as we intend. - {L"bad\x0003\x0091 file\u200E\u200Fname.png", L"bad-- file--name.png"}, -#if defined(OS_WIN) - {L"bad*file\\name.jpg", L"bad-file-name.jpg"}, - {L"\t bad*file\\name/.jpg ", L"bad-file-name-.jpg"}, -#elif defined(OS_POSIX) - {L"bad*file?name.jpg", L"bad-file-name.jpg"}, - {L"\t bad*file?name/.jpg ", L"bad-file-name-.jpg"}, -#endif - {L"this_file_name is okay!.mp3", L"this_file_name is okay!.mp3"}, - {L"\u4E00\uAC00.mp3", L"\u4E00\uAC00.mp3"}, - {L"\u0635\u200C\u0644.mp3", L"\u0635\u200C\u0644.mp3"}, - {L"\U00010330\U00010331.mp3", L"\U00010330\U00010331.mp3"}, - // Unassigned codepoints are ok. - {L"\u0378\U00040001.mp3", L"\u0378\U00040001.mp3"}, - // Non-characters are not allowed. - {L"bad\uFFFFfile\U0010FFFEname.jpg ", L"bad-file-name.jpg"}, - {L"bad\uFDD0file\uFDEFname.jpg ", L"bad-file-name.jpg"}, -}; - -TEST_F(FileUtilTest, ReplaceIllegalCharactersTest) { - for (unsigned int i = 0; i < arraysize(kIllegalCharacterCases); ++i) { - std::wstring bad_name(kIllegalCharacterCases[i].bad_name); - file_util::ReplaceIllegalCharacters(&bad_name, L'-'); - EXPECT_EQ(kIllegalCharacterCases[i].good_name, bad_name); - } -} - static const struct ReplaceExtensionCase { std::wstring file_name; FilePath::StringType extension; @@ -1069,58 +1034,6 @@ TEST_F(FileUtilTest, FileEnumeratorTest) { // (we don't care what). } -TEST_F(FileUtilTest, FileEnumeratorOrderTest) { - FilePath fileA = test_dir_.Append(FILE_PATH_LITERAL("a")); - FilePath fileB = test_dir_.Append(FILE_PATH_LITERAL("B")); - FilePath dirC = test_dir_.Append(FILE_PATH_LITERAL("C")); - FilePath dirD = test_dir_.Append(FILE_PATH_LITERAL("d")); - FilePath dirE = test_dir_.Append(FILE_PATH_LITERAL("e")); - FilePath fileF = test_dir_.Append(FILE_PATH_LITERAL("f")); - - // Create files/directories in near random order. - CreateTextFile(fileF, L""); - CreateTextFile(fileA, L""); - CreateTextFile(fileB, L""); - EXPECT_TRUE(file_util::CreateDirectory(dirE)); - EXPECT_TRUE(file_util::CreateDirectory(dirC)); - EXPECT_TRUE(file_util::CreateDirectory(dirD)); - - // On Windows, files and directories are enumerated in the lexicographical - // order, ignoring case and whether they are files or directories. On posix, - // we order directories before files. - file_util::FileEnumerator enumerator(test_dir_, false, FILES_AND_DIRECTORIES); - FilePath cur_file = enumerator.Next(); -#if defined(OS_WIN) - EXPECT_EQ(fileA.value(), cur_file.value()); - cur_file = enumerator.Next(); - EXPECT_EQ(fileB.value(), cur_file.value()); - cur_file = enumerator.Next(); - EXPECT_EQ(dirC.value(), cur_file.value()); - cur_file = enumerator.Next(); - EXPECT_EQ(dirD.value(), cur_file.value()); - cur_file = enumerator.Next(); - EXPECT_EQ(dirE.value(), cur_file.value()); - cur_file = enumerator.Next(); - EXPECT_EQ(fileF.value(), cur_file.value()); - cur_file = enumerator.Next(); -#elif defined(OS_POSIX) - EXPECT_EQ(dirC.value(), cur_file.value()); - cur_file = enumerator.Next(); - EXPECT_EQ(dirD.value(), cur_file.value()); - cur_file = enumerator.Next(); - EXPECT_EQ(dirE.value(), cur_file.value()); - cur_file = enumerator.Next(); - EXPECT_EQ(fileA.value(), cur_file.value()); - cur_file = enumerator.Next(); - EXPECT_EQ(fileB.value(), cur_file.value()); - cur_file = enumerator.Next(); - EXPECT_EQ(fileF.value(), cur_file.value()); - cur_file = enumerator.Next(); -#endif - - EXPECT_EQ(FILE_PATH_LITERAL(""), cur_file.value()); -} - TEST_F(FileUtilTest, Contains) { FilePath data_dir = test_dir_.Append(FILE_PATH_LITERAL("FilePathTest")); diff --git a/base/file_util_icu.cc b/base/i18n/file_util_icu.cc index eeffa92..0bc9db6 100644 --- a/base/file_util_icu.cc +++ b/base/i18n/file_util_icu.cc @@ -2,21 +2,21 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. -// File utilities that use the ICU library go in this file. Functions using ICU -// are separated from the other functions to prevent ICU being pulled in by the -// linker if there is a false dependency. -// -// (The VS2005 linker finds such a false dependency and adds ~300K of ICU to -// chrome.exe if this code lives in file_util.cc, even though none of this code -// is called.) +// File utilities that use the ICU library go in this file. -#include "base/file_util.h" +#include "base/i18n/file_util_icu.h" +#include "base/file_path.h" +#include "base/scoped_ptr.h" #include "base/singleton.h" #include "base/string_util.h" +#include "base/sys_string_conversions.h" +#include "build/build_config.h" +#include "unicode/coll.h" #include "unicode/uniset.h" namespace { + class IllegalCharacters { public: bool contains(UChar32 ucs4) { @@ -72,6 +72,50 @@ IllegalCharacters::IllegalCharacters() { set->freeze(); } +class LocaleAwareComparator { + public: + LocaleAwareComparator() { + UErrorCode error_code = U_ZERO_ERROR; + // Use the default collator. The default locale should have been properly + // set by the time this constructor is called. + collator_.reset(icu::Collator::createInstance(error_code)); + DCHECK(U_SUCCESS(error_code)); + // Make it case-sensitive. + collator_->setStrength(icu::Collator::TERTIARY); + // Note: We do not set UCOL_NORMALIZATION_MODE attribute. In other words, we + // do not pay performance penalty to guarantee sort order correctness for + // non-FCD (http://unicode.org/notes/tn5/#FCD) file names. This should be a + // reasonable tradeoff because such file names should be rare and the sort + // order doesn't change much anyway. + } + + // Note: A similar function is available in l10n_util. + // We cannot use it because base should not depend on l10n_util. + // TODO(yuzo): Move some of l10n_util to base. + int Compare(const string16& a, const string16& b) { + // We are not sure if Collator::compare is thread-safe. + // Use an AutoLock just in case. + AutoLock auto_lock(lock_); + + UErrorCode error_code = U_ZERO_ERROR; + UCollationResult result = collator_->compare( + static_cast<const UChar*>(a.c_str()), + static_cast<int>(a.length()), + static_cast<const UChar*>(b.c_str()), + static_cast<int>(b.length()), + error_code); + DCHECK(U_SUCCESS(error_code)); + return result; + } + + private: + scoped_ptr<icu::Collator> collator_; + Lock lock_; + friend struct DefaultSingletonTraits<LocaleAwareComparator>; + + DISALLOW_COPY_AND_ASSIGN(LocaleAwareComparator); +}; + } // namespace namespace file_util { @@ -126,4 +170,24 @@ void ReplaceIllegalCharacters(std::wstring* file_name, int replace_char) { #endif } +bool LocaleAwareCompareFilenames(const FilePath& a, const FilePath& b) { +#if defined(OS_WIN) + return Singleton<LocaleAwareComparator>()->Compare(a.value().c_str(), + b.value().c_str()) < 0; + +#elif defined(OS_POSIX) + // On linux, the file system encoding is not defined. We assume + // SysNativeMBToWide takes care of it. + // + // ICU's collator can take strings in OS native encoding. But we convert the + // strings to UTF-16 ourselves to ensure conversion consistency. + // TODO(yuzo): Perhaps we should define SysNativeMBToUTF16? + return Singleton<LocaleAwareComparator>()->Compare( + WideToUTF16(base::SysNativeMBToWide(a.value().c_str())), + WideToUTF16(base::SysNativeMBToWide(b.value().c_str()))) < 0; +#else + #error Not implemented on your system +#endif +} + } // namespace diff --git a/base/i18n/file_util_icu.h b/base/i18n/file_util_icu.h new file mode 100644 index 0000000..c309a9e --- /dev/null +++ b/base/i18n/file_util_icu.h @@ -0,0 +1,33 @@ +// Copyright (c) 2009 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// File utilities that use the ICU library go in this file. + +#include <string> + +#include "base/string16.h" + +class FilePath; + +namespace file_util { + +// Returns true if file_name does not have any illegal character. The input +// param has the same restriction as that for ReplaceIllegalCharacters. +bool IsFilenameLegal(const string16& file_name); + +// Replaces characters in 'file_name' that are illegal for file names with +// 'replace_char'. 'file_name' must not be a full or relative path, but just the +// file name component. Any leading or trailing whitespace in 'file_name' is +// removed. +// Example: +// file_name == "bad:file*name?.txt", changed to: "bad-file-name-.txt" when +// 'replace_char' is '-'. +void ReplaceIllegalCharacters(std::wstring* file_name, int replace_char); + +// Compares two filenames using the current locale information. This can be +// used to sort directory listings. It behaves like "operator<" for use in +// std::sort. +bool LocaleAwareCompareFilenames(const FilePath& a, const FilePath& b); + +} // namespace file_util diff --git a/base/i18n/file_util_icu_unittest.cc b/base/i18n/file_util_icu_unittest.cc new file mode 100644 index 0000000..aebcd0df --- /dev/null +++ b/base/i18n/file_util_icu_unittest.cc @@ -0,0 +1,71 @@ +// Copyright (c) 2009 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "base/i18n/file_util_icu.h" + +#include "base/file_util.h" +#include "base/path_service.h" +#include "testing/gtest/include/gtest/gtest.h" +#include "testing/platform_test.h" + +// file_util winds up using autoreleased objects on the Mac, so this needs +// to be a PlatformTest +class FileUtilICUTest : public PlatformTest { + protected: + virtual void SetUp() { + PlatformTest::SetUp(); + // Name a subdirectory of the temp directory. + ASSERT_TRUE(PathService::Get(base::DIR_TEMP, &test_dir_)); + test_dir_ = test_dir_.Append(FILE_PATH_LITERAL("FileUtilTest")); + + // Create a fresh, empty copy of this directory. + file_util::Delete(test_dir_, true); + file_util::CreateDirectory(test_dir_); + } + virtual void TearDown() { + PlatformTest::TearDown(); + // Clean up test directory + ASSERT_TRUE(file_util::Delete(test_dir_, true)); + ASSERT_FALSE(file_util::PathExists(test_dir_)); + } + + // the path to temporary directory used to contain the test operations + FilePath test_dir_; +}; + +static const struct goodbad_pair { + std::wstring bad_name; + std::wstring good_name; +} kIllegalCharacterCases[] = { + {L"bad*file:name?.jpg", L"bad-file-name-.jpg"}, + {L"**********::::.txt", L"--------------.txt"}, + // We can't use UCNs (universal character names) for C0/C1 characters and + // U+007F, but \x escape is interpreted by MSVC and gcc as we intend. + {L"bad\x0003\x0091 file\u200E\u200Fname.png", L"bad-- file--name.png"}, +#if defined(OS_WIN) + {L"bad*file\\name.jpg", L"bad-file-name.jpg"}, + {L"\t bad*file\\name/.jpg ", L"bad-file-name-.jpg"}, +#elif defined(OS_POSIX) + {L"bad*file?name.jpg", L"bad-file-name.jpg"}, + {L"\t bad*file?name/.jpg ", L"bad-file-name-.jpg"}, +#endif + {L"this_file_name is okay!.mp3", L"this_file_name is okay!.mp3"}, + {L"\u4E00\uAC00.mp3", L"\u4E00\uAC00.mp3"}, + {L"\u0635\u200C\u0644.mp3", L"\u0635\u200C\u0644.mp3"}, + {L"\U00010330\U00010331.mp3", L"\U00010330\U00010331.mp3"}, + // Unassigned codepoints are ok. + {L"\u0378\U00040001.mp3", L"\u0378\U00040001.mp3"}, + // Non-characters are not allowed. + {L"bad\uFFFFfile\U0010FFFEname.jpg ", L"bad-file-name.jpg"}, + {L"bad\uFDD0file\uFDEFname.jpg ", L"bad-file-name.jpg"}, +}; + +TEST_F(FileUtilICUTest, ReplaceIllegalCharactersTest) { + for (unsigned int i = 0; i < arraysize(kIllegalCharacterCases); ++i) { + std::wstring bad_name(kIllegalCharacterCases[i].bad_name); + file_util::ReplaceIllegalCharacters(&bad_name, L'-'); + EXPECT_EQ(kIllegalCharacterCases[i].good_name, bad_name); + } +} + diff --git a/base/i18n/number_formatting.cc b/base/i18n/number_formatting.cc new file mode 100644 index 0000000..fef1b7d --- /dev/null +++ b/base/i18n/number_formatting.cc @@ -0,0 +1,48 @@ +// Copyright (c) 2008 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "base/i18n/number_formatting.h" + +#include "base/logging.h" +#include "base/singleton.h" +#include "base/string_util.h" +#include "base/utf_string_conversions.h" +#include "unicode/numfmt.h" +#include "unicode/ustring.h" + +namespace base { + +namespace { + +struct NumberFormatSingletonTraits + : public DefaultSingletonTraits<icu::NumberFormat> { + static icu::NumberFormat* New() { + UErrorCode status = U_ZERO_ERROR; + icu::NumberFormat* formatter = icu::NumberFormat::createInstance(status); + DCHECK(U_SUCCESS(status)); + return formatter; + } + // There's no ICU call to destroy a NumberFormat object other than + // operator delete, so use the default Delete, which calls operator delete. + // This can cause problems if a different allocator is used by this file than + // by ICU. +}; + +} // namespace + +string16 FormatNumber(int64 number) { + icu::NumberFormat* number_format = + Singleton<icu::NumberFormat, NumberFormatSingletonTraits>::get(); + + if (!number_format) { + // As a fallback, just return the raw number in a string. + return UTF8ToUTF16(StringPrintf("%lld", number)); + } + icu::UnicodeString ustr; + number_format->format(number, ustr); + + return string16(ustr.getBuffer(), static_cast<size_t>(ustr.length())); +} + +} // namespace base diff --git a/base/i18n/number_formatting.h b/base/i18n/number_formatting.h new file mode 100644 index 0000000..9fa2b18 --- /dev/null +++ b/base/i18n/number_formatting.h @@ -0,0 +1,19 @@ +// Copyright (c) 2008 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef BASE_I18N_NUMBER_FORMATTING_H_ +#define BASE_I18N_NUMBER_FORMATTING_H_ + +#include <string> + +#include "base/basictypes.h" +#include "base/string16.h" + +namespace base { + +string16 FormatNumber(int64 number); + +} // namespace base + +#endif // BASE_I18N_NUMBER_FORMATTING_H_ diff --git a/base/string_util.h b/base/string_util.h index 254e18f..c6b9fb1 100644 --- a/base/string_util.h +++ b/base/string_util.h @@ -140,8 +140,6 @@ bool TrimString(const std::string& input, // The non-wide version has two functions: // * TrimWhitespaceASCII() // This function is for ASCII strings and only looks for ASCII whitespace; -// * TrimWhitespaceUTF8() -// This function is for UTF-8 strings and looks for Unicode whitespace. // Please choose the best one according to your usage. // NOTE: Safe to use the same variable for both input and output. enum TrimPositions { @@ -156,9 +154,6 @@ TrimPositions TrimWhitespace(const std::wstring& input, TrimPositions TrimWhitespaceASCII(const std::string& input, TrimPositions positions, std::string* output); -TrimPositions TrimWhitespaceUTF8(const std::string& input, - TrimPositions positions, - std::string* output); // Deprecated. This function is only for backward compatibility and calls // TrimWhitespaceASCII(). diff --git a/base/string_util_icu.cc b/base/string_util_icu.cc deleted file mode 100644 index 68fbd10..0000000 --- a/base/string_util_icu.cc +++ /dev/null @@ -1,80 +0,0 @@ -// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#include "base/string_util.h" - -#include <string.h> -#include <vector> - -#include "base/basictypes.h" -#include "base/logging.h" -#include "base/singleton.h" -#include "unicode/numfmt.h" -#include "unicode/ustring.h" - -// Number formatting ----------------------------------------------------------- - -namespace { - -struct NumberFormatSingletonTraits - : public DefaultSingletonTraits<icu::NumberFormat> { - static icu::NumberFormat* New() { - UErrorCode status = U_ZERO_ERROR; - icu::NumberFormat* formatter = icu::NumberFormat::createInstance(status); - DCHECK(U_SUCCESS(status)); - return formatter; - } - // There's no ICU call to destroy a NumberFormat object other than - // operator delete, so use the default Delete, which calls operator delete. - // This can cause problems if a different allocator is used by this file than - // by ICU. -}; - -} // namespace - -std::wstring FormatNumber(int64 number) { - icu::NumberFormat* number_format = - Singleton<icu::NumberFormat, NumberFormatSingletonTraits>::get(); - - if (!number_format) { - // As a fallback, just return the raw number in a string. - return StringPrintf(L"%lld", number); - } - icu::UnicodeString ustr; - number_format->format(number, ustr); - -#if defined(WCHAR_T_IS_UTF16) - return std::wstring(ustr.getBuffer(), - static_cast<std::wstring::size_type>(ustr.length())); -#elif defined(WCHAR_T_IS_UTF32) - wchar_t buffer[64]; // A int64 is less than 20 chars long, so 64 chars - // leaves plenty of room for formating stuff. - int length = 0; - UErrorCode error = U_ZERO_ERROR; - u_strToWCS(buffer, 64, &length, ustr.getBuffer(), ustr.length() , &error); - if (U_FAILURE(error)) { - NOTREACHED(); - // As a fallback, just return the raw number in a string. - return StringPrintf(L"%lld", number); - } - return std::wstring(buffer, static_cast<std::wstring::size_type>(length)); -#endif // defined(WCHAR_T_IS_UTF32) -} - -// Although this function isn't specific to ICU, we implemented it here so -// that chrome.exe won't pull it in. Moving this function to string_util.cc -// causes chrome.exe to grow by 400k because of more ICU being pulled in. -TrimPositions TrimWhitespaceUTF8(const std::string& input, - TrimPositions positions, - std::string* output) { - // This implementation is not so fast since it converts the text encoding - // twice. Please feel free to file a bug if this function hurts the - // performance of Chrome. - DCHECK(IsStringUTF8(input)); - std::wstring input_wide = UTF8ToWide(input); - std::wstring output_wide; - TrimPositions result = TrimWhitespace(input_wide, positions, &output_wide); - *output = WideToUTF8(output_wide); - return result; -} diff --git a/base/string_util_unittest.cc b/base/string_util_unittest.cc index 2723541..a70b03e 100644 --- a/base/string_util_unittest.cc +++ b/base/string_util_unittest.cc @@ -100,52 +100,6 @@ TEST(StringUtilTest, TrimWhitespace) { } } -static const struct trim_case_utf8 { - const char* input; - const TrimPositions positions; - const char* output; - const TrimPositions return_value; -} trim_cases_utf8[] = { - // UTF-8 strings that start (and end) with Unicode space characters - // (including zero-width spaces). - {"\xE2\x80\x80Test String\xE2\x80\x81", TRIM_ALL, "Test String", TRIM_ALL}, - {"\xE2\x80\x82Test String\xE2\x80\x83", TRIM_ALL, "Test String", TRIM_ALL}, - {"\xE2\x80\x84Test String\xE2\x80\x85", TRIM_ALL, "Test String", TRIM_ALL}, - {"\xE2\x80\x86Test String\xE2\x80\x87", TRIM_ALL, "Test String", TRIM_ALL}, - {"\xE2\x80\x88Test String\xE2\x80\x8A", TRIM_ALL, "Test String", TRIM_ALL}, - {"\xE3\x80\x80Test String\xE3\x80\x80", TRIM_ALL, "Test String", TRIM_ALL}, - // UTF-8 strings that end with 0x85 (NEL in ISO-8859). - {"\xD0\x85", TRIM_TRAILING, "\xD0\x85", TRIM_NONE}, - {"\xD9\x85", TRIM_TRAILING, "\xD9\x85", TRIM_NONE}, - {"\xEC\x97\x85", TRIM_TRAILING, "\xEC\x97\x85", TRIM_NONE}, - {"\xF0\x90\x80\x85", TRIM_TRAILING, "\xF0\x90\x80\x85", TRIM_NONE}, - // UTF-8 strings that end with 0xA0 (non-break space in ISO-8859-1). - {"\xD0\xA0", TRIM_TRAILING, "\xD0\xA0", TRIM_NONE}, - {"\xD9\xA0", TRIM_TRAILING, "\xD9\xA0", TRIM_NONE}, - {"\xEC\x97\xA0", TRIM_TRAILING, "\xEC\x97\xA0", TRIM_NONE}, - {"\xF0\x90\x80\xA0", TRIM_TRAILING, "\xF0\x90\x80\xA0", TRIM_NONE}, -}; - -TEST(StringUtilTest, TrimWhitespaceUTF8) { - std::string output_ascii; - for (size_t i = 0; i < arraysize(trim_cases_ascii); ++i) { - const trim_case_ascii& value = trim_cases_ascii[i]; - EXPECT_EQ(value.return_value, - TrimWhitespaceASCII(value.input, value.positions, &output_ascii)); - EXPECT_EQ(value.output, output_ascii); - } - - // Test that TrimWhiteSpaceUTF8() can remove Unicode space characters and - // prevent from removing UTF-8 characters that end with an ISO-8859 NEL. - std::string output_utf8; - for (size_t i = 0; i < arraysize(trim_cases_utf8); ++i) { - const trim_case_utf8& value = trim_cases_utf8[i]; - EXPECT_EQ(value.return_value, - TrimWhitespaceUTF8(value.input, value.positions, &output_utf8)); - EXPECT_EQ(value.output, output_utf8); - } -} - static const struct collapse_case { const wchar_t* input; const bool trim; |