summaryrefslogtreecommitdiffstats
path: root/base
diff options
context:
space:
mode:
authorbrettw@chromium.org <brettw@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2009-10-08 17:38:30 +0000
committerbrettw@chromium.org <brettw@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2009-10-08 17:38:30 +0000
commitd0767cb54b2b5ee4d9cf00b3ee0fa585826b4036 (patch)
tree1c2ee733bf62a44c31dc11f76dad53243a84439f /base
parente91d532339c854ff0a082c6562a519647524fa66 (diff)
downloadchromium_src-d0767cb54b2b5ee4d9cf00b3ee0fa585826b4036.zip
chromium_src-d0767cb54b2b5ee4d9cf00b3ee0fa585826b4036.tar.gz
chromium_src-d0767cb54b2b5ee4d9cf00b3ee0fa585826b4036.tar.bz2
Separate out some more ICU from base and into base/i18n.
This moves string_util_icu. I moved the number formatting function into base/i18n/number_formatting and just removed the other function in string_util_icu which was TrimWhitespaceUTF8. It is only used in a few places and isn't actually helpful (and the fact that it round-trips through UTF-16 is better for the caller to see). This takes out the sorting from the FileEnumerator. The comment says the sorting is not guaranteed. I moved it into file_util_icu as a standalone function for callers of FileEnumerator to call manually if they need sorted results. I modified the directory lister to use this sorting instead, and filed a bug on doing more optimal JS-based sorting. TEST=none BUG=none Review URL: http://codereview.chromium.org/267001 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@28405 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'base')
-rw-r--r--base/base.gyp7
-rw-r--r--base/file_util.h19
-rw-r--r--base/file_util_posix.cc70
-rw-r--r--base/file_util_unittest.cc89
-rw-r--r--base/i18n/file_util_icu.cc (renamed from base/file_util_icu.cc)80
-rw-r--r--base/i18n/file_util_icu.h33
-rw-r--r--base/i18n/file_util_icu_unittest.cc71
-rw-r--r--base/i18n/number_formatting.cc48
-rw-r--r--base/i18n/number_formatting.h19
-rw-r--r--base/string_util.h5
-rw-r--r--base/string_util_icu.cc80
-rw-r--r--base/string_util_unittest.cc46
12 files changed, 250 insertions, 317 deletions
diff --git a/base/base.gyp b/base/base.gyp
index 41108dd..570e9f0 100644
--- a/base/base.gyp
+++ b/base/base.gyp
@@ -121,7 +121,6 @@
'file_path.h',
'file_util.cc',
'file_util.h',
- 'file_util_icu.cc',
'file_util_linux.cc',
'file_util_mac.mm',
'file_util_posix.cc',
@@ -141,8 +140,12 @@
'hmac_mac.cc',
'hmac_nss.cc',
'hmac_win.cc',
+ 'i18n/file_util_icu.cc',
+ 'i18n/file_util_icu.h',
'i18n/icu_string_conversions.cc',
'i18n/icu_string_conversions.h',
+ 'i18n/number_formatting.cc',
+ 'i18n/number_formatting.h',
'iat_patch.cc',
'iat_patch.h',
'icu_util.cc',
@@ -284,7 +287,6 @@
'string_tokenizer.h',
'string_util.cc',
'string_util.h',
- 'string_util_icu.cc',
'string_util_win.h',
'sys_info.h',
'sys_info_chromeos.cc',
@@ -582,6 +584,7 @@
'histogram_unittest.cc',
'hmac_unittest.cc',
'id_map_unittest.cc',
+ 'i18n/file_util_icu_unittest.cc',
'json_reader_unittest.cc',
'json_writer_unittest.cc',
'lazy_instance_unittest.cc',
diff --git a/base/file_util.h b/base/file_util.h
index 6474f048..37634b9 100644
--- a/base/file_util.h
+++ b/base/file_util.h
@@ -103,19 +103,6 @@ void InsertBeforeExtension(FilePath* path, const FilePath::StringType& suffix);
void ReplaceExtension(FilePath* file_name,
const FilePath::StringType& extension);
-// Replaces characters in 'file_name' that are illegal for file names with
-// 'replace_char'. 'file_name' must not be a full or relative path, but just the
-// file name component. Any leading or trailing whitespace in 'file_name' is
-// removed.
-// Example:
-// file_name == "bad:file*name?.txt", changed to: "bad-file-name-.txt" when
-// 'replace_char' is '-'.
-void ReplaceIllegalCharacters(std::wstring* file_name, int replace_char);
-
-// Returns true if file_name does not have any illegal character. The input
-// param has the same restriction as that for ReplaceIllegalCharacters.
-bool IsFilenameLegal(const string16& file_name);
-
//-----------------------------------------------------------------------------
// Functions that involve filesystem access or modification:
@@ -490,10 +477,6 @@ class FileEnumerator {
static bool ReadDirectory(std::vector<DirectoryEntryInfo>* entries,
const FilePath& source, bool show_links);
- // Comparison function to neatly sort directory entries
- static bool CompareFiles(const DirectoryEntryInfo& a,
- const DirectoryEntryInfo& b);
-
// The files in the current directory
std::vector<DirectoryEntryInfo> directory_entries_;
@@ -501,7 +484,7 @@ class FileEnumerator {
size_t current_directory_entry_;
#endif
- DISALLOW_EVIL_CONSTRUCTORS(FileEnumerator);
+ DISALLOW_COPY_AND_ASSIGN(FileEnumerator);
};
class MemoryMappedFile {
diff --git a/base/file_util_posix.cc b/base/file_util_posix.cc
index 27adbfa..4621bb3 100644
--- a/base/file_util_posix.cc
+++ b/base/file_util_posix.cc
@@ -35,56 +35,6 @@
#include "base/sys_string_conversions.h"
#include "base/time.h"
#include "base/utf_string_conversions.h"
-#include "unicode/coll.h"
-
-
-namespace {
-
-class LocaleAwareComparator {
- public:
- LocaleAwareComparator() {
- UErrorCode error_code = U_ZERO_ERROR;
- // Use the default collator. The default locale should have been properly
- // set by the time this constructor is called.
- collator_.reset(icu::Collator::createInstance(error_code));
- DCHECK(U_SUCCESS(error_code));
- // Make it case-sensitive.
- collator_->setStrength(icu::Collator::TERTIARY);
- // Note: We do not set UCOL_NORMALIZATION_MODE attribute. In other words, we
- // do not pay performance penalty to guarantee sort order correctness for
- // non-FCD (http://unicode.org/notes/tn5/#FCD) file names. This should be a
- // reasonable tradeoff because such file names should be rare and the sort
- // order doesn't change much anyway.
- }
-
- // Note: A similar function is available in l10n_util.
- // We cannot use it because base should not depend on l10n_util.
- // TODO(yuzo): Move some of l10n_util to base.
- int Compare(const string16& a, const string16& b) {
- // We are not sure if Collator::compare is thread-safe.
- // Use an AutoLock just in case.
- AutoLock auto_lock(lock_);
-
- UErrorCode error_code = U_ZERO_ERROR;
- UCollationResult result = collator_->compare(
- static_cast<const UChar*>(a.c_str()),
- static_cast<int>(a.length()),
- static_cast<const UChar*>(b.c_str()),
- static_cast<int>(b.length()),
- error_code);
- DCHECK(U_SUCCESS(error_code));
- return result;
- }
-
- private:
- scoped_ptr<icu::Collator> collator_;
- Lock lock_;
- friend struct DefaultSingletonTraits<LocaleAwareComparator>;
-
- DISALLOW_COPY_AND_ASSIGN(LocaleAwareComparator);
-};
-
-} // namespace
namespace file_util {
@@ -623,9 +573,6 @@ FilePath FileEnumerator::Next() {
if (!ReadDirectory(&entries, root_path_, file_type_ & SHOW_SYM_LINKS))
continue;
- // The API says that order is not guaranteed, but order affects UX
- std::sort(entries.begin(), entries.end(), CompareFiles);
-
directory_entries_.clear();
current_directory_entry_ = 0;
for (std::vector<DirectoryEntryInfo>::const_iterator
@@ -691,23 +638,6 @@ bool FileEnumerator::ReadDirectory(std::vector<DirectoryEntryInfo>* entries,
return true;
}
-bool FileEnumerator::CompareFiles(const DirectoryEntryInfo& a,
- const DirectoryEntryInfo& b) {
- // Order lexicographically with directories before other files.
- if (S_ISDIR(a.stat.st_mode) != S_ISDIR(b.stat.st_mode))
- return S_ISDIR(a.stat.st_mode);
-
- // On linux, the file system encoding is not defined. We assume
- // SysNativeMBToWide takes care of it.
- //
- // ICU's collator can take strings in OS native encoding. But we convert the
- // strings to UTF-16 ourselves to ensure conversion consistency.
- // TODO(yuzo): Perhaps we should define SysNativeMBToUTF16?
- return Singleton<LocaleAwareComparator>()->Compare(
- WideToUTF16(base::SysNativeMBToWide(a.filename.value().c_str())),
- WideToUTF16(base::SysNativeMBToWide(b.filename.value().c_str()))) < 0;
-}
-
///////////////////////////////////////////////
// MemoryMappedFile
diff --git a/base/file_util_unittest.cc b/base/file_util_unittest.cc
index 5b606c9..b1f9fed 100644
--- a/base/file_util_unittest.cc
+++ b/base/file_util_unittest.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
+// Copyright (c) 2009 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
@@ -876,41 +876,6 @@ TEST_F(FileUtilTest, DetectDirectoryTest) {
EXPECT_TRUE(file_util::Delete(test_root, true));
}
-static const struct goodbad_pair {
- std::wstring bad_name;
- std::wstring good_name;
-} kIllegalCharacterCases[] = {
- {L"bad*file:name?.jpg", L"bad-file-name-.jpg"},
- {L"**********::::.txt", L"--------------.txt"},
- // We can't use UCNs (universal character names) for C0/C1 characters and
- // U+007F, but \x escape is interpreted by MSVC and gcc as we intend.
- {L"bad\x0003\x0091 file\u200E\u200Fname.png", L"bad-- file--name.png"},
-#if defined(OS_WIN)
- {L"bad*file\\name.jpg", L"bad-file-name.jpg"},
- {L"\t bad*file\\name/.jpg ", L"bad-file-name-.jpg"},
-#elif defined(OS_POSIX)
- {L"bad*file?name.jpg", L"bad-file-name.jpg"},
- {L"\t bad*file?name/.jpg ", L"bad-file-name-.jpg"},
-#endif
- {L"this_file_name is okay!.mp3", L"this_file_name is okay!.mp3"},
- {L"\u4E00\uAC00.mp3", L"\u4E00\uAC00.mp3"},
- {L"\u0635\u200C\u0644.mp3", L"\u0635\u200C\u0644.mp3"},
- {L"\U00010330\U00010331.mp3", L"\U00010330\U00010331.mp3"},
- // Unassigned codepoints are ok.
- {L"\u0378\U00040001.mp3", L"\u0378\U00040001.mp3"},
- // Non-characters are not allowed.
- {L"bad\uFFFFfile\U0010FFFEname.jpg ", L"bad-file-name.jpg"},
- {L"bad\uFDD0file\uFDEFname.jpg ", L"bad-file-name.jpg"},
-};
-
-TEST_F(FileUtilTest, ReplaceIllegalCharactersTest) {
- for (unsigned int i = 0; i < arraysize(kIllegalCharacterCases); ++i) {
- std::wstring bad_name(kIllegalCharacterCases[i].bad_name);
- file_util::ReplaceIllegalCharacters(&bad_name, L'-');
- EXPECT_EQ(kIllegalCharacterCases[i].good_name, bad_name);
- }
-}
-
static const struct ReplaceExtensionCase {
std::wstring file_name;
FilePath::StringType extension;
@@ -1069,58 +1034,6 @@ TEST_F(FileUtilTest, FileEnumeratorTest) {
// (we don't care what).
}
-TEST_F(FileUtilTest, FileEnumeratorOrderTest) {
- FilePath fileA = test_dir_.Append(FILE_PATH_LITERAL("a"));
- FilePath fileB = test_dir_.Append(FILE_PATH_LITERAL("B"));
- FilePath dirC = test_dir_.Append(FILE_PATH_LITERAL("C"));
- FilePath dirD = test_dir_.Append(FILE_PATH_LITERAL("d"));
- FilePath dirE = test_dir_.Append(FILE_PATH_LITERAL("e"));
- FilePath fileF = test_dir_.Append(FILE_PATH_LITERAL("f"));
-
- // Create files/directories in near random order.
- CreateTextFile(fileF, L"");
- CreateTextFile(fileA, L"");
- CreateTextFile(fileB, L"");
- EXPECT_TRUE(file_util::CreateDirectory(dirE));
- EXPECT_TRUE(file_util::CreateDirectory(dirC));
- EXPECT_TRUE(file_util::CreateDirectory(dirD));
-
- // On Windows, files and directories are enumerated in the lexicographical
- // order, ignoring case and whether they are files or directories. On posix,
- // we order directories before files.
- file_util::FileEnumerator enumerator(test_dir_, false, FILES_AND_DIRECTORIES);
- FilePath cur_file = enumerator.Next();
-#if defined(OS_WIN)
- EXPECT_EQ(fileA.value(), cur_file.value());
- cur_file = enumerator.Next();
- EXPECT_EQ(fileB.value(), cur_file.value());
- cur_file = enumerator.Next();
- EXPECT_EQ(dirC.value(), cur_file.value());
- cur_file = enumerator.Next();
- EXPECT_EQ(dirD.value(), cur_file.value());
- cur_file = enumerator.Next();
- EXPECT_EQ(dirE.value(), cur_file.value());
- cur_file = enumerator.Next();
- EXPECT_EQ(fileF.value(), cur_file.value());
- cur_file = enumerator.Next();
-#elif defined(OS_POSIX)
- EXPECT_EQ(dirC.value(), cur_file.value());
- cur_file = enumerator.Next();
- EXPECT_EQ(dirD.value(), cur_file.value());
- cur_file = enumerator.Next();
- EXPECT_EQ(dirE.value(), cur_file.value());
- cur_file = enumerator.Next();
- EXPECT_EQ(fileA.value(), cur_file.value());
- cur_file = enumerator.Next();
- EXPECT_EQ(fileB.value(), cur_file.value());
- cur_file = enumerator.Next();
- EXPECT_EQ(fileF.value(), cur_file.value());
- cur_file = enumerator.Next();
-#endif
-
- EXPECT_EQ(FILE_PATH_LITERAL(""), cur_file.value());
-}
-
TEST_F(FileUtilTest, Contains) {
FilePath data_dir = test_dir_.Append(FILE_PATH_LITERAL("FilePathTest"));
diff --git a/base/file_util_icu.cc b/base/i18n/file_util_icu.cc
index eeffa92..0bc9db6 100644
--- a/base/file_util_icu.cc
+++ b/base/i18n/file_util_icu.cc
@@ -2,21 +2,21 @@
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
-// File utilities that use the ICU library go in this file. Functions using ICU
-// are separated from the other functions to prevent ICU being pulled in by the
-// linker if there is a false dependency.
-//
-// (The VS2005 linker finds such a false dependency and adds ~300K of ICU to
-// chrome.exe if this code lives in file_util.cc, even though none of this code
-// is called.)
+// File utilities that use the ICU library go in this file.
-#include "base/file_util.h"
+#include "base/i18n/file_util_icu.h"
+#include "base/file_path.h"
+#include "base/scoped_ptr.h"
#include "base/singleton.h"
#include "base/string_util.h"
+#include "base/sys_string_conversions.h"
+#include "build/build_config.h"
+#include "unicode/coll.h"
#include "unicode/uniset.h"
namespace {
+
class IllegalCharacters {
public:
bool contains(UChar32 ucs4) {
@@ -72,6 +72,50 @@ IllegalCharacters::IllegalCharacters() {
set->freeze();
}
+class LocaleAwareComparator {
+ public:
+ LocaleAwareComparator() {
+ UErrorCode error_code = U_ZERO_ERROR;
+ // Use the default collator. The default locale should have been properly
+ // set by the time this constructor is called.
+ collator_.reset(icu::Collator::createInstance(error_code));
+ DCHECK(U_SUCCESS(error_code));
+ // Make it case-sensitive.
+ collator_->setStrength(icu::Collator::TERTIARY);
+ // Note: We do not set UCOL_NORMALIZATION_MODE attribute. In other words, we
+ // do not pay performance penalty to guarantee sort order correctness for
+ // non-FCD (http://unicode.org/notes/tn5/#FCD) file names. This should be a
+ // reasonable tradeoff because such file names should be rare and the sort
+ // order doesn't change much anyway.
+ }
+
+ // Note: A similar function is available in l10n_util.
+ // We cannot use it because base should not depend on l10n_util.
+ // TODO(yuzo): Move some of l10n_util to base.
+ int Compare(const string16& a, const string16& b) {
+ // We are not sure if Collator::compare is thread-safe.
+ // Use an AutoLock just in case.
+ AutoLock auto_lock(lock_);
+
+ UErrorCode error_code = U_ZERO_ERROR;
+ UCollationResult result = collator_->compare(
+ static_cast<const UChar*>(a.c_str()),
+ static_cast<int>(a.length()),
+ static_cast<const UChar*>(b.c_str()),
+ static_cast<int>(b.length()),
+ error_code);
+ DCHECK(U_SUCCESS(error_code));
+ return result;
+ }
+
+ private:
+ scoped_ptr<icu::Collator> collator_;
+ Lock lock_;
+ friend struct DefaultSingletonTraits<LocaleAwareComparator>;
+
+ DISALLOW_COPY_AND_ASSIGN(LocaleAwareComparator);
+};
+
} // namespace
namespace file_util {
@@ -126,4 +170,24 @@ void ReplaceIllegalCharacters(std::wstring* file_name, int replace_char) {
#endif
}
+bool LocaleAwareCompareFilenames(const FilePath& a, const FilePath& b) {
+#if defined(OS_WIN)
+ return Singleton<LocaleAwareComparator>()->Compare(a.value().c_str(),
+ b.value().c_str()) < 0;
+
+#elif defined(OS_POSIX)
+ // On linux, the file system encoding is not defined. We assume
+ // SysNativeMBToWide takes care of it.
+ //
+ // ICU's collator can take strings in OS native encoding. But we convert the
+ // strings to UTF-16 ourselves to ensure conversion consistency.
+ // TODO(yuzo): Perhaps we should define SysNativeMBToUTF16?
+ return Singleton<LocaleAwareComparator>()->Compare(
+ WideToUTF16(base::SysNativeMBToWide(a.value().c_str())),
+ WideToUTF16(base::SysNativeMBToWide(b.value().c_str()))) < 0;
+#else
+ #error Not implemented on your system
+#endif
+}
+
} // namespace
diff --git a/base/i18n/file_util_icu.h b/base/i18n/file_util_icu.h
new file mode 100644
index 0000000..c309a9e
--- /dev/null
+++ b/base/i18n/file_util_icu.h
@@ -0,0 +1,33 @@
+// Copyright (c) 2009 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// File utilities that use the ICU library go in this file.
+
+#include <string>
+
+#include "base/string16.h"
+
+class FilePath;
+
+namespace file_util {
+
+// Returns true if file_name does not have any illegal character. The input
+// param has the same restriction as that for ReplaceIllegalCharacters.
+bool IsFilenameLegal(const string16& file_name);
+
+// Replaces characters in 'file_name' that are illegal for file names with
+// 'replace_char'. 'file_name' must not be a full or relative path, but just the
+// file name component. Any leading or trailing whitespace in 'file_name' is
+// removed.
+// Example:
+// file_name == "bad:file*name?.txt", changed to: "bad-file-name-.txt" when
+// 'replace_char' is '-'.
+void ReplaceIllegalCharacters(std::wstring* file_name, int replace_char);
+
+// Compares two filenames using the current locale information. This can be
+// used to sort directory listings. It behaves like "operator<" for use in
+// std::sort.
+bool LocaleAwareCompareFilenames(const FilePath& a, const FilePath& b);
+
+} // namespace file_util
diff --git a/base/i18n/file_util_icu_unittest.cc b/base/i18n/file_util_icu_unittest.cc
new file mode 100644
index 0000000..aebcd0df
--- /dev/null
+++ b/base/i18n/file_util_icu_unittest.cc
@@ -0,0 +1,71 @@
+// Copyright (c) 2009 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "base/i18n/file_util_icu.h"
+
+#include "base/file_util.h"
+#include "base/path_service.h"
+#include "testing/gtest/include/gtest/gtest.h"
+#include "testing/platform_test.h"
+
+// file_util winds up using autoreleased objects on the Mac, so this needs
+// to be a PlatformTest
+class FileUtilICUTest : public PlatformTest {
+ protected:
+ virtual void SetUp() {
+ PlatformTest::SetUp();
+ // Name a subdirectory of the temp directory.
+ ASSERT_TRUE(PathService::Get(base::DIR_TEMP, &test_dir_));
+ test_dir_ = test_dir_.Append(FILE_PATH_LITERAL("FileUtilTest"));
+
+ // Create a fresh, empty copy of this directory.
+ file_util::Delete(test_dir_, true);
+ file_util::CreateDirectory(test_dir_);
+ }
+ virtual void TearDown() {
+ PlatformTest::TearDown();
+ // Clean up test directory
+ ASSERT_TRUE(file_util::Delete(test_dir_, true));
+ ASSERT_FALSE(file_util::PathExists(test_dir_));
+ }
+
+ // the path to temporary directory used to contain the test operations
+ FilePath test_dir_;
+};
+
+static const struct goodbad_pair {
+ std::wstring bad_name;
+ std::wstring good_name;
+} kIllegalCharacterCases[] = {
+ {L"bad*file:name?.jpg", L"bad-file-name-.jpg"},
+ {L"**********::::.txt", L"--------------.txt"},
+ // We can't use UCNs (universal character names) for C0/C1 characters and
+ // U+007F, but \x escape is interpreted by MSVC and gcc as we intend.
+ {L"bad\x0003\x0091 file\u200E\u200Fname.png", L"bad-- file--name.png"},
+#if defined(OS_WIN)
+ {L"bad*file\\name.jpg", L"bad-file-name.jpg"},
+ {L"\t bad*file\\name/.jpg ", L"bad-file-name-.jpg"},
+#elif defined(OS_POSIX)
+ {L"bad*file?name.jpg", L"bad-file-name.jpg"},
+ {L"\t bad*file?name/.jpg ", L"bad-file-name-.jpg"},
+#endif
+ {L"this_file_name is okay!.mp3", L"this_file_name is okay!.mp3"},
+ {L"\u4E00\uAC00.mp3", L"\u4E00\uAC00.mp3"},
+ {L"\u0635\u200C\u0644.mp3", L"\u0635\u200C\u0644.mp3"},
+ {L"\U00010330\U00010331.mp3", L"\U00010330\U00010331.mp3"},
+ // Unassigned codepoints are ok.
+ {L"\u0378\U00040001.mp3", L"\u0378\U00040001.mp3"},
+ // Non-characters are not allowed.
+ {L"bad\uFFFFfile\U0010FFFEname.jpg ", L"bad-file-name.jpg"},
+ {L"bad\uFDD0file\uFDEFname.jpg ", L"bad-file-name.jpg"},
+};
+
+TEST_F(FileUtilICUTest, ReplaceIllegalCharactersTest) {
+ for (unsigned int i = 0; i < arraysize(kIllegalCharacterCases); ++i) {
+ std::wstring bad_name(kIllegalCharacterCases[i].bad_name);
+ file_util::ReplaceIllegalCharacters(&bad_name, L'-');
+ EXPECT_EQ(kIllegalCharacterCases[i].good_name, bad_name);
+ }
+}
+
diff --git a/base/i18n/number_formatting.cc b/base/i18n/number_formatting.cc
new file mode 100644
index 0000000..fef1b7d
--- /dev/null
+++ b/base/i18n/number_formatting.cc
@@ -0,0 +1,48 @@
+// Copyright (c) 2008 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "base/i18n/number_formatting.h"
+
+#include "base/logging.h"
+#include "base/singleton.h"
+#include "base/string_util.h"
+#include "base/utf_string_conversions.h"
+#include "unicode/numfmt.h"
+#include "unicode/ustring.h"
+
+namespace base {
+
+namespace {
+
+struct NumberFormatSingletonTraits
+ : public DefaultSingletonTraits<icu::NumberFormat> {
+ static icu::NumberFormat* New() {
+ UErrorCode status = U_ZERO_ERROR;
+ icu::NumberFormat* formatter = icu::NumberFormat::createInstance(status);
+ DCHECK(U_SUCCESS(status));
+ return formatter;
+ }
+ // There's no ICU call to destroy a NumberFormat object other than
+ // operator delete, so use the default Delete, which calls operator delete.
+ // This can cause problems if a different allocator is used by this file than
+ // by ICU.
+};
+
+} // namespace
+
+string16 FormatNumber(int64 number) {
+ icu::NumberFormat* number_format =
+ Singleton<icu::NumberFormat, NumberFormatSingletonTraits>::get();
+
+ if (!number_format) {
+ // As a fallback, just return the raw number in a string.
+ return UTF8ToUTF16(StringPrintf("%lld", number));
+ }
+ icu::UnicodeString ustr;
+ number_format->format(number, ustr);
+
+ return string16(ustr.getBuffer(), static_cast<size_t>(ustr.length()));
+}
+
+} // namespace base
diff --git a/base/i18n/number_formatting.h b/base/i18n/number_formatting.h
new file mode 100644
index 0000000..9fa2b18
--- /dev/null
+++ b/base/i18n/number_formatting.h
@@ -0,0 +1,19 @@
+// Copyright (c) 2008 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef BASE_I18N_NUMBER_FORMATTING_H_
+#define BASE_I18N_NUMBER_FORMATTING_H_
+
+#include <string>
+
+#include "base/basictypes.h"
+#include "base/string16.h"
+
+namespace base {
+
+string16 FormatNumber(int64 number);
+
+} // namespace base
+
+#endif // BASE_I18N_NUMBER_FORMATTING_H_
diff --git a/base/string_util.h b/base/string_util.h
index 254e18f..c6b9fb1 100644
--- a/base/string_util.h
+++ b/base/string_util.h
@@ -140,8 +140,6 @@ bool TrimString(const std::string& input,
// The non-wide version has two functions:
// * TrimWhitespaceASCII()
// This function is for ASCII strings and only looks for ASCII whitespace;
-// * TrimWhitespaceUTF8()
-// This function is for UTF-8 strings and looks for Unicode whitespace.
// Please choose the best one according to your usage.
// NOTE: Safe to use the same variable for both input and output.
enum TrimPositions {
@@ -156,9 +154,6 @@ TrimPositions TrimWhitespace(const std::wstring& input,
TrimPositions TrimWhitespaceASCII(const std::string& input,
TrimPositions positions,
std::string* output);
-TrimPositions TrimWhitespaceUTF8(const std::string& input,
- TrimPositions positions,
- std::string* output);
// Deprecated. This function is only for backward compatibility and calls
// TrimWhitespaceASCII().
diff --git a/base/string_util_icu.cc b/base/string_util_icu.cc
deleted file mode 100644
index 68fbd10..0000000
--- a/base/string_util_icu.cc
+++ /dev/null
@@ -1,80 +0,0 @@
-// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#include "base/string_util.h"
-
-#include <string.h>
-#include <vector>
-
-#include "base/basictypes.h"
-#include "base/logging.h"
-#include "base/singleton.h"
-#include "unicode/numfmt.h"
-#include "unicode/ustring.h"
-
-// Number formatting -----------------------------------------------------------
-
-namespace {
-
-struct NumberFormatSingletonTraits
- : public DefaultSingletonTraits<icu::NumberFormat> {
- static icu::NumberFormat* New() {
- UErrorCode status = U_ZERO_ERROR;
- icu::NumberFormat* formatter = icu::NumberFormat::createInstance(status);
- DCHECK(U_SUCCESS(status));
- return formatter;
- }
- // There's no ICU call to destroy a NumberFormat object other than
- // operator delete, so use the default Delete, which calls operator delete.
- // This can cause problems if a different allocator is used by this file than
- // by ICU.
-};
-
-} // namespace
-
-std::wstring FormatNumber(int64 number) {
- icu::NumberFormat* number_format =
- Singleton<icu::NumberFormat, NumberFormatSingletonTraits>::get();
-
- if (!number_format) {
- // As a fallback, just return the raw number in a string.
- return StringPrintf(L"%lld", number);
- }
- icu::UnicodeString ustr;
- number_format->format(number, ustr);
-
-#if defined(WCHAR_T_IS_UTF16)
- return std::wstring(ustr.getBuffer(),
- static_cast<std::wstring::size_type>(ustr.length()));
-#elif defined(WCHAR_T_IS_UTF32)
- wchar_t buffer[64]; // A int64 is less than 20 chars long, so 64 chars
- // leaves plenty of room for formating stuff.
- int length = 0;
- UErrorCode error = U_ZERO_ERROR;
- u_strToWCS(buffer, 64, &length, ustr.getBuffer(), ustr.length() , &error);
- if (U_FAILURE(error)) {
- NOTREACHED();
- // As a fallback, just return the raw number in a string.
- return StringPrintf(L"%lld", number);
- }
- return std::wstring(buffer, static_cast<std::wstring::size_type>(length));
-#endif // defined(WCHAR_T_IS_UTF32)
-}
-
-// Although this function isn't specific to ICU, we implemented it here so
-// that chrome.exe won't pull it in. Moving this function to string_util.cc
-// causes chrome.exe to grow by 400k because of more ICU being pulled in.
-TrimPositions TrimWhitespaceUTF8(const std::string& input,
- TrimPositions positions,
- std::string* output) {
- // This implementation is not so fast since it converts the text encoding
- // twice. Please feel free to file a bug if this function hurts the
- // performance of Chrome.
- DCHECK(IsStringUTF8(input));
- std::wstring input_wide = UTF8ToWide(input);
- std::wstring output_wide;
- TrimPositions result = TrimWhitespace(input_wide, positions, &output_wide);
- *output = WideToUTF8(output_wide);
- return result;
-}
diff --git a/base/string_util_unittest.cc b/base/string_util_unittest.cc
index 2723541..a70b03e 100644
--- a/base/string_util_unittest.cc
+++ b/base/string_util_unittest.cc
@@ -100,52 +100,6 @@ TEST(StringUtilTest, TrimWhitespace) {
}
}
-static const struct trim_case_utf8 {
- const char* input;
- const TrimPositions positions;
- const char* output;
- const TrimPositions return_value;
-} trim_cases_utf8[] = {
- // UTF-8 strings that start (and end) with Unicode space characters
- // (including zero-width spaces).
- {"\xE2\x80\x80Test String\xE2\x80\x81", TRIM_ALL, "Test String", TRIM_ALL},
- {"\xE2\x80\x82Test String\xE2\x80\x83", TRIM_ALL, "Test String", TRIM_ALL},
- {"\xE2\x80\x84Test String\xE2\x80\x85", TRIM_ALL, "Test String", TRIM_ALL},
- {"\xE2\x80\x86Test String\xE2\x80\x87", TRIM_ALL, "Test String", TRIM_ALL},
- {"\xE2\x80\x88Test String\xE2\x80\x8A", TRIM_ALL, "Test String", TRIM_ALL},
- {"\xE3\x80\x80Test String\xE3\x80\x80", TRIM_ALL, "Test String", TRIM_ALL},
- // UTF-8 strings that end with 0x85 (NEL in ISO-8859).
- {"\xD0\x85", TRIM_TRAILING, "\xD0\x85", TRIM_NONE},
- {"\xD9\x85", TRIM_TRAILING, "\xD9\x85", TRIM_NONE},
- {"\xEC\x97\x85", TRIM_TRAILING, "\xEC\x97\x85", TRIM_NONE},
- {"\xF0\x90\x80\x85", TRIM_TRAILING, "\xF0\x90\x80\x85", TRIM_NONE},
- // UTF-8 strings that end with 0xA0 (non-break space in ISO-8859-1).
- {"\xD0\xA0", TRIM_TRAILING, "\xD0\xA0", TRIM_NONE},
- {"\xD9\xA0", TRIM_TRAILING, "\xD9\xA0", TRIM_NONE},
- {"\xEC\x97\xA0", TRIM_TRAILING, "\xEC\x97\xA0", TRIM_NONE},
- {"\xF0\x90\x80\xA0", TRIM_TRAILING, "\xF0\x90\x80\xA0", TRIM_NONE},
-};
-
-TEST(StringUtilTest, TrimWhitespaceUTF8) {
- std::string output_ascii;
- for (size_t i = 0; i < arraysize(trim_cases_ascii); ++i) {
- const trim_case_ascii& value = trim_cases_ascii[i];
- EXPECT_EQ(value.return_value,
- TrimWhitespaceASCII(value.input, value.positions, &output_ascii));
- EXPECT_EQ(value.output, output_ascii);
- }
-
- // Test that TrimWhiteSpaceUTF8() can remove Unicode space characters and
- // prevent from removing UTF-8 characters that end with an ISO-8859 NEL.
- std::string output_utf8;
- for (size_t i = 0; i < arraysize(trim_cases_utf8); ++i) {
- const trim_case_utf8& value = trim_cases_utf8[i];
- EXPECT_EQ(value.return_value,
- TrimWhitespaceUTF8(value.input, value.positions, &output_utf8));
- EXPECT_EQ(value.output, output_utf8);
- }
-}
-
static const struct collapse_case {
const wchar_t* input;
const bool trim;